diff --git a/kernel/Makefile b/kernel/Makefile index f9ecb06..39380c9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -20,13 +20,6 @@ CFLAGS += -I. \ -DLFS_NO_DEBUG \ -DLFS_NO_WARN \ -DLFS_NO_ERROR \ - -DPICO_MOP2 \ - -DPICO_SUPPORT_TCP \ - -DPICO_SUPPORT_UDP \ - -DPICO_SUPPORT_ETH \ - -DPICO_SUPPORT_ICMP4 \ - -DPICO_SUPPORT_IPV4 \ - -DPICO_SUPPORT_MUTEX \ include arch/$(ARCH)/$(ARCH).mk include extconf/extra.mk @@ -61,15 +54,10 @@ SRCFILES += $(call GRABSRC, \ path \ rbuf \ ipc/pipe \ - ipc/netsock \ dev \ randcrypto \ time \ diskpart \ - netdev \ - port_picotcp \ - port_picotcp/modules \ - picotcp/stack \ ) CFILES := $(call GET_CFILES, $(SRCFILES)) diff --git a/kernel/ipc/netsock/netsock.c b/kernel/ipc/netsock/netsock.c deleted file mode 100644 index 684da47..0000000 --- a/kernel/ipc/netsock/netsock.c +++ /dev/null @@ -1,178 +0,0 @@ -#include -#include -#include "hal/hal.h" -#include "spinlock/spinlock.h" -#include "netsock.h" -#include "errors.h" -#include "pico_socket.h" -#include "dlmalloc/malloc.h" -#include "util/util.h" -#include "sysdefs/ipcnetsock.h" -#include "kprintf.h" -#include "proc/proc.h" - -IpcNetSocks IPC_NETSOCKS; - -void ipc_netsockinit(void) { - hal_memset(&IPC_NETSOCKS, 0, sizeof(IPC_NETSOCKS)); - spinlock_init(&IPC_NETSOCKS.spinlock); -} - -void ipc_netsock_event(uint16_t ev, struct pico_socket *sock1) { - IpcNetSock *netsock, *netsocktmp; - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - LL_FOREACH_SAFE(IPC_NETSOCKS.netsocks, netsock, netsocktmp) { - if (netsock->picosock == sock1) - break; - } - spinlock_release(&IPC_NETSOCKS.spinlock); - - if (netsock == NULL) { - return; - } - - spinlock_acquire(&netsock->spinlock); - - /* if (ev & PICO_SOCK_EV_RD) { */ - /* uint8_t *buf = dlmalloc(IPC_PIPE_MAX); */ - /* int32_t len = 0; */ - /* int32_t read; */ - /* do { */ - /* read = pico_socket_read(netsock->picosock, buf + len, IPC_PIPE_MAX - len); */ - /* if (read > 0) { */ - /* len += read; */ - /* } */ - /* } while(read > 0); */ - /* ipc_pipewrite(netsock->datapipe, buf, len); */ - /* dlfree(buf); */ - /* } */ - - /* if (ev & PICO_SOCK_EV_WR) { */ - /* uint8_t *buf = dlmalloc(IPC_PIPE_MAX); */ - /* int32_t read = ipc_piperead(netsock->datapipe, buf, IPC_PIPE_MAX); */ - /* if (read > 0) { */ - /* pico_socket_write(netsock->picosock, buf, read); */ - /* } */ - /* dlfree(buf); */ - /* } */ - - /* if (ev & PICO_SOCK_EV_FIN) { */ - /* // normal close */ - /* } */ - - - - /* if (ev & PICO_SOCK_EV_CLOSE) { */ - /* pico_socket_shutdown(netsock->picosock, PICO_SHUT_WR); */ - /* } */ - - rbuft_push(&netsock->eventbuffer, &ev); - - spinlock_release(&netsock->spinlock); -} - -IpcNetSock *ipc_netsockmake(uint16_t net, uint16_t proto, uint64_t pid) { - IpcNetSock *netsock = dlmalloc(sizeof(*netsock)); - if (netsock == NULL) { - return NULL; - } - - netsock->picosock = pico_socket_open(net, proto, &ipc_netsock_event); - if (netsock->picosock == NULL) { - goto err_sock_open; - } - - spinlock_init(&netsock->spinlock); - netsock->datapipe = dlmalloc(sizeof(*netsock->datapipe)); - ipc_pipeinit(netsock->datapipe, (uint64_t)-1); - - uint8_t *eventbuffer = dlmalloc(sizeof(IpcNetSockEventBuffer) * IPC_NETSOCK_EVENTBUFFER_MAX); - rbuft_init(&netsock->eventbuffer, eventbuffer, sizeof(IpcNetSockEventBuffer), IPC_NETSOCK_EVENTBUFFER_MAX); - - netsock->ownerpid = pid; - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - LL_APPEND(IPC_NETSOCKS.netsocks, netsock); - spinlock_release(&IPC_NETSOCKS.spinlock); - - return netsock; - -err_sock_open: - dlfree(netsock); - return NULL; -} - -int32_t ipc_netsocklisten(IpcNetSock *netsock, size_t maxlisteners) { - spinlock_acquire(&netsock->spinlock); - int32_t r = pico_socket_listen(netsock->picosock, (int)maxlisteners); - spinlock_release(&netsock->spinlock); - return r == 0 ? E_OK : E_NETSOCKLISTEN; -} - -int32_t ipc_netsockdelete_nolock(IpcNetSock *netsock) { - LL_REMOVE(IPC_NETSOCKS.netsocks, netsock); - - spinlock_acquire(&netsock->spinlock); - pico_socket_del(netsock->picosock); - ipc_pipefree(netsock->datapipe); - dlfree(netsock->datapipe); - dlfree(netsock->eventbuffer.buffer); - spinlock_release(&netsock->spinlock); - - dlfree(netsock); - return E_OK; -} - -int32_t ipc_netsockdelete(IpcNetSock *netsock) { - spinlock_acquire(&IPC_NETSOCKS.spinlock); - LL_REMOVE(IPC_NETSOCKS.netsocks, netsock); - spinlock_release(&IPC_NETSOCKS.spinlock); - - spinlock_acquire(&netsock->spinlock); - pico_socket_del(netsock->picosock); - ipc_pipefree(netsock->datapipe); - dlfree(netsock->datapipe); - dlfree(netsock->eventbuffer.buffer); - spinlock_release(&netsock->spinlock); - - dlfree(netsock); - return E_OK; -} - -int32_t ipc_netsockbindport(IpcNetSock *netsock, uint16_t port) { - uint16_t port_be = short_be(port); - struct pico_ip4 inaddr_any = {0}; - spinlock_acquire(&netsock->spinlock); - int32_t r = pico_socket_bind(netsock->picosock, &inaddr_any, &port_be); - spinlock_release(&netsock->spinlock); - if (r != 0) { - return E_NETSOCKBIND; - } - return E_OK; -} - -void ipc_netsock_cleanup_dangling(void) { - IpcNetSock *ns, *nstmp; - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - spinlock_acquire(&PROCS.spinlock); - - LL_FOREACH_SAFE(IPC_NETSOCKS.netsocks, ns, nstmp) { - bool foundowner = false; - Proc *proc, *proctmp; - LL_FOREACH_SAFE(PROCS.procs, proc, proctmp) { - if (ns->ownerpid == proc->pid) { - foundowner = true; - break; - } - } - - if (!foundowner) { - ipc_netsockdelete_nolock(ns); - } - } - - spinlock_release(&PROCS.spinlock); - spinlock_release(&IPC_NETSOCKS.spinlock); -} diff --git a/kernel/ipc/netsock/netsock.h b/kernel/ipc/netsock/netsock.h deleted file mode 100644 index 668758b..0000000 --- a/kernel/ipc/netsock/netsock.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef NETSOCK_NETSOCK_H_ -#define NETSOCK_NETSOCK_H_ - -#include -#include "spinlock/spinlock.h" -#include "pico_socket.h" -#include "ipc/pipe/pipe.h" -#include "rbuf/rbuf.h" - -typedef uint16_t IpcNetSockEventBuffer; -#define IPC_NETSOCK_EVENTBUFFER_MAX 512 - -typedef struct IpcNetSock { - struct IpcNetSock *next; - struct pico_socket *picosock; - IpcPipe *datapipe; - RBufT eventbuffer; - SpinLock spinlock; - uint64_t ownerpid; -} IpcNetSock; - -typedef struct { - IpcNetSock *netsocks; - SpinLock spinlock; -} IpcNetSocks; - -extern IpcNetSocks IPC_NETSOCKS; - -void ipc_netsockinit(void); -IpcNetSock *ipc_netsockmake(uint16_t net, uint16_t proto, uint64_t pid); -int32_t ipc_netsocklisten(IpcNetSock *netsock, size_t maxlisteners); -int32_t ipc_netsockdelete(IpcNetSock *netsock); -int32_t ipc_netsockbindport(IpcNetSock *netsock, uint16_t port); -void ipc_netsock_cleanup_dangling(void); - -#endif // NETSOCK_NETSOCK_H_ diff --git a/kernel/kmain.c b/kernel/kmain.c index 5af8082..b78fa20 100644 --- a/kernel/kmain.c +++ b/kernel/kmain.c @@ -15,9 +15,6 @@ #include "randcrypto/randcrypto.h" #include "time/time.h" #include "diskpart/diskpart.h" -#include "netdev/netdev.h" -#include "ipc/netsock/netsock.h" -#include "pico_stack.h" void log_bootinfo(void) { char buf[100]; @@ -53,9 +50,6 @@ void kmain(void) { diskpart_init(); baseimg_init(); vfs_init(); - pico_stack_init(); - netdev_init(); - ipc_netsockinit(); proc_init(); for(;;); diff --git a/kernel/netdev/netdev.c b/kernel/netdev/netdev.c deleted file mode 100644 index 33409fd..0000000 --- a/kernel/netdev/netdev.c +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include "netdev.h" -#include "spinlock/spinlock.h" -#include "kprintf.h" -#include "dlmalloc/malloc.h" -#include "errors.h" -#include "util/util.h" -#include "hal/hal.h" -#include "pico_device.h" -#include "pico_dev_loop.h" -#include "pico_ipv4.h" - -NetDevList NETDEV_LIST; - -void netdev_init(void) { - spinlock_init(&NETDEV_LIST.spinlock); - NETDEV_LIST.head = NULL; - - LOG("netdev", "init\n"); - - netdev_create(NETDEV_LOOPBACK, "127.0.0.1", "255.255.255.0"); -} - -NetDev *netdev_create(int32_t ndtype, const char *ipaddrstring, const char *netmaskstring) { - NetDev *nd = dlmalloc(sizeof(*nd)); - if (nd == NULL) { - return NULL; - } - - spinlock_acquire(&NETDEV_LIST.spinlock); - - nd->_magic = NETDEV_MAGIC; - spinlock_init(&nd->spinlock); - - switch (ndtype) { - case NETDEV_LOOPBACK: { - nd->picodev = pico_loop_create(); - } break; - default: - dlfree(nd); - spinlock_release(&NETDEV_LIST.spinlock); - return NULL; - } - - pico_string_to_ipv4(ipaddrstring, &nd->ipaddr4.addr); - pico_string_to_ipv4(netmaskstring, &nd->netmask4.addr); - pico_ipv4_link_add(nd->picodev, nd->ipaddr4, nd->netmask4); - - LL_APPEND(NETDEV_LIST.head, nd); - spinlock_release(&NETDEV_LIST.spinlock); - - return nd; -} - -// TODO: delete diff --git a/kernel/netdev/netdev.h b/kernel/netdev/netdev.h deleted file mode 100644 index 88c08fb..0000000 --- a/kernel/netdev/netdev.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef NETDEV_NETDEV_H_ -#define NETDEV_NETDEV_H_ - -#include -#include -#include "spinlock/spinlock.h" -#include "compiler/attr.h" -#include "pico_device.h" -#include "pico_ipv4.h" - -enum { - NETDEV_LOOPBACK, -}; - -#define NETDEV_MAGIC 0xB00B - -typedef struct NetDev { - struct pico_device *picodev; - uint32_t _magic; - struct NetDev *next; - SpinLock spinlock; - struct pico_ip4 ipaddr4, netmask4; -} NetDev; - -typedef struct { - NetDev *head; - SpinLock spinlock; -} NetDevList; - -extern NetDevList NETDEV_LIST; - -void netdev_init(void); - -NetDev *netdev_create(int32_t ndtype, const char *ipaddrstring, const char *netmaskstring); - -#endif // NETDEV_NETDEV_H_ diff --git a/kernel/picotcp/.gitignore b/kernel/picotcp/.gitignore deleted file mode 100644 index 0a9bc6b..0000000 --- a/kernel/picotcp/.gitignore +++ /dev/null @@ -1,28 +0,0 @@ -*.d -*.o -*.a -*.out -*.swp -tags -build -UNIT_* -core -core.* -.DS_Store -cscope.files -cscope.out -*.so -*.aux -*.pdf -*.toc -*.gz -*.log -*.pyc -*.elf -*.gcov -*.gcda -*.gcno -*.expand -*.pcap -.ycm_extra_conf.py -.clang_complete diff --git a/kernel/picotcp/.travis.yml b/kernel/picotcp/.travis.yml deleted file mode 100644 index 0a87dd4..0000000 --- a/kernel/picotcp/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -before_install: - - sudo apt-get update -qq - - sudo apt-get install -y vde2 check libvdeplug2-dev libpcap-dev psmisc - - sudo pip install cpp-coveralls - - make clean - - rm -f *.gc* - -install: make GCOV=1 && make units ARCH=faulty GCOV=1 && make test GCOV=1 -language: c -script: - - ./test/coverage.sh -after_success: - - coveralls --exclude test/ --exclude modules/ptsocket --exclude build --exclude modules/pico_dev_mock.c --exclude modules/pico_dev_null.c --exclude modules/pico_dev_pcap.c --exclude modules/pico_dev_tap_windows.c --exclude modules/pico_dev_tun.c --gcov-options='\-lp' diff --git a/kernel/picotcp/CONTRIBUTING.md b/kernel/picotcp/CONTRIBUTING.md deleted file mode 100755 index a61f72f..0000000 --- a/kernel/picotcp/CONTRIBUTING.md +++ /dev/null @@ -1,8 +0,0 @@ - -External contributions to picoTCP are very welcome. We do, however, ask that you sign the Contributor License Agreement. -We don't ask you to sign away your copyright. The CLA simply grants us an additional license on the code you wrote. This allows us to also use picoTCP in commercial projects, which enables us to keep investing time and money in creating a better TCP/IP stack. - -Please read the [Agreement](https://docs.google.com/forms/d/1-z6lsT75l6ZIrgHGEWrWdHylJ6xxpjc7FwGfL2ilDFU/viewform), and if you agree with it, fill in your information. -You will receive a mail with a timestamp. Please modify our [CLA confirmation page](https://github.com/tass-belgium/picotcp/wiki/picoTCP-CLA-Confirmation-Page), adding the timestamp and your github username. This way we can be sure that nobody else filled in your info in the form. - -Pull requests by people who haven't signed the CLA will, unfortunately, have to be rejected. diff --git a/kernel/picotcp/COPYING b/kernel/picotcp/COPYING deleted file mode 100644 index 9556096..0000000 --- a/kernel/picotcp/COPYING +++ /dev/null @@ -1,8 +0,0 @@ -PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. - -Released under the GNU General Public License, version 2, or (at your option) -version 3. -See LICENSE.GPLv2 and LICENSE.GPLv3 for details. - -Different licensing models may exist, at the sole discretion of -the Copyright holders. diff --git a/kernel/picotcp/LICENSE.GPLv2 b/kernel/picotcp/LICENSE.GPLv2 deleted file mode 100644 index d159169..0000000 --- a/kernel/picotcp/LICENSE.GPLv2 +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/kernel/picotcp/LICENSE.GPLv3 b/kernel/picotcp/LICENSE.GPLv3 deleted file mode 100644 index 94a9ed0..0000000 --- a/kernel/picotcp/LICENSE.GPLv3 +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/kernel/picotcp/MODTREE b/kernel/picotcp/MODTREE deleted file mode 100644 index 27f9448..0000000 --- a/kernel/picotcp/MODTREE +++ /dev/null @@ -1,27 +0,0 @@ -RTOS: -IPV4: ETH -IPV6: -DEVLOOP: -CRC: -ETH: -TCP: IPV4 -UDP: IPV4 -IPV4FRAG: IPV4 -NAT: IPV4 UDP -ICMP4: IPV4 -MCAST: UDP -PING: ICMP4 -DHCP_CLIENT: UDP -DHCP_SERVER: UDP -DNS_CLIENT: UDP -IPFILTER: IPV4 -OLSR: MCAST -SLAACV4: IPV4 -SNTP_CLIENT: DNS_CLIENT -TFTP: UDP -MDNS: MCAST -DNS_SD: MDNS -AODV: IPV4 UDP -PPP: IPV4 -6LOWPAN: IPV6 -IEEE802154: 6LOWPAN diff --git a/kernel/picotcp/Makefile b/kernel/picotcp/Makefile deleted file mode 100644 index 78ec25b..0000000 --- a/kernel/picotcp/Makefile +++ /dev/null @@ -1,508 +0,0 @@ --include ../../config.mk --include ../../tools/kconfig/.config - -OS:=$(shell uname) -CC:=$(CROSS_COMPILE)gcc -LD:=$(CROSS_COMPILE)ld -AR:=$(CROSS_COMPILE)ar -RANLIB:=$(CROSS_COMPILE)ranlib -SIZE:=$(CROSS_COMPILE)size -STRIP_BIN:=$(CROSS_COMPILE)strip -TEST_LDFLAGS=-pthread $(PREFIX)/modules/*.o $(PREFIX)/lib/*.o -lvdeplug -UNIT_LDFLAGS=-lcheck -lm -pthread -lrt -lsubunit -UNIT_CFLAGS= $(CFLAGS) -Wno-missing-braces - -LIBNAME:="libpicotcp.a" - -PREFIX?=$(PWD)/build -DEBUG?=1 -PROFILE?=0 -PERF?=0 -ENDIAN?=little -STRIP?=0 -RTOS?=0 -GENERIC?=0 -PTHREAD?=0 -ADDRESS_SANITIZER?=1 -GCOV?=0 - -# Default compiled-in protocols -# -TCP?=1 -UDP?=1 -ETH?=1 -IPV4?=1 -IPV4FRAG?=1 -IPV6FRAG?=0 -NAT?=1 -ICMP4?=1 -MCAST?=1 -DEVLOOP?=1 -PING?=1 -DHCP_CLIENT?=1 -DHCP_SERVER?=1 -DNS_CLIENT?=1 -MDNS?=1 -DNS_SD?=1 -SNTP_CLIENT?=1 -IPFILTER?=1 -CRC?=1 -OLSR?=0 -SLAACV4?=1 -TFTP?=1 -AODV?=1 -MEMORY_MANAGER?=0 -MEMORY_MANAGER_PROFILING?=0 -TUN?=0 -TAP?=0 -PCAP?=0 -PPP?=1 -6LOWPAN?=0 -IEEE802154?=0 -IPC?=0 -CYASSL?=0 -WOLFSSL?=0 -POLARSSL?=0 - -#IPv6 related -IPV6?=1 - -TEST?=0 -ifeq ($(TEST),1) - 6LOWPAN=1 - IEEE802154=1 -endif - -UNITS?=0 -ifeq ($(UNITS),1) - 6LOWPAN=1 - IEEE802154=1 - ARCH=faulty -endif - -UNITS_MM?=0 -ifeq ($(UNITS_MM),1) - 6LOWPAN=1 - IEEE802154=1 - MEMORY_MANAGER=1 -endif - -EXTRA_CFLAGS+=-DPICO_COMPILE_TIME=`date +%s` -EXTRA_CFLAGS+=$(PLATFORM_CFLAGS) - -CFLAGS=-I$(PREFIX)/include -Iinclude -Imodules $(EXTRA_CFLAGS) -# options for adding warnings -CFLAGS+= -Wall -W -Wextra -Wshadow -Wcast-qual -Wwrite-strings -Wundef -Wdeclaration-after-statement -CFLAGS+= -Wconversion -Wcast-align -Wmissing-prototypes -# options for supressing warnings -CFLAGS+= -Wno-missing-field-initializers - -ifeq ($(CC),clang) -CFLAGS+= -Wunreachable-code-break -Wpointer-bool-conversion -Wmissing-variable-declarations -endif - -ifeq ($(OS),Darwin) - LIBSIZE=stat -f%z - ifeq ($(SIZE),size) - SUMSIZE=$(SIZE) - else - SUMSIZE=$(SIZE) -t - endif -else - LIBSIZE=du -b - SUMSIZE=$(SIZE) -t -endif - -ifeq ($(DEBUG),1) - CFLAGS+=-ggdb -else - ifeq ($(PERF), 1) - CFLAGS+=-O3 - else - CFLAGS+=-Os - endif -endif - -ifeq ($(PROFILE),1) - CFLAGS+=-pg -endif - -ifeq ($(TFTP),1) - MOD_OBJ+=$(LIBBASE)modules/pico_strings.o $(LIBBASE)modules/pico_tftp.o - OPTIONS+=-DPICO_SUPPORT_TFTP -endif - -ifeq ($(AODV),1) - MOD_OBJ+=$(LIBBASE)modules/pico_aodv.o - OPTIONS+=-DPICO_SUPPORT_AODV -endif - -ifeq ($(GENERIC),1) - CFLAGS+=-DGENERIC -endif - -ifeq ($(PTHREAD),1) - CFLAGS+=-DPICO_SUPPORT_PTHREAD -endif - - -ifneq ($(ENDIAN),little) - CFLAGS+=-DPICO_BIGENDIAN -endif - -ifneq ($(RTOS),0) - OPTIONS+=-DPICO_SUPPORT_RTOS -endif - -ifeq ($(ARCH),cortexm4-hardfloat) - CFLAGS+=-DCORTEX_M4_HARDFLOAT -mcpu=cortex-m4 -mthumb -mlittle-endian -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb-interwork -fsingle-precision-constant -endif - -ifeq ($(ARCH),cortexm4-softfloat) - CFLAGS+=-DCORTEX_M4_SOFTFLOAT -mcpu=cortex-m4 -mthumb -mlittle-endian -mfloat-abi=soft -mthumb-interwork -endif - -ifeq ($(ARCH),cortexm3) - CFLAGS+=-DCORTEX_M3 -mcpu=cortex-m3 -mthumb -mlittle-endian -mthumb-interwork -endif - -ifeq ($(ARCH),cortexm0plus) - CFLAGS+=-DCORTEX_M0PLUS -mcpu=cortex-m0plus -mthumb -mlittle-endian -mthumb-interwork -endif - -ifeq ($(ARCH),arm9) - CFLAGS+=-DARM9 -mcpu=arm9e -march=armv5te -gdwarf-2 -Wall -marm -mthumb-interwork -fpack-struct -endif - -ifeq ($(ADDRESS_SANITIZER),1) - TEST_LDFLAGS+=-fsanitize=address -fno-omit-frame-pointer -endif - -ifeq ($(GCOV),1) - TEST_LDFLAGS+=-lgcov --coverage - CFLAGS+=-fprofile-arcs -ftest-coverage -endif - -ifeq ($(ARCH),faulty) - CFLAGS+=-DFAULTY -DUNIT_TEST - ifeq ($(ADDRESS_SANITIZER),1) - CFLAGS+=-fsanitize=address - endif - CFLAGS+=-fno-omit-frame-pointer - UNITS_OBJ+=test/pico_faulty.o - TEST_OBJ+=test/pico_faulty.o - DUMMY_EXTRA+=test/pico_faulty.o -endif - -ifeq ($(ARCH),msp430) - CFLAGS+=-DMSP430 -endif - -ifeq ($(ARCH),esp8266) - CFLAGS+=-DESP8266 -Wl,-EL -fno-inline-functions -nostdlib -mlongcalls -mtext-section-literals -endif - -ifeq ($(ARCH),mt7681) - CFLAGS+=-DMT7681 -fno-builtin -ffunction-sections -fno-strict-aliasing -m16bit -mabi=2 -mbaseline=V2 -mcpu=n9 -mno-div -mel -mmw-count=8 -mno-ext-mac -mno-dx-regs -endif - -ifeq ($(ARCH),pic24) - CFLAGS+=-DPIC24 -c -mcpu=24FJ256GA106 -MMD -MF -g -omf=elf \ - -mlarge-code -mlarge-data -msmart-io=1 -msfr-warn=off -endif - -ifeq ($(ARCH),pic32) - CFLAGS+=-DPIC32 -endif - -ifeq ($(ARCH),atmega128) - CFLAGS+=-Wall -mmcu=atmega128 -DAVR -endif - -ifeq ($(ARCH),none) - CFLAGS+=-DARCHNONE -endif - -ifeq ($(ARCH),shared) - CFLAGS+=-fPIC -endif - -%.o:%.c deps - $(CC) -c $(CFLAGS) -o $@ $< - -CORE_OBJ= stack/pico_stack.o \ - stack/pico_frame.o \ - stack/pico_device.o \ - stack/pico_protocol.o \ - stack/pico_socket.o \ - stack/pico_socket_multicast.o \ - stack/pico_tree.o \ - stack/pico_md5.o - -POSIX_OBJ+= modules/pico_dev_vde.o \ - modules/pico_dev_tun.o \ - modules/pico_dev_ipc.o \ - modules/pico_dev_tap.o \ - modules/pico_dev_mock.o - -include rules/debug.mk - -ifneq ($(ETH),0) - include rules/eth.mk -endif -ifneq ($(IPV4),0) - include rules/ipv4.mk -endif -ifneq ($(IPV4FRAG),0) - include rules/ipv4frag.mk -endif -ifneq ($(ICMP4),0) - include rules/icmp4.mk -endif -ifneq ($(TCP),0) - include rules/tcp.mk -endif -ifneq ($(UDP),0) - include rules/udp.mk -endif -ifneq ($(MCAST),0) - include rules/mcast.mk - include rules/igmp.mk - include rules/mld.mk -endif -ifneq ($(NAT),0) - include rules/nat.mk -endif -ifneq ($(DEVLOOP),0) - include rules/devloop.mk -endif -ifneq ($(DHCP_CLIENT),0) - include rules/dhcp_client.mk -endif -ifneq ($(DHCP_SERVER),0) - include rules/dhcp_server.mk -endif -ifneq ($(DNS_CLIENT),0) - include rules/dns_client.mk -endif -ifneq ($(MDNS),0) - include rules/mdns.mk -endif -ifneq ($(DNS_SD),0) - include rules/dns_sd.mk -endif -ifneq ($(IPFILTER),0) - include rules/ipfilter.mk -endif -ifneq ($(CRC),0) - include rules/crc.mk -endif -ifneq ($(OLSR),0) - include rules/olsr.mk -endif -ifneq ($(SLAACV4),0) - include rules/slaacv4.mk -endif -ifneq ($(IPV6),0) - include rules/ipv6.mk -endif -ifneq ($(MEMORY_MANAGER),0) - include rules/memory_manager.mk -endif -ifneq ($(MEMORY_MANAGER_PROFILING),0) - OPTIONS+=-DPICO_SUPPORT_MM_PROFILING -endif -ifneq ($(SNTP_CLIENT),0) - include rules/sntp_client.mk -endif -ifneq ($(TUN),0) - include rules/tun.mk -endif -ifneq ($(TAP),0) - include rules/tap.mk -endif -ifneq ($(PCAP),0) - include rules/pcap.mk -endif -ifneq ($(PPP),0) - include rules/ppp.mk -endif -ifneq ($(6LOWPAN), 0) - include rules/6lowpan.mk -endif -ifneq ($(IPC),0) - include rules/ipc.mk -endif -ifneq ($(CYASSL),0) - include rules/cyassl.mk -endif -ifneq ($(WOLFSSL),0) - include rules/wolfssl.mk -endif -ifneq ($(POLARSSL),0) - include rules/polarssl.mk -endif - -all: mod core lib - -core: $(CORE_OBJ) - @mkdir -p $(PREFIX)/lib - @mv stack/*.o $(PREFIX)/lib - -mod: $(MOD_OBJ) - @mkdir -p $(PREFIX)/modules - @mv modules/*.o $(PREFIX)/modules || echo - -posix: all $(POSIX_OBJ) - @mv modules/*.o $(PREFIX)/modules || echo - - -TEST_ELF= test/picoapp.elf -TEST6_ELF= test/picoapp6.elf - - -test: posix - @if [ $(TEST) -eq 0 ]; then \ - echo "\n\nsmoke tests should be compiled with TEST=1 from now on!"; \ - exit 1; \ - fi - @mkdir -p $(PREFIX)/test/ - @make -C test/examples PREFIX=$(PREFIX) - @echo -e "\t[CC] picoapp.o" - @$(CC) -c -o $(PREFIX)/examples/picoapp.o test/picoapp.c $(CFLAGS) -Itest/examples - @echo -e "\t[LD] $@" - @$(CC) -g -o $(TEST_ELF) -I include -I modules -I $(PREFIX)/include -Wl,--start-group $(TEST_LDFLAGS) $(TEST_OBJ) $(PREFIX)/examples/*.o -Wl,--end-group - @mv test/*.elf $(PREFIX)/test - @install $(PREFIX)/$(TEST_ELF) $(PREFIX)/$(TEST6_ELF) - -tst: test - -$(PREFIX)/include/pico_defines.h: - @mkdir -p $(PREFIX)/lib - @mkdir -p $(PREFIX)/include - @bash ./mkdeps.sh $(PREFIX) $(OPTIONS) - - -deps: $(PREFIX)/include/pico_defines.h - - - -lib: mod core - @cp -f include/*.h $(PREFIX)/include - @cp -fa include/arch $(PREFIX)/include - @cp -f modules/*.h $(PREFIX)/include - @echo -e "\t[AR] $(PREFIX)/lib/$(LIBNAME)" - @$(AR) cru $(PREFIX)/lib/$(LIBNAME) $(PREFIX)/modules/*.o $(PREFIX)/lib/*.o \ - || $(AR) cru $(PREFIX)/lib/$(LIBNAME) $(PREFIX)/lib/*.o - @echo -e "\t[RANLIB] $(PREFIX)/lib/$(LIBNAME)" - @$(RANLIB) $(PREFIX)/lib/$(LIBNAME) - @test $(STRIP) -eq 1 && (echo -e "\t[STRIP] $(PREFIX)/lib/$(LIBNAME)" \ - && $(STRIP_BIN) $(PREFIX)/lib/$(LIBNAME)) \ - || echo -e "\t[KEEP SYMBOLS] $(PREFIX)/lib/$(LIBNAME)" - @echo -e "\t[LIBSIZE] `$(LIBSIZE) $(PREFIX)/lib/$(LIBNAME)`" - @echo -e "`$(SUMSIZE) $(PREFIX)/lib/$(LIBNAME)`" - -loop: mod core - mkdir -p $(PREFIX)/test - @$(CC) -c -o $(PREFIX)/modules/pico_dev_loop.o modules/pico_dev_loop.c $(CFLAGS) - @$(CC) -c -o $(PREFIX)/loop_ping.o test/loop_ping.c $(CFLAGS) -ggdb - -units: mod core lib $(UNITS_OBJ) $(MOD_OBJ) - @if [ $(UNITS) -eq 0 ]; then \ - echo "\n\nunit tests should be compiled with UNITS=1 from now on!"; \ - exit 1; \ - fi - @echo -e "\n\t[UNIT TESTS SUITE]" - @mkdir -p $(PREFIX)/test - @echo -e "\t[CC] units.o" - @$(CC) -g -c -o $(PREFIX)/test/units.o test/units.c $(UNIT_CFLAGS) -I stack -I modules -I includes -I test/unit -DUNIT_TEST - @echo -e "\t[LD] $(PREFIX)/test/units" - @$(CC) -o $(PREFIX)/test/units $(UNIT_CFLAGS) $(PREFIX)/test/units.o $(UNIT_LDFLAGS) \ - $(UNITS_OBJ) $(PREFIX)/modules/pico_aodv.o \ - $(PREFIX)/modules/pico_fragments.o - @$(CC) -o $(PREFIX)/test/modunit_pico_protocol.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_protocol.c stack/pico_tree.c $(UNIT_LDFLAGS) $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_pico_frame.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_frame.c stack/pico_tree.c $(UNIT_LDFLAGS) $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_seq.elf $(UNIT_CFLAGS) -I. test/unit/modunit_seq.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_tcp.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_tcp.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dns_client.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_dns_client.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dns_common.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_dns_common.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_mdns.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_mdns.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dns_sd.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_dns_sd.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dev_loop.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_dev_loop.c $(UNIT_LDFLAGS) $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_ipv6_nd.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_ipv6_nd.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_ethernet.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_ethernet.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_pico_stack.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_stack.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_tftp.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_tftp.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_sntp_client.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_sntp_client.c $(UNIT_LDFLAGS) $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_ipfilter.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_ipfilter.c stack/pico_tree.c $(UNIT_LDFLAGS) $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_aodv.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_aodv.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_fragments.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_fragments.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_queue.elf $(UNIT_CFLAGS) -I. test/unit/modunit_queue.c $(UNIT_LDFLAGS) $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_dev_ppp.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_dev_ppp.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_mld.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_mld.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_igmp.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_igmp.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_hotplug_detection.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_hotplug_detection.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_802154.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_802154.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_6lowpan.elf $(UNIT_CFLAGS) -I. -I test/examples test/unit/modunit_pico_6lowpan.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_strings.elf $(UNIT_CFLAGS) -I. test/unit/modunit_pico_strings.c $(UNIT_LDFLAGS) $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - -devunits: mod core lib - @echo -e "\n\t[UNIT TESTS SUITE: device drivers]" - @mkdir -p $(PREFIX)/test/unit/device/ - @echo -e "\t[CC] picotcp_mock.o" - @$(CC) -c -o $(PREFIX)/test/unit/device/picotcp_mock.o $(CFLAGS) -I stack -I modules -I includes -I test/unit test/unit/device/picotcp_mock.c - @$(CC) -c -o $(PREFIX)/test/unit/device/unit_dev_vde.o $(CFLAGS) -I stack -I modules -I includes -I test/unit test/unit/device/unit_dev_vde.c - @echo -e "\t[LD] $(PREFIX)/test/devunits" - @$(CC) -o $(PREFIX)/test/devunits $(CFLAGS) -I stack $(PREFIX)/test/unit/device/*.o -lcheck -lm -pthread -lrt - -units_mm: mod core lib - @if [ $(UNITS_MM) -eq 0 ]; then \ - echo "\n\nMM unit tests should be compiled with UNITS_MM=1 from now on!"; \ - exit 1; \ - fi - @echo -e "\n\t[UNIT TESTS SUITE]" - @mkdir -p $(PREFIX)/test - @echo -e "\t[CC] units_mm.o" - @$(CC) -c -o $(PREFIX)/test/units_mm.o test/unit/unit_mem_manager.c $(CFLAGS) -I stack -I modules -I includes -I test/unit - @echo -e "\t[LD] $(PREFIX)/test/units" - @$(CC) -o $(PREFIX)/test/units_mm $(CFLAGS) $(PREFIX)/test/units_mm.o -lcheck -lm -pthread -lrt - - -clean: - @echo -e "\t[CLEAN] $(PREFIX)/" - @rm -rf $(PREFIX) tags - -mbed: - @echo -e "\t[Creating PicoTCP.zip]" - @rm -f PicoTCP.zip - @cp include/pico_socket.h include/socket.tmp - @echo "#define MBED\n" > include/mbed.tmp - @cat include/mbed.tmp include/socket.tmp > include/pico_socket.h - @zip -0 PicoTCP.zip -r include modules stack -x include/arch/ include/arch/* include/pico_config.h include/*.tmp modules/pico_dev_* - @rm include/pico_socket.h include/mbed.tmp - @mv include/socket.tmp include/pico_socket.h - - -style: - @find . -iname "*.[c|h]" | xargs uncrustify --replace -l C -c uncrustify.cfg || true - @find . -iname "*unc-backup*" |xargs rm || true - -dummy: mod core lib $(DUMMY_EXTRA) - @echo testing configuration... - @$(CC) -c -o test/dummy.o test/dummy.c $(CFLAGS) - @$(CC) -o dummy test/dummy.o $(DUMMY_EXTRA) $(PREFIX)/lib/libpicotcp.a $(LDFLAGS) $(CFLAGS) - @echo done. - @rm -f test/dummy.o dummy - -ppptest: test/ppp.c lib - gcc -ggdb -c -o ppp.o test/ppp.c -I $(PREFIX)/include/ -I $(PREFIX)/modules/ $(CFLAGS) - gcc -o ppp ppp.o $(PREFIX)/lib/libpicotcp.a $(LDFLAGS) $(CFLAGS) - rm -f ppp.o - -.PHONY: coverity -coverity: - @make clean - @cov-build --dir $(PREFIX)/cov-int make - @tar czvf $(PREFIX)/coverity.tgz -C $(PREFIX) cov-int - -FORCE: diff --git a/kernel/picotcp/Makefile.watcom b/kernel/picotcp/Makefile.watcom deleted file mode 100644 index cbe04d7..0000000 --- a/kernel/picotcp/Makefile.watcom +++ /dev/null @@ -1,403 +0,0 @@ --include ../../config.mk --include ../../tools/kconfig/.config - -WATCOM_PATH:=/opt/watcom -CC:=$(WATCOM_PATH)/binl/$(CROSS_COMPILE)wcc386 -LD:=$(WATCOM_PATH)/binl/$(CROSS_COMPILE)wcl386 -AR:=$(WATCOM_PATH)/binl/$(CROSS_COMPILE)wlib -RANLIB:=$(WATCOM_PATH)/binl/$(CROSS_COMPILE)ranlib -SIZE:=$(CROSS_COMPILE)size -STRIP_BIN:=$(CROSS_COMPILE)strip -TEST_LDFLAGS=-pthread $(PREFIX)/modules/*.o $(PREFIX)/lib/*.o -lvdeplug -LIBNAME:=libpicotcp.a - -PREFIX?=$(PWD)/build -DEBUG?=1 -PROFILE?=0 -PERF?=0 -ENDIAN?=little -STRIP?=0 -RTOS?=0 -GENERIC?=0 -PTHREAD?=0 -ADDRESS_SANITIZER?=1 - -# Default compiled-in protocols -# -TCP?=1 -UDP?=1 -ETH?=1 -IPV4?=1 -IPV4FRAG?=1 -IPV6FRAG?=0 -NAT?=1 -ICMP4?=1 -MCAST?=1 -DEVLOOP?=1 -PING?=1 -DHCP_CLIENT?=1 -DHCP_SERVER?=1 -DNS_CLIENT?=1 -MDNS?=1 -DNS_SD?=1 -SNTP_CLIENT?=1 -IPFILTER?=1 -CRC?=1 -OLSR?=0 -SLAACV4?=1 -TFTP?=1 -AODV?=1 -MEMORY_MANAGER?=0 -MEMORY_MANAGER_PROFILING?=0 -TUN?=0 -TAP?=0 -PCAP?=0 -PPP?=0 -CYASSL?=0 -WOLFSSL?=0 -POLARSSL?=0 - -#IPv6 related -IPV6?=1 - -EXTRA_CFLAGS+=-dPICO_COMPILE_TIME=`date +%s` -EXTRA_CFLAGS+=$(PLATFORM_CFLAGS) - -CFLAGS=-i=$(WATCOM_PATH)/h -i=$(PREFIX)/include -i=include -i=modules $(EXTRA_CFLAGS) -q - - -ifeq ($(DEBUG),1) - CFLAGS+=-od -of -d9 -else - ifeq ($(PERF), 1) - CFLAGS+= - else - CFLAGS+= - endif -endif - -ifeq ($(TFTP),1) - MOD_OBJ+=$(LIBBASE)modules/pico_strings.o $(LIBBASE)modules/pico_tftp.o - OPTIONS+=-dPICO_SUPPORT_TFTP -endif - -ifeq ($(AODV),1) - MOD_OBJ+=$(LIBBASE)modules/pico_aodv.o - OPTIONS+=-dPICO_SUPPORT_AODV -endif - -ifeq ($(GENERIC),1) - CFLAGS+=-dGENERIC -endif - -ifeq ($(PTHREAD),1) - CFLAGS+=-dPICO_SUPPORT_PTHREAD -endif - - -ifneq ($(ENDIAN),little) - CFLAGS+=-dPICO_BIGENDIAN -endif - -ifneq ($(RTOS),0) - OPTIONS+=-dPICO_SUPPORT_RTOS -endif - -ifeq ($(ARCH),cortexm4-hardfloat) - CFLAGS+=-dCORTEX_M4_HARDFLOAT -mcpu=cortex-m4 -mthumb -mlittle-endian -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb-interwork -fsingle-precision-constant -endif - -ifeq ($(ARCH),cortexm4-softfloat) - CFLAGS+=-dCORTEX_M4_SOFTFLOAT -mcpu=cortex-m4 -mthumb -mlittle-endian -mfloat-abi=soft -mthumb-interwork -endif - -ifeq ($(ARCH),cortexm3) - CFLAGS+=-dCORTEX_M3 -mcpu=cortex-m3 -mthumb -mlittle-endian -mthumb-interwork -endif - -ifeq ($(ARCH),arm9) - CFLAGS+=-dARM9 -mcpu=arm9e -march=armv5te -gdwarf-2 -Wall -marm -mthumb-interwork -fpack-struct -endif - -ifeq ($(ADDRESS_SANITIZER),1) - TEST_LDFLAGS+=-fsanitize=address -fno-omit-frame-pointer -endif - -ifeq ($(ARCH),faulty) - CFLAGS+=-dFAULTY -dUNIT_TEST - CFLAGS+=-fsanitize=address -fno-omit-frame-pointer - UNITS_OBJ+=test/pico_faulty.o - TEST_OBJ+=test/pico_faulty.o - DUMMY_EXTRA+=test/pico_faulty.o -endif - -ifeq ($(ARCH),msp430) - CFLAGS+=-dMSP430 -endif - -ifeq ($(ARCH),esp8266) - CFLAGS+=-dESP8266 -Wl,-EL -fno-inline-functions -nostdlib -mlongcalls -mtext-section-literals -endif - -ifeq ($(ARCH),mt7681) - CFLAGS+=-dMT7681 -fno-builtin -ffunction-sections -fno-strict-aliasing -m16bit -mabi=2 -mbaseline=V2 -mcpu=n9 -mno-div -mel -mmw-count=8 -mno-ext-mac -mno-dx-regs -endif - -ifeq ($(ARCH),pic24) - CFLAGS+=-dPIC24 -mcpu=24FJ256GA106 -MMD -MF -g -omf=elf \ - -mlarge-code -mlarge-data -msmart-io=1 -msfr-warn=off -endif - -ifeq ($(ARCH),atmega128) - CFLAGS+=-Wall -mmcu=atmega128 -dAVR -endif - -ifeq ($(ARCH),none) - CFLAGS+=-dARCHNONE -endif - -ifeq ($(ARCH),shared) - CFLAGS+=-fPIC -endif - -%.o:%.c deps - $(CC) $(CFLAGS) -fo=$@ $< - -CORE_OBJ= stack/pico_stack.o \ - stack/pico_frame.o \ - stack/pico_device.o \ - stack/pico_protocol.o \ - stack/pico_socket.o \ - stack/pico_socket_multicast.o \ - stack/pico_tree.o \ - stack/pico_md5.o - -POSIX_OBJ+= modules/pico_dev_vde.o \ - modules/pico_dev_tun.o \ - modules/pico_dev_tap.o \ - modules/pico_dev_mock.o - -ifneq ($(ETH),0) - include rules/eth.mk -endif -ifneq ($(IPV4),0) - include rules/ipv4.mk -endif -ifneq ($(IPV4FRAG),0) - include rules/ipv4frag.mk -endif -ifneq ($(ICMP4),0) - include rules/icmp4.mk -endif -ifneq ($(TCP),0) - include rules/tcp.mk -endif -ifneq ($(UDP),0) - include rules/udp.mk -endif -ifneq ($(MCAST),0) - include rules/mcast.mk - include rules/igmp.mk -endif -ifneq ($(NAT),0) - include rules/nat.mk -endif -ifneq ($(DEVLOOP),0) - include rules/devloop.mk -endif -ifneq ($(DHCP_CLIENT),0) - include rules/dhcp_client.mk -endif -ifneq ($(DHCP_SERVER),0) - include rules/dhcp_server.mk -endif -ifneq ($(DNS_CLIENT),0) - include rules/dns_client.mk -endif -ifneq ($(MDNS),0) - include rules/mdns.mk -endif -ifneq ($(DNS_SD),0) - include rules/dns_sd.mk -endif -ifneq ($(IPFILTER),0) - include rules/ipfilter.mk -endif -ifneq ($(CRC),0) - include rules/crc.mk -endif -ifneq ($(OLSR),0) - include rules/olsr.mk -endif -ifneq ($(SLAACV4),0) - include rules/slaacv4.mk -endif -ifneq ($(IPV6),0) - include rules/ipv6.mk -endif -ifneq ($(MEMORY_MANAGER),0) - include rules/memory_manager.mk -endif -ifneq ($(MEMORY_MANAGER_PROFILING),0) - OPTIONS+=-dPICO_SUPPORT_MM_PROFILING -endif -ifneq ($(SNTP_CLIENT),0) - include rules/sntp_client.mk -endif -ifneq ($(TUN),0) - include rules/tun.mk -endif -ifneq ($(TAP),0) - include rules/tap.mk -endif -ifneq ($(PCAP),0) - include rules/pcap.mk -endif -ifneq ($(PPP),0) - include rules/ppp.mk -endif -ifneq ($(CYASSL),0) - include rules/cyassl.mk -endif -ifneq ($(WOLFSSL),0) - include rules/wolfssl.mk -endif -ifneq ($(POLARSSL),0) - include rules/polarssl.mk -endif - -all: mod core lib - -core: $(CORE_OBJ) - @mkdir -p $(PREFIX)/lib - @mv stack/*.o $(PREFIX)/lib - -mod: $(MOD_OBJ) - @mkdir -p $(PREFIX)/modules - @mv modules/*.o $(PREFIX)/modules || echo - -posix: all $(POSIX_OBJ) - @mv modules/*.o $(PREFIX)/modules || echo - - -TEST_ELF= test/picoapp.elf -TEST6_ELF= test/picoapp6.elf - - -test: posix - @mkdir -p $(PREFIX)/test/ - @make -C test/examples PREFIX=$(PREFIX) - @echo -e "\t[CC] picoapp.o" - @$(CC) -c -o $(PREFIX)/examples/picoapp.o test/picoapp.c $(CFLAGS) -Itest/examples - @echo -e "\t[LD] $@" - @$(CC) -g -o $(TEST_ELF) -I include -I modules -I $(PREFIX)/include -Wl,--start-group $(TEST_LDFLAGS) $(TEST_OBJ) $(PREFIX)/examples/*.o -Wl,--end-group - @mv test/*.elf $(PREFIX)/test - @install $(PREFIX)/$(TEST_ELF) $(PREFIX)/$(TEST6_ELF) - -tst: test - -$(PREFIX)/include/pico_defines.h: - @mkdir -p $(PREFIX)/lib - @mkdir -p $(PREFIX)/include - @bash ./mkdeps.sh $(PREFIX) $(OPTIONS) - - -deps: $(PREFIX)/include/pico_defines.h - - - -lib: mod core - @cp -f include/*.h $(PREFIX)/include - @cp -fa include/arch $(PREFIX)/include - @cp -f modules/*.h $(PREFIX)/include - @echo -e "\t[AR] $(PREFIX)/lib/$(LIBNAME)" - $(AR) -q -b -n -fag -o=$(PREFIX)/lib/$(LIBNAME) $(PREFIX)/modules/*.o $(PREFIX)/lib/*.o - @echo || $(AR) cru $(PREFIX)/lib/$(LIBNAME) $(PREFIX)/lib/*.o - @echo -e "\t[RANLIB] $(PREFIX)/lib/$(LIBNAME)" - @$(RANLIB) $(PREFIX)/lib/$(LIBNAME) - @echo -e "\t[LIBSIZE] `du -b $(PREFIX)/lib/$(LIBNAME)`" - -loop: mod core - mkdir -p $(PREFIX)/test - @$(CC) -c -o $(PREFIX)/modules/pico_dev_loop.o modules/pico_dev_loop.c $(CFLAGS) - @$(CC) -c -o $(PREFIX)/loop_ping.o test/loop_ping.c $(CFLAGS) -ggdb - -units: mod core lib $(UNITS_OBJ) $(MOD_OBJ) - @echo -e "\n\t[UNIT TESTS SUITE]" - @mkdir -p $(PREFIX)/test - @echo -e "\t[CC] units.o" - @$(CC) -c -o $(PREFIX)/test/units.o test/units.c $(CFLAGS) -I stack -I modules -I includes -I test/unit -dUNIT_TEST - @echo -e "\t[LD] $(PREFIX)/test/units" - @$(CC) -o $(PREFIX)/test/units $(CFLAGS) $(PREFIX)/test/units.o -lcheck -lm -pthread -lrt \ - $(UNITS_OBJ) $(PREFIX)/modules/pico_aodv.o \ - $(PREFIX)/modules/pico_fragments.o - @$(CC) -o $(PREFIX)/test/modunit_pico_protocol.elf $(CFLAGS) -I. test/unit/modunit_pico_protocol.c stack/pico_tree.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_pico_frame.elf $(CFLAGS) -I. test/unit/modunit_pico_frame.c stack/pico_tree.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_seq.elf $(CFLAGS) -I. test/unit/modunit_seq.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_tcp.elf $(CFLAGS) -I. test/unit/modunit_pico_tcp.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dns_client.elf $(CFLAGS) -I. test/unit/modunit_pico_dns_client.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dns_common.elf $(CFLAGS) -I. test/unit/modunit_pico_dns_common.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_mdns.elf $(CFLAGS) -I. test/unit/modunit_pico_mdns.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dns_sd.elf $(CFLAGS) -I. test/unit/modunit_pico_dns_sd.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_dev_loop.elf $(CFLAGS) -I. test/unit/modunit_pico_dev_loop.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_ipv6_nd.elf $(CFLAGS) -I. test/unit/modunit_pico_ipv6_nd.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_pico_stack.elf $(CFLAGS) -I. test/unit/modunit_pico_stack.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_tftp.elf $(CFLAGS) -I. test/unit/modunit_pico_tftp.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_sntp_client.elf $(CFLAGS) -I. test/unit/modunit_pico_sntp_client.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_ipfilter.elf $(CFLAGS) -I. test/unit/modunit_pico_ipfilter.c stack/pico_tree.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_aodv.elf $(CFLAGS) -I. test/unit/modunit_pico_aodv.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_fragments.elf $(CFLAGS) -I. test/unit/modunit_pico_fragments.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - @$(CC) -o $(PREFIX)/test/modunit_queue.elf $(CFLAGS) -I. test/unit/modunit_queue.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) - @$(CC) -o $(PREFIX)/test/modunit_dev_ppp.elf $(CFLAGS) -I. test/unit/modunit_pico_dev_ppp.c -lcheck -lm -pthread -lrt $(UNITS_OBJ) $(PREFIX)/lib/libpicotcp.a - -devunits: mod core lib - @echo -e "\n\t[UNIT TESTS SUITE: device drivers]" - @mkdir -p $(PREFIX)/test/unit/device/ - @echo -e "\t[CC] picotcp_mock.o" - @$(CC) -c -o $(PREFIX)/test/unit/device/picotcp_mock.o $(CFLAGS) -I stack -I modules -I includes -I test/unit test/unit/device/picotcp_mock.c - @$(CC) -c -o $(PREFIX)/test/unit/device/unit_dev_vde.o $(CFLAGS) -I stack -I modules -I includes -I test/unit test/unit/device/unit_dev_vde.c - @echo -e "\t[LD] $(PREFIX)/test/devunits" - @$(CC) -o $(PREFIX)/test/devunits $(CFLAGS) -I stack $(PREFIX)/test/unit/device/*.o -lcheck -lm -pthread -lrt - -units_mm: mod core lib - @echo -e "\n\t[UNIT TESTS SUITE]" - @mkdir -p $(PREFIX)/test - @echo -e "\t[CC] units_mm.o" - @$(CC) -c -o $(PREFIX)/test/units_mm.o test/unit/unit_mem_manager.c $(CFLAGS) -I stack -I modules -I includes -I test/unit - @echo -e "\t[LD] $(PREFIX)/test/units" - @$(CC) -o $(PREFIX)/test/units_mm $(CFLAGS) $(PREFIX)/test/units_mm.o -lcheck -lm -pthread -lrt - - -clean: - @echo -e "\t[CLEAN] $(PREFIX)/" - @rm -rf $(PREFIX) tags - -mbed: - @echo -e "\t[Creating PicoTCP.zip]" - @rm -f PicoTCP.zip - @cp include/pico_socket.h include/socket.tmp - @echo "#define MBED\n" > include/mbed.tmp - @cat include/mbed.tmp include/socket.tmp > include/pico_socket.h - @zip -0 PicoTCP.zip -r include modules stack -x include/arch/ include/arch/* include/pico_config.h include/*.tmp modules/pico_dev_* - @rm include/pico_socket.h include/mbed.tmp - @mv include/socket.tmp include/pico_socket.h - - -style: - @find . -iname "*.[c|h]" | xargs -x uncrustify --replace -l C -c uncrustify.cfg || true - @find . -iname "*unc-backup*" |xargs -x rm || true - -dummy: mod core lib $(DUMMY_EXTRA) - @echo testing configuration... - @$(CC) -c -o test/dummy.o test/dummy.c $(CFLAGS) - @$(CC) -o dummy test/dummy.o $(DUMMY_EXTRA) $(PREFIX)/lib/libpicotcp.a $(LDFLAGS) $(CFLAGS) - @echo done. - @rm -f test/dummy.o dummy - -ppptest: test/ppp.c lib - gcc -ggdb -c -o ppp.o test/ppp.c -I build/include/ -I build/modules/ $(CFLAGS) - gcc -o ppp ppp.o build/lib/libpicotcp.a $(LDFLAGS) $(CFLAGS) - rm -f ppp.o - - -FORCE: diff --git a/kernel/picotcp/README.md b/kernel/picotcp/README.md deleted file mode 100644 index 55ab359..0000000 --- a/kernel/picotcp/README.md +++ /dev/null @@ -1,234 +0,0 @@ -# picoTCP - ---------------- - -Welcome to the one and only picoTCP repository. - -picoTCP is a small-footprint, modular TCP/IP stack designed for embedded systems and the Internet of Things. It's actively being developed by *[Altran Intelligent Systems](http://intelligent-systems.altran.com/)*. - -This code is released under the terms of GNU GPL v2 and GNU GPL v3. Some rights reserved. -Other licenses may apply at the sole discretion of the copyright holders. - -Learn how to use picoTCP in your project by going through the **Getting Started guide** on our [GitHub wiki](https://github.com/tass-belgium/picotcp/wiki). - -For more information send us an email or contact us on [Twitter](https://twitter.com/picotcp), [Facebook](https://www.facebook.com/picoTCP) or [Reddit](http://www.reddit.com/r/picotcp/). - -Wondering about picoTCP's code quality? Check [our TiCS score](http://162.13.112.57:42506/tiobeweb/TICS/TqiDashboard.html#axes=Project()&metric=tqi&sel=Project(PicoTCP_rel)) - ---------------- - -## Continuous integration - -Functional tests: -[![Jenkins autotest](http://162.13.84.104:8080/buildStatus/icon?job=picoTCP_Rel/PicoTCP_rel_autotest)](http://162.13.84.104:8080/job/picoTCP_Rel/job/PicoTCP_rel_autotest) - -Unit tests : -[![Jenkins unit tests](http://162.13.84.104:8080/buildStatus/icon?job=picoTCP_Rel/PicoTCP_rel_unit_tests)](http://162.13.84.104:8080/job/picoTCP_Rel/job/PicoTCP_rel_unit_tests) - -RFC compliance : -[![Jenkins RFC Compliance](http://162.13.84.104:8080/buildStatus/icon?job=picoTCP_Rel/PicoTCP_rel_RF_mbed)](http://162.13.84.104:8080/job/picoTCP_Rel/job/PicoTCP_rel_RF_mbed) - -TICS quality : -[![Jenkins TICS](http://162.13.84.104:8080/buildStatus/icon?job=picoTCP_Rel/PicoTCP_rel_TICS)](http://162.13.84.104:8080/job/picoTCP_Rel/job/PicoTCP_rel_TICS/) -Coverity Scan Build status: -[![Coverity Scan Build Status](https://scan.coverity.com/projects/7944/badge.svg)](https://scan.coverity.com/projects/7944) - ---------------- - -## It runs on (pretty much) everything - -By keeping interfaces simple, the porting effort to new platforms and OSses are very low. To give you an indication: porting to a new platform can be done in 3 days or less, a new OS in a single day and if you really go crazy, you can do an initial port in a single evening. Different platforms, mean different compilers, that’s why we continuously compile our stack with a bunch of them. The following list shows some of the currently supported platforms, device drivers and compilers. - -### PicoTCP has been used with - -**Platforms picoTCP runs on**: -ARM Cortex-M series (ST Micro STM, NXP LPC, TI Stellaris, Freescale K64F), -ARM ARM9-series (ST Micro STR9), -Texas Instruments (MSP430), -Microchip (PIC24, PIC32), -Atmel (AVR 8bit), -Linux (User space (TUN/TAP), Kernel space), -Windows (User space (TAP)) - -**Network devices picoTCP has worked with**: -BCM43362 (IEEE 802.11), MRF24WG (IEEE 802.11), LPC Ethernet ENET/EMAC (IEEE 802.3), Stellaris Ethernet (IEEE 802.3), STM32 Ethernet (IEEE 802.3), Wiznet W5100 (IEEE 802.3), USB CDC-ECM (CDC1.2), PPP, Virtual drivers ( TUN/TAP, VDE, Libpcap) - -**(RT)OSes picoTCP has been integrated into**: -No OS / Bare metal, FreeRTOS, mbed-RTOS, Frosted, linux / POSIX, MS DOS, MS Windows - -**Libraries picoTCP has been integrated with**: -wolfSSL, mbedTLS, Mongoose RESTful library, MicroPython - -**Compilers picoTCP compiles under**: -GCC, Clang, TCC, ARM-RCVT, IAR, XC-16, XC-32, MSP-GCC, AVR-GCC - -Unfortunately we can't release all the code, a.o. because some parts depend on code or binaries that aren't GPL compatible, some parts were developed under a commercial contract, and some consist of very rough proof-of-concept code. -If you want to know more about the availability under the commercial license, or the possibility of using our expert services for porting or driver development, feel free to contact us at picotcp@altran.com. - -Your favorite not in the list? Check out the wiki for information and examples on how to port picoTCP to a new platform! - ---------------- - -## Highly configurable and modular design - -Features are developed as modules in picoTCP, allowing you to pick the features you want in your application. This results in the smallest possible stack that remains compliant with the internet standards. The schematic below provides an overview of all implemented protocols. - -![modular](https://s1.postimg.org/139xbnv7lb/image.png) - ---------------- - - -## Simple example - -### Preparations -This example uses Ubuntu 14.04. It works on other linux distibutions as well, though you may need to change some package names. See [setting up the environment](https://github.com/tass-belgium/picotcp/wiki/Setting-up-the-environment#prerequisite-packages) for some more info. - -```bash -sudo apt-get install git check vde2 libvdeplug2-dev libpcap0.8-dev openvpn wireshark -git clone https://github.com/tass-belgium/picotcp -cd picotcp -make TAP=1 -cd .. -``` - -### The code - -Then make a new directory, e.g. `example`, and create a file with the following content : -[//]: # (The code below is pulled through our CI - please leave the code extractor comments intact!) -[//]: # (code extractor start) -```C -#include -#include -#include -#include -#include - -#define NUM_PING 10 - -static int finished = 0; - -/* gets called when the ping receives a reply, or encounters a problem */ -void cb_ping(struct pico_icmp4_stats *s) -{ - char host[30]; - pico_ipv4_to_string(host, s->dst.addr); - if (s->err == 0) { - /* if all is well, print some pretty info */ - printf("%lu bytes from %s: icmp_req=%lu ttl=%lu time=%lu ms\n", s->size, - host, s->seq, s->ttl, (long unsigned int)s->time); - if (s->seq >= NUM_PING) - finished = 1; - } else { - /* if something went wrong, print it and signal we want to stop */ - printf("PING %lu to %s: Error %d\n", s->seq, host, s->err); - finished = 1; - } -} - - -int main(void){ - int id; - struct pico_ip4 ipaddr, netmask; - struct pico_device* dev; - - /* initialise the stack. Super important if you don't want ugly stuff like - * segfaults and such! */ - pico_stack_init(); - - /* create the tap device */ - dev = pico_tap_create("tap0"); - if (!dev) - return -1; - - /* assign the IP address to the tap interface */ - pico_string_to_ipv4("192.168.5.4", &ipaddr.addr); - pico_string_to_ipv4("255.255.255.0", &netmask.addr); - pico_ipv4_link_add(dev, ipaddr, netmask); - - printf("starting ping\n"); - id = pico_icmp4_ping("192.168.5.5", NUM_PING, 1000, 10000, 64, cb_ping); - - if (id == -1) - return -1; - - /* keep running stack ticks to have picoTCP do its network magic. Note that - * you can do other stuff here as well, or sleep a little. This will impact - * your network performance, but everything should keep working (provided - * you don't go overboard with the delays). */ - while (finished != 1) - { - usleep(1000); - pico_stack_tick(); - } - - printf("finished !\n"); - return 0; -} - -``` - -[//]: # (code extractor stop) - -### Building and running - -Now we can compile this and link it, by running -```bash -gcc -c -o main.o -I../picotcp/build/include main.c -gcc -o main.elf main.o ../picotcp/build/lib/libpicotcp.a -``` - -Next we'll create a persistent tap device - a virtual network port. You don't need to repeat this each time, the device will exist until you reboot, or until you go `sudo tunctl -d tap0` -```bash -sudo tunctl -u -sudo ifconfig tap0 192.168.5.5 -``` - -Now, you should be able to run `./main.elf`, and see output like -``` -Protocol ethernet registered (layer: 2). -Protocol ipv4 registered (layer: 3). -Protocol ipv6 registered (layer: 3). -Protocol icmp4 registered (layer: 4). -Protocol icmp6 registered (layer: 4). -Protocol igmp registered (layer: 4). -Protocol udp registered (layer: 4). -Protocol tcp registered (layer: 4). -Device tap0 created. -Assigned ipv4 192.168.5.4 to device tap0 -starting ping -64 bytes from 192.168.5.5: icmp_req=1 ttl=64 time=5 ms -64 bytes from 192.168.5.5: icmp_req=2 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=3 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=4 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=5 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=6 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=7 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=8 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=9 ttl=64 time=0 ms -64 bytes from 192.168.5.5: icmp_req=10 ttl=64 time=0 ms -finished ! -``` - -While the application is running, you can also run -``` -ping 192.168.5.4 -``` -to send pings in the other direction. - -### Investigating what happened - -Run wireshark, and sniff the tap0 interface. Then run the `./main.elf` again, and see what happens. You should see an ARP request from picoTCP to Linux, and a reply. After that you should see the ping requests and replies going back and forth. - -Note, sometimes you may see lots of other stuff, IPv6 router sollicitations, various broadcasts, mDNS, DNS-SD, etc - this is your when your Linux notices the new network interface is up, and starts all sorts of discoveries. With the persistent TAP device, this usually only happens the first time you start the application. Start a new wireshark capture, and start the application again, it should be much cleaner now. - -Now you could make some changes to the `main.c` file, and experiment a bit! Keep some statistics of your pings (max, min, avg time). Open a UDP socket, send some stuff to a netcat instance on your linux. Or build a rudimentary port scanner, see what ports are open on your machine. - - -This is just a very quick overview, more info can be found in our [wiki](https://github.com/tass-belgium/picotcp/wiki). - ---------------- - -## Contributors - -Contributors are very welcome. Report a bug, suggest a way to improve our documentation, or write some new code. - -Note however that, before accepting your code, we would ask you to sign our [Contributors License Agreement](https://docs.google.com/forms/d/1-z6lsT75l6ZIrgHGEWrWdHylJ6xxpjc7FwGfL2ilDFU/viewform). Your code remains under your copyright, and will always be available under GPLv2 and GPLv3. However, this CLA enables us to use picoTCP (including code from external contributors like you) under other licenses, including our commercial license. By doing commercial projects, we can keep investing in the quality and features of picoTCP. - - diff --git a/kernel/picotcp/RFC/get_all_rfc b/kernel/picotcp/RFC/get_all_rfc deleted file mode 100755 index 701bba4..0000000 --- a/kernel/picotcp/RFC/get_all_rfc +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -wget -O rfc4614.txt http://tools.ietf.org/rfc/rfc4614.txt - - -for RFC in `grep "\[RFC" rfc4614.txt | sed -e "s/^.*RFC/rfc/" | grep -v "rfc \|rfc$" | sed -e "s/\].*$/.txt/g" |sort |uniq`; do - wget -O ${RFC} http://tools.ietf.org/rfc/${RFC} -done - -wget -O rfc3927.txt http://tools.ietf.org/rfc/rfc3927.txt - -# Get PPP related RFC's -for RFC in $(echo 1332 1334 1661 1662 1877 1994 | sed -r "s/[^ ]+/rfc&.txt/g"); do - wget -O ${RFC} http://tools.ietf.org/rfc/${RFC} -done diff --git a/kernel/picotcp/RFC/rfc0793.txt b/kernel/picotcp/RFC/rfc0793.txt deleted file mode 100644 index 603a78c..0000000 --- a/kernel/picotcp/RFC/rfc0793.txt +++ /dev/null @@ -1,5247 +0,0 @@ - - -RFC: 793 - - - - - - - - TRANSMISSION CONTROL PROTOCOL - - - DARPA INTERNET PROGRAM - - PROTOCOL SPECIFICATION - - - - September 1981 - - - - - - - - - - - - - - prepared for - - Defense Advanced Research Projects Agency - Information Processing Techniques Office - 1400 Wilson Boulevard - Arlington, Virginia 22209 - - - - - - - - by - - Information Sciences Institute - University of Southern California - 4676 Admiralty Way - Marina del Rey, California 90291 - - - -September 1981 - Transmission Control Protocol - - - - TABLE OF CONTENTS - - PREFACE ........................................................ iii - -1. INTRODUCTION ..................................................... 1 - - 1.1 Motivation .................................................... 1 - 1.2 Scope ......................................................... 2 - 1.3 About This Document ........................................... 2 - 1.4 Interfaces .................................................... 3 - 1.5 Operation ..................................................... 3 - -2. PHILOSOPHY ....................................................... 7 - - 2.1 Elements of the Internetwork System ........................... 7 - 2.2 Model of Operation ............................................ 7 - 2.3 The Host Environment .......................................... 8 - 2.4 Interfaces .................................................... 9 - 2.5 Relation to Other Protocols ................................... 9 - 2.6 Reliable Communication ........................................ 9 - 2.7 Connection Establishment and Clearing ........................ 10 - 2.8 Data Communication ........................................... 12 - 2.9 Precedence and Security ...................................... 13 - 2.10 Robustness Principle ......................................... 13 - -3. FUNCTIONAL SPECIFICATION ........................................ 15 - - 3.1 Header Format ................................................ 15 - 3.2 Terminology .................................................. 19 - 3.3 Sequence Numbers ............................................. 24 - 3.4 Establishing a connection .................................... 30 - 3.5 Closing a Connection ......................................... 37 - 3.6 Precedence and Security ...................................... 40 - 3.7 Data Communication ........................................... 40 - 3.8 Interfaces ................................................... 44 - 3.9 Event Processing ............................................. 52 - -GLOSSARY ............................................................ 79 - -REFERENCES .......................................................... 85 - - - - - - - - - - - - [Page i] - - - September 1981 -Transmission Control Protocol - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page ii] - - -September 1981 - Transmission Control Protocol - - - - PREFACE - - - -This document describes the DoD Standard Transmission Control Protocol -(TCP). There have been nine earlier editions of the ARPA TCP -specification on which this standard is based, and the present text -draws heavily from them. There have been many contributors to this work -both in terms of concepts and in terms of text. This edition clarifies -several details and removes the end-of-letter buffer-size adjustments, -and redescribes the letter mechanism as a push function. - - Jon Postel - - Editor - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page iii] - - - - -RFC: 793 -Replaces: RFC 761 -IENs: 129, 124, 112, 81, -55, 44, 40, 27, 21, 5 - - TRANSMISSION CONTROL PROTOCOL - - DARPA INTERNET PROGRAM - PROTOCOL SPECIFICATION - - - - 1. INTRODUCTION - -The Transmission Control Protocol (TCP) is intended for use as a highly -reliable host-to-host protocol between hosts in packet-switched computer -communication networks, and in interconnected systems of such networks. - -This document describes the functions to be performed by the -Transmission Control Protocol, the program that implements it, and its -interface to programs or users that require its services. - -1.1. Motivation - - Computer communication systems are playing an increasingly important - role in military, government, and civilian environments. This - document focuses its attention primarily on military computer - communication requirements, especially robustness in the presence of - communication unreliability and availability in the presence of - congestion, but many of these problems are found in the civilian and - government sector as well. - - As strategic and tactical computer communication networks are - developed and deployed, it is essential to provide means of - interconnecting them and to provide standard interprocess - communication protocols which can support a broad range of - applications. In anticipation of the need for such standards, the - Deputy Undersecretary of Defense for Research and Engineering has - declared the Transmission Control Protocol (TCP) described herein to - be a basis for DoD-wide inter-process communication protocol - standardization. - - TCP is a connection-oriented, end-to-end reliable protocol designed to - fit into a layered hierarchy of protocols which support multi-network - applications. The TCP provides for reliable inter-process - communication between pairs of processes in host computers attached to - distinct but interconnected computer communication networks. Very few - assumptions are made as to the reliability of the communication - protocols below the TCP layer. TCP assumes it can obtain a simple, - potentially unreliable datagram service from the lower level - protocols. In principle, the TCP should be able to operate above a - wide spectrum of communication systems ranging from hard-wired - connections to packet-switched or circuit-switched networks. - - - [Page 1] - - - September 1981 -Transmission Control Protocol -Introduction - - - - TCP is based on concepts first described by Cerf and Kahn in [1]. The - TCP fits into a layered protocol architecture just above a basic - Internet Protocol [2] which provides a way for the TCP to send and - receive variable-length segments of information enclosed in internet - datagram "envelopes". The internet datagram provides a means for - addressing source and destination TCPs in different networks. The - internet protocol also deals with any fragmentation or reassembly of - the TCP segments required to achieve transport and delivery through - multiple networks and interconnecting gateways. The internet protocol - also carries information on the precedence, security classification - and compartmentation of the TCP segments, so this information can be - communicated end-to-end across multiple networks. - - Protocol Layering - - +---------------------+ - | higher-level | - +---------------------+ - | TCP | - +---------------------+ - | internet protocol | - +---------------------+ - |communication network| - +---------------------+ - - Figure 1 - - Much of this document is written in the context of TCP implementations - which are co-resident with higher level protocols in the host - computer. Some computer systems will be connected to networks via - front-end computers which house the TCP and internet protocol layers, - as well as network specific software. The TCP specification describes - an interface to the higher level protocols which appears to be - implementable even for the front-end case, as long as a suitable - host-to-front end protocol is implemented. - -1.2. Scope - - The TCP is intended to provide a reliable process-to-process - communication service in a multinetwork environment. The TCP is - intended to be a host-to-host protocol in common use in multiple - networks. - -1.3. About this Document - - This document represents a specification of the behavior required of - any TCP implementation, both in its interactions with higher level - protocols and in its interactions with other TCPs. The rest of this - - -[Page 2] - - -September 1981 - Transmission Control Protocol - Introduction - - - - section offers a very brief view of the protocol interfaces and - operation. Section 2 summarizes the philosophical basis for the TCP - design. Section 3 offers both a detailed description of the actions - required of TCP when various events occur (arrival of new segments, - user calls, errors, etc.) and the details of the formats of TCP - segments. - -1.4. Interfaces - - The TCP interfaces on one side to user or application processes and on - the other side to a lower level protocol such as Internet Protocol. - - The interface between an application process and the TCP is - illustrated in reasonable detail. This interface consists of a set of - calls much like the calls an operating system provides to an - application process for manipulating files. For example, there are - calls to open and close connections and to send and receive data on - established connections. It is also expected that the TCP can - asynchronously communicate with application programs. Although - considerable freedom is permitted to TCP implementors to design - interfaces which are appropriate to a particular operating system - environment, a minimum functionality is required at the TCP/user - interface for any valid implementation. - - The interface between TCP and lower level protocol is essentially - unspecified except that it is assumed there is a mechanism whereby the - two levels can asynchronously pass information to each other. - Typically, one expects the lower level protocol to specify this - interface. TCP is designed to work in a very general environment of - interconnected networks. The lower level protocol which is assumed - throughout this document is the Internet Protocol [2]. - -1.5. Operation - - As noted above, the primary purpose of the TCP is to provide reliable, - securable logical circuit or connection service between pairs of - processes. To provide this service on top of a less reliable internet - communication system requires facilities in the following areas: - - Basic Data Transfer - Reliability - Flow Control - Multiplexing - Connections - Precedence and Security - - The basic operation of the TCP in each of these areas is described in - the following paragraphs. - - - [Page 3] - - - September 1981 -Transmission Control Protocol -Introduction - - - - Basic Data Transfer: - - The TCP is able to transfer a continuous stream of octets in each - direction between its users by packaging some number of octets into - segments for transmission through the internet system. In general, - the TCPs decide when to block and forward data at their own - convenience. - - Sometimes users need to be sure that all the data they have - submitted to the TCP has been transmitted. For this purpose a push - function is defined. To assure that data submitted to a TCP is - actually transmitted the sending user indicates that it should be - pushed through to the receiving user. A push causes the TCPs to - promptly forward and deliver data up to that point to the receiver. - The exact push point might not be visible to the receiving user and - the push function does not supply a record boundary marker. - - Reliability: - - The TCP must recover from data that is damaged, lost, duplicated, or - delivered out of order by the internet communication system. This - is achieved by assigning a sequence number to each octet - transmitted, and requiring a positive acknowledgment (ACK) from the - receiving TCP. If the ACK is not received within a timeout - interval, the data is retransmitted. At the receiver, the sequence - numbers are used to correctly order segments that may be received - out of order and to eliminate duplicates. Damage is handled by - adding a checksum to each segment transmitted, checking it at the - receiver, and discarding damaged segments. - - As long as the TCPs continue to function properly and the internet - system does not become completely partitioned, no transmission - errors will affect the correct delivery of data. TCP recovers from - internet communication system errors. - - Flow Control: - - TCP provides a means for the receiver to govern the amount of data - sent by the sender. This is achieved by returning a "window" with - every ACK indicating a range of acceptable sequence numbers beyond - the last segment successfully received. The window indicates an - allowed number of octets that the sender may transmit before - receiving further permission. - - - - - - - -[Page 4] - - -September 1981 - Transmission Control Protocol - Introduction - - - - Multiplexing: - - To allow for many processes within a single Host to use TCP - communication facilities simultaneously, the TCP provides a set of - addresses or ports within each host. Concatenated with the network - and host addresses from the internet communication layer, this forms - a socket. A pair of sockets uniquely identifies each connection. - That is, a socket may be simultaneously used in multiple - connections. - - The binding of ports to processes is handled independently by each - Host. However, it proves useful to attach frequently used processes - (e.g., a "logger" or timesharing service) to fixed sockets which are - made known to the public. These services can then be accessed - through the known addresses. Establishing and learning the port - addresses of other processes may involve more dynamic mechanisms. - - Connections: - - The reliability and flow control mechanisms described above require - that TCPs initialize and maintain certain status information for - each data stream. The combination of this information, including - sockets, sequence numbers, and window sizes, is called a connection. - Each connection is uniquely specified by a pair of sockets - identifying its two sides. - - When two processes wish to communicate, their TCP's must first - establish a connection (initialize the status information on each - side). When their communication is complete, the connection is - terminated or closed to free the resources for other uses. - - Since connections must be established between unreliable hosts and - over the unreliable internet communication system, a handshake - mechanism with clock-based sequence numbers is used to avoid - erroneous initialization of connections. - - Precedence and Security: - - The users of TCP may indicate the security and precedence of their - communication. Provision is made for default values to be used when - these features are not needed. - - - - - - - - - - [Page 5] - - - September 1981 -Transmission Control Protocol - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 6] - - -September 1981 - Transmission Control Protocol - - - - 2. PHILOSOPHY - -2.1. Elements of the Internetwork System - - The internetwork environment consists of hosts connected to networks - which are in turn interconnected via gateways. It is assumed here - that the networks may be either local networks (e.g., the ETHERNET) or - large networks (e.g., the ARPANET), but in any case are based on - packet switching technology. The active agents that produce and - consume messages are processes. Various levels of protocols in the - networks, the gateways, and the hosts support an interprocess - communication system that provides two-way data flow on logical - connections between process ports. - - The term packet is used generically here to mean the data of one - transaction between a host and its network. The format of data blocks - exchanged within the a network will generally not be of concern to us. - - Hosts are computers attached to a network, and from the communication - network's point of view, are the sources and destinations of packets. - Processes are viewed as the active elements in host computers (in - accordance with the fairly common definition of a process as a program - in execution). Even terminals and files or other I/O devices are - viewed as communicating with each other through the use of processes. - Thus, all communication is viewed as inter-process communication. - - Since a process may need to distinguish among several communication - streams between itself and another process (or processes), we imagine - that each process may have a number of ports through which it - communicates with the ports of other processes. - -2.2. Model of Operation - - Processes transmit data by calling on the TCP and passing buffers of - data as arguments. The TCP packages the data from these buffers into - segments and calls on the internet module to transmit each segment to - the destination TCP. The receiving TCP places the data from a segment - into the receiving user's buffer and notifies the receiving user. The - TCPs include control information in the segments which they use to - ensure reliable ordered data transmission. - - The model of internet communication is that there is an internet - protocol module associated with each TCP which provides an interface - to the local network. This internet module packages TCP segments - inside internet datagrams and routes these datagrams to a destination - internet module or intermediate gateway. To transmit the datagram - through the local network, it is embedded in a local network packet. - - The packet switches may perform further packaging, fragmentation, or - - - [Page 7] - - - September 1981 -Transmission Control Protocol -Philosophy - - - - other operations to achieve the delivery of the local packet to the - destination internet module. - - At a gateway between networks, the internet datagram is "unwrapped" - from its local packet and examined to determine through which network - the internet datagram should travel next. The internet datagram is - then "wrapped" in a local packet suitable to the next network and - routed to the next gateway, or to the final destination. - - A gateway is permitted to break up an internet datagram into smaller - internet datagram fragments if this is necessary for transmission - through the next network. To do this, the gateway produces a set of - internet datagrams; each carrying a fragment. Fragments may be - further broken into smaller fragments at subsequent gateways. The - internet datagram fragment format is designed so that the destination - internet module can reassemble fragments into internet datagrams. - - A destination internet module unwraps the segment from the datagram - (after reassembling the datagram, if necessary) and passes it to the - destination TCP. - - This simple model of the operation glosses over many details. One - important feature is the type of service. This provides information - to the gateway (or internet module) to guide it in selecting the - service parameters to be used in traversing the next network. - Included in the type of service information is the precedence of the - datagram. Datagrams may also carry security information to permit - host and gateways that operate in multilevel secure environments to - properly segregate datagrams for security considerations. - -2.3. The Host Environment - - The TCP is assumed to be a module in an operating system. The users - access the TCP much like they would access the file system. The TCP - may call on other operating system functions, for example, to manage - data structures. The actual interface to the network is assumed to be - controlled by a device driver module. The TCP does not call on the - network device driver directly, but rather calls on the internet - datagram protocol module which may in turn call on the device driver. - - The mechanisms of TCP do not preclude implementation of the TCP in a - front-end processor. However, in such an implementation, a - host-to-front-end protocol must provide the functionality to support - the type of TCP-user interface described in this document. - - - - - - -[Page 8] - - -September 1981 - Transmission Control Protocol - Philosophy - - - -2.4. Interfaces - - The TCP/user interface provides for calls made by the user on the TCP - to OPEN or CLOSE a connection, to SEND or RECEIVE data, or to obtain - STATUS about a connection. These calls are like other calls from user - programs on the operating system, for example, the calls to open, read - from, and close a file. - - The TCP/internet interface provides calls to send and receive - datagrams addressed to TCP modules in hosts anywhere in the internet - system. These calls have parameters for passing the address, type of - service, precedence, security, and other control information. - -2.5. Relation to Other Protocols - - The following diagram illustrates the place of the TCP in the protocol - hierarchy: - - - +------+ +-----+ +-----+ +-----+ - |Telnet| | FTP | |Voice| ... | | Application Level - +------+ +-----+ +-----+ +-----+ - | | | | - +-----+ +-----+ +-----+ - | TCP | | RTP | ... | | Host Level - +-----+ +-----+ +-----+ - | | | - +-------------------------------+ - | Internet Protocol & ICMP | Gateway Level - +-------------------------------+ - | - +---------------------------+ - | Local Network Protocol | Network Level - +---------------------------+ - - Protocol Relationships - - Figure 2. - - It is expected that the TCP will be able to support higher level - protocols efficiently. It should be easy to interface higher level - protocols like the ARPANET Telnet or AUTODIN II THP to the TCP. - -2.6. Reliable Communication - - A stream of data sent on a TCP connection is delivered reliably and in - order at the destination. - - - - [Page 9] - - - September 1981 -Transmission Control Protocol -Philosophy - - - - Transmission is made reliable via the use of sequence numbers and - acknowledgments. Conceptually, each octet of data is assigned a - sequence number. The sequence number of the first octet of data in a - segment is transmitted with that segment and is called the segment - sequence number. Segments also carry an acknowledgment number which - is the sequence number of the next expected data octet of - transmissions in the reverse direction. When the TCP transmits a - segment containing data, it puts a copy on a retransmission queue and - starts a timer; when the acknowledgment for that data is received, the - segment is deleted from the queue. If the acknowledgment is not - received before the timer runs out, the segment is retransmitted. - - An acknowledgment by TCP does not guarantee that the data has been - delivered to the end user, but only that the receiving TCP has taken - the responsibility to do so. - - To govern the flow of data between TCPs, a flow control mechanism is - employed. The receiving TCP reports a "window" to the sending TCP. - This window specifies the number of octets, starting with the - acknowledgment number, that the receiving TCP is currently prepared to - receive. - -2.7. Connection Establishment and Clearing - - To identify the separate data streams that a TCP may handle, the TCP - provides a port identifier. Since port identifiers are selected - independently by each TCP they might not be unique. To provide for - unique addresses within each TCP, we concatenate an internet address - identifying the TCP with a port identifier to create a socket which - will be unique throughout all networks connected together. - - A connection is fully specified by the pair of sockets at the ends. A - local socket may participate in many connections to different foreign - sockets. A connection can be used to carry data in both directions, - that is, it is "full duplex". - - TCPs are free to associate ports with processes however they choose. - However, several basic concepts are necessary in any implementation. - There must be well-known sockets which the TCP associates only with - the "appropriate" processes by some means. We envision that processes - may "own" ports, and that processes can initiate connections only on - the ports they own. (Means for implementing ownership is a local - issue, but we envision a Request Port user command, or a method of - uniquely allocating a group of ports to a given process, e.g., by - associating the high order bits of a port name with a given process.) - - A connection is specified in the OPEN call by the local port and - foreign socket arguments. In return, the TCP supplies a (short) local - - -[Page 10] - - -September 1981 - Transmission Control Protocol - Philosophy - - - - connection name by which the user refers to the connection in - subsequent calls. There are several things that must be remembered - about a connection. To store this information we imagine that there - is a data structure called a Transmission Control Block (TCB). One - implementation strategy would have the local connection name be a - pointer to the TCB for this connection. The OPEN call also specifies - whether the connection establishment is to be actively pursued, or to - be passively waited for. - - A passive OPEN request means that the process wants to accept incoming - connection requests rather than attempting to initiate a connection. - Often the process requesting a passive OPEN will accept a connection - request from any caller. In this case a foreign socket of all zeros - is used to denote an unspecified socket. Unspecified foreign sockets - are allowed only on passive OPENs. - - A service process that wished to provide services for unknown other - processes would issue a passive OPEN request with an unspecified - foreign socket. Then a connection could be made with any process that - requested a connection to this local socket. It would help if this - local socket were known to be associated with this service. - - Well-known sockets are a convenient mechanism for a priori associating - a socket address with a standard service. For instance, the - "Telnet-Server" process is permanently assigned to a particular - socket, and other sockets are reserved for File Transfer, Remote Job - Entry, Text Generator, Echoer, and Sink processes (the last three - being for test purposes). A socket address might be reserved for - access to a "Look-Up" service which would return the specific socket - at which a newly created service would be provided. The concept of a - well-known socket is part of the TCP specification, but the assignment - of sockets to services is outside this specification. (See [4].) - - Processes can issue passive OPENs and wait for matching active OPENs - from other processes and be informed by the TCP when connections have - been established. Two processes which issue active OPENs to each - other at the same time will be correctly connected. This flexibility - is critical for the support of distributed computing in which - components act asynchronously with respect to each other. - - There are two principal cases for matching the sockets in the local - passive OPENs and an foreign active OPENs. In the first case, the - local passive OPENs has fully specified the foreign socket. In this - case, the match must be exact. In the second case, the local passive - OPENs has left the foreign socket unspecified. In this case, any - foreign socket is acceptable as long as the local sockets match. - Other possibilities include partially restricted matches. - - - - [Page 11] - - - September 1981 -Transmission Control Protocol -Philosophy - - - - If there are several pending passive OPENs (recorded in TCBs) with the - same local socket, an foreign active OPEN will be matched to a TCB - with the specific foreign socket in the foreign active OPEN, if such a - TCB exists, before selecting a TCB with an unspecified foreign socket. - - The procedures to establish connections utilize the synchronize (SYN) - control flag and involves an exchange of three messages. This - exchange has been termed a three-way hand shake [3]. - - A connection is initiated by the rendezvous of an arriving segment - containing a SYN and a waiting TCB entry each created by a user OPEN - command. The matching of local and foreign sockets determines when a - connection has been initiated. The connection becomes "established" - when sequence numbers have been synchronized in both directions. - - The clearing of a connection also involves the exchange of segments, - in this case carrying the FIN control flag. - -2.8. Data Communication - - The data that flows on a connection may be thought of as a stream of - octets. The sending user indicates in each SEND call whether the data - in that call (and any preceeding calls) should be immediately pushed - through to the receiving user by the setting of the PUSH flag. - - A sending TCP is allowed to collect data from the sending user and to - send that data in segments at its own convenience, until the push - function is signaled, then it must send all unsent data. When a - receiving TCP sees the PUSH flag, it must not wait for more data from - the sending TCP before passing the data to the receiving process. - - There is no necessary relationship between push functions and segment - boundaries. The data in any particular segment may be the result of a - single SEND call, in whole or part, or of multiple SEND calls. - - The purpose of push function and the PUSH flag is to push data through - from the sending user to the receiving user. It does not provide a - record service. - - There is a coupling between the push function and the use of buffers - of data that cross the TCP/user interface. Each time a PUSH flag is - associated with data placed into the receiving user's buffer, the - buffer is returned to the user for processing even if the buffer is - not filled. If data arrives that fills the user's buffer before a - PUSH is seen, the data is passed to the user in buffer size units. - - TCP also provides a means to communicate to the receiver of data that - at some point further along in the data stream than the receiver is - - -[Page 12] - - -September 1981 - Transmission Control Protocol - Philosophy - - - - currently reading there is urgent data. TCP does not attempt to - define what the user specifically does upon being notified of pending - urgent data, but the general notion is that the receiving process will - take action to process the urgent data quickly. - -2.9. Precedence and Security - - The TCP makes use of the internet protocol type of service field and - security option to provide precedence and security on a per connection - basis to TCP users. Not all TCP modules will necessarily function in - a multilevel secure environment; some may be limited to unclassified - use only, and others may operate at only one security level and - compartment. Consequently, some TCP implementations and services to - users may be limited to a subset of the multilevel secure case. - - TCP modules which operate in a multilevel secure environment must - properly mark outgoing segments with the security, compartment, and - precedence. Such TCP modules must also provide to their users or - higher level protocols such as Telnet or THP an interface to allow - them to specify the desired security level, compartment, and - precedence of connections. - -2.10. Robustness Principle - - TCP implementations will follow a general principle of robustness: be - conservative in what you do, be liberal in what you accept from - others. - - - - - - - - - - - - - - - - - - - - - - - - [Page 13] - - - September 1981 -Transmission Control Protocol - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 14] - - -September 1981 - Transmission Control Protocol - - - - 3. FUNCTIONAL SPECIFICATION - -3.1. Header Format - - TCP segments are sent as internet datagrams. The Internet Protocol - header carries several information fields, including the source and - destination host addresses [2]. A TCP header follows the internet - header, supplying information specific to the TCP protocol. This - division allows for the existence of host level protocols other than - TCP. - - TCP Header Format - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Source Port | Destination Port | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Sequence Number | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Acknowledgment Number | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data | |U|A|P|R|S|F| | - | Offset| Reserved |R|C|S|S|Y|I| Window | - | | |G|K|H|T|N|N| | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Checksum | Urgent Pointer | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Options | Padding | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | data | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - TCP Header Format - - Note that one tick mark represents one bit position. - - Figure 3. - - Source Port: 16 bits - - The source port number. - - Destination Port: 16 bits - - The destination port number. - - - - - [Page 15] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - Sequence Number: 32 bits - - The sequence number of the first data octet in this segment (except - when SYN is present). If SYN is present the sequence number is the - initial sequence number (ISN) and the first data octet is ISN+1. - - Acknowledgment Number: 32 bits - - If the ACK control bit is set this field contains the value of the - next sequence number the sender of the segment is expecting to - receive. Once a connection is established this is always sent. - - Data Offset: 4 bits - - The number of 32 bit words in the TCP Header. This indicates where - the data begins. The TCP header (even one including options) is an - integral number of 32 bits long. - - Reserved: 6 bits - - Reserved for future use. Must be zero. - - Control Bits: 6 bits (from left to right): - - URG: Urgent Pointer field significant - ACK: Acknowledgment field significant - PSH: Push Function - RST: Reset the connection - SYN: Synchronize sequence numbers - FIN: No more data from sender - - Window: 16 bits - - The number of data octets beginning with the one indicated in the - acknowledgment field which the sender of this segment is willing to - accept. - - Checksum: 16 bits - - The checksum field is the 16 bit one's complement of the one's - complement sum of all 16 bit words in the header and text. If a - segment contains an odd number of header and text octets to be - checksummed, the last octet is padded on the right with zeros to - form a 16 bit word for checksum purposes. The pad is not - transmitted as part of the segment. While computing the checksum, - the checksum field itself is replaced with zeros. - - The checksum also covers a 96 bit pseudo header conceptually - - -[Page 16] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - prefixed to the TCP header. This pseudo header contains the Source - Address, the Destination Address, the Protocol, and TCP length. - This gives the TCP protection against misrouted segments. This - information is carried in the Internet Protocol and is transferred - across the TCP/Network interface in the arguments or results of - calls by the TCP on the IP. - - +--------+--------+--------+--------+ - | Source Address | - +--------+--------+--------+--------+ - | Destination Address | - +--------+--------+--------+--------+ - | zero | PTCL | TCP Length | - +--------+--------+--------+--------+ - - The TCP Length is the TCP header length plus the data length in - octets (this is not an explicitly transmitted quantity, but is - computed), and it does not count the 12 octets of the pseudo - header. - - Urgent Pointer: 16 bits - - This field communicates the current value of the urgent pointer as a - positive offset from the sequence number in this segment. The - urgent pointer points to the sequence number of the octet following - the urgent data. This field is only be interpreted in segments with - the URG control bit set. - - Options: variable - - Options may occupy space at the end of the TCP header and are a - multiple of 8 bits in length. All options are included in the - checksum. An option may begin on any octet boundary. There are two - cases for the format of an option: - - Case 1: A single octet of option-kind. - - Case 2: An octet of option-kind, an octet of option-length, and - the actual option-data octets. - - The option-length counts the two octets of option-kind and - option-length as well as the option-data octets. - - Note that the list of options may be shorter than the data offset - field might imply. The content of the header beyond the - End-of-Option option must be header padding (i.e., zero). - - A TCP must implement all options. - - - [Page 17] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - Currently defined options include (kind indicated in octal): - - Kind Length Meaning - ---- ------ ------- - 0 - End of option list. - 1 - No-Operation. - 2 4 Maximum Segment Size. - - - Specific Option Definitions - - End of Option List - - +--------+ - |00000000| - +--------+ - Kind=0 - - This option code indicates the end of the option list. This - might not coincide with the end of the TCP header according to - the Data Offset field. This is used at the end of all options, - not the end of each option, and need only be used if the end of - the options would not otherwise coincide with the end of the TCP - header. - - No-Operation - - +--------+ - |00000001| - +--------+ - Kind=1 - - This option code may be used between options, for example, to - align the beginning of a subsequent option on a word boundary. - There is no guarantee that senders will use this option, so - receivers must be prepared to process options even if they do - not begin on a word boundary. - - Maximum Segment Size - - +--------+--------+---------+--------+ - |00000010|00000100| max seg size | - +--------+--------+---------+--------+ - Kind=2 Length=4 - - - - - - -[Page 18] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - Maximum Segment Size Option Data: 16 bits - - If this option is present, then it communicates the maximum - receive segment size at the TCP which sends this segment. - This field must only be sent in the initial connection request - (i.e., in segments with the SYN control bit set). If this - option is not used, any segment size is allowed. - - Padding: variable - - The TCP header padding is used to ensure that the TCP header ends - and data begins on a 32 bit boundary. The padding is composed of - zeros. - -3.2. Terminology - - Before we can discuss very much about the operation of the TCP we need - to introduce some detailed terminology. The maintenance of a TCP - connection requires the remembering of several variables. We conceive - of these variables being stored in a connection record called a - Transmission Control Block or TCB. Among the variables stored in the - TCB are the local and remote socket numbers, the security and - precedence of the connection, pointers to the user's send and receive - buffers, pointers to the retransmit queue and to the current segment. - In addition several variables relating to the send and receive - sequence numbers are stored in the TCB. - - Send Sequence Variables - - SND.UNA - send unacknowledged - SND.NXT - send next - SND.WND - send window - SND.UP - send urgent pointer - SND.WL1 - segment sequence number used for last window update - SND.WL2 - segment acknowledgment number used for last window - update - ISS - initial send sequence number - - Receive Sequence Variables - - RCV.NXT - receive next - RCV.WND - receive window - RCV.UP - receive urgent pointer - IRS - initial receive sequence number - - - - - - - [Page 19] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - The following diagrams may help to relate some of these variables to - the sequence space. - - Send Sequence Space - - 1 2 3 4 - ----------|----------|----------|---------- - SND.UNA SND.NXT SND.UNA - +SND.WND - - 1 - old sequence numbers which have been acknowledged - 2 - sequence numbers of unacknowledged data - 3 - sequence numbers allowed for new data transmission - 4 - future sequence numbers which are not yet allowed - - Send Sequence Space - - Figure 4. - - - - The send window is the portion of the sequence space labeled 3 in - figure 4. - - Receive Sequence Space - - 1 2 3 - ----------|----------|---------- - RCV.NXT RCV.NXT - +RCV.WND - - 1 - old sequence numbers which have been acknowledged - 2 - sequence numbers allowed for new reception - 3 - future sequence numbers which are not yet allowed - - Receive Sequence Space - - Figure 5. - - - - The receive window is the portion of the sequence space labeled 2 in - figure 5. - - There are also some variables used frequently in the discussion that - take their values from the fields of the current segment. - - - - -[Page 20] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - Current Segment Variables - - SEG.SEQ - segment sequence number - SEG.ACK - segment acknowledgment number - SEG.LEN - segment length - SEG.WND - segment window - SEG.UP - segment urgent pointer - SEG.PRC - segment precedence value - - A connection progresses through a series of states during its - lifetime. The states are: LISTEN, SYN-SENT, SYN-RECEIVED, - ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, - TIME-WAIT, and the fictional state CLOSED. CLOSED is fictional - because it represents the state when there is no TCB, and therefore, - no connection. Briefly the meanings of the states are: - - LISTEN - represents waiting for a connection request from any remote - TCP and port. - - SYN-SENT - represents waiting for a matching connection request - after having sent a connection request. - - SYN-RECEIVED - represents waiting for a confirming connection - request acknowledgment after having both received and sent a - connection request. - - ESTABLISHED - represents an open connection, data received can be - delivered to the user. The normal state for the data transfer phase - of the connection. - - FIN-WAIT-1 - represents waiting for a connection termination request - from the remote TCP, or an acknowledgment of the connection - termination request previously sent. - - FIN-WAIT-2 - represents waiting for a connection termination request - from the remote TCP. - - CLOSE-WAIT - represents waiting for a connection termination request - from the local user. - - CLOSING - represents waiting for a connection termination request - acknowledgment from the remote TCP. - - LAST-ACK - represents waiting for an acknowledgment of the - connection termination request previously sent to the remote TCP - (which includes an acknowledgment of its connection termination - request). - - - - [Page 21] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - TIME-WAIT - represents waiting for enough time to pass to be sure - the remote TCP received the acknowledgment of its connection - termination request. - - CLOSED - represents no connection state at all. - - A TCP connection progresses from one state to another in response to - events. The events are the user calls, OPEN, SEND, RECEIVE, CLOSE, - ABORT, and STATUS; the incoming segments, particularly those - containing the SYN, ACK, RST and FIN flags; and timeouts. - - The state diagram in figure 6 illustrates only state changes, together - with the causing events and resulting actions, but addresses neither - error conditions nor actions which are not connected with state - changes. In a later section, more detail is offered with respect to - the reaction of the TCP to events. - - NOTE BENE: this diagram is only a summary and must not be taken as - the total specification. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 22] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - - +---------+ ---------\ active OPEN - | CLOSED | \ ----------- - +---------+<---------\ \ create TCB - | ^ \ \ snd SYN - passive OPEN | | CLOSE \ \ - ------------ | | ---------- \ \ - create TCB | | delete TCB \ \ - V | \ \ - +---------+ CLOSE | \ - | LISTEN | ---------- | | - +---------+ delete TCB | | - rcv SYN | | SEND | | - ----------- | | ------- | V - +---------+ snd SYN,ACK / \ snd SYN +---------+ - | |<----------------- ------------------>| | - | SYN | rcv SYN | SYN | - | RCVD |<-----------------------------------------------| SENT | - | | snd ACK | | - | |------------------ -------------------| | - +---------+ rcv ACK of SYN \ / rcv SYN,ACK +---------+ - | -------------- | | ----------- - | x | | snd ACK - | V V - | CLOSE +---------+ - | ------- | ESTAB | - | snd FIN +---------+ - | CLOSE | | rcv FIN - V ------- | | ------- - +---------+ snd FIN / \ snd ACK +---------+ - | FIN |<----------------- ------------------>| CLOSE | - | WAIT-1 |------------------ | WAIT | - +---------+ rcv FIN \ +---------+ - | rcv ACK of FIN ------- | CLOSE | - | -------------- snd ACK | ------- | - V x V snd FIN V - +---------+ +---------+ +---------+ - |FINWAIT-2| | CLOSING | | LAST-ACK| - +---------+ +---------+ +---------+ - | rcv ACK of FIN | rcv ACK of FIN | - | rcv FIN -------------- | Timeout=2MSL -------------- | - | ------- x V ------------ x V - \ snd ACK +---------+delete TCB +---------+ - ------------------------>|TIME WAIT|------------------>| CLOSED | - +---------+ +---------+ - - TCP Connection State Diagram - Figure 6. - - - [Page 23] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - -3.3. Sequence Numbers - - A fundamental notion in the design is that every octet of data sent - over a TCP connection has a sequence number. Since every octet is - sequenced, each of them can be acknowledged. The acknowledgment - mechanism employed is cumulative so that an acknowledgment of sequence - number X indicates that all octets up to but not including X have been - received. This mechanism allows for straight-forward duplicate - detection in the presence of retransmission. Numbering of octets - within a segment is that the first data octet immediately following - the header is the lowest numbered, and the following octets are - numbered consecutively. - - It is essential to remember that the actual sequence number space is - finite, though very large. This space ranges from 0 to 2**32 - 1. - Since the space is finite, all arithmetic dealing with sequence - numbers must be performed modulo 2**32. This unsigned arithmetic - preserves the relationship of sequence numbers as they cycle from - 2**32 - 1 to 0 again. There are some subtleties to computer modulo - arithmetic, so great care should be taken in programming the - comparison of such values. The symbol "=<" means "less than or equal" - (modulo 2**32). - - The typical kinds of sequence number comparisons which the TCP must - perform include: - - (a) Determining that an acknowledgment refers to some sequence - number sent but not yet acknowledged. - - (b) Determining that all sequence numbers occupied by a segment - have been acknowledged (e.g., to remove the segment from a - retransmission queue). - - (c) Determining that an incoming segment contains sequence numbers - which are expected (i.e., that the segment "overlaps" the - receive window). - - - - - - - - - - - - - - -[Page 24] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - In response to sending data the TCP will receive acknowledgments. The - following comparisons are needed to process the acknowledgments. - - SND.UNA = oldest unacknowledged sequence number - - SND.NXT = next sequence number to be sent - - SEG.ACK = acknowledgment from the receiving TCP (next sequence - number expected by the receiving TCP) - - SEG.SEQ = first sequence number of a segment - - SEG.LEN = the number of octets occupied by the data in the segment - (counting SYN and FIN) - - SEG.SEQ+SEG.LEN-1 = last sequence number of a segment - - A new acknowledgment (called an "acceptable ack"), is one for which - the inequality below holds: - - SND.UNA < SEG.ACK =< SND.NXT - - A segment on the retransmission queue is fully acknowledged if the sum - of its sequence number and length is less or equal than the - acknowledgment value in the incoming segment. - - When data is received the following comparisons are needed: - - RCV.NXT = next sequence number expected on an incoming segments, and - is the left or lower edge of the receive window - - RCV.NXT+RCV.WND-1 = last sequence number expected on an incoming - segment, and is the right or upper edge of the receive window - - SEG.SEQ = first sequence number occupied by the incoming segment - - SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming - segment - - A segment is judged to occupy a portion of valid receive sequence - space if - - RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - - or - - RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND - - - - [Page 25] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - The first part of this test checks to see if the beginning of the - segment falls in the window, the second part of the test checks to see - if the end of the segment falls in the window; if the segment passes - either part of the test it contains data in the window. - - Actually, it is a little more complicated than this. Due to zero - windows and zero length segments, we have four cases for the - acceptability of an incoming segment: - - Segment Receive Test - Length Window - ------- ------- ------------------------------------------- - - 0 0 SEG.SEQ = RCV.NXT - - 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - - >0 0 not acceptable - - >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND - - Note that when the receive window is zero no segments should be - acceptable except ACK segments. Thus, it is be possible for a TCP to - maintain a zero receive window while transmitting data and receiving - ACKs. However, even when the receive window is zero, a TCP must - process the RST and URG fields of all incoming segments. - - We have taken advantage of the numbering scheme to protect certain - control information as well. This is achieved by implicitly including - some control flags in the sequence space so they can be retransmitted - and acknowledged without confusion (i.e., one and only one copy of the - control will be acted upon). Control information is not physically - carried in the segment data space. Consequently, we must adopt rules - for implicitly assigning sequence numbers to control. The SYN and FIN - are the only controls requiring this protection, and these controls - are used only at connection opening and closing. For sequence number - purposes, the SYN is considered to occur before the first actual data - octet of the segment in which it occurs, while the FIN is considered - to occur after the last actual data octet in a segment in which it - occurs. The segment length (SEG.LEN) includes both data and sequence - space occupying controls. When a SYN is present then SEG.SEQ is the - sequence number of the SYN. - - - - - - - -[Page 26] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - Initial Sequence Number Selection - - The protocol places no restriction on a particular connection being - used over and over again. A connection is defined by a pair of - sockets. New instances of a connection will be referred to as - incarnations of the connection. The problem that arises from this is - -- "how does the TCP identify duplicate segments from previous - incarnations of the connection?" This problem becomes apparent if the - connection is being opened and closed in quick succession, or if the - connection breaks with loss of memory and is then reestablished. - - To avoid confusion we must prevent segments from one incarnation of a - connection from being used while the same sequence numbers may still - be present in the network from an earlier incarnation. We want to - assure this, even if a TCP crashes and loses all knowledge of the - sequence numbers it has been using. When new connections are created, - an initial sequence number (ISN) generator is employed which selects a - new 32 bit ISN. The generator is bound to a (possibly fictitious) 32 - bit clock whose low order bit is incremented roughly every 4 - microseconds. Thus, the ISN cycles approximately every 4.55 hours. - Since we assume that segments will stay in the network no more than - the Maximum Segment Lifetime (MSL) and that the MSL is less than 4.55 - hours we can reasonably assume that ISN's will be unique. - - For each connection there is a send sequence number and a receive - sequence number. The initial send sequence number (ISS) is chosen by - the data sending TCP, and the initial receive sequence number (IRS) is - learned during the connection establishing procedure. - - For a connection to be established or initialized, the two TCPs must - synchronize on each other's initial sequence numbers. This is done in - an exchange of connection establishing segments carrying a control bit - called "SYN" (for synchronize) and the initial sequence numbers. As a - shorthand, segments carrying the SYN bit are also called "SYNs". - Hence, the solution requires a suitable mechanism for picking an - initial sequence number and a slightly involved handshake to exchange - the ISN's. - - The synchronization requires each side to send it's own initial - sequence number and to receive a confirmation of it in acknowledgment - from the other side. Each side must also receive the other side's - initial sequence number and send a confirming acknowledgment. - - 1) A --> B SYN my sequence number is X - 2) A <-- B ACK your sequence number is X - 3) A <-- B SYN my sequence number is Y - 4) A --> B ACK your sequence number is Y - - - - [Page 27] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - Because steps 2 and 3 can be combined in a single message this is - called the three way (or three message) handshake. - - A three way handshake is necessary because sequence numbers are not - tied to a global clock in the network, and TCPs may have different - mechanisms for picking the ISN's. The receiver of the first SYN has - no way of knowing whether the segment was an old delayed one or not, - unless it remembers the last sequence number used on the connection - (which is not always possible), and so it must ask the sender to - verify this SYN. The three way handshake and the advantages of a - clock-driven scheme are discussed in [3]. - - Knowing When to Keep Quiet - - To be sure that a TCP does not create a segment that carries a - sequence number which may be duplicated by an old segment remaining in - the network, the TCP must keep quiet for a maximum segment lifetime - (MSL) before assigning any sequence numbers upon starting up or - recovering from a crash in which memory of sequence numbers in use was - lost. For this specification the MSL is taken to be 2 minutes. This - is an engineering choice, and may be changed if experience indicates - it is desirable to do so. Note that if a TCP is reinitialized in some - sense, yet retains its memory of sequence numbers in use, then it need - not wait at all; it must only be sure to use sequence numbers larger - than those recently used. - - The TCP Quiet Time Concept - - This specification provides that hosts which "crash" without - retaining any knowledge of the last sequence numbers transmitted on - each active (i.e., not closed) connection shall delay emitting any - TCP segments for at least the agreed Maximum Segment Lifetime (MSL) - in the internet system of which the host is a part. In the - paragraphs below, an explanation for this specification is given. - TCP implementors may violate the "quiet time" restriction, but only - at the risk of causing some old data to be accepted as new or new - data rejected as old duplicated by some receivers in the internet - system. - - TCPs consume sequence number space each time a segment is formed and - entered into the network output queue at a source host. The - duplicate detection and sequencing algorithm in the TCP protocol - relies on the unique binding of segment data to sequence space to - the extent that sequence numbers will not cycle through all 2**32 - values before the segment data bound to those sequence numbers has - been delivered and acknowledged by the receiver and all duplicate - copies of the segments have "drained" from the internet. Without - such an assumption, two distinct TCP segments could conceivably be - - -[Page 28] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - assigned the same or overlapping sequence numbers, causing confusion - at the receiver as to which data is new and which is old. Remember - that each segment is bound to as many consecutive sequence numbers - as there are octets of data in the segment. - - Under normal conditions, TCPs keep track of the next sequence number - to emit and the oldest awaiting acknowledgment so as to avoid - mistakenly using a sequence number over before its first use has - been acknowledged. This alone does not guarantee that old duplicate - data is drained from the net, so the sequence space has been made - very large to reduce the probability that a wandering duplicate will - cause trouble upon arrival. At 2 megabits/sec. it takes 4.5 hours - to use up 2**32 octets of sequence space. Since the maximum segment - lifetime in the net is not likely to exceed a few tens of seconds, - this is deemed ample protection for foreseeable nets, even if data - rates escalate to l0's of megabits/sec. At 100 megabits/sec, the - cycle time is 5.4 minutes which may be a little short, but still - within reason. - - The basic duplicate detection and sequencing algorithm in TCP can be - defeated, however, if a source TCP does not have any memory of the - sequence numbers it last used on a given connection. For example, if - the TCP were to start all connections with sequence number 0, then - upon crashing and restarting, a TCP might re-form an earlier - connection (possibly after half-open connection resolution) and emit - packets with sequence numbers identical to or overlapping with - packets still in the network which were emitted on an earlier - incarnation of the same connection. In the absence of knowledge - about the sequence numbers used on a particular connection, the TCP - specification recommends that the source delay for MSL seconds - before emitting segments on the connection, to allow time for - segments from the earlier connection incarnation to drain from the - system. - - Even hosts which can remember the time of day and used it to select - initial sequence number values are not immune from this problem - (i.e., even if time of day is used to select an initial sequence - number for each new connection incarnation). - - Suppose, for example, that a connection is opened starting with - sequence number S. Suppose that this connection is not used much - and that eventually the initial sequence number function (ISN(t)) - takes on a value equal to the sequence number, say S1, of the last - segment sent by this TCP on a particular connection. Now suppose, - at this instant, the host crashes, recovers, and establishes a new - incarnation of the connection. The initial sequence number chosen is - S1 = ISN(t) -- last used sequence number on old incarnation of - connection! If the recovery occurs quickly enough, any old - - - [Page 29] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - duplicates in the net bearing sequence numbers in the neighborhood - of S1 may arrive and be treated as new packets by the receiver of - the new incarnation of the connection. - - The problem is that the recovering host may not know for how long it - crashed nor does it know whether there are still old duplicates in - the system from earlier connection incarnations. - - One way to deal with this problem is to deliberately delay emitting - segments for one MSL after recovery from a crash- this is the "quite - time" specification. Hosts which prefer to avoid waiting are - willing to risk possible confusion of old and new packets at a given - destination may choose not to wait for the "quite time". - Implementors may provide TCP users with the ability to select on a - connection by connection basis whether to wait after a crash, or may - informally implement the "quite time" for all connections. - Obviously, even where a user selects to "wait," this is not - necessary after the host has been "up" for at least MSL seconds. - - To summarize: every segment emitted occupies one or more sequence - numbers in the sequence space, the numbers occupied by a segment are - "busy" or "in use" until MSL seconds have passed, upon crashing a - block of space-time is occupied by the octets of the last emitted - segment, if a new connection is started too soon and uses any of the - sequence numbers in the space-time footprint of the last segment of - the previous connection incarnation, there is a potential sequence - number overlap area which could cause confusion at the receiver. - -3.4. Establishing a connection - - The "three-way handshake" is the procedure used to establish a - connection. This procedure normally is initiated by one TCP and - responded to by another TCP. The procedure also works if two TCP - simultaneously initiate the procedure. When simultaneous attempt - occurs, each TCP receives a "SYN" segment which carries no - acknowledgment after it has sent a "SYN". Of course, the arrival of - an old duplicate "SYN" segment can potentially make it appear, to the - recipient, that a simultaneous connection initiation is in progress. - Proper use of "reset" segments can disambiguate these cases. - - Several examples of connection initiation follow. Although these - examples do not show connection synchronization using data-carrying - segments, this is perfectly legitimate, so long as the receiving TCP - doesn't deliver the data to the user until it is clear the data is - valid (i.e., the data must be buffered at the receiver until the - connection reaches the ESTABLISHED state). The three-way handshake - reduces the possibility of false connections. It is the - - - -[Page 30] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - implementation of a trade-off between memory and messages to provide - information for this checking. - - The simplest three-way handshake is shown in figure 7 below. The - figures should be interpreted in the following way. Each line is - numbered for reference purposes. Right arrows (-->) indicate - departure of a TCP segment from TCP A to TCP B, or arrival of a - segment at B from A. Left arrows (<--), indicate the reverse. - Ellipsis (...) indicates a segment which is still in the network - (delayed). An "XXX" indicates a segment which is lost or rejected. - Comments appear in parentheses. TCP states represent the state AFTER - the departure or arrival of the segment (whose contents are shown in - the center of each line). Segment contents are shown in abbreviated - form, with sequence number, control flags, and ACK field. Other - fields such as window, addresses, lengths, and text have been left out - in the interest of clarity. - - - - TCP A TCP B - - 1. CLOSED LISTEN - - 2. SYN-SENT --> --> SYN-RECEIVED - - 3. ESTABLISHED <-- <-- SYN-RECEIVED - - 4. ESTABLISHED --> --> ESTABLISHED - - 5. ESTABLISHED --> --> ESTABLISHED - - Basic 3-Way Handshake for Connection Synchronization - - Figure 7. - - In line 2 of figure 7, TCP A begins by sending a SYN segment - indicating that it will use sequence numbers starting with sequence - number 100. In line 3, TCP B sends a SYN and acknowledges the SYN it - received from TCP A. Note that the acknowledgment field indicates TCP - B is now expecting to hear sequence 101, acknowledging the SYN which - occupied sequence 100. - - At line 4, TCP A responds with an empty segment containing an ACK for - TCP B's SYN; and in line 5, TCP A sends some data. Note that the - sequence number of the segment in line 5 is the same as in line 4 - because the ACK does not occupy sequence number space (if it did, we - would wind up ACKing ACK's!). - - - - [Page 31] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - Simultaneous initiation is only slightly more complex, as is shown in - figure 8. Each TCP cycles from CLOSED to SYN-SENT to SYN-RECEIVED to - ESTABLISHED. - - - - TCP A TCP B - - 1. CLOSED CLOSED - - 2. SYN-SENT --> ... - - 3. SYN-RECEIVED <-- <-- SYN-SENT - - 4. ... --> SYN-RECEIVED - - 5. SYN-RECEIVED --> ... - - 6. ESTABLISHED <-- <-- SYN-RECEIVED - - 7. ... --> ESTABLISHED - - Simultaneous Connection Synchronization - - Figure 8. - - The principle reason for the three-way handshake is to prevent old - duplicate connection initiations from causing confusion. To deal with - this, a special control message, reset, has been devised. If the - receiving TCP is in a non-synchronized state (i.e., SYN-SENT, - SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset. - If the TCP is in one of the synchronized states (ESTABLISHED, - FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), it - aborts the connection and informs its user. We discuss this latter - case under "half-open" connections below. - - - - - - - - - - - - - - - -[Page 32] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - - - TCP A TCP B - - 1. CLOSED LISTEN - - 2. SYN-SENT --> ... - - 3. (duplicate) ... --> SYN-RECEIVED - - 4. SYN-SENT <-- <-- SYN-RECEIVED - - 5. SYN-SENT --> --> LISTEN - - - 6. ... --> SYN-RECEIVED - - 7. SYN-SENT <-- <-- SYN-RECEIVED - - 8. ESTABLISHED --> --> ESTABLISHED - - Recovery from Old Duplicate SYN - - Figure 9. - - As a simple example of recovery from old duplicates, consider - figure 9. At line 3, an old duplicate SYN arrives at TCP B. TCP B - cannot tell that this is an old duplicate, so it responds normally - (line 4). TCP A detects that the ACK field is incorrect and returns a - RST (reset) with its SEQ field selected to make the segment - believable. TCP B, on receiving the RST, returns to the LISTEN state. - When the original SYN (pun intended) finally arrives at line 6, the - synchronization proceeds normally. If the SYN at line 6 had arrived - before the RST, a more complex exchange might have occurred with RST's - sent in both directions. - - Half-Open Connections and Other Anomalies - - An established connection is said to be "half-open" if one of the - TCPs has closed or aborted the connection at its end without the - knowledge of the other, or if the two ends of the connection have - become desynchronized owing to a crash that resulted in loss of - memory. Such connections will automatically become reset if an - attempt is made to send data in either direction. However, half-open - connections are expected to be unusual, and the recovery procedure is - mildly involved. - - If at site A the connection no longer exists, then an attempt by the - - - [Page 33] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - user at site B to send any data on it will result in the site B TCP - receiving a reset control message. Such a message indicates to the - site B TCP that something is wrong, and it is expected to abort the - connection. - - Assume that two user processes A and B are communicating with one - another when a crash occurs causing loss of memory to A's TCP. - Depending on the operating system supporting A's TCP, it is likely - that some error recovery mechanism exists. When the TCP is up again, - A is likely to start again from the beginning or from a recovery - point. As a result, A will probably try to OPEN the connection again - or try to SEND on the connection it believes open. In the latter - case, it receives the error message "connection not open" from the - local (A's) TCP. In an attempt to establish the connection, A's TCP - will send a segment containing SYN. This scenario leads to the - example shown in figure 10. After TCP A crashes, the user attempts to - re-open the connection. TCP B, in the meantime, thinks the connection - is open. - - - - TCP A TCP B - - 1. (CRASH) (send 300,receive 100) - - 2. CLOSED ESTABLISHED - - 3. SYN-SENT --> --> (??) - - 4. (!!) <-- <-- ESTABLISHED - - 5. SYN-SENT --> --> (Abort!!) - - 6. SYN-SENT CLOSED - - 7. SYN-SENT --> --> - - Half-Open Connection Discovery - - Figure 10. - - When the SYN arrives at line 3, TCP B, being in a synchronized state, - and the incoming segment outside the window, responds with an - acknowledgment indicating what sequence it next expects to hear (ACK - 100). TCP A sees that this segment does not acknowledge anything it - sent and, being unsynchronized, sends a reset (RST) because it has - detected a half-open connection. TCP B aborts at line 5. TCP A will - - - -[Page 34] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - continue to try to establish the connection; the problem is now - reduced to the basic 3-way handshake of figure 7. - - An interesting alternative case occurs when TCP A crashes and TCP B - tries to send data on what it thinks is a synchronized connection. - This is illustrated in figure 11. In this case, the data arriving at - TCP A from TCP B (line 2) is unacceptable because no such connection - exists, so TCP A sends a RST. The RST is acceptable so TCP B - processes it and aborts the connection. - - - - TCP A TCP B - - 1. (CRASH) (send 300,receive 100) - - 2. (??) <-- <-- ESTABLISHED - - 3. --> --> (ABORT!!) - - Active Side Causes Half-Open Connection Discovery - - Figure 11. - - In figure 12, we find the two TCPs A and B with passive connections - waiting for SYN. An old duplicate arriving at TCP B (line 2) stirs B - into action. A SYN-ACK is returned (line 3) and causes TCP A to - generate a RST (the ACK in line 3 is not acceptable). TCP B accepts - the reset and returns to its passive LISTEN state. - - - - TCP A TCP B - - 1. LISTEN LISTEN - - 2. ... --> SYN-RECEIVED - - 3. (??) <-- <-- SYN-RECEIVED - - 4. --> --> (return to LISTEN!) - - 5. LISTEN LISTEN - - Old Duplicate SYN Initiates a Reset on two Passive Sockets - - Figure 12. - - - - [Page 35] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - A variety of other cases are possible, all of which are accounted for - by the following rules for RST generation and processing. - - Reset Generation - - As a general rule, reset (RST) must be sent whenever a segment arrives - which apparently is not intended for the current connection. A reset - must not be sent if it is not clear that this is the case. - - There are three groups of states: - - 1. If the connection does not exist (CLOSED) then a reset is sent - in response to any incoming segment except another reset. In - particular, SYNs addressed to a non-existent connection are rejected - by this means. - - If the incoming segment has an ACK field, the reset takes its - sequence number from the ACK field of the segment, otherwise the - reset has sequence number zero and the ACK field is set to the sum - of the sequence number and segment length of the incoming segment. - The connection remains in the CLOSED state. - - 2. If the connection is in any non-synchronized state (LISTEN, - SYN-SENT, SYN-RECEIVED), and the incoming segment acknowledges - something not yet sent (the segment carries an unacceptable ACK), or - if an incoming segment has a security level or compartment which - does not exactly match the level and compartment requested for the - connection, a reset is sent. - - If our SYN has not been acknowledged and the precedence level of the - incoming segment is higher than the precedence level requested then - either raise the local precedence level (if allowed by the user and - the system) or send a reset; or if the precedence level of the - incoming segment is lower than the precedence level requested then - continue as if the precedence matched exactly (if the remote TCP - cannot raise the precedence level to match ours this will be - detected in the next segment it sends, and the connection will be - terminated then). If our SYN has been acknowledged (perhaps in this - incoming segment) the precedence level of the incoming segment must - match the local precedence level exactly, if it does not a reset - must be sent. - - If the incoming segment has an ACK field, the reset takes its - sequence number from the ACK field of the segment, otherwise the - reset has sequence number zero and the ACK field is set to the sum - of the sequence number and segment length of the incoming segment. - The connection remains in the same state. - - - -[Page 36] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - 3. If the connection is in a synchronized state (ESTABLISHED, - FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), - any unacceptable segment (out of window sequence number or - unacceptible acknowledgment number) must elicit only an empty - acknowledgment segment containing the current send-sequence number - and an acknowledgment indicating the next sequence number expected - to be received, and the connection remains in the same state. - - If an incoming segment has a security level, or compartment, or - precedence which does not exactly match the level, and compartment, - and precedence requested for the connection,a reset is sent and - connection goes to the CLOSED state. The reset takes its sequence - number from the ACK field of the incoming segment. - - Reset Processing - - In all states except SYN-SENT, all reset (RST) segments are validated - by checking their SEQ-fields. A reset is valid if its sequence number - is in the window. In the SYN-SENT state (a RST received in response - to an initial SYN), the RST is acceptable if the ACK field - acknowledges the SYN. - - The receiver of a RST first validates it, then changes state. If the - receiver was in the LISTEN state, it ignores it. If the receiver was - in SYN-RECEIVED state and had previously been in the LISTEN state, - then the receiver returns to the LISTEN state, otherwise the receiver - aborts the connection and goes to the CLOSED state. If the receiver - was in any other state, it aborts the connection and advises the user - and goes to the CLOSED state. - -3.5. Closing a Connection - - CLOSE is an operation meaning "I have no more data to send." The - notion of closing a full-duplex connection is subject to ambiguous - interpretation, of course, since it may not be obvious how to treat - the receiving side of the connection. We have chosen to treat CLOSE - in a simplex fashion. The user who CLOSEs may continue to RECEIVE - until he is told that the other side has CLOSED also. Thus, a program - could initiate several SENDs followed by a CLOSE, and then continue to - RECEIVE until signaled that a RECEIVE failed because the other side - has CLOSED. We assume that the TCP will signal a user, even if no - RECEIVEs are outstanding, that the other side has closed, so the user - can terminate his side gracefully. A TCP will reliably deliver all - buffers SENT before the connection was CLOSED so a user who expects no - data in return need only wait to hear the connection was CLOSED - successfully to know that all his data was received at the destination - TCP. Users must keep reading connections they close for sending until - the TCP says no more data. - - - [Page 37] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - There are essentially three cases: - - 1) The user initiates by telling the TCP to CLOSE the connection - - 2) The remote TCP initiates by sending a FIN control signal - - 3) Both users CLOSE simultaneously - - Case 1: Local user initiates the close - - In this case, a FIN segment can be constructed and placed on the - outgoing segment queue. No further SENDs from the user will be - accepted by the TCP, and it enters the FIN-WAIT-1 state. RECEIVEs - are allowed in this state. All segments preceding and including FIN - will be retransmitted until acknowledged. When the other TCP has - both acknowledged the FIN and sent a FIN of its own, the first TCP - can ACK this FIN. Note that a TCP receiving a FIN will ACK but not - send its own FIN until its user has CLOSED the connection also. - - Case 2: TCP receives a FIN from the network - - If an unsolicited FIN arrives from the network, the receiving TCP - can ACK it and tell the user that the connection is closing. The - user will respond with a CLOSE, upon which the TCP can send a FIN to - the other TCP after sending any remaining data. The TCP then waits - until its own FIN is acknowledged whereupon it deletes the - connection. If an ACK is not forthcoming, after the user timeout - the connection is aborted and the user is told. - - Case 3: both users close simultaneously - - A simultaneous CLOSE by users at both ends of a connection causes - FIN segments to be exchanged. When all segments preceding the FINs - have been processed and acknowledged, each TCP can ACK the FIN it - has received. Both will, upon receiving these ACKs, delete the - connection. - - - - - - - - - - - - - - -[Page 38] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - - - TCP A TCP B - - 1. ESTABLISHED ESTABLISHED - - 2. (Close) - FIN-WAIT-1 --> --> CLOSE-WAIT - - 3. FIN-WAIT-2 <-- <-- CLOSE-WAIT - - 4. (Close) - TIME-WAIT <-- <-- LAST-ACK - - 5. TIME-WAIT --> --> CLOSED - - 6. (2 MSL) - CLOSED - - Normal Close Sequence - - Figure 13. - - - - TCP A TCP B - - 1. ESTABLISHED ESTABLISHED - - 2. (Close) (Close) - FIN-WAIT-1 --> ... FIN-WAIT-1 - <-- <-- - ... --> - - 3. CLOSING --> ... CLOSING - <-- <-- - ... --> - - 4. TIME-WAIT TIME-WAIT - (2 MSL) (2 MSL) - CLOSED CLOSED - - Simultaneous Close Sequence - - Figure 14. - - - - - - [Page 39] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - -3.6. Precedence and Security - - The intent is that connection be allowed only between ports operating - with exactly the same security and compartment values and at the - higher of the precedence level requested by the two ports. - - The precedence and security parameters used in TCP are exactly those - defined in the Internet Protocol (IP) [2]. Throughout this TCP - specification the term "security/compartment" is intended to indicate - the security parameters used in IP including security, compartment, - user group, and handling restriction. - - A connection attempt with mismatched security/compartment values or a - lower precedence value must be rejected by sending a reset. Rejecting - a connection due to too low a precedence only occurs after an - acknowledgment of the SYN has been received. - - Note that TCP modules which operate only at the default value of - precedence will still have to check the precedence of incoming - segments and possibly raise the precedence level they use on the - connection. - - The security paramaters may be used even in a non-secure environment - (the values would indicate unclassified data), thus hosts in - non-secure environments must be prepared to receive the security - parameters, though they need not send them. - -3.7. Data Communication - - Once the connection is established data is communicated by the - exchange of segments. Because segments may be lost due to errors - (checksum test failure), or network congestion, TCP uses - retransmission (after a timeout) to ensure delivery of every segment. - Duplicate segments may arrive due to network or TCP retransmission. - As discussed in the section on sequence numbers the TCP performs - certain tests on the sequence and acknowledgment numbers in the - segments to verify their acceptability. - - The sender of data keeps track of the next sequence number to use in - the variable SND.NXT. The receiver of data keeps track of the next - sequence number to expect in the variable RCV.NXT. The sender of data - keeps track of the oldest unacknowledged sequence number in the - variable SND.UNA. If the data flow is momentarily idle and all data - sent has been acknowledged then the three variables will be equal. - - When the sender creates a segment and transmits it the sender advances - SND.NXT. When the receiver accepts a segment it advances RCV.NXT and - sends an acknowledgment. When the data sender receives an - - -[Page 40] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - acknowledgment it advances SND.UNA. The extent to which the values of - these variables differ is a measure of the delay in the communication. - The amount by which the variables are advanced is the length of the - data in the segment. Note that once in the ESTABLISHED state all - segments must carry current acknowledgment information. - - The CLOSE user call implies a push function, as does the FIN control - flag in an incoming segment. - - Retransmission Timeout - - Because of the variability of the networks that compose an - internetwork system and the wide range of uses of TCP connections the - retransmission timeout must be dynamically determined. One procedure - for determining a retransmission time out is given here as an - illustration. - - An Example Retransmission Timeout Procedure - - Measure the elapsed time between sending a data octet with a - particular sequence number and receiving an acknowledgment that - covers that sequence number (segments sent do not have to match - segments received). This measured elapsed time is the Round Trip - Time (RTT). Next compute a Smoothed Round Trip Time (SRTT) as: - - SRTT = ( ALPHA * SRTT ) + ((1-ALPHA) * RTT) - - and based on this, compute the retransmission timeout (RTO) as: - - RTO = min[UBOUND,max[LBOUND,(BETA*SRTT)]] - - where UBOUND is an upper bound on the timeout (e.g., 1 minute), - LBOUND is a lower bound on the timeout (e.g., 1 second), ALPHA is - a smoothing factor (e.g., .8 to .9), and BETA is a delay variance - factor (e.g., 1.3 to 2.0). - - The Communication of Urgent Information - - The objective of the TCP urgent mechanism is to allow the sending user - to stimulate the receiving user to accept some urgent data and to - permit the receiving TCP to indicate to the receiving user when all - the currently known urgent data has been received by the user. - - This mechanism permits a point in the data stream to be designated as - the end of urgent information. Whenever this point is in advance of - the receive sequence number (RCV.NXT) at the receiving TCP, that TCP - must tell the user to go into "urgent mode"; when the receive sequence - number catches up to the urgent pointer, the TCP must tell user to go - - - [Page 41] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - into "normal mode". If the urgent pointer is updated while the user - is in "urgent mode", the update will be invisible to the user. - - The method employs a urgent field which is carried in all segments - transmitted. The URG control flag indicates that the urgent field is - meaningful and must be added to the segment sequence number to yield - the urgent pointer. The absence of this flag indicates that there is - no urgent data outstanding. - - To send an urgent indication the user must also send at least one data - octet. If the sending user also indicates a push, timely delivery of - the urgent information to the destination process is enhanced. - - Managing the Window - - The window sent in each segment indicates the range of sequence - numbers the sender of the window (the data receiver) is currently - prepared to accept. There is an assumption that this is related to - the currently available data buffer space available for this - connection. - - Indicating a large window encourages transmissions. If more data - arrives than can be accepted, it will be discarded. This will result - in excessive retransmissions, adding unnecessarily to the load on the - network and the TCPs. Indicating a small window may restrict the - transmission of data to the point of introducing a round trip delay - between each new segment transmitted. - - The mechanisms provided allow a TCP to advertise a large window and to - subsequently advertise a much smaller window without having accepted - that much data. This, so called "shrinking the window," is strongly - discouraged. The robustness principle dictates that TCPs will not - shrink the window themselves, but will be prepared for such behavior - on the part of other TCPs. - - The sending TCP must be prepared to accept from the user and send at - least one octet of new data even if the send window is zero. The - sending TCP must regularly retransmit to the receiving TCP even when - the window is zero. Two minutes is recommended for the retransmission - interval when the window is zero. This retransmission is essential to - guarantee that when either TCP has a zero window the re-opening of the - window will be reliably reported to the other. - - When the receiving TCP has a zero window and a segment arrives it must - still send an acknowledgment showing its next expected sequence number - and current window (zero). - - The sending TCP packages the data to be transmitted into segments - - -[Page 42] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - which fit the current window, and may repackage segments on the - retransmission queue. Such repackaging is not required, but may be - helpful. - - In a connection with a one-way data flow, the window information will - be carried in acknowledgment segments that all have the same sequence - number so there will be no way to reorder them if they arrive out of - order. This is not a serious problem, but it will allow the window - information to be on occasion temporarily based on old reports from - the data receiver. A refinement to avoid this problem is to act on - the window information from segments that carry the highest - acknowledgment number (that is segments with acknowledgment number - equal or greater than the highest previously received). - - The window management procedure has significant influence on the - communication performance. The following comments are suggestions to - implementers. - - Window Management Suggestions - - Allocating a very small window causes data to be transmitted in - many small segments when better performance is achieved using - fewer large segments. - - One suggestion for avoiding small windows is for the receiver to - defer updating a window until the additional allocation is at - least X percent of the maximum allocation possible for the - connection (where X might be 20 to 40). - - Another suggestion is for the sender to avoid sending small - segments by waiting until the window is large enough before - sending data. If the the user signals a push function then the - data must be sent even if it is a small segment. - - Note that the acknowledgments should not be delayed or unnecessary - retransmissions will result. One strategy would be to send an - acknowledgment when a small segment arrives (with out updating the - window information), and then to send another acknowledgment with - new window information when the window is larger. - - The segment sent to probe a zero window may also begin a break up - of transmitted data into smaller and smaller segments. If a - segment containing a single data octet sent to probe a zero window - is accepted, it consumes one octet of the window now available. - If the sending TCP simply sends as much as it can whenever the - window is non zero, the transmitted data will be broken into - alternating big and small segments. As time goes on, occasional - pauses in the receiver making window allocation available will - - - [Page 43] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - result in breaking the big segments into a small and not quite so - big pair. And after a while the data transmission will be in - mostly small segments. - - The suggestion here is that the TCP implementations need to - actively attempt to combine small window allocations into larger - windows, since the mechanisms for managing the window tend to lead - to many small windows in the simplest minded implementations. - -3.8. Interfaces - - There are of course two interfaces of concern: the user/TCP interface - and the TCP/lower-level interface. We have a fairly elaborate model - of the user/TCP interface, but the interface to the lower level - protocol module is left unspecified here, since it will be specified - in detail by the specification of the lowel level protocol. For the - case that the lower level is IP we note some of the parameter values - that TCPs might use. - - User/TCP Interface - - The following functional description of user commands to the TCP is, - at best, fictional, since every operating system will have different - facilities. Consequently, we must warn readers that different TCP - implementations may have different user interfaces. However, all - TCPs must provide a certain minimum set of services to guarantee - that all TCP implementations can support the same protocol - hierarchy. This section specifies the functional interfaces - required of all TCP implementations. - - TCP User Commands - - The following sections functionally characterize a USER/TCP - interface. The notation used is similar to most procedure or - function calls in high level languages, but this usage is not - meant to rule out trap type service calls (e.g., SVCs, UUOs, - EMTs). - - The user commands described below specify the basic functions the - TCP must perform to support interprocess communication. - Individual implementations must define their own exact format, and - may provide combinations or subsets of the basic functions in - single calls. In particular, some implementations may wish to - automatically OPEN a connection on the first SEND or RECEIVE - issued by the user for a given connection. - - - - - -[Page 44] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - In providing interprocess communication facilities, the TCP must - not only accept commands, but must also return information to the - processes it serves. The latter consists of: - - (a) general information about a connection (e.g., interrupts, - remote close, binding of unspecified foreign socket). - - (b) replies to specific user commands indicating success or - various types of failure. - - Open - - Format: OPEN (local port, foreign socket, active/passive - [, timeout] [, precedence] [, security/compartment] [, options]) - -> local connection name - - We assume that the local TCP is aware of the identity of the - processes it serves and will check the authority of the process - to use the connection specified. Depending upon the - implementation of the TCP, the local network and TCP identifiers - for the source address will either be supplied by the TCP or the - lower level protocol (e.g., IP). These considerations are the - result of concern about security, to the extent that no TCP be - able to masquerade as another one, and so on. Similarly, no - process can masquerade as another without the collusion of the - TCP. - - If the active/passive flag is set to passive, then this is a - call to LISTEN for an incoming connection. A passive open may - have either a fully specified foreign socket to wait for a - particular connection or an unspecified foreign socket to wait - for any call. A fully specified passive call can be made active - by the subsequent execution of a SEND. - - A transmission control block (TCB) is created and partially - filled in with data from the OPEN command parameters. - - On an active OPEN command, the TCP will begin the procedure to - synchronize (i.e., establish) the connection at once. - - The timeout, if present, permits the caller to set up a timeout - for all data submitted to TCP. If data is not successfully - delivered to the destination within the timeout period, the TCP - will abort the connection. The present global default is five - minutes. - - The TCP or some component of the operating system will verify - the users authority to open a connection with the specified - - - [Page 45] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - precedence or security/compartment. The absence of precedence - or security/compartment specification in the OPEN call indicates - the default values must be used. - - TCP will accept incoming requests as matching only if the - security/compartment information is exactly the same and only if - the precedence is equal to or higher than the precedence - requested in the OPEN call. - - The precedence for the connection is the higher of the values - requested in the OPEN call and received from the incoming - request, and fixed at that value for the life of the - connection.Implementers may want to give the user control of - this precedence negotiation. For example, the user might be - allowed to specify that the precedence must be exactly matched, - or that any attempt to raise the precedence be confirmed by the - user. - - A local connection name will be returned to the user by the TCP. - The local connection name can then be used as a short hand term - for the connection defined by the - pair. - - Send - - Format: SEND (local connection name, buffer address, byte - count, PUSH flag, URGENT flag [,timeout]) - - This call causes the data contained in the indicated user buffer - to be sent on the indicated connection. If the connection has - not been opened, the SEND is considered an error. Some - implementations may allow users to SEND first; in which case, an - automatic OPEN would be done. If the calling process is not - authorized to use this connection, an error is returned. - - If the PUSH flag is set, the data must be transmitted promptly - to the receiver, and the PUSH bit will be set in the last TCP - segment created from the buffer. If the PUSH flag is not set, - the data may be combined with data from subsequent SENDs for - transmission efficiency. - - If the URGENT flag is set, segments sent to the destination TCP - will have the urgent pointer set. The receiving TCP will signal - the urgent condition to the receiving process if the urgent - pointer indicates that data preceding the urgent pointer has not - been consumed by the receiving process. The purpose of urgent - is to stimulate the receiver to process the urgent data and to - indicate to the receiver when all the currently known urgent - - -[Page 46] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - data has been received. The number of times the sending user's - TCP signals urgent will not necessarily be equal to the number - of times the receiving user will be notified of the presence of - urgent data. - - If no foreign socket was specified in the OPEN, but the - connection is established (e.g., because a LISTENing connection - has become specific due to a foreign segment arriving for the - local socket), then the designated buffer is sent to the implied - foreign socket. Users who make use of OPEN with an unspecified - foreign socket can make use of SEND without ever explicitly - knowing the foreign socket address. - - However, if a SEND is attempted before the foreign socket - becomes specified, an error will be returned. Users can use the - STATUS call to determine the status of the connection. In some - implementations the TCP may notify the user when an unspecified - socket is bound. - - If a timeout is specified, the current user timeout for this - connection is changed to the new one. - - In the simplest implementation, SEND would not return control to - the sending process until either the transmission was complete - or the timeout had been exceeded. However, this simple method - is both subject to deadlocks (for example, both sides of the - connection might try to do SENDs before doing any RECEIVEs) and - offers poor performance, so it is not recommended. A more - sophisticated implementation would return immediately to allow - the process to run concurrently with network I/O, and, - furthermore, to allow multiple SENDs to be in progress. - Multiple SENDs are served in first come, first served order, so - the TCP will queue those it cannot service immediately. - - We have implicitly assumed an asynchronous user interface in - which a SEND later elicits some kind of SIGNAL or - pseudo-interrupt from the serving TCP. An alternative is to - return a response immediately. For instance, SENDs might return - immediate local acknowledgment, even if the segment sent had not - been acknowledged by the distant TCP. We could optimistically - assume eventual success. If we are wrong, the connection will - close anyway due to the timeout. In implementations of this - kind (synchronous), there will still be some asynchronous - signals, but these will deal with the connection itself, and not - with specific segments or buffers. - - In order for the process to distinguish among error or success - indications for different SENDs, it might be appropriate for the - - - [Page 47] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - buffer address to be returned along with the coded response to - the SEND request. TCP-to-user signals are discussed below, - indicating the information which should be returned to the - calling process. - - Receive - - Format: RECEIVE (local connection name, buffer address, byte - count) -> byte count, urgent flag, push flag - - This command allocates a receiving buffer associated with the - specified connection. If no OPEN precedes this command or the - calling process is not authorized to use this connection, an - error is returned. - - In the simplest implementation, control would not return to the - calling program until either the buffer was filled, or some - error occurred, but this scheme is highly subject to deadlocks. - A more sophisticated implementation would permit several - RECEIVEs to be outstanding at once. These would be filled as - segments arrive. This strategy permits increased throughput at - the cost of a more elaborate scheme (possibly asynchronous) to - notify the calling program that a PUSH has been seen or a buffer - filled. - - If enough data arrive to fill the buffer before a PUSH is seen, - the PUSH flag will not be set in the response to the RECEIVE. - The buffer will be filled with as much data as it can hold. If - a PUSH is seen before the buffer is filled the buffer will be - returned partially filled and PUSH indicated. - - If there is urgent data the user will have been informed as soon - as it arrived via a TCP-to-user signal. The receiving user - should thus be in "urgent mode". If the URGENT flag is on, - additional urgent data remains. If the URGENT flag is off, this - call to RECEIVE has returned all the urgent data, and the user - may now leave "urgent mode". Note that data following the - urgent pointer (non-urgent data) cannot be delivered to the user - in the same buffer with preceeding urgent data unless the - boundary is clearly marked for the user. - - To distinguish among several outstanding RECEIVEs and to take - care of the case that a buffer is not completely filled, the - return code is accompanied by both a buffer pointer and a byte - count indicating the actual length of the data received. - - Alternative implementations of RECEIVE might have the TCP - - - -[Page 48] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - allocate buffer storage, or the TCP might share a ring buffer - with the user. - - Close - - Format: CLOSE (local connection name) - - This command causes the connection specified to be closed. If - the connection is not open or the calling process is not - authorized to use this connection, an error is returned. - Closing connections is intended to be a graceful operation in - the sense that outstanding SENDs will be transmitted (and - retransmitted), as flow control permits, until all have been - serviced. Thus, it should be acceptable to make several SEND - calls, followed by a CLOSE, and expect all the data to be sent - to the destination. It should also be clear that users should - continue to RECEIVE on CLOSING connections, since the other side - may be trying to transmit the last of its data. Thus, CLOSE - means "I have no more to send" but does not mean "I will not - receive any more." It may happen (if the user level protocol is - not well thought out) that the closing side is unable to get rid - of all its data before timing out. In this event, CLOSE turns - into ABORT, and the closing TCP gives up. - - The user may CLOSE the connection at any time on his own - initiative, or in response to various prompts from the TCP - (e.g., remote close executed, transmission timeout exceeded, - destination inaccessible). - - Because closing a connection requires communication with the - foreign TCP, connections may remain in the closing state for a - short time. Attempts to reopen the connection before the TCP - replies to the CLOSE command will result in error responses. - - Close also implies push function. - - Status - - Format: STATUS (local connection name) -> status data - - This is an implementation dependent user command and could be - excluded without adverse effect. Information returned would - typically come from the TCB associated with the connection. - - This command returns a data block containing the following - information: - - local socket, - - - [Page 49] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - - foreign socket, - local connection name, - receive window, - send window, - connection state, - number of buffers awaiting acknowledgment, - number of buffers pending receipt, - urgent state, - precedence, - security/compartment, - and transmission timeout. - - Depending on the state of the connection, or on the - implementation itself, some of this information may not be - available or meaningful. If the calling process is not - authorized to use this connection, an error is returned. This - prevents unauthorized processes from gaining information about a - connection. - - Abort - - Format: ABORT (local connection name) - - This command causes all pending SENDs and RECEIVES to be - aborted, the TCB to be removed, and a special RESET message to - be sent to the TCP on the other side of the connection. - Depending on the implementation, users may receive abort - indications for each outstanding SEND or RECEIVE, or may simply - receive an ABORT-acknowledgment. - - TCP-to-User Messages - - It is assumed that the operating system environment provides a - means for the TCP to asynchronously signal the user program. When - the TCP does signal a user program, certain information is passed - to the user. Often in the specification the information will be - an error message. In other cases there will be information - relating to the completion of processing a SEND or RECEIVE or - other user call. - - The following information is provided: - - Local Connection Name Always - Response String Always - Buffer Address Send & Receive - Byte count (counts bytes received) Receive - Push flag Receive - Urgent flag Receive - - -[Page 50] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - TCP/Lower-Level Interface - - The TCP calls on a lower level protocol module to actually send and - receive information over a network. One case is that of the ARPA - internetwork system where the lower level module is the Internet - Protocol (IP) [2]. - - If the lower level protocol is IP it provides arguments for a type - of service and for a time to live. TCP uses the following settings - for these parameters: - - Type of Service = Precedence: routine, Delay: normal, Throughput: - normal, Reliability: normal; or 00000000. - - Time to Live = one minute, or 00111100. - - Note that the assumed maximum segment lifetime is two minutes. - Here we explicitly ask that a segment be destroyed if it cannot - be delivered by the internet system within one minute. - - If the lower level is IP (or other protocol that provides this - feature) and source routing is used, the interface must allow the - route information to be communicated. This is especially important - so that the source and destination addresses used in the TCP - checksum be the originating source and ultimate destination. It is - also important to preserve the return route to answer connection - requests. - - Any lower level protocol will have to provide the source address, - destination address, and protocol fields, and some way to determine - the "TCP length", both to provide the functional equivlent service - of IP and to be used in the TCP checksum. - - - - - - - - - - - - - - - - - - - [Page 51] - - - September 1981 -Transmission Control Protocol -Functional Specification - - - -3.9. Event Processing - - The processing depicted in this section is an example of one possible - implementation. Other implementations may have slightly different - processing sequences, but they should differ from those in this - section only in detail, not in substance. - - The activity of the TCP can be characterized as responding to events. - The events that occur can be cast into three categories: user calls, - arriving segments, and timeouts. This section describes the - processing the TCP does in response to each of the events. In many - cases the processing required depends on the state of the connection. - - Events that occur: - - User Calls - - OPEN - SEND - RECEIVE - CLOSE - ABORT - STATUS - - Arriving Segments - - SEGMENT ARRIVES - - Timeouts - - USER TIMEOUT - RETRANSMISSION TIMEOUT - TIME-WAIT TIMEOUT - - The model of the TCP/user interface is that user commands receive an - immediate return and possibly a delayed response via an event or - pseudo interrupt. In the following descriptions, the term "signal" - means cause a delayed response. - - Error responses are given as character strings. For example, user - commands referencing connections that do not exist receive "error: - connection not open". - - Please note in the following that all arithmetic on sequence numbers, - acknowledgment numbers, windows, et cetera, is modulo 2**32 the size - of the sequence number space. Also note that "=<" means less than or - equal to (modulo 2**32). - - - -[Page 52] - - -September 1981 - Transmission Control Protocol - Functional Specification - - - - A natural way to think about processing incoming segments is to - imagine that they are first tested for proper sequence number (i.e., - that their contents lie in the range of the expected "receive window" - in the sequence number space) and then that they are generally queued - and processed in sequence number order. - - When a segment overlaps other already received segments we reconstruct - the segment to contain just the new data, and adjust the header fields - to be consistent. - - Note that if no state change is mentioned the TCP stays in the same - state. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page 53] - - - September 1981 -Transmission Control Protocol -Functional Specification - OPEN Call - - - - OPEN Call - - CLOSED STATE (i.e., TCB does not exist) - - Create a new transmission control block (TCB) to hold connection - state information. Fill in local socket identifier, foreign - socket, precedence, security/compartment, and user timeout - information. Note that some parts of the foreign socket may be - unspecified in a passive OPEN and are to be filled in by the - parameters of the incoming SYN segment. Verify the security and - precedence requested are allowed for this user, if not return - "error: precedence not allowed" or "error: security/compartment - not allowed." If passive enter the LISTEN state and return. If - active and the foreign socket is unspecified, return "error: - foreign socket unspecified"; if active and the foreign socket is - specified, issue a SYN segment. An initial send sequence number - (ISS) is selected. A SYN segment of the form - is sent. Set SND.UNA to ISS, SND.NXT to ISS+1, enter SYN-SENT - state, and return. - - If the caller does not have access to the local socket specified, - return "error: connection illegal for this process". If there is - no room to create a new connection, return "error: insufficient - resources". - - LISTEN STATE - - If active and the foreign socket is specified, then change the - connection from passive to active, select an ISS. Send a SYN - segment, set SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT - state. Data associated with SEND may be sent with SYN segment or - queued for transmission after entering ESTABLISHED state. The - urgent bit if requested in the command must be sent with the data - segments sent as a result of this command. If there is no room to - queue the request, respond with "error: insufficient resources". - If Foreign socket was not specified, then return "error: foreign - socket unspecified". - - - - - - - - - - - - -[Page 54] - - -September 1981 - Transmission Control Protocol - Functional Specification -OPEN Call - - - - SYN-SENT STATE - SYN-RECEIVED STATE - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - CLOSE-WAIT STATE - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - Return "error: connection already exists". - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page 55] - - - September 1981 -Transmission Control Protocol -Functional Specification - SEND Call - - - - SEND Call - - CLOSED STATE (i.e., TCB does not exist) - - If the user does not have access to such a connection, then return - "error: connection illegal for this process". - - Otherwise, return "error: connection does not exist". - - LISTEN STATE - - If the foreign socket is specified, then change the connection - from passive to active, select an ISS. Send a SYN segment, set - SND.UNA to ISS, SND.NXT to ISS+1. Enter SYN-SENT state. Data - associated with SEND may be sent with SYN segment or queued for - transmission after entering ESTABLISHED state. The urgent bit if - requested in the command must be sent with the data segments sent - as a result of this command. If there is no room to queue the - request, respond with "error: insufficient resources". If - Foreign socket was not specified, then return "error: foreign - socket unspecified". - - SYN-SENT STATE - SYN-RECEIVED STATE - - Queue the data for transmission after entering ESTABLISHED state. - If no space to queue, respond with "error: insufficient - resources". - - ESTABLISHED STATE - CLOSE-WAIT STATE - - Segmentize the buffer and send it with a piggybacked - acknowledgment (acknowledgment value = RCV.NXT). If there is - insufficient space to remember this buffer, simply return "error: - insufficient resources". - - If the urgent flag is set, then SND.UP <- SND.NXT-1 and set the - urgent pointer in the outgoing segments. - - - - - - - - - - -[Page 56] - - -September 1981 - Transmission Control Protocol - Functional Specification -SEND Call - - - - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - Return "error: connection closing" and do not service request. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page 57] - - - September 1981 -Transmission Control Protocol -Functional Specification - RECEIVE Call - - - - RECEIVE Call - - CLOSED STATE (i.e., TCB does not exist) - - If the user does not have access to such a connection, return - "error: connection illegal for this process". - - Otherwise return "error: connection does not exist". - - LISTEN STATE - SYN-SENT STATE - SYN-RECEIVED STATE - - Queue for processing after entering ESTABLISHED state. If there - is no room to queue this request, respond with "error: - insufficient resources". - - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - - If insufficient incoming segments are queued to satisfy the - request, queue the request. If there is no queue space to - remember the RECEIVE, respond with "error: insufficient - resources". - - Reassemble queued incoming segments into receive buffer and return - to user. Mark "push seen" (PUSH) if this is the case. - - If RCV.UP is in advance of the data currently being passed to the - user notify the user of the presence of urgent data. - - When the TCP takes responsibility for delivering data to the user - that fact must be communicated to the sender via an - acknowledgment. The formation of such an acknowledgment is - described below in the discussion of processing an incoming - segment. - - - - - - - - - - - - -[Page 58] - - -September 1981 - Transmission Control Protocol - Functional Specification -RECEIVE Call - - - - CLOSE-WAIT STATE - - Since the remote side has already sent FIN, RECEIVEs must be - satisfied by text already on hand, but not yet delivered to the - user. If no text is awaiting delivery, the RECEIVE will get a - "error: connection closing" response. Otherwise, any remaining - text can be used to satisfy the RECEIVE. - - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - Return "error: connection closing". - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page 59] - - - September 1981 -Transmission Control Protocol -Functional Specification - CLOSE Call - - - - CLOSE Call - - CLOSED STATE (i.e., TCB does not exist) - - If the user does not have access to such a connection, return - "error: connection illegal for this process". - - Otherwise, return "error: connection does not exist". - - LISTEN STATE - - Any outstanding RECEIVEs are returned with "error: closing" - responses. Delete TCB, enter CLOSED state, and return. - - SYN-SENT STATE - - Delete the TCB and return "error: closing" responses to any - queued SENDs, or RECEIVEs. - - SYN-RECEIVED STATE - - If no SENDs have been issued and there is no pending data to send, - then form a FIN segment and send it, and enter FIN-WAIT-1 state; - otherwise queue for processing after entering ESTABLISHED state. - - ESTABLISHED STATE - - Queue this until all preceding SENDs have been segmentized, then - form a FIN segment and send it. In any case, enter FIN-WAIT-1 - state. - - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - - Strictly speaking, this is an error and should receive a "error: - connection closing" response. An "ok" response would be - acceptable, too, as long as a second FIN is not emitted (the first - FIN may be retransmitted though). - - - - - - - - - - - -[Page 60] - - -September 1981 - Transmission Control Protocol - Functional Specification -CLOSE Call - - - - CLOSE-WAIT STATE - - Queue this request until all preceding SENDs have been - segmentized; then send a FIN segment, enter CLOSING state. - - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - Respond with "error: connection closing". - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page 61] - - - September 1981 -Transmission Control Protocol -Functional Specification - ABORT Call - - - - ABORT Call - - CLOSED STATE (i.e., TCB does not exist) - - If the user should not have access to such a connection, return - "error: connection illegal for this process". - - Otherwise return "error: connection does not exist". - - LISTEN STATE - - Any outstanding RECEIVEs should be returned with "error: - connection reset" responses. Delete TCB, enter CLOSED state, and - return. - - SYN-SENT STATE - - All queued SENDs and RECEIVEs should be given "connection reset" - notification, delete the TCB, enter CLOSED state, and return. - - SYN-RECEIVED STATE - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - CLOSE-WAIT STATE - - Send a reset segment: - - - - All queued SENDs and RECEIVEs should be given "connection reset" - notification; all segments queued for transmission (except for the - RST formed above) or retransmission should be flushed, delete the - TCB, enter CLOSED state, and return. - - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - Respond with "ok" and delete the TCB, enter CLOSED state, and - return. - - - - - - - - -[Page 62] - - -September 1981 - Transmission Control Protocol - Functional Specification -STATUS Call - - - - STATUS Call - - CLOSED STATE (i.e., TCB does not exist) - - If the user should not have access to such a connection, return - "error: connection illegal for this process". - - Otherwise return "error: connection does not exist". - - LISTEN STATE - - Return "state = LISTEN", and the TCB pointer. - - SYN-SENT STATE - - Return "state = SYN-SENT", and the TCB pointer. - - SYN-RECEIVED STATE - - Return "state = SYN-RECEIVED", and the TCB pointer. - - ESTABLISHED STATE - - Return "state = ESTABLISHED", and the TCB pointer. - - FIN-WAIT-1 STATE - - Return "state = FIN-WAIT-1", and the TCB pointer. - - FIN-WAIT-2 STATE - - Return "state = FIN-WAIT-2", and the TCB pointer. - - CLOSE-WAIT STATE - - Return "state = CLOSE-WAIT", and the TCB pointer. - - CLOSING STATE - - Return "state = CLOSING", and the TCB pointer. - - LAST-ACK STATE - - Return "state = LAST-ACK", and the TCB pointer. - - - - - - [Page 63] - - - September 1981 -Transmission Control Protocol -Functional Specification - STATUS Call - - - - TIME-WAIT STATE - - Return "state = TIME-WAIT", and the TCB pointer. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 64] - - -September 1981 - Transmission Control Protocol - Functional Specification -SEGMENT ARRIVES - - - - SEGMENT ARRIVES - - If the state is CLOSED (i.e., TCB does not exist) then - - all data in the incoming segment is discarded. An incoming - segment containing a RST is discarded. An incoming segment not - containing a RST causes a RST to be sent in response. The - acknowledgment and sequence field values are selected to make the - reset sequence acceptable to the TCP that sent the offending - segment. - - If the ACK bit is off, sequence number zero is used, - - - - If the ACK bit is on, - - - - Return. - - If the state is LISTEN then - - first check for an RST - - An incoming RST should be ignored. Return. - - second check for an ACK - - Any acknowledgment is bad if it arrives on a connection still in - the LISTEN state. An acceptable reset segment should be formed - for any arriving ACK-bearing segment. The RST should be - formatted as follows: - - - - Return. - - third check for a SYN - - If the SYN bit is set, check the security. If the - security/compartment on the incoming segment does not exactly - match the security/compartment in the TCB then send a reset and - return. - - - - - - [Page 65] - - - September 1981 -Transmission Control Protocol -Functional Specification - SEGMENT ARRIVES - - - - If the SEG.PRC is greater than the TCB.PRC then if allowed by - the user and the system set TCB.PRC<-SEG.PRC, if not allowed - send a reset and return. - - - - If the SEG.PRC is less than the TCB.PRC then continue. - - Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ and any other - control or text should be queued for processing later. ISS - should be selected and a SYN segment sent of the form: - - - - SND.NXT is set to ISS+1 and SND.UNA to ISS. The connection - state should be changed to SYN-RECEIVED. Note that any other - incoming control or data (combined with SYN) will be processed - in the SYN-RECEIVED state, but processing of SYN and ACK should - not be repeated. If the listen was not fully specified (i.e., - the foreign socket was not fully specified), then the - unspecified fields should be filled in now. - - fourth other text or control - - Any other control or text-bearing segment (not containing SYN) - must have an ACK and thus would be discarded by the ACK - processing. An incoming RST segment could not be valid, since - it could not have been sent in response to anything sent by this - incarnation of the connection. So you are unlikely to get here, - but if you do, drop the segment, and return. - - If the state is SYN-SENT then - - first check the ACK bit - - If the ACK bit is set - - If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset (unless - the RST bit is set, if so drop the segment and return) - - - - and discard the segment. Return. - - If SND.UNA =< SEG.ACK =< SND.NXT then the ACK is acceptable. - - second check the RST bit - - -[Page 66] - - -September 1981 - Transmission Control Protocol - Functional Specification -SEGMENT ARRIVES - - - - If the RST bit is set - - If the ACK was acceptable then signal the user "error: - connection reset", drop the segment, enter CLOSED state, - delete TCB, and return. Otherwise (no ACK) drop the segment - and return. - - third check the security and precedence - - If the security/compartment in the segment does not exactly - match the security/compartment in the TCB, send a reset - - If there is an ACK - - - - Otherwise - - - - If there is an ACK - - The precedence in the segment must match the precedence in the - TCB, if not, send a reset - - - - If there is no ACK - - If the precedence in the segment is higher than the precedence - in the TCB then if allowed by the user and the system raise - the precedence in the TCB to that in the segment, if not - allowed to raise the prec then send a reset. - - - - If the precedence in the segment is lower than the precedence - in the TCB continue. - - If a reset was sent, discard the segment and return. - - fourth check the SYN bit - - This step should be reached only if the ACK is ok, or there is - no ACK, and it the segment did not contain a RST. - - If the SYN bit is on and the security/compartment and precedence - - - [Page 67] - - - September 1981 -Transmission Control Protocol -Functional Specification - SEGMENT ARRIVES - - - - are acceptable then, RCV.NXT is set to SEG.SEQ+1, IRS is set to - SEG.SEQ. SND.UNA should be advanced to equal SEG.ACK (if there - is an ACK), and any segments on the retransmission queue which - are thereby acknowledged should be removed. - - If SND.UNA > ISS (our SYN has been ACKed), change the connection - state to ESTABLISHED, form an ACK segment - - - - and send it. Data or controls which were queued for - transmission may be included. If there are other controls or - text in the segment then continue processing at the sixth step - below where the URG bit is checked, otherwise return. - - Otherwise enter SYN-RECEIVED, form a SYN,ACK segment - - - - and send it. If there are other controls or text in the - segment, queue them for processing after the ESTABLISHED state - has been reached, return. - - fifth, if neither of the SYN or RST bits is set then drop the - segment and return. - - - - - - - - - - - - - - - - - - - - - - - - -[Page 68] - - -September 1981 - Transmission Control Protocol - Functional Specification -SEGMENT ARRIVES - - - - Otherwise, - - first check sequence number - - SYN-RECEIVED STATE - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - CLOSE-WAIT STATE - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - Segments are processed in sequence. Initial tests on arrival - are used to discard old duplicates, but further processing is - done in SEG.SEQ order. If a segment's contents straddle the - boundary between old and new, only the new parts should be - processed. - - There are four cases for the acceptability test for an incoming - segment: - - Segment Receive Test - Length Window - ------- ------- ------------------------------------------- - - 0 0 SEG.SEQ = RCV.NXT - - 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - - >0 0 not acceptable - - >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND - or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND - - If the RCV.WND is zero, no segments will be acceptable, but - special allowance should be made to accept valid ACKs, URGs and - RSTs. - - If an incoming segment is not acceptable, an acknowledgment - should be sent in reply (unless the RST bit is set, if so drop - the segment and return): - - - - After sending the acknowledgment, drop the unacceptable segment - and return. - - - [Page 69] - - - September 1981 -Transmission Control Protocol -Functional Specification - SEGMENT ARRIVES - - - - In the following it is assumed that the segment is the idealized - segment that begins at RCV.NXT and does not exceed the window. - One could tailor actual segments to fit this assumption by - trimming off any portions that lie outside the window (including - SYN and FIN), and only processing further if the segment then - begins at RCV.NXT. Segments with higher begining sequence - numbers may be held for later processing. - - second check the RST bit, - - SYN-RECEIVED STATE - - If the RST bit is set - - If this connection was initiated with a passive OPEN (i.e., - came from the LISTEN state), then return this connection to - LISTEN state and return. The user need not be informed. If - this connection was initiated with an active OPEN (i.e., came - from SYN-SENT state) then the connection was refused, signal - the user "connection refused". In either case, all segments - on the retransmission queue should be removed. And in the - active OPEN case, enter the CLOSED state and delete the TCB, - and return. - - ESTABLISHED - FIN-WAIT-1 - FIN-WAIT-2 - CLOSE-WAIT - - If the RST bit is set then, any outstanding RECEIVEs and SEND - should receive "reset" responses. All segment queues should be - flushed. Users should also receive an unsolicited general - "connection reset" signal. Enter the CLOSED state, delete the - TCB, and return. - - CLOSING STATE - LAST-ACK STATE - TIME-WAIT - - If the RST bit is set then, enter the CLOSED state, delete the - TCB, and return. - - - - - - - - -[Page 70] - - -September 1981 - Transmission Control Protocol - Functional Specification -SEGMENT ARRIVES - - - - third check security and precedence - - SYN-RECEIVED - - If the security/compartment and precedence in the segment do not - exactly match the security/compartment and precedence in the TCB - then send a reset, and return. - - ESTABLISHED STATE - - If the security/compartment and precedence in the segment do not - exactly match the security/compartment and precedence in the TCB - then send a reset, any outstanding RECEIVEs and SEND should - receive "reset" responses. All segment queues should be - flushed. Users should also receive an unsolicited general - "connection reset" signal. Enter the CLOSED state, delete the - TCB, and return. - - Note this check is placed following the sequence check to prevent - a segment from an old connection between these ports with a - different security or precedence from causing an abort of the - current connection. - - fourth, check the SYN bit, - - SYN-RECEIVED - ESTABLISHED STATE - FIN-WAIT STATE-1 - FIN-WAIT STATE-2 - CLOSE-WAIT STATE - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - If the SYN is in the window it is an error, send a reset, any - outstanding RECEIVEs and SEND should receive "reset" responses, - all segment queues should be flushed, the user should also - receive an unsolicited general "connection reset" signal, enter - the CLOSED state, delete the TCB, and return. - - If the SYN is not in the window this step would not be reached - and an ack would have been sent in the first step (sequence - number check). - - - - - - - [Page 71] - - - September 1981 -Transmission Control Protocol -Functional Specification - SEGMENT ARRIVES - - - - fifth check the ACK field, - - if the ACK bit is off drop the segment and return - - if the ACK bit is on - - SYN-RECEIVED STATE - - If SND.UNA =< SEG.ACK =< SND.NXT then enter ESTABLISHED state - and continue processing. - - If the segment acknowledgment is not acceptable, form a - reset segment, - - - - and send it. - - ESTABLISHED STATE - - If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK. - Any segments on the retransmission queue which are thereby - entirely acknowledged are removed. Users should receive - positive acknowledgments for buffers which have been SENT and - fully acknowledged (i.e., SEND buffer should be returned with - "ok" response). If the ACK is a duplicate - (SEG.ACK < SND.UNA), it can be ignored. If the ACK acks - something not yet sent (SEG.ACK > SND.NXT) then send an ACK, - drop the segment, and return. - - If SND.UNA < SEG.ACK =< SND.NXT, the send window should be - updated. If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and - SND.WL2 =< SEG.ACK)), set SND.WND <- SEG.WND, set - SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK. - - Note that SND.WND is an offset from SND.UNA, that SND.WL1 - records the sequence number of the last segment used to update - SND.WND, and that SND.WL2 records the acknowledgment number of - the last segment used to update SND.WND. The check here - prevents using old segments to update the window. - - - - - - - - - -[Page 72] - - -September 1981 - Transmission Control Protocol - Functional Specification -SEGMENT ARRIVES - - - - FIN-WAIT-1 STATE - - In addition to the processing for the ESTABLISHED state, if - our FIN is now acknowledged then enter FIN-WAIT-2 and continue - processing in that state. - - FIN-WAIT-2 STATE - - In addition to the processing for the ESTABLISHED state, if - the retransmission queue is empty, the user's CLOSE can be - acknowledged ("ok") but do not delete the TCB. - - CLOSE-WAIT STATE - - Do the same processing as for the ESTABLISHED state. - - CLOSING STATE - - In addition to the processing for the ESTABLISHED state, if - the ACK acknowledges our FIN then enter the TIME-WAIT state, - otherwise ignore the segment. - - LAST-ACK STATE - - The only thing that can arrive in this state is an - acknowledgment of our FIN. If our FIN is now acknowledged, - delete the TCB, enter the CLOSED state, and return. - - TIME-WAIT STATE - - The only thing that can arrive in this state is a - retransmission of the remote FIN. Acknowledge it, and restart - the 2 MSL timeout. - - sixth, check the URG bit, - - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - - If the URG bit is set, RCV.UP <- max(RCV.UP,SEG.UP), and signal - the user that the remote side has urgent data if the urgent - pointer (RCV.UP) is in advance of the data consumed. If the - user has already been signaled (or is still in the "urgent - mode") for this continuous sequence of urgent data, do not - signal the user again. - - - - [Page 73] - - - September 1981 -Transmission Control Protocol -Functional Specification - SEGMENT ARRIVES - - - - CLOSE-WAIT STATE - CLOSING STATE - LAST-ACK STATE - TIME-WAIT - - This should not occur, since a FIN has been received from the - remote side. Ignore the URG. - - seventh, process the segment text, - - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - - Once in the ESTABLISHED state, it is possible to deliver segment - text to user RECEIVE buffers. Text from segments can be moved - into buffers until either the buffer is full or the segment is - empty. If the segment empties and carries an PUSH flag, then - the user is informed, when the buffer is returned, that a PUSH - has been received. - - When the TCP takes responsibility for delivering the data to the - user it must also acknowledge the receipt of the data. - - Once the TCP takes responsibility for the data it advances - RCV.NXT over the data accepted, and adjusts RCV.WND as - apporopriate to the current buffer availability. The total of - RCV.NXT and RCV.WND should not be reduced. - - Please note the window management suggestions in section 3.7. - - Send an acknowledgment of the form: - - - - This acknowledgment should be piggybacked on a segment being - transmitted if possible without incurring undue delay. - - - - - - - - - - - - -[Page 74] - - -September 1981 - Transmission Control Protocol - Functional Specification -SEGMENT ARRIVES - - - - CLOSE-WAIT STATE - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - This should not occur, since a FIN has been received from the - remote side. Ignore the segment text. - - eighth, check the FIN bit, - - Do not process the FIN if the state is CLOSED, LISTEN or SYN-SENT - since the SEG.SEQ cannot be validated; drop the segment and - return. - - If the FIN bit is set, signal the user "connection closing" and - return any pending RECEIVEs with same message, advance RCV.NXT - over the FIN, and send an acknowledgment for the FIN. Note that - FIN implies PUSH for any segment text not yet delivered to the - user. - - SYN-RECEIVED STATE - ESTABLISHED STATE - - Enter the CLOSE-WAIT state. - - FIN-WAIT-1 STATE - - If our FIN has been ACKed (perhaps in this segment), then - enter TIME-WAIT, start the time-wait timer, turn off the other - timers; otherwise enter the CLOSING state. - - FIN-WAIT-2 STATE - - Enter the TIME-WAIT state. Start the time-wait timer, turn - off the other timers. - - CLOSE-WAIT STATE - - Remain in the CLOSE-WAIT state. - - CLOSING STATE - - Remain in the CLOSING state. - - LAST-ACK STATE - - Remain in the LAST-ACK state. - - - [Page 75] - - - September 1981 -Transmission Control Protocol -Functional Specification - SEGMENT ARRIVES - - - - TIME-WAIT STATE - - Remain in the TIME-WAIT state. Restart the 2 MSL time-wait - timeout. - - and return. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 76] - - -September 1981 - Transmission Control Protocol - Functional Specification -USER TIMEOUT - - - - USER TIMEOUT - - For any state if the user timeout expires, flush all queues, signal - the user "error: connection aborted due to user timeout" in general - and for any outstanding calls, delete the TCB, enter the CLOSED - state and return. - - RETRANSMISSION TIMEOUT - - For any state if the retransmission timeout expires on a segment in - the retransmission queue, send the segment at the front of the - retransmission queue again, reinitialize the retransmission timer, - and return. - - TIME-WAIT TIMEOUT - - If the time-wait timeout expires on a connection delete the TCB, - enter the CLOSED state and return. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page 77] - - - September 1981 -Transmission Control Protocol - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -[Page 78] - - -September 1981 - Transmission Control Protocol - - - - GLOSSARY - - - -1822 - BBN Report 1822, "The Specification of the Interconnection of - a Host and an IMP". The specification of interface between a - host and the ARPANET. - -ACK - A control bit (acknowledge) occupying no sequence space, which - indicates that the acknowledgment field of this segment - specifies the next sequence number the sender of this segment - is expecting to receive, hence acknowledging receipt of all - previous sequence numbers. - -ARPANET message - The unit of transmission between a host and an IMP in the - ARPANET. The maximum size is about 1012 octets (8096 bits). - -ARPANET packet - A unit of transmission used internally in the ARPANET between - IMPs. The maximum size is about 126 octets (1008 bits). - -connection - A logical communication path identified by a pair of sockets. - -datagram - A message sent in a packet switched computer communications - network. - -Destination Address - The destination address, usually the network and host - identifiers. - -FIN - A control bit (finis) occupying one sequence number, which - indicates that the sender will send no more data or control - occupying sequence space. - -fragment - A portion of a logical unit of data, in particular an internet - fragment is a portion of an internet datagram. - -FTP - A file transfer protocol. - - - - - - [Page 79] - - - September 1981 -Transmission Control Protocol -Glossary - - - -header - Control information at the beginning of a message, segment, - fragment, packet or block of data. - -host - A computer. In particular a source or destination of messages - from the point of view of the communication network. - -Identification - An Internet Protocol field. This identifying value assigned - by the sender aids in assembling the fragments of a datagram. - -IMP - The Interface Message Processor, the packet switch of the - ARPANET. - -internet address - A source or destination address specific to the host level. - -internet datagram - The unit of data exchanged between an internet module and the - higher level protocol together with the internet header. - -internet fragment - A portion of the data of an internet datagram with an internet - header. - -IP - Internet Protocol. - -IRS - The Initial Receive Sequence number. The first sequence - number used by the sender on a connection. - -ISN - The Initial Sequence Number. The first sequence number used - on a connection, (either ISS or IRS). Selected on a clock - based procedure. - -ISS - The Initial Send Sequence number. The first sequence number - used by the sender on a connection. - -leader - Control information at the beginning of a message or block of - data. In particular, in the ARPANET, the control information - on an ARPANET message at the host-IMP interface. - - - -[Page 80] - - -September 1981 - Transmission Control Protocol - Glossary - - - -left sequence - This is the next sequence number to be acknowledged by the - data receiving TCP (or the lowest currently unacknowledged - sequence number) and is sometimes referred to as the left edge - of the send window. - -local packet - The unit of transmission within a local network. - -module - An implementation, usually in software, of a protocol or other - procedure. - -MSL - Maximum Segment Lifetime, the time a TCP segment can exist in - the internetwork system. Arbitrarily defined to be 2 minutes. - -octet - An eight bit byte. - -Options - An Option field may contain several options, and each option - may be several octets in length. The options are used - primarily in testing situations; for example, to carry - timestamps. Both the Internet Protocol and TCP provide for - options fields. - -packet - A package of data with a header which may or may not be - logically complete. More often a physical packaging than a - logical packaging of data. - -port - The portion of a socket that specifies which logical input or - output channel of a process is associated with the data. - -process - A program in execution. A source or destination of data from - the point of view of the TCP or other host-to-host protocol. - -PUSH - A control bit occupying no sequence space, indicating that - this segment contains data that must be pushed through to the - receiving user. - -RCV.NXT - receive next sequence number - - - - [Page 81] - - - September 1981 -Transmission Control Protocol -Glossary - - - -RCV.UP - receive urgent pointer - -RCV.WND - receive window - -receive next sequence number - This is the next sequence number the local TCP is expecting to - receive. - -receive window - This represents the sequence numbers the local (receiving) TCP - is willing to receive. Thus, the local TCP considers that - segments overlapping the range RCV.NXT to - RCV.NXT + RCV.WND - 1 carry acceptable data or control. - Segments containing sequence numbers entirely outside of this - range are considered duplicates and discarded. - -RST - A control bit (reset), occupying no sequence space, indicating - that the receiver should delete the connection without further - interaction. The receiver can determine, based on the - sequence number and acknowledgment fields of the incoming - segment, whether it should honor the reset command or ignore - it. In no case does receipt of a segment containing RST give - rise to a RST in response. - -RTP - Real Time Protocol: A host-to-host protocol for communication - of time critical information. - -SEG.ACK - segment acknowledgment - -SEG.LEN - segment length - -SEG.PRC - segment precedence value - -SEG.SEQ - segment sequence - -SEG.UP - segment urgent pointer field - - - - - -[Page 82] - - -September 1981 - Transmission Control Protocol - Glossary - - - -SEG.WND - segment window field - -segment - A logical unit of data, in particular a TCP segment is the - unit of data transfered between a pair of TCP modules. - -segment acknowledgment - The sequence number in the acknowledgment field of the - arriving segment. - -segment length - The amount of sequence number space occupied by a segment, - including any controls which occupy sequence space. - -segment sequence - The number in the sequence field of the arriving segment. - -send sequence - This is the next sequence number the local (sending) TCP will - use on the connection. It is initially selected from an - initial sequence number curve (ISN) and is incremented for - each octet of data or sequenced control transmitted. - -send window - This represents the sequence numbers which the remote - (receiving) TCP is willing to receive. It is the value of the - window field specified in segments from the remote (data - receiving) TCP. The range of new sequence numbers which may - be emitted by a TCP lies between SND.NXT and - SND.UNA + SND.WND - 1. (Retransmissions of sequence numbers - between SND.UNA and SND.NXT are expected, of course.) - -SND.NXT - send sequence - -SND.UNA - left sequence - -SND.UP - send urgent pointer - -SND.WL1 - segment sequence number at last window update - -SND.WL2 - segment acknowledgment number at last window update - - - - [Page 83] - - - September 1981 -Transmission Control Protocol -Glossary - - - -SND.WND - send window - -socket - An address which specifically includes a port identifier, that - is, the concatenation of an Internet Address with a TCP port. - -Source Address - The source address, usually the network and host identifiers. - -SYN - A control bit in the incoming segment, occupying one sequence - number, used at the initiation of a connection, to indicate - where the sequence numbering will start. - -TCB - Transmission control block, the data structure that records - the state of a connection. - -TCB.PRC - The precedence of the connection. - -TCP - Transmission Control Protocol: A host-to-host protocol for - reliable communication in internetwork environments. - -TOS - Type of Service, an Internet Protocol field. - -Type of Service - An Internet Protocol field which indicates the type of service - for this internet fragment. - -URG - A control bit (urgent), occupying no sequence space, used to - indicate that the receiving user should be notified to do - urgent processing as long as there is data to be consumed with - sequence numbers less than the value indicated in the urgent - pointer. - -urgent pointer - A control field meaningful only when the URG bit is on. This - field communicates the value of the urgent pointer which - indicates the data octet associated with the sending user's - urgent call. - - - - - -[Page 84] - - -September 1981 - Transmission Control Protocol - - - - REFERENCES - - - -[1] Cerf, V., and R. Kahn, "A Protocol for Packet Network - Intercommunication", IEEE Transactions on Communications, - Vol. COM-22, No. 5, pp 637-648, May 1974. - -[2] Postel, J. (ed.), "Internet Protocol - DARPA Internet Program - Protocol Specification", RFC 791, USC/Information Sciences - Institute, September 1981. - -[3] Dalal, Y. and C. Sunshine, "Connection Management in Transport - Protocols", Computer Networks, Vol. 2, No. 6, pp. 454-473, - December 1978. - -[4] Postel, J., "Assigned Numbers", RFC 790, USC/Information Sciences - Institute, September 1981. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - [Page 85] - diff --git a/kernel/picotcp/RFC/rfc0813.txt b/kernel/picotcp/RFC/rfc0813.txt deleted file mode 100644 index 5817050..0000000 --- a/kernel/picotcp/RFC/rfc0813.txt +++ /dev/null @@ -1,1167 +0,0 @@ - -RFC: 813 - - - - WINDOW AND ACKNOWLEDGEMENT STRATEGY IN TCP - - David D. Clark - MIT Laboratory for Computer Science - Computer Systems and Communications Group - July, 1982 - - - 1. Introduction - - - This document describes implementation strategies to deal with two - -mechanisms in TCP, the window and the acknowledgement. These mechanisms - -are described in the specification document, but it is possible, while - -complying with the specification, to produce implementations which yield - -very bad performance. Happily, the pitfalls possible in window and - -acknowledgement strategies are very easy to avoid. - - - It is a much more difficult exercise to verify the performance of a - -specification than the correctness. Certainly, we have less experience - -in this area, and we certainly lack any useful formal technique. - -Nonetheless, it is important to attempt a specification in this area, - -because different implementors might otherwise choose superficially - -reasonable algorithms which interact poorly with each other. This - -document presents a particular set of algorithms which have received - -testing in the field, and which appear to work properly with each other. - -With more experience, these algorithms may become part of the formal - -specification: until such time their use is recommended. - - 2 - - -2. The Mechanisms - - - The acknowledgement mechanism is at the heart of TCP. Very simply, - -when data arrives at the recipient, the protocol requires that it send - -back an acknowledgement of this data. The protocol specifies that the - -bytes of data are sequentially numbered, so that the recipient can - -acknowledge data by naming the highest numbered byte of data it has - -received, which also acknowledges the previous bytes (actually, it - -identifies the first byte of data which it has not yet received, but - -this is a small detail). The protocol contains only a general assertion - -that data should be acknowledged promptly, but gives no more specific - -indication as to how quickly an acknowledgement must be sent, or how - -much data should be acknowledged in each separate acknowledgement. - - - The window mechanism is a flow control tool. Whenever appropriate, - -the recipient of data returns to the sender a number, which is (more or - -less) the size of the buffer which the receiver currently has available - -for additional data. This number of bytes, called the window, is the - -maximum which the sender is permitted to transmit until the receiver - -returns some additional window. Sometimes, the receiver will have no - -buffer space available, and will return a window value of zero. Under - -these circumstances,the protocol requires the sender to send a small - -segment to the receiver now and then, to see if more data is accepted. - -If the window remains closed at zero for some substantial period, and - -the sender can obtain no response from the receiver, the protocol - -requires the sender to conclude that the receiver has failed, and to - -close the connection. Again, there is very little performance - - 3 - - -information in the specification, describing under what circumstances - -the window should be increased, and how the sender should respond to - -such revised information. - - - A bad implementation of the window algorithm can lead to extremely - -poor performance overall. The degradations which occur in throughput - -and CPU utilizations can easily be several factors of ten, not just a - -fractional increase. This particular phenomenon is specific enough that - -it has been given the name of Silly Window Syndrome, or SWS. Happily - -SWS is easy to avoid if a few simple rules are observed. The most - -important function of this memo is to describe SWS, so that implementors - -will understand the general nature of the problem, and to describe - -algorithms which will prevent its occurrence. This document also - -describes performance enhancing algorithms which relate to - -acknowledgement, and discusses the way acknowledgement and window - -algorithms interact as part of SWS. - - - 3. SILLY WINDOW SYNDROME - - - In order to understand SWS, we must first define two new terms. - -Superficially, the window mechanism is very simple: there is a number, - -called "the window", which is returned from the receiver to the sender. - -However, we must have a more detailed way of talking about the meaning - -of this number. The receiver of data computes a value which we will - -call the "offered window". In a simple case, the offered window - -corresponds to the amount of buffer space available in the receiver. - -This correspondence is not necessarily exact, but is a suitable model - -for the discussion to follow. It is the offered window which is - - 4 - - -actually transmitted back from the receiver to the sender. The sender - -uses the offered window to compute a different value, the "usable - -window", which is the offered window minus the amount of outstanding - -unacknowledged data. The usable window is less than or equal to the - -offered window, and can be much smaller. - - - Consider the following simple example. The receiver initially - -provides an offered window of 1,000. The sender uses up this window by - -sending five segments of 200 bytes each. The receiver, on processing - -the first of these segments, returns an acknowledgement which also - -contains an updated window value. Let us assume that the receiver of - -the data has removed the first 200 bytes from the buffer, so that the - -receiver once again has 1,000 bytes of available buffer. Therefore, the - -receiver would return, as before, an offered window of 1,000 bytes. The - -sender, on receipt of this first acknowledgement, now computes the - -additional number of bytes which may be sent. In fact, of the 1,000 - -bytes which the recipient is prepared to receive at this time, 800 are - -already in transit, having been sent in response to the previous offered - -window. In this case, the usable window is only 200 bytes. - - - Let us now consider how SWS arises. To continue the previous - -example, assume that at some point, when the sender computes a useable - -window of 200 bytes, it has only 50 bytes to send until it reaches a - -"push" point. It thus sends 50 bytes in one segment, and 150 bytes in - -the next segment. Sometime later, this 50-byte segment will arrive at - -the recipient, which will process and remove the 50 bytes and once again - -return an offered window of 1,000 bytes. However, the sender will now - - 5 - - -compute that there are 950 bytes in transit in the network, so that the - -useable window is now only 50 bytes. Thus, the sender will once again - -send a 50 byte segment, even though there is no longer a natural - -boundary to force it. - - - In fact, whenever the acknowledgement of a small segment comes - -back, the useable window associated with that acknowledgement will cause - -another segment of the same small size to be sent, until some - -abnormality breaks the pattern. It is easy to see how small segments - -arise, because natural boundaries in the data occasionally cause the - -sender to take a computed useable window and divide it up between two - -segments. Once that division has occurred, there is no natural way for - -those useable window allocations to be recombined; thus the breaking up - -of the useable window into small pieces will persist. - - - Thus, SWS is a degeneration in the throughput which develops over - -time, during a long data transfer. If the sender ever stops, as for - -example when it runs out of data to send, the receiver will eventually - -acknowledge all the outstanding data, so that the useable window - -computed by the sender will equal the full offered window of the - -receiver. At this point the situation will have healed, and further - -data transmission over the link will occur efficiently. However, in - -large file transfers, which occur without interruption, SWS can cause - -appalling performance. The network between the sender and the receiver - -becomes clogged with many small segments, and an equal number of - -acknowledgements, which in turn causes lost segments, which triggers - -massive retransmission. Bad cases of SWS have been seen in which the - - 6 - - -average segment size was one-tenth of the size the sender and receiver - -were prepared to deal with, and the average number of retransmission per - -successful segments sent was five. - - - Happily, SWS is trivial to avoid. The following sections describe - -two algorithms, one executed by the sender, and one by the receiver, - -which appear to eliminate SWS completely. Actually, either algorithm by - -itself is sufficient to prevent SWS, and thus protect a host from a - -foreign implementation which has failed to deal properly with this - -problem. The two algorithms taken together produce an additional - -reduction in CPU consumption, observed in practice to be as high as a - -factor of four. - - - 4. Improved Window Algorithms - - - The receiver of data can take a very simple step to eliminate SWS. - -When it disposes of a small amount of data, it can artificially reduce - -the offered window in subsequent acknowledgements, so that the useable - -window computed by the sender does not permit the sending of any further - -data. At some later time, when the receiver has processed a - -substantially larger amount of incoming data, the artificial limitation - -on the offered window can be removed all at once, so that the sender - -computes a sudden large jump rather than a sequence of small jumps in - -the useable window. - - - At this level, the algorithm is quite simple, but in order to - -determine exactly when the window should be opened up again, it is - -necessary to look at some of the other details of the implementation. - - 7 - - -Depending on whether the window is held artificially closed for a short - -or long time, two problems will develop. The one we have already - -discussed -- never closing the window artificially -- will lead to SWS. - -On the other hand, if the window is only opened infrequently, the - -pipeline of data in the network between the sender and the receiver may - -have emptied out while the sender was being held off, so that a delay is - -introduced before additional data arrives from the sender. This delay - -does reduce throughput, but it does not consume network resources or CPU - -resources in the process, as does SWS. Thus, it is in this direction - -that one ought to overcompensate. For a simple implementation, a rule - -of thumb that seems to work in practice is to artificially reduce the - -offered window until the reduction constitutes one half of the available - -space, at which point increase the window to advertise the entire space - -again. In any event, one ought to make the chunk by which the window is - -opened at least permit one reasonably large segment. (If the receiver - -is so short of buffers that it can never advertise a large enough buffer - -to permit at least one large segment, it is hopeless to expect any sort - -of high throughput.) - - - There is an algorithm that the sender can use to achieve the same - -effect described above: a very simple and elegant rule first described - -by Michael Greenwald at MIT. The sender of the data uses the offered - -window to compute a useable window, and then compares the useable window - -to the offered window, and refrains from sending anything if the ratio - -of useable to offered is less than a certain fraction. Clearly, if the - -computed useable window is small compared to the offered window, this - -means that a substantial amount of previously sent information is still - - 8 - - -in the pipeline from the sender to the receiver, which in turn means - -that the sender can count on being granted a larger useable window in - -the future. Until the useable window reaches a certain amount, the - -sender should simply refuse to send anything. - - - Simple experiments suggest that the exact value of the ratio is not - -very important, but that a value of about 25 percent is sufficient to - -avoid SWS and achieve reasonable throughput, even for machines with a - -small offered window. An additional enhancement which might help - -throughput would be to attempt to hold off sending until one can send a - -maximum size segment. Another enhancement would be to send anyway, even - -if the ratio is small, if the useable window is sufficient to hold the - -data available up to the next "push point". - - - This algorithm at the sender end is very simple. Notice that it is - -not necessary to set a timer to protect against protocol lockup when - -postponing the send operation. Further acknowledgements, as they - -arrive, will inevitably change the ratio of offered to useable window. - -(To see this, note that when all the data in the catanet pipeline has - -arrived at the receiver, the resulting acknowledgement must yield an - -offered window and useable window that equal each other.) If the - -expected acknowledgements do not arrive, the retransmission mechanism - -will come into play to assure that something finally happens. Thus, to - -add this algorithm to an existing TCP implementation usually requires - -one line of code. As part of the send algorithm it is already necessary - -to compute the useable window from the offered window. It is a simple - -matter to add a line of code which, if the ratio is less than a certain - - 9 - - -percent, sets the useable window to zero. The results of SWS are so - -devastating that no sender should be without this simple piece of - -insurance. - - - 5. Improved Acknowledgement Algorithms - - - In the beginning of this paper, an overly simplistic implementation - -of TCP was described, which led to SWS. One of the characteristics of - -this implementation was that the recipient of data sent a separate - -acknowledgement for every segment that it received. This compulsive - -acknowledgement was one of the causes of SWS, because each - -acknowledgement provided some new useable window, but even if one of the - -algorithms described above is used to eliminate SWS, overly frequent - -acknowledgement still has a substantial problem, which is that it - -greatly increases the processing time at the sender's end. Measurement - -of TCP implementations, especially on large operating systems, indicate - -that most of the overhead of dealing with a segment is not in the - -processing at the TCP or IP level, but simply in the scheduling of the - -handler which is required to deal with the segment. A steady dribble of - -acknowledgements causes a high overhead in scheduling, with very little - -to show for it. This waste is to be avoided if possible. - - - There are two reasons for prompt acknowledgement. One is to - -prevent retransmission. We will discuss later how to determine whether - -unnecessary retransmission is occurring. The other reason one - -acknowledges promptly is to permit further data to be sent. However, - -the previous section makes quite clear that it is not always desirable - -to send a little bit of data, even though the receiver may have room for - - 10 - - -it. Therefore, one can state a general rule that under normal - -operation, the receiver of data need not, and for efficiency reasons - -should not, acknowledge the data unless either the acknowledgement is - -intended to produce an increased useable window, is necessary in order - -to prevent retransmission or is being sent as part of a reverse - -direction segment being sent for some other reason. We will consider an - -algorithm to achieve these goals. - - - Only the recipient of the data can control the generation of - -acknowledgements. Once an acknowledgement has been sent from the - -receiver back to the sender, the sender must process it. Although the - -extra overhead is incurred at the sender's end, it is entirely under the - -receiver's control. Therefore, we must now describe an algorithm which - -occurs at the receiver's end. Obviously, the algorithm must have the - -following general form; sometimes the receiver of data, upon processing - -a segment, decides not to send an acknowledgement now, but to postpone - -the acknowledgement until some time in the future, perhaps by setting a - -timer. The peril of this approach is that on many large operating - -systems it is extremely costly to respond to a timer event, almost as - -costly as to respond to an incoming segment. Clearly, if the receiver - -of the data, in order to avoid extra overhead at the sender end, spends - -a great deal of time responding to timer interrupts, no overall benefit - -has been achieved, for efficiency at the sender end is achieved by great - -thrashing at the receiver end. We must find an algorithm that avoids - -both of these perils. - - - The following scheme seems a good compromise. The receiver of data - - 11 - - -will refrain from sending an acknowledgement under certain - -circumstances, in which case it must set a timer which will cause the - -acknowledgement to be sent later. However, the receiver should do this - -only where it is a reasonable guess that some other event will intervene - -and prevent the necessity of the timer interrupt. The most obvious - -event on which to depend is the arrival of another segment. So, if a - -segment arrives, postpone sending an acknowledgement if both of the - -following conditions hold. First, the push bit is not set in the - -segment, since it is a reasonable assumption that there is more data - -coming in a subsequent segment. Second, there is no revised window - -information to be sent back. - - - This algorithm will insure that the timer, although set, is seldom - -used. The interval of the timer is related to the expected inter- - -segment delay, which is in turn a function of the particular network - -through which the data is flowing. For the Arpanet, a reasonable - -interval seems to be 200 to 300 milliseconds. Appendix A describes an - -adaptive algorithm for measuring this delay. - - - The section on improved window algorithms described both a receiver - -algorithm and a sender algorithm, and suggested that both should be - -used. The reason for this is now clear. While the sender algorithm is - -extremely simple, and useful as insurance, the receiver algorithm is - -required in order that this improved acknowledgement strategy work. If - -the receipt of every segment causes a new window value to be returned, - -then of necessity an acknowledgement will be sent for every data - -segment. When, according to the strategy of the previous section, the - - 12 - - -receiver determines to artificially reduce the offered window, that is - -precisely the circumstance under which an acknowledgement need not be - -sent. When the receiver window algorithm and the receiver - -acknowledgement algorithm are used together, it will be seen that - -sending an acknowledgement will be triggered by one of the following - -events. First, a push bit has been received. Second, a temporary pause - -in the data stream is detected. Third, the offered window has been - -artificially reduced to one-half its actual value. - - - In the beginning of this section, it was pointed out that there are - -two reasons why one must acknowledge data. Our consideration at this - -point has been concerned only with the first, that an acknowledgement - -must be returned as part of triggering the sending of new data. It is - -also necessary to acknowledge whenever the failure to do so would - -trigger retransmission by the sender. Since the retransmission interval - -is selected by the sender, the receiver of the data cannot make a - -precise determination of when the acknowledgement must be sent. - -However, there is a rough rule the sender can use to avoid - -retransmission, provided that the receiver is reasonably well behaved. - - - We will assume that sender of the data uses the optional algorithm - -described in the TCP specification, in which the roundtrip delay is - -measured using an exponential decay smoothing algorithm. Retransmission - -of a segment occurs if the measured delay for that segment exceeds the - -smoothed average by some factor. To see how retransmission might be - -triggered, one must consider the pattern of segment arrivals at the - -receiver. The goal of our strategy was that the sender should send off - - 13 - - -a number of segments in close sequence, and receive one acknowledgement - -for the whole burst. The acknowledgement will be generated by the - -receiver at the time that the last segment in the burst arrives at the - -receiver. (To ensure the prompt return of the acknowledgement, the - -sender could turn on the "push" bit in the last segment of the burst.) - -The delay observed at the sender between the initial transmission of a - -segment and the receipt of the acknowledgement will include both the - -network transit time, plus the holding time at the receiver. The - -holding time will be greatest for the first segments in the burst, and - -smallest for the last segments in the burst. Thus, the smoothing - -algorithm will measure a delay which is roughly proportional to the - -average roundtrip delay for all the segments in the burst. Problems - -will arise if the average delay is substantially smaller than the - -maximum delay and the smoothing algorithm used has a very small - -threshold for triggering retransmission. The widest variation between - -average and maximum delay will occur when network transit time is - -negligible, and all delay is processing time. In this case, the maximum - -will be twice the average (by simple algebra) so the threshold that - -controls retransmission should be somewhat more than a factor of two. - - - In practice, retransmission of the first segments of a burst has - -not been a problem because the delay measured consists of the network - -roundtrip delay, as well as the delay due to withholding the - -acknowledgement, and the roundtrip tends to dominate except in very low - -roundtrip time situations (such as when sending to one's self for test - -purposes). This low roundtrip situation can be covered very simply by - -including a minimum value below which the roundtrip estimate is not - -permitted to drop. - - 14 - - - In our experiments with this algorithm, retransmission due to - -faulty calculation of the roundtrip delay occurred only once, when the - -parameters of the exponential smoothing algorithm had been misadjusted - -so that they were only taking into account the last two or three - -segments sent. Clearly, this will cause trouble since the last two or - -three segments of any burst are the ones whose holding time at the - -receiver is minimal, so the resulting total estimate was much lower than - -appropriate. Once the parameters of the algorithm had been adjusted so - -that the number of segments taken into account was approximately twice - -the number of segments in a burst of average size, with a threshold - -factor of 1.5, no further retransmission has ever been identified due to - -this problem, including when sending to ourself and when sending over - -high delay nets. - - - 6. Conservative Vs. Optimistic Windows - - - According to the TCP specification, the offered window is presumed - -to have some relationship to the amount of data which the receiver is - -actually prepared to receive. However, it is not necessarily an exact - -correspondence. We will use the term "conservative window" to describe - -the case where the offered window is precisely no larger than the actual - -buffering available. The drawback to conservative window algorithms is - -that they can produce very low throughput in long delay situations. It - -is easy to see that the maximum input of a conservative window algorithm - -is one bufferfull every roundtrip delay in the net, since the next - -bufferfull cannot be launched until the updated window/acknowledgement - -information from the previous transmission has made the roundtrip. - - 15 - - - In certain cases, it may be possible to increase the overall - -throughput of the transmission by increasing the offered window over the - -actual buffer available at the receiver. Such a strategy we will call - -an "optimistic window" strategy. The optimistic strategy works if the - -network delivers the data to the recipient sufficiently slowly that it - -can process the data fast enough to keep the buffer from overflowing. - -If the receiver is faster than the sender, one could, with luck, permit - -an infinitely optimistic window, in which the sender is simply permitted - -to send full-speed. If the sender is faster than the receiver, however, - -and the window is too optimistic, then some segments will cause a buffer - -overflow, and will be discarded. Therefore, the correct strategy to - -implement an optimistic window is to increase the window size until - -segments start to be lost. This only works if it is possible to detect - -that the segment has been lost. In some cases, it is easy to do, - -because the segment is partially processed inside the receiving host - -before it is thrown away. In other cases, overflows may actually cause - -the network interface to be clogged, which will cause the segments to be - -lost elsewhere in the net. It is inadvisable to attempt an optimistic - -window strategy unless one is certain that the algorithm can detect the - -resulting lost segments. However, the increase in throughput which is - -possible from optimistic windows is quite substantial. Any systems with - -small buffer space should seriously consider the merit of optimistic - -windows. - - - The selection of an appropriate window algorithm is actually more - -complicated than even the above discussion suggests. The following - -considerations are not presented with the intention that they be - - 16 - - -incorporated in current implementations of TCP, but as background for - -the sophisticated designer who is attempting to understand how his TCP - -will respond to a variety of networks, with different speed and delay - -characteristics. The particular pattern of windows and acknowledgements - -sent from receiver to sender influences two characteristics of the data - -being sent. First, they control the average data rate. Clearly, the - -average rate of the sender cannot exceed the average rate of the - -receiver, or long-term buffer overflow will occur. Second, they - -influence the burstiness of the data coming from the sender. Burstiness - -has both advantages and disadvantages. The advantage of burstiness is - -that it reduces the CPU processing necessary to send the data. This - -follows from the observed fact, especially on large machines, that most - -of the cost of sending a segment is not the TCP or IP processing, but - -the scheduling overhead of getting started. - - - On the other hand, the disadvantage of burstiness is that it may - -cause buffers to overflow, either in the eventual recipient, which was - -discussed above, or in an intermediate gateway, a problem ignored in - -this paper. The algorithms described above attempts to strike a balance - -between excessive burstiness, which in the extreme cases can cause - -delays because a burst is not requested soon enough, and excessive - -fragmentation of the data stream into small segments, which we - -identified as Silly Window Syndrome. - - - Under conditions of extreme delay in the network, none of the - -algorithms described above will achieve adequate throughput. - -Conservative window algorithms have a predictable throughput limit, - - 17 - - -which is one windowfull per roundtrip delay. Attempts to solve this by - -optimistic window strategies may cause buffer overflows due to the - -bursty nature of the arriving data. A very sophisticated way to solve - -this is for the receiver, having measured by some means the roundtrip - -delay and intersegment arrival rate of the actual connection, to open - -his window, not in one optimistic increment of gigantic proportion, but - -in a number of smaller optimistic increments, which have been carefully - -spaced using a timer so that the resulting smaller bursts which arrive - -are each sufficiently small to fit into the existing buffers. One could - -visualize this as a number of requests flowing backwards through the net - -which trigger in return a number of bursts which flow back spaced evenly - -from the sender to the receiver. The overall result is that the - -receiver uses the window mechanism to control the burstiness of the - -arrivals, and the average rate. - - - To my knowledge, no such strategy has been implemented in any TCP. - -First, we do not normally have delays high enough to require this kind - -of treatment. Second, the strategy described above is probably not - -stable unless it is very carefully balanced. Just as buses on a single - -bus route tend to bunch up, bursts which start out equally spaced could - -well end up piling into each other, and forming the single large burst - -which the receiver was hoping to avoid. It is important to understand - -this extreme case, however, in order to understand the limits beyond - -which TCP, as normally implemented, with either conservative or simple - -optimistic windows can be expected to deliver throughput which is a - -reasonable percentage of the actual network capacity. - - 18 - - - 7. Conclusions - - - This paper describes three simple algorithms for performance - -enhancement in TCP, one at the sender end and two at the receiver. The - -sender algorithm is to refrain from sending if the useable window is - -smaller than 25 percent of the offered window. The receiver algorithms - -are first, to artificially reduce the offered window when processing new - -data if the resulting reduction does not represent more than some - -fraction, say 50 percent, of the actual space available, and second, to - -refrain from sending an acknowledgment at all if two simple conditions - -hold. - - - Either of these algorithms will prevent the worst aspects of Silly - -Window Syndrome, and when these algorithms are used together, they will - -produce substantial improvement in CPU utilization, by eliminating the - -process of excess acknowledgements. - - - Preliminary experiments with these algorithms suggest that they - -work, and work very well. Both the sender and receiver algorithms have - -been shown to eliminate SWS, even when talking to fairly silly - -algorithms at the other end. The Multics mailer, in particular, had - -suffered substantial attacks of SWS while sending large mail to a number - -of hosts. We believe that implementation of the sender side algorithm - -has eliminated every known case of SWS detected in our mailer. - -Implementation of the receiver side algorithm produced substantial - -improvements of CPU time when Multics was the sending system. Multics - -is a typical large operating system, with scheduling costs which are - -large compared to the actual processing time for protocol handlers. - - 19 - - -Tests were done sending from Multics to a host which implemented the SWS - -suppression algorithm, and which could either refrain or not from - -sending acknowledgements on each segment. As predicted, suppressing the - -return acknowledgements did not influence the throughput for large data - -transfer at all, since the throttling effect was elsewhere. However, - -the CPU time required to process the data at the Multics end was cut by - -a factor of four (In this experiment, the bursts of data which were - -being sent were approximately eight segments. Thus, the number of - -acknowledgements in the two experiments differed by a factor of eight.) - - - An important consideration in evaluating these algorithms is that - -they must not cause the protocol implementations to deadlock. All of - -the recommendations in this document have the characteristic that they - -suggest one refrain from doing something even though the protocol - -specification permits one to do it. The possibility exists that if one - -refrains from doing something now one may never get to do it later, and - -both ends will halt, even though it would appear superficially that the - -transaction can continue. - - - Formally, the idea that things continue to work is referred to as - -"liveness". One of the defects of ad hoc solutions to performance - -problems is the possibility that two different approaches will interact - -to prevent liveness. It is believed that the algorithms described in - -this paper are always live, and that is one of the reasons why there is - -a strong advantage in uniform use of this particular proposal, except in - -cases where it is explicitly demonstrated not to work. - - - The argument for liveness in these solutions proceeds as follows. - - 20 - - -First, the sender algorithm can only be stopped by one thing, a refusal - -of the receiver to acknowledge sent data. As long as the receiver - -continues to acknowledge data, the ratio of useable window to offered - -window will approach one, and eventually the sender must continue to - -send. However, notice that the receiver algorithm we have advocated - -involves refraining from acknowledging. Therefore, we certainly do have - -a situation where improper operation of this algorithm can prevent - -liveness. - - - What we must show is that the receiver of the data, if it chooses - -to refrain from acknowledging, will do so only for a short time, and not - -forever. The design of the algorithm described above was intended to - -achieve precisely this goal: whenever the receiver of data refrained - -from sending an acknowledgement it was required to set a timer. The - -only event that was permitted to clear that timer was the receipt of - -another segment, which essentially reset the timer, and started it going - -again. Thus, an acknowledgement will be sent as soon as no data has - -been received. This has precisely the effect desired: if the data flow - -appears to be disrupted for any reason, the receiver responds by sending - -an up-to-date acknowledgement. In fact, the receiver algorithm is - -designed to be more robust than this, for transmission of an - -acknowledgment is triggered by two events, either a cessation of data or - -a reduction in the amount of offered window to 50 percent of the actual - -value. This is the condition which will normally trigger the - -transmission of this acknowledgement. - - 21 - - - - - - APPENDIX A - - - Dynamic Calculation of Acknowledgement Delay - - - The text suggested that when setting a timer to postpone the - -sending of an acknowledgement, a fixed interval of 200 to 300 - -milliseconds would work properly in practice. This has not been - -verified over a wide variety of network delays, and clearly if there is - -a very slow net which stretches out the intersegment arrival time, a - -fixed interval will fail. In a sophisticated TCP, which is expected to - -adjust dynamically (rather than manually) to changing network - -conditions, it would be appropriate to measure this interval and respond - -dynamically. The following algorithm, which has been relegated to an - -Appendix, because it has not been tested, seems sensible. Whenever a - -segment arrives which does not have the push bit on in it, start a - -timer, which runs until the next segment arrives. Average these - -interarrival intervals, using an exponential decay smoothing function - -tuned to take into account perhaps the last ten or twenty segments that - -have come in. Occasionally, there will be a long interarrival period, - -even for a segment which is does not terminate a piece of data being - -pushed, perhaps because a window has gone to zero or some glitch in the - -sender or the network has held up the data. Therefore, examine each - -interarrival interval, and discard it from the smoothing algorithm if it - -exceeds the current estimate by some amount, perhaps a ratio of two or - -four times. By rejecting the larger intersegment arrival intervals, one - -should obtain a smoothed estimate of the interarrival of segments inside - - 22 - - -a burst. The number need not be exact, since the timer which triggers - -acknowledgement can add a fairly generous fudge factor to this without - -causing trouble with the sender's estimate of the retransmission - -interval, so long as the fudge factor is constant. - - diff --git a/kernel/picotcp/RFC/rfc0814.txt b/kernel/picotcp/RFC/rfc0814.txt deleted file mode 100644 index b82819e..0000000 --- a/kernel/picotcp/RFC/rfc0814.txt +++ /dev/null @@ -1,763 +0,0 @@ - -RFC: 814 - - - - NAME, ADDRESSES, PORTS, AND ROUTES - - David D. Clark - MIT Laboratory for Computer Science - Computer Systems and Communications Group - July, 1982 - - - 1. Introduction - - - It has been said that the principal function of an operating system - -is to define a number of different names for the same object, so that it - -can busy itself keeping track of the relationship between all of the - -different names. Network protocols seem to have somewhat the same - -characteristic. In TCP/IP, there are several ways of referring to - -things. At the human visible interface, there are character string - -"names" to identify networks, hosts, and services. Host names are - -translated into network "addresses", 32-bit values that identify the - -network to which a host is attached, and the location of the host on - -that net. Service names are translated into a "port identifier", which - -in TCP is a 16-bit value. Finally, addresses are translated into - -"routes", which are the sequence of steps a packet must take to reach - -the specified addresses. Routes show up explicitly in the form of the - -internet routing options, and also implicitly in the address to route - -translation tables which all hosts and gateways maintain. - - - This RFC gives suggestions and guidance for the design of the - -tables and algorithms necessary to keep track of these various sorts of - -identifiers inside a host implementation of TCP/IP. - - 2 - - - 2. The Scope of the Problem - - - One of the first questions one can ask about a naming mechanism is - -how many names one can expect to encounter. In order to answer this, it - -is necessary to know something about the expected maximum size of the - -internet. Currently, the internet is fairly small. It contains no more - -than 25 active networks, and no more than a few hundred hosts. This - -makes it possible to install tables which exhaustively list all of these - -elements. However, any implementation undertaken now should be based on - -an assumption of a much larger internet. The guidelines currently - -recommended are an upper limit of about 1,000 networks. If we imagine - -an average number of 25 hosts per net, this would suggest a maximum - -number of 25,000 hosts. It is quite unclear whether this host estimate - -is high or low, but even if it is off by several factors of two, the - -resulting number is still large enough to suggest that current table - -management strategies are unacceptable. Some fresh techniques will be - -required to deal with the internet of the future. - - - 3. Names - - - As the previous section suggests, the internet will eventually have - -a sufficient number of names that a host cannot have a static table - -which provides a translation from every name to its associated address. - -There are several reasons other than sheer size why a host would not - -wish to have such a table. First, with that many names, we can expect - -names to be added and deleted at such a rate that an installer might - -spend all his time just revising the table. Second, most of the names - -will refer to addresses of machines with which nothing will ever be - - 3 - - -exchanged. In fact, there may be whole networks with which a particular - -host will never have any traffic. - - - To cope with this large and somewhat dynamic environment, the - -internet is moving from its current position in which a single name - -table is maintained by the NIC and distributed to all hosts, to a - -distributed approach in which each network (or group of networks) is - -responsible for maintaining its own names and providing a "name server" - -to translate between the names and the addresses in that network. Each - -host is assumed to store not a complete set of name-address - -translations, but only a cache of recently used names. When a name is - -provided by a user for translation to an address, the host will first - -examine its local cache, and if the name is not found there, will - -communicate with an appropriate name server to obtain the information, - -which it may then insert into its cache for future reference. - - - Unfortunately, the name server mechanism is not totally in place in - -the internet yet, so for the moment, it is necessary to continue to use - -the old strategy of maintaining a complete table of all names in every - -host. Implementors, however, should structure this table in such a way - -that it is easy to convert later to a name server approach. In - -particular, a reasonable programming strategy would be to make the name - -table accessible only through a subroutine interface, rather than by - -scattering direct references to the table all through the code. In this - -way, it will be possible, at a later date, to replace the subroutine - -with one capable of making calls on remote name servers. - - - A problem which occasionally arises in the ARPANET today is that - - 4 - - -the information in a local host table is out of date, because a host has - -moved, and a revision of the host table has not yet been installed from - -the NIC. In this case, one attempts to connect to a particular host and - -discovers an unexpected machine at the address obtained from the local - -table. If a human is directly observing the connection attempt, the - -error is usually detected immediately. However, for unattended - -operations such as the sending of queued mail, this sort of problem can - -lead to a great deal of confusion. - - - The nameserver scheme will only make this problem worse, if hosts - -cache locally the address associated with names that have been looked - -up, because the host has no way of knowing when the address has changed - -and the cache entry should be removed. To solve this problem, plans are - -currently under way to define a simple facility by which a host can - -query a foreign address to determine what name is actually associated - -with it. SMTP already defines a verification technique based on this - -approach. - - - 4. Addresses - - - The IP layer must know something about addresses. In particular, - -when a datagram is being sent out from a host, the IP layer must decide - -where to send it on the immediately connected network, based on the - -internet address. Mechanically, the IP first tests the internet address - -to see whether the network number of the recipient is the same as the - -network number of the sender. If so, the packet can be sent directly to - -the final recipient. If not, the datagram must be sent to a gateway for - -further forwarding. In this latter case, a second decision must be - - 5 - - -made, as there may be more than one gateway available on the immediately - -attached network. - - - When the internet address format was first specified, 8 bits were - -reserved to identify the network. Early implementations thus - -implemented the above algorithm by means of a table with 256 entries, - -one for each possible net, that specified the gateway of choice for that - -net, with a special case entry for those nets to which the host was - -immediately connected. Such tables were sometimes statically filled in, - -which caused confusion and malfunctions when gateways and networks moved - -(or crashed). - - - The current definition of the internet address provides three - -different options for network numbering, with the goal of allowing a - -very large number of networks to be part of the internet. Thus, it is - -no longer possible to imagine having an exhaustive table to select a - -gateway for any foreign net. Again, current implementations must use a - -strategy based on a local cache of routing information for addresses - -currently being used. - - - The recommended strategy for address to route translation is as - -follows. When the IP layer receives an outbound datagram for - -transmission, it extracts the network number from the destination - -address, and queries its local table to determine whether it knows a - -suitable gateway to which to send the datagram. If it does, the job is - -done. (But see RFC 816 on Fault Isolation and Recovery, for - -recommendations on how to deal with the possible failure of the - -gateway.) If there is no such entry in the local table, then select any - - 6 - - -accessible gateway at random, insert that as an entry in the table, and - -use it to send the packet. Either the guess will be right or wrong. If - -it is wrong, the gateway to which the packet was sent will return an - -ICMP redirect message to report that there is a better gateway to reach - -the net in question. The arrival of this redirect should cause an - -update of the local table. - - - The number of entries in the local table should be determined by - -the maximum number of active connections which this particular host can - -support at any one time. For a large time sharing system, one might - -imagine a table with 100 or more entries. For a personal computer being - -used to support a single user telnet connection, only one address to - -gateway association need be maintained at once. - - - The above strategy actually does not completely solve the problem, - -but only pushes it down one level, where the problem then arises of how - -a new host, freshly arriving on the internet, finds all of its - -accessible gateways. Intentionally, this problem is not solved within - -the internetwork architecture. The reason is that different networks - -have drastically different strategies for allowing a host to find out - -about other hosts on its immediate network. Some nets permit a - -broadcast mechanism. In this case, a host can send out a message and - -expect an answer back from all of the attached gateways. In other - -cases, where a particular network is richly provided with tools to - -support the internet, there may be a special network mechanism which a - -host can invoke to determine where the gateways are. In other cases, it - -may be necessary for an installer to manually provide the name of at - - 7 - - -least one accessible gateway. Once a host has discovered the name of - -one gateway, it can build up a table of all other available gateways, by - -keeping track of every gateway that has been reported back to it in an - -ICMP message. - - - 5. Advanced Topics in Addressing and Routing - - - The preceding discussion describes the mechanism required in a - -minimal implementation, an implementation intended only to provide - -operational service access today to the various networks that make up - -the internet. For any host which will participate in future research, - -as contrasted with service, some additional features are required. - -These features will also be helpful for service hosts if they wish to - -obtain access to some of the more exotic networks which will become part - -of the internet over the next few years. All implementors are urged to - -at least provide a structure into which these features could be later - -integrated. - - - There are several features, either already a part of the - -architecture or now under development, which are used to modify or - -expand the relationships between addresses and routes. The IP source - -route options allow a host to explicitly direct a datagram through a - -series of gateways to its foreign host. An alternative form of the ICMP - -redirect packet has been proposed, which would return information - -specific to a particular destination host, not a destination net. - -Finally, additional IP options have been proposed to identify particular - -routes within the internet that are unacceptable. The difficulty with - -implementing these new features is that the mechanisms do not lie - - 8 - - -entirely within the bounds of IP. All the mechanisms above are designed - -to apply to a particular connection, so that their use must be specified - -at the TCP level. Thus, the interface between IP and the layers above - -it must include mechanisms to allow passing this information back and - -forth, and TCP (or any other protocol at this level, such as UDP), must - -be prepared to store this information. The passing of information - -between IP and TCP is made more complicated by the fact that some of the - -information, in particular ICMP packets, may arrive at any time. The - -normal interface envisioned between TCP and IP is one across which - -packets can be sent or received. The existence of asynchronous ICMP - -messages implies that there must be an additional channel between the - -two, unrelated to the actual sending and receiving of data. (In fact, - -there are many other ICMP messages which arrive asynchronously and which - -must be passed from IP up to higher layers. See RFC 816, Fault - -Isolation and Recovery.) - - - Source routes are already in use in the internet, and many - -implementations will wish to be able to take advantage of them. The - -following sorts of usages should be permitted. First, a user, when - -initiating a TCP connection, should be able to hand a source route into - -TCP, which in turn must hand the source route to IP with every outgoing - -datagram. The user might initially obtain the source route by querying - -a different sort of name server, which would return a source route - -instead of an address, or the user may have fabricated the source route - -manually. A TCP which is listening for a connection, rather than - -attempting to open one, must be prepared to receive a datagram which - -contains a IP return route, in which case it must remember this return - -route, and use it as a source route on all returning datagrams. - - 9 - - - 6. Ports and Service Identifiers - - - The IP layer of the architecture contains the address information - -which specifies the destination host to which the datagram is being - -sent. In fact, datagrams are not intended just for particular hosts, - -but for particular agents within a host, processes or other entities - -that are the actual source and sink of the data. IP performs only a - -very simple dispatching once the datagram has arrived at the target - -host, it dispatches it to a particular protocol. It is the - -responsibility of that protocol handler, for example TCP, to finish - -dispatching the datagram to the particular connection for which it is - -destined. This next layer of dispatching is done using "port - -identifiers", which are a part of the header of the higher level - -protocol, and not the IP layer. - - - This two-layer dispatching architecture has caused a problem for - -certain implementations. In particular, some implementations have - -wished to put the IP layer within the kernel of the operating system, - -and the TCP layer as a user domain application program. Strict - -adherence to this partitioning can lead to grave performance problems, - -for the datagram must first be dispatched from the kernel to a TCP - -process, which then dispatches the datagram to its final destination - -process. The overhead of scheduling this dispatch process can severely - -limit the achievable throughput of the implementation. - - - As is discussed in RFC 817, Modularity and Efficiency in Protocol - -Implementations, this particular separation between kernel and user - -leads to other performance problems, even ignoring the issue of port - - 10 - - -level dispatching. However, there is an acceptable shortcut which can - -be taken to move the higher level dispatching function into the IP - -layer, if this makes the implementation substantially easier. - - - In principle, every higher level protocol could have a different - -dispatching algorithm. The reason for this is discussed below. - -However, for the protocols involved in the service offering being - -implemented today, TCP and UDP, the dispatching algorithm is exactly the - -same, and the port field is located in precisely the same place in the - -header. Therefore, unless one is interested in participating in further - -protocol research, there is only one higher level dispatch algorithm. - -This algorithm takes into account the internet level foreign address, - -the protocol number, and the local port and foreign port from the higher - -level protocol header. This algorithm can be implemented as a sort of - -adjunct to the IP layer implementation, as long as no other higher level - -protocols are to be implemented. (Actually, the above statement is only - -partially true, in that the UDP dispatch function is subset of the TCP - -dispatch function. UDP dispatch depends only protocol number and local - -port. However, there is an occasion within TCP when this exact same - -subset comes into play, when a process wishes to listen for a connection - -from any foreign host. Thus, the range of mechanisms necessary to - -support TCP dispatch are also sufficient to support precisely the UDP - -requirement.) - - - The decision to remove port level dispatching from IP to the higher - -level protocol has been questioned by some implementors. It has been - -argued that if all of the address structure were part of the IP layer, - - 11 - - -then IP could do all of the packet dispatching function within the host, - -which would lead to a simpler modularity. Three problems were - -identified with this. First, not all protocol implementors could agree - -on the size of the port identifier. TCP selected a fairly short port - -identifier, 16 bits, to reduce header size. Other protocols being - -designed, however, wanted a larger port identifier, perhaps 32 bits, so - -that the port identifier, if properly selected, could be considered - -probabilistically unique. Thus, constraining the port id to one - -particular IP level mechanism would prevent certain fruitful lines of - -research. Second, ports serve a special function in addition to - -datagram delivery: certain port numbers are reserved to identify - -particular services. Thus, TCP port 23 is the remote login service. If - -ports were implemented at the IP level, then the assignment of well - -known ports could not be done on a protocol basis, but would have to be - -done in a centralized manner for all of the IP architecture. Third, IP - -was designed with a very simple layering role: IP contained exactly - -those functions that the gateways must understand. If the port idea had - -been made a part of the IP layer, it would have suggested that gateways - -needed to know about ports, which is not the case. - - - There are, of course, other ways to avoid these problems. In - -particular, the "well-known port" problem can be solved by devising a - -second mechanism, distinct from port dispatching, to name well-known - -ports. Several protocols have settled on the idea of including, in the - -packet which sets up a connection to a particular service, a more - -general service descriptor, such as a character string field. These - -special packets, which are requesting connection to a particular - - 12 - - -service, are routed on arrival to a special server, sometimes called a - -"rendezvous server", which examines the service request, selects a - -random port which is to be used for this instance of the service, and - -then passes the packet along to the service itself to commence the - -interaction. - - - For the internet architecture, this strategy had the serious flaw - -that it presumed all protocols would fit into the same service paradigm: - -an initial setup phase, which might contain a certain overhead such as - -indirect routing through a rendezvous server, followed by the packets of - -the interaction itself, which would flow directly to the process - -providing the service. Unfortunately, not all high level protocols in - -internet were expected to fit this model. The best example of this is - -isolated datagram exchange using UDP. The simplest exchange in UDP is - -one process sending a single datagram to another. Especially on a local - -net, where the net related overhead is very low, this kind of simple - -single datagram interchange can be extremely efficient, with very low - -overhead in the hosts. However, since these individual packets would - -not be part of an established connection, if IP supported a strategy - -based on a rendezvous server and service descriptors, every isolated - -datagram would have to be routed indirectly in the receiving host - -through the rendezvous server, which would substantially increase the - -overhead of processing, and every datagram would have to carry the full - -service request field, which would increase the size of the packet - -header. - - - In general, if a network is intended for "virtual circuit service", - - 13 - - -or things similar to that, then using a special high overhead mechanism - -for circuit setup makes sense. However, current directions in research - -are leading away from this class of protocol, so once again the - -architecture was designed not to preclude alternative protocol - -structures. The only rational position was that the particular - -dispatching strategy used should be part of the higher level protocol - -design, not the IP layer. - - - This same argument about circuit setup mechanisms also applies to - -the design of the IP address structure. Many protocols do not transmit - -a full address field as part of every packet, but rather transmit a - -short identifier which is created as part of a circuit setup from source - -to destination. If the full address needs to be carried in only the - -first packet of a long exchange, then the overhead of carrying a very - -long address field can easily be justified. Under these circumstances, - -one can create truly extravagant address fields, which are capable of - -extending to address almost any conceivable entity. However, this - -strategy is useable only in a virtual circuit net, where the packets - -being transmitted are part of a established sequence, otherwise this - -large extravagant address must be transported on every packet. Since - -Internet explicitly rejected this restriction on the architecture, it - -was necessary to come up with an address field that was compact enough - -to be sent in every datagram, but general enough to correctly route the - -datagram through the catanet without a previous setup phase. The IP - -address of 32 bits is the compromise that results. Clearly it requires - -a substantial amount of shoehorning to address all of the interesting - -places in the universe with only 32 bits. On the other hand, had the - - 14 - - -address field become much bigger, IP would have been susceptible to - -another criticism, which is that the header had grown unworkably large. - -Again, the fundamental design decision was that the protocol be designed - -in such a way that it supported research in new and different sorts of - -protocol architectures. - - - There are some limited restrictions imposed by the IP design on the - -port mechanism selected by the higher level process. In particular, - -when a packet goes awry somewhere on the internet, the offending packet - -is returned, along with an error indication, as part of an ICMP packet. - -An ICMP packet returns only the IP layer, and the next 64 bits of the - -original datagram. Thus, any higher level protocol which wishes to sort - -out from which port a particular offending datagram came must make sure - -that the port information is contained within the first 64 bits of the - -next level header. This also means, in most cases, that it is possible - -to imagine, as part of the IP layer, a port dispatch mechanism which - -works by masking and matching on the first 64 bits of the incoming - -higher level header. - - diff --git a/kernel/picotcp/RFC/rfc0816.txt b/kernel/picotcp/RFC/rfc0816.txt deleted file mode 100644 index 28e01d5..0000000 --- a/kernel/picotcp/RFC/rfc0816.txt +++ /dev/null @@ -1,648 +0,0 @@ - - -RFC: 816 - - - - FAULT ISOLATION AND RECOVERY - - David D. Clark - MIT Laboratory for Computer Science - Computer Systems and Communications Group - July, 1982 - - - 1. Introduction - - - Occasionally, a network or a gateway will go down, and the sequence - -of hops which the packet takes from source to destination must change. - -Fault isolation is that action which hosts and gateways collectively - -take to determine that something is wrong; fault recovery is the - -identification and selection of an alternative route which will serve to - -reconnect the source to the destination. In fact, the gateways perform - -most of the functions of fault isolation and recovery. There are, - -however, a few actions which hosts must take if they wish to provide a - -reasonable level of service. This document describes the portion of - -fault isolation and recovery which is the responsibility of the host. - - - 2. What Gateways Do - - - Gateways collectively implement an algorithm which identifies the - -best route between all pairs of networks. They do this by exchanging - -packets which contain each gateway's latest opinion about the - -operational status of its neighbor networks and gateways. Assuming that - -this algorithm is operating properly, one can expect the gateways to go - -through a period of confusion immediately after some network or gateway - - 2 - - -has failed, but one can assume that once a period of negotiation has - -passed, the gateways are equipped with a consistent and correct model of - -the connectivity of the internet. At present this period of negotiation - -may actually take several minutes, and many TCP implementations time out - -within that period, but it is a design goal of the eventual algorithm - -that the gateway should be able to reconstruct the topology quickly - -enough that a TCP connection should be able to survive a failure of the - -route. - - - 3. Host Algorithm for Fault Recovery - - - Since the gateways always attempt to have a consistent and correct - -model of the internetwork topology, the host strategy for fault recovery - -is very simple. Whenever the host feels that something is wrong, it - -asks the gateway for advice, and, assuming the advice is forthcoming, it - -believes the advice completely. The advice will be wrong only during - -the transient period of negotiation, which immediately follows an - -outage, but will otherwise be reliably correct. - - - In fact, it is never necessary for a host to explicitly ask a - -gateway for advice, because the gateway will provide it as appropriate. - -When a host sends a datagram to some distant net, the host should be - -prepared to receive back either of two advisory messages which the - -gateway may send. The ICMP "redirect" message indicates that the - -gateway to which the host sent the datagram is not longer the best - -gateway to reach the net in question. The gateway will have forwarded - -the datagram, but the host should revise its routing table to have a - -different immediate address for this net. The ICMP "destination - - 3 - - -unreachable" message indicates that as a result of an outage, it is - -currently impossible to reach the addressed net or host in any manner. - -On receipt of this message, a host can either abandon the connection - -immediately without any further retransmission, or resend slowly to see - -if the fault is corrected in reasonable time. - - - If a host could assume that these two ICMP messages would always - -arrive when something was amiss in the network, then no other action on - -the part of the host would be required in order maintain its tables in - -an optimal condition. Unfortunately, there are two circumstances under - -which the messages will not arrive properly. First, during the - -transient following a failure, error messages may arrive that do not - -correctly represent the state of the world. Thus, hosts must take an - -isolated error message with some scepticism. (This transient period is - -discussed more fully below.) Second, if the host has been sending - -datagrams to a particular gateway, and that gateway itself crashes, then - -all the other gateways in the internet will reconstruct the topology, - -but the gateway in question will still be down, and therefore cannot - -provide any advice back to the host. As long as the host continues to - -direct datagrams at this dead gateway, the datagrams will simply vanish - -off the face of the earth, and nothing will come back in return. Hosts - -must detect this failure. - - - If some gateway many hops away fails, this is not of concern to the - -host, for then the discovery of the failure is the responsibility of the - -immediate neighbor gateways, which will perform this action in a manner - -invisible to the host. The problem only arises if the very first - - 4 - - -gateway, the one to which the host is immediately sending the datagrams, - -fails. We thus identify one single task which the host must perform as - -its part of fault isolation in the internet: the host must use some - -strategy to detect that a gateway to which it is sending datagrams is - -dead. - - - Let us assume for the moment that the host implements some - -algorithm to detect failed gateways; we will return later to discuss - -what this algorithm might be. First, let us consider what the host - -should do when it has determined that a gateway is down. In fact, with - -the exception of one small problem, the action the host should take is - -extremely simple. The host should select some other gateway, and try - -sending the datagram to it. Assuming that gateway is up, this will - -either produce correct results, or some ICMP advice. Since we assume - -that, ignoring temporary periods immediately following an outage, any - -gateway is capable of giving correct advice, once the host has received - -advice from any gateway, that host is in as good a condition as it can - -hope to be. - - - There is always the unpleasant possibility that when the host tries - -a different gateway, that gateway too will be down. Therefore, whatever - -algorithm the host uses to detect a dead gateway must continuously be - -applied, as the host tries every gateway in turn that it knows about. - - - The only difficult part of this algorithm is to specify the means - -by which the host maintains the table of all of the gateways to which it - -has immediate access. Currently, the specification of the internet - -protocol does not architect any message by which a host can ask to be - - 5 - - -supplied with such a table. The reason is that different networks may - -provide very different mechanisms by which this table can be filled in. - -For example, if the net is a broadcast net, such as an ethernet or a - -ringnet, every gateway may simply broadcast such a table from time to - -time, and the host need do nothing but listen to obtain the required - -information. Alternatively, the network may provide the mechanism of - -logical addressing, by which a whole set of machines can be provided - -with a single group address, to which a request can be sent for - -assistance. Failing those two schemes, the host can build up its table - -of neighbor gateways by remembering all the gateways from which it has - -ever received a message. Finally, in certain cases, it may be necessary - -for this table, or at least the initial entries in the table, to be - -constructed manually by a manager or operator at the site. In cases - -where the network in question provides absolutely no support for this - -kind of host query, at least some manual intervention will be required - -to get started, so that the host can find out about at least one - -gateway. - - - 4. Host Algorithms for Fault Isolation - - - We now return to the question raised above. What strategy should - -the host use to detect that it is talking to a dead gateway, so that it - -can know to switch to some other gateway in the list. In fact, there are - -several algorithms which can be used. All are reasonably simple to - -implement, but they have very different implications for the overhead on - -the host, the gateway, and the network. Thus, to a certain extent, the - -algorithm picked must depend on the details of the network and of the - -host. - - 6 - - - -1. NETWORK LEVEL DETECTION - - - Many networks, particularly the Arpanet, perform precisely the - -required function internal to the network. If a host sends a datagram - -to a dead gateway on the Arpanet, the network will return a "host dead" - -message, which is precisely the information the host needs to know in - -order to switch to another gateway. Some early implementations of - -Internet on the Arpanet threw these messages away. That is an - -exceedingly poor idea. - - -2. CONTINUOUS POLLING - - - The ICMP protocol provides an echo mechanism by which a host may - -solicit a response from a gateway. A host could simply send this - -message at a reasonable rate, to assure itself continuously that the - -gateway was still up. This works, but, since the message must be sent - -fairly often to detect a fault in a reasonable time, it can imply an - -unbearable overhead on the host itself, the network, and the gateway. - -This strategy is prohibited except where a specific analysis has - -indicated that the overhead is tolerable. - - -3. TRIGGERED POLLING - - - If the use of polling could be restricted to only those times when - -something seemed to be wrong, then the overhead would be bearable. - -Provided that one can get the proper advice from one's higher level - -protocols, it is possible to implement such a strategy. For example, - -one could program the TCP level so that whenever it retransmitted a - - 7 - - -segment more than once, it sent a hint down to the IP layer which - -triggered polling. This strategy does not have excessive overhead, but - -does have the problem that the host may be somewhat slow to respond to - -an error, since only after polling has started will the host be able to - -confirm that something has gone wrong, and by then the TCP above may - -have already timed out. - - - Both forms of polling suffer from a minor flaw. Hosts as well as - -gateways respond to ICMP echo messages. Thus, polling cannot be used to - -detect the error that a foreign address thought to be a gateway is - -actually a host. Such a confusion can arise if the physical addresses - -of machines are rearranged. - - -4. TRIGGERED RESELECTION - - - There is a strategy which makes use of a hint from a higher level, - -as did the previous strategy, but which avoids polling altogether. - -Whenever a higher level complains that the service seems to be - -defective, the Internet layer can pick the next gateway from the list of - -available gateways, and switch to it. Assuming that this gateway is up, - -no real harm can come of this decision, even if it was wrong, for the - -worst that will happen is a redirect message which instructs the host to - -return to the gateway originally being used. If, on the other hand, the - -original gateway was indeed down, then this immediately provides a new - -route, so the period of time until recovery is shortened. This last - -strategy seems particularly clever, and is probably the most generally - -suitable for those cases where the network itself does not provide fault - -isolation. (Regretably, I have forgotten who suggested this idea to me. - -It is not my invention.) - - 8 - - - 5. Higher Level Fault Detection - - - The previous discussion has concentrated on fault detection and - -recovery at the IP layer. This section considers what the higher layers - -such as TCP should do. - - - TCP has a single fault recovery action; it repeatedly retransmits a - -segment until either it gets an acknowledgement or its connection timer - -expires. As discussed above, it may use retransmission as an event to - -trigger a request for fault recovery to the IP layer. In the other - -direction, information may flow up from IP, reporting such things as - -ICMP Destination Unreachable or error messages from the attached - -network. The only subtle question about TCP and faults is what TCP - -should do when such an error message arrives or its connection timer - -expires. - - - The TCP specification discusses the timer. In the description of - -the open call, the timeout is described as an optional value that the - -client of TCP may specify; if any segment remains unacknowledged for - -this period, TCP should abort the connection. The default for the - -timeout is 30 seconds. Early TCPs were often implemented with a fixed - -timeout interval, but this did not work well in practice, as the - -following discussion may suggest. - - - Clients of TCP can be divided into two classes: those running on - -immediate behalf of a human, such as Telnet, and those supporting a - -program, such as a mail sender. Humans require a sophisticated response - -to errors. Depending on exactly what went wrong, they may want to - - 9 - - -abandon the connection at once, or wait for a long time to see if things - -get better. Programs do not have this human impatience, but also lack - -the power to make complex decisions based on details of the exact error - -condition. For them, a simple timeout is reasonable. - - - Based on these considerations, at least two modes of operation are - -needed in TCP. One, for programs, abandons the connection without - -exception if the TCP timer expires. The other mode, suitable for - -people, never abandons the connection on its own initiative, but reports - -to the layer above when the timer expires. Thus, the human user can see - -error messages coming from all the relevant layers, TCP and ICMP, and - -can request TCP to abort as appropriate. This second mode requires that - -TCP be able to send an asynchronous message up to its client to report - -the timeout, and it requires that error messages arriving at lower - -layers similarly flow up through TCP. - - - At levels above TCP, fault detection is also required. Either of - -the following can happen. First, the foreign client of TCP can fail, - -even though TCP is still running, so data is still acknowledged and the - -timer never expires. Alternatively, the communication path can fail, - -without the TCP timer going off, because the local client has no data to - -send. Both of these have caused trouble. - - - Sending mail provides an example of the first case. When sending - -mail using SMTP, there is an SMTP level acknowledgement that is returned - -when a piece of mail is successfully delivered. Several early mail - -receiving programs would crash just at the point where they had received - -all of the mail text (so TCP did not detect a timeout due to outstanding - - 10 - - -unacknowledged data) but before the mail was acknowledged at the SMTP - -level. This failure would cause early mail senders to wait forever for - -the SMTP level acknowledgement. The obvious cure was to set a timer at - -the SMTP level, but the first attempt to do this did not work, for there - -was no simple way to select the timer interval. If the interval - -selected was short, it expired in normal operational when sending a - -large file to a slow host. An interval of many minutes was needed to - -prevent false timeouts, but that meant that failures were detected only - -very slowly. The current solution in several mailers is to pick a - -timeout interval proportional to the size of the message. - - - Server telnet provides an example of the other kind of failure. It - -can easily happen that the communications link can fail while there is - -no traffic flowing, perhaps because the user is thinking. Eventually, - -the user will attempt to type something, at which time he will discover - -that the connection is dead and abort it. But the host end of the - -connection, having nothing to send, will not discover anything wrong, - -and will remain waiting forever. In some systems there is no way for a - -user in a different process to destroy or take over such a hanging - -process, so there is no way to recover. - - - One solution to this would be to have the host server telnet query - -the user end now and then, to see if it is still up. (Telnet does not - -have an explicit query feature, but the host could negotiate some - -unimportant option, which should produce either agreement or - -disagreement in return.) The only problem with this is that a - -reasonable sample interval, if applied to every user on a large system, - - 11 - - -can generate an unacceptable amount of traffic and system overhead. A - -smart server telnet would use this query only when something seems - -wrong, perhaps when there had been no user activity for some time. - - - In both these cases, the general conclusion is that client level - -error detection is needed, and that the details of the mechanism are - -very dependent on the application. Application programmers must be made - -aware of the problem of failures, and must understand that error - -detection at the TCP or lower level cannot solve the whole problem for - -them. - - - 6. Knowing When to Give Up - - - It is not obvious, when error messages such as ICMP Destination - -Unreachable arrive, whether TCP should abandon the connection. The - -reason that error messages are difficult to interpret is that, as - -discussed above, after a failure of a gateway or network, there is a - -transient period during which the gateways may have incorrect - -information, so that irrelevant or incorrect error messages may - -sometimes return. An isolated ICMP Destination Unreachable may arrive - -at a host, for example, if a packet is sent during the period when the - -gateways are trying to find a new route. To abandon a TCP connection - -based on such a message arriving would be to ignore the valuable feature - -of the Internet that for many internal failures it reconstructs its - -function without any disruption of the end points. - - - But if failure messages do not imply a failure, what are they for? - -In fact, error messages serve several important purposes. First, if - - 12 - - -they arrive in response to opening a new connection, they probably are - -caused by opening the connection improperly (e.g., to a non-existent - -address) rather than by a transient network failure. Second, they - -provide valuable information, after the TCP timeout has occurred, as to - -the probable cause of the failure. Finally, certain messages, such as - -ICMP Parameter Problem, imply a possible implementation problem. In - -general, error messages give valuable information about what went wrong, - -but are not to be taken as absolutely reliable. A general alerting - -mechanism, such as the TCP timeout discussed above, provides a good - -indication that whatever is wrong is a serious condition, but without - -the advisory messages to augment the timer, there is no way for the - -client to know how to respond to the error. The combination of the - -timer and the advice from the error messages provide a reasonable set of - -facts for the client layer to have. It is important that error messages - -from all layers be passed up to the client module in a useful and - -consistent way. - - -------- diff --git a/kernel/picotcp/RFC/rfc0817.txt b/kernel/picotcp/RFC/rfc0817.txt deleted file mode 100644 index dcdef8a..0000000 --- a/kernel/picotcp/RFC/rfc0817.txt +++ /dev/null @@ -1,1388 +0,0 @@ - -RFC: 817 - - - - MODULARITY AND EFFICIENCY IN PROTOCOL IMPLEMENTATION - - David D. Clark - MIT Laboratory for Computer Science - Computer Systems and Communications Group - July, 1982 - - - 1. Introduction - - - Many protocol implementers have made the unpleasant discovery that - -their packages do not run quite as fast as they had hoped. The blame - -for this widely observed problem has been attributed to a variety of - -causes, ranging from details in the design of the protocol to the - -underlying structure of the host operating system. This RFC will - -discuss some of the commonly encountered reasons why protocol - -implementations seem to run slowly. - - - Experience suggests that one of the most important factors in - -determining the performance of an implementation is the manner in which - -that implementation is modularized and integrated into the host - -operating system. For this reason, it is useful to discuss the question - -of how an implementation is structured at the same time that we consider - -how it will perform. In fact, this RFC will argue that modularity is - -one of the chief villains in attempting to obtain good performance, so - -that the designer is faced with a delicate and inevitable tradeoff - -between good structure and good performance. Further, the single factor - -which most strongly determines how well this conflict can be resolved is - -not the protocol but the operating system. - - 2 - - - 2. Efficiency Considerations - - - There are many aspects to efficiency. One aspect is sending data - -at minimum transmission cost, which is a critical aspect of common - -carrier communications, if not in local area network communications. - -Another aspect is sending data at a high rate, which may not be possible - -at all if the net is very slow, but which may be the one central design - -constraint when taking advantage of a local net with high raw bandwidth. - -The final consideration is doing the above with minimum expenditure of - -computer resources. This last may be necessary to achieve high speed, - -but in the case of the slow net may be important only in that the - -resources used up, for example cpu cycles, are costly or otherwise - -needed. It is worth pointing out that these different goals often - -conflict; for example it is often possible to trade off efficient use of - -the computer against efficient use of the network. Thus, there may be - -no such thing as a successful general purpose protocol implementation. - - - The simplest measure of performance is throughput, measured in bits - -per second. It is worth doing a few simple computations in order to get - -a feeling for the magnitude of the problems involved. Assume that data - -is being sent from one machine to another in packets of 576 bytes, the - -maximum generally acceptable internet packet size. Allowing for header - -overhead, this packet size permits 4288 bits in each packet. If a - -useful throughput of 10,000 bits per second is desired, then a data - -bearing packet must leave the sending host about every 430 milliseconds, - -a little over two per second. This is clearly not difficult to achieve. - -However, if one wishes to achieve 100 kilobits per second throughput, - - 3 - - -the packet must leave the host every 43 milliseconds, and to achieve one - -megabit per second, which is not at all unreasonable on a high-speed - -local net, the packets must be spaced no more than 4.3 milliseconds. - - - These latter numbers are a slightly more alarming goal for which to - -set one's sights. Many operating systems take a substantial fraction of - -a millisecond just to service an interrupt. If the protocol has been - -structured as a process, it is necessary to go through a process - -scheduling before the protocol code can even begin to run. If any piece - -of a protocol package or its data must be fetched from disk, real time - -delays of between 30 to 100 milliseconds can be expected. If the - -protocol must compete for cpu resources with other processes of the - -system, it may be necessary to wait a scheduling quantum before the - -protocol can run. Many systems have a scheduling quantum of 100 - -milliseconds or more. Considering these sorts of numbers, it becomes - -immediately clear that the protocol must be fitted into the operating - -system in a thorough and effective manner if any like reasonable - -throughput is to be achieved. - - - There is one obvious conclusion immediately suggested by even this - -simple analysis. Except in very special circumstances, when many - -packets are being processed at once, the cost of processing a packet is - -dominated by factors, such as cpu scheduling, which are independent of - -the packet size. This suggests two general rules which any - -implementation ought to obey. First, send data in large packets. - -Obviously, if processing time per packet is a constant, then throughput - -will be directly proportional to the packet size. Second, never send an - - 4 - - -unneeded packet. Unneeded packets use up just as many resources as a - -packet full of data, but perform no useful function. RFC 813, "Window - -and Acknowledgement Strategy in TCP", discusses one aspect of reducing - -the number of packets sent per useful data byte. This document will - -mention other attacks on the same problem. - - - The above analysis suggests that there are two main parts to the - -problem of achieving good protocol performance. The first has to do - -with how the protocol implementation is integrated into the host - -operating system. The second has to do with how the protocol package - -itself is organized internally. This document will consider each of - -these topics in turn. - - - 3. The Protocol vs. the Operating System - - - There are normally three reasonable ways in which to add a protocol - -to an operating system. The protocol can be in a process that is - -provided by the operating system, or it can be part of the kernel of the - -operating system itself, or it can be put in a separate communications - -processor or front end machine. This decision is strongly influenced by - -details of hardware architecture and operating system design; each of - -these three approaches has its own advantages and disadvantages. - - - The "process" is the abstraction which most operating systems use - -to provide the execution environment for user programs. A very simple - -path for implementing a protocol is to obtain a process from the - -operating system and implement the protocol to run in it. - -Superficially, this approach has a number of advantages. Since - - 5 - - -modifications to the kernel are not required, the job can be done by - -someone who is not an expert in the kernel structure. Since it is often - -impossible to find somebody who is experienced both in the structure of - -the operating system and the structure of the protocol, this path, from - -a management point of view, is often extremely appealing. Unfortunately, - -putting a protocol in a process has a number of disadvantages, related - -to both structure and performance. First, as was discussed above, - -process scheduling can be a significant source of real-time delay. - -There is not only the actual cost of going through the scheduler, but - -the problem that the operating system may not have the right sort of - -priority tools to bring the process into execution quickly whenever - -there is work to be done. - - - Structurally, the difficulty with putting a protocol in a process - -is that the protocol may be providing services, for example support of - -data streams, which are normally obtained by going to special kernel - -entry points. Depending on the generality of the operating system, it - -may be impossible to take a program which is accustomed to reading - -through a kernel entry point, and redirect it so it is reading the data - -from a process. The most extreme example of this problem occurs when - -implementing server telnet. In almost all systems, the device handler - -for the locally attached teletypes is located inside the kernel, and - -programs read and write from their teletype by making kernel calls. If - -server telnet is implemented in a process, it is then necessary to take - -the data streams provided by server telnet and somehow get them back - -down inside the kernel so that they mimic the interface provided by - -local teletypes. It is usually the case that special kernel - - 6 - - -modification is necessary to achieve this structure, which somewhat - -defeats the benefit of having removed the protocol from the kernel in - -the first place. - - - Clearly, then, there are advantages to putting the protocol package - -in the kernel. Structurally, it is reasonable to view the network as a - -device, and device drivers are traditionally contained in the kernel. - -Presumably, the problems associated with process scheduling can be - -sidesteped, at least to a certain extent, by placing the code inside the - -kernel. And it is obviously easier to make the server telnet channels - -mimic the local teletype channels if they are both realized in the same - -level in the kernel. - - - However, implementation of protocols in the kernel has its own set - -of pitfalls. First, network protocols have a characteristic which is - -shared by almost no other device: they require rather complex actions - -to be performed as a result of a timeout. The problem with this - -requirement is that the kernel often has no facility by which a program - -can be brought into execution as a result of the timer event. What is - -really needed, of course, is a special sort of process inside the - -kernel. Most systems lack this mechanism. Failing that, the only - -execution mechanism available is to run at interrupt time. - - - There are substantial drawbacks to implementing a protocol to run - -at interrupt time. First, the actions performed may be somewhat complex - -and time consuming, compared to the maximum amount of time that the - -operating system is prepared to spend servicing an interrupt. Problems - -can arise if interrupts are masked for too long. This is particularly - - 7 - - -bad when running as a result of a clock interrupt, which can imply that - -the clock interrupt is masked. Second, the environment provided by an - -interrupt handler is usually extremely primitive compared to the - -environment of a process. There are usually a variety of system - -facilities which are unavailable while running in an interrupt handler. - -The most important of these is the ability to suspend execution pending - -the arrival of some event or message. It is a cardinal rule of almost - -every known operating system that one must not invoke the scheduler - -while running in an interrupt handler. Thus, the programmer who is - -forced to implement all or part of his protocol package as an interrupt - -handler must be the best sort of expert in the operating system - -involved, and must be prepared for development sessions filled with - -obscure bugs which crash not just the protocol package but the entire - -operating system. - - - A final problem with processing at interrupt time is that the - -system scheduler has no control over the percentage of system time used - -by the protocol handler. If a large number of packets arrive, from a - -foreign host that is either malfunctioning or fast, all of the time may - -be spent in the interrupt handler, effectively killing the system. - - - There are other problems associated with putting protocols into an - -operating system kernel. The simplest problem often encountered is that - -the kernel address space is simply too small to hold the piece of code - -in question. This is a rather artificial sort of problem, but it is a - -severe problem none the less in many machines. It is an appallingly - -unpleasant experience to do an implementation with the knowledge that - - 8 - - -for every byte of new feature put in one must find some other byte of - -old feature to throw out. It is hopeless to expect an effective and - -general implementation under this kind of constraint. Another problem - -is that the protocol package, once it is thoroughly entwined in the - -operating system, may need to be redone every time the operating system - -changes. If the protocol and the operating system are not maintained by - -the same group, this makes maintenance of the protocol package a - -perpetual headache. - - - The third option for protocol implementation is to take the - -protocol package and move it outside the machine entirely, on to a - -separate processor dedicated to this kind of task. Such a machine is - -often described as a communications processor or a front-end processor. - -There are several advantages to this approach. First, the operating - -system on the communications processor can be tailored for precisely - -this kind of task. This makes the job of implementation much easier. - -Second, one does not need to redo the task for every machine to which - -the protocol is to be added. It may be possible to reuse the same - -front-end machine on different host computers. Since the task need not - -be done as many times, one might hope that more attention could be paid - -to doing it right. Given a careful implementation in an environment - -which is optimized for this kind of task, the resulting package should - -turn out to be very efficient. Unfortunately, there are also problems - -with this approach. There is, of course, a financial problem associated - -with buying an additional computer. In many cases, this is not a - -problem at all since the cost is negligible compared to what the - -programmer would cost to do the job in the mainframe itself. More - - 9 - - -fundamentally, the communications processor approach does not completely - -sidestep any of the problems raised above. The reason is that the - -communications processor, since it is a separate machine, must be - -attached to the mainframe by some mechanism. Whatever that mechanism, - -code is required in the mainframe to deal with it. It can be argued - -that the program to deal with the communications processor is simpler - -than the program to implement the entire protocol package. Even if that - -is so, the communications processor interface package is still a - -protocol in nature, with all of the same structural problems. Thus, all - -of the issues raised above must still be faced. In addition to those - -problems, there are some other, more subtle problems associated with an - -outboard implementation of a protocol. We will return to these problems - -later. - - - There is a way of attaching a communications processor to a - -mainframe host which sidesteps all of the mainframe implementation - -problems, which is to use some preexisting interface on the host machine - -as the port by which a communications processor is attached. This - -strategy is often used as a last stage of desperation when the software - -on the host computer is so intractable that it cannot be changed in any - -way. Unfortunately, it is almost inevitably the case that all of the - -available interfaces are totally unsuitable for this purpose, so the - -result is unsatisfactory at best. The most common way in which this - -form of attachment occurs is when a network connection is being used to - -mimic local teletypes. In this case, the front-end processor can be - -attached to the mainframe by simply providing a number of wires out of - -the front-end processor, each corresponding to a connection, which are - - 10 - - -plugged into teletype ports on the mainframe computer. (Because of the - -appearance of the physical configuration which results from this - -arrangement, Michael Padlipsky has described this as the "milking - -machine" approach to computer networking.) This strategy solves the - -immediate problem of providing remote access to a host, but it is - -extremely inflexible. The channels being provided to the host are - -restricted by the host software to one purpose only, remote login. It - -is impossible to use them for any other purpose, such as file transfer - -or sending mail, so the host is integrated into the network environment - -in an extremely limited and inflexible manner. If this is the best that - -can be done, then it should be tolerated. Otherwise, implementors - -should be strongly encouraged to take a more flexible approach. - - - 4. Protocol Layering - - - The previous discussion suggested that there was a decision to be - -made as to where a protocol ought to be implemented. In fact, the - -decision is much more complicated than that, for the goal is not to - -implement a single protocol, but to implement a whole family of protocol - -layers, starting with a device driver or local network driver at the - -bottom, then IP and TCP, and eventually reaching the application - -specific protocol, such as Telnet, FTP and SMTP on the top. Clearly, - -the bottommost of these layers is somewhere within the kernel, since the - -physical device driver for the net is almost inevitably located there. - -Equally clearly, the top layers of this package, which provide the user - -his ability to perform the remote login function or to send mail, are - -not entirely contained within the kernel. Thus, the question is not - - 11 - - -whether the protocol family shall be inside or outside the kernel, but - -how it shall be sliced in two between that part inside and that part - -outside. - - - Since protocols come nicely layered, an obvious proposal is that - -one of the layer interfaces should be the point at which the inside and - -outside components are sliced apart. Most systems have been implemented - -in this way, and many have been made to work quite effectively. One - -obvious place to slice is at the upper interface of TCP. Since TCP - -provides a bidirectional byte stream, which is somewhat similar to the - -I/O facility provided by most operating systems, it is possible to make - -the interface to TCP almost mimic the interface to other existing - -devices. Except in the matter of opening a connection, and dealing with - -peculiar failures, the software using TCP need not know that it is a - -network connection, rather than a local I/O stream that is providing the - -communications function. This approach does put TCP inside the kernel, - -which raises all the problems addressed above. It also raises the - -problem that the interface to the IP layer can, if the programmer is not - -careful, become excessively buried inside the kernel. It must be - -remembered that things other than TCP are expected to run on top of IP. - -The IP interface must be made accessible, even if TCP sits on top of it - -inside the kernel. - - - Another obvious place to slice is above Telnet. The advantage of - -slicing above Telnet is that it solves the problem of having remote - -login channels emulate local teletype channels. The disadvantage of - -putting Telnet into the kernel is that the amount of code which has now - - 12 - - -been included there is getting remarkably large. In some early - -implementations, the size of the network package, when one includes - -protocols at the level of Telnet, rivals the size of the rest of the - -supervisor. This leads to vague feelings that all is not right. - - - Any attempt to slice through a lower layer boundary, for example - -between internet and TCP, reveals one fundamental problem. The TCP - -layer, as well as the IP layer, performs a demultiplexing function on - -incoming datagrams. Until the TCP header has been examined, it is not - -possible to know for which user the packet is ultimately destined. - -Therefore, if TCP, as a whole, is moved outside the kernel, it is - -necessary to create one separate process called the TCP process, which - -performs the TCP multiplexing function, and probably all of the rest of - -TCP processing as well. This means that incoming data destined for a - -user process involves not just a scheduling of the user process, but - -scheduling the TCP process first. - - - This suggests an alternative structuring strategy which slices - -through the protocols, not along an established layer boundary, but - -along a functional boundary having to do with demultiplexing. In this - -approach, certain parts of IP and certain parts of TCP are placed in the - -kernel. The amount of code placed there is sufficient so that when an - -incoming datagram arrives, it is possible to know for which process that - -datagram is ultimately destined. The datagram is then routed directly - -to the final process, where additional IP and TCP processing is - -performed on it. This removes from the kernel any requirement for timer - -based actions, since they can be done by the process provided by the - - 13 - - -user. This structure has the additional advantage of reducing the - -amount of code required in the kernel, so that it is suitable for - -systems where kernel space is at a premium. The RFC 814, titled "Names, - -Addresses, Ports, and Routes," discusses this rather orthogonal slicing - -strategy in more detail. - - - A related discussion of protocol layering and multiplexing can be - -found in Cohen and Postel [1]. - - - 5. Breaking Down the Barriers - - - In fact, the implementor should be sensitive to the possibility of - -even more peculiar slicing strategies in dividing up the various - -protocol layers between the kernel and the one or more user processes. - -The result of the strategy proposed above was that part of TCP should - -execute in the process of the user. In other words, instead of having - -one TCP process for the system, there is one TCP process per connection. - -Given this architecture, it is not longer necessary to imagine that all - -of the TCPs are identical. One TCP could be optimized for high - -throughput applications, such as file transfer. Another TCP could be - -optimized for small low delay applications such as Telnet. In fact, it - -would be possible to produce a TCP which was somewhat integrated with - -the Telnet or FTP on top of it. Such an integration is extremely - -important, for it can lead to a kind of efficiency which more - -traditional structures are incapable of producing. Earlier, this paper - -pointed out that one of the important rules to achieving efficiency was - -to send the minimum number of packets for a given amount of data. The - -idea of protocol layering interacts very strongly (and poorly) with this - - 14 - - -goal, because independent layers have independent ideas about when - -packets should be sent, and unless these layers can somehow be brought - -into cooperation, additional packets will flow. The best example of - -this is the operation of server telnet in a character at a time remote - -echo mode on top of TCP. When a packet containing a character arrives - -at a server host, each layer has a different response to that packet. - -TCP has an obligation to acknowledge the packet. Either server telnet - -or the application layer above has an obligation to echo the character - -received in the packet. If the character is a Telnet control sequence, - -then Telnet has additional actions which it must perform in response to - -the packet. The result of this, in most implementations, is that - -several packets are sent back in response to the one arriving packet. - -Combining all of these return messages into one packet is important for - -several reasons. First, of course, it reduces the number of packets - -being sent over the net, which directly reduces the charges incurred for - -many common carrier tariff structures. Second, it reduces the number of - -scheduling actions which will occur inside both hosts, which, as was - -discussed above, is extremely important in improving throughput. - - - The way to achieve this goal of packet sharing is to break down the - -barrier between the layers of the protocols, in a very restrained and - -careful manner, so that a limited amount of information can leak across - -the barrier to enable one layer to optimize its behavior with respect to - -the desires of the layers above and below it. For example, it would - -represent an improvement if TCP, when it received a packet, could ask - -the layer above whether or not it would be worth pausing for a few - -milliseconds before sending an acknowledgement in order to see if the - - 15 - - -upper layer would have any outgoing data to send. Dallying before - -sending the acknowledgement produces precisely the right sort of - -optimization if the client of TCP is server Telnet. However, dallying - -before sending an acknowledgement is absolutely unacceptable if TCP is - -being used for file transfer, for in file transfer there is almost never - -data flowing in the reverse direction, and the delay in sending the - -acknowledgement probably translates directly into a delay in obtaining - -the next packets. Thus, TCP must know a little about the layers above - -it to adjust its performance as needed. - - - It would be possible to imagine a general purpose TCP which was - -equipped with all sorts of special mechanisms by which it would query - -the layer above and modify its behavior accordingly. In the structures - -suggested above, in which there is not one but several TCPs, the TCP can - -simply be modified so that it produces the correct behavior as a matter - -of course. This structure has the disadvantage that there will be - -several implementations of TCP existing on a single machine, which can - -mean more maintenance headaches if a problem is found where TCP needs to - -be changed. However, it is probably the case that each of the TCPs will - -be substantially simpler than the general purpose TCP which would - -otherwise have been built. There are some experimental projects - -currently under way which suggest that this approach may make designing - -of a TCP, or almost any other layer, substantially easier, so that the - -total effort involved in bringing up a complete package is actually less - -if this approach is followed. This approach is by no means generally - -accepted, but deserves some consideration. - - 16 - - - The general conclusion to be drawn from this sort of consideration - -is that a layer boundary has both a benefit and a penalty. A visible - -layer boundary, with a well specified interface, provides a form of - -isolation between two layers which allows one to be changed with the - -confidence that the other one will not stop working as a result. - -However, a firm layer boundary almost inevitably leads to inefficient - -operation. This can easily be seen by analogy with other aspects of - -operating systems. Consider, for example, file systems. A typical - -operating system provides a file system, which is a highly abstracted - -representation of a disk. The interface is highly formalized, and - -presumed to be highly stable. This makes it very easy for naive users - -to have access to disks without having to write a great deal of - -software. The existence of a file system is clearly beneficial. On the - -other hand, it is clear that the restricted interface to a file system - -almost inevitably leads to inefficiency. If the interface is organized - -as a sequential read and write of bytes, then there will be people who - -wish to do high throughput transfers who cannot achieve their goal. If - -the interface is a virtual memory interface, then other users will - -regret the necessity of building a byte stream interface on top of the - -memory mapped file. The most objectionable inefficiency results when a - -highly sophisticated package, such as a data base management package, - -must be built on top of an existing operating system. Almost - -inevitably, the implementors of the database system attempt to reject - -the file system and obtain direct access to the disks. They have - -sacrificed modularity for efficiency. - - - The same conflict appears in networking, in a rather extreme form. - - 17 - - -The concept of a protocol is still unknown and frightening to most naive - -programmers. The idea that they might have to implement a protocol, or - -even part of a protocol, as part of some application package, is a - -dreadful thought. And thus there is great pressure to hide the function - -of the net behind a very hard barrier. On the other hand, the kind of - -inefficiency which results from this is a particularly undesirable sort - -of inefficiency, for it shows up, among other things, in increasing the - -cost of the communications resource used up to achieve the application - -goal. In cases where one must pay for one's communications costs, they - -usually turn out to be the dominant cost within the system. Thus, doing - -an excessively good job of packaging up the protocols in an inflexible - -manner has a direct impact on increasing the cost of the critical - -resource within the system. This is a dilemma which will probably only - -be solved when programmers become somewhat less alarmed about protocols, - -so that they are willing to weave a certain amount of protocol structure - -into their application program, much as application programs today weave - -parts of database management systems into the structure of their - -application program. - - - An extreme example of putting the protocol package behind a firm - -layer boundary occurs when the protocol package is relegated to a front- - -end processor. In this case the interface to the protocol is some other - -protocol. It is difficult to imagine how to build close cooperation - -between layers when they are that far separated. Realistically, one of - -the prices which must be associated with an implementation so physically - -modularized is that the performance will suffer as a result. Of course, - -a separate processor for protocols could be very closely integrated into - - 18 - - -the mainframe architecture, with interprocessor co-ordination signals, - -shared memory, and similar features. Such a physical modularity might - -work very well, but there is little documented experience with this - -closely coupled architecture for protocol support. - - - 6. Efficiency of Protocol Processing - - - To this point, this document has considered how a protocol package - -should be broken into modules, and how those modules should be - -distributed between free standing machines, the operating system kernel, - -and one or more user processes. It is now time to consider the other - -half of the efficiency question, which is what can be done to speed the - -execution of those programs that actually implement the protocols. We - -will make some specific observations about TCP and IP, and then conclude - -with a few generalities. - - - IP is a simple protocol, especially with respect to the processing - -of normal packets, so it should be easy to get it to perform - -efficiently. The only area of any complexity related to actual packet - -processing has to do with fragmentation and reassembly. The reader is - -referred to RFC 815, titled "IP Datagram Reassembly Algorithms", for - -specific consideration of this point. - - - Most costs in the IP layer come from table look up functions, as - -opposed to packet processing functions. An outgoing packet requires two - -translation functions to be performed. The internet address must be - -translated to a target gateway, and a gateway address must be translated - -to a local network number (if the host is attached to more than one - - 19 - - -network). It is easy to build a simple implementation of these table - -look up functions that in fact performs very poorly. The programmer - -should keep in mind that there may be as many as a thousand network - -numbers in a typical configuration. Linear searching of a thousand - -entry table on every packet is extremely unsuitable. In fact, it may be - -worth asking TCP to cache a hint for each connection, which can be - -handed down to IP each time a packet is sent, to try to avoid the - -overhead of a table look up. - - - TCP is a more complex protocol, and presents many more - -opportunities for getting things wrong. There is one area which is - -generally accepted as causing noticeable and substantial overhead as - -part of TCP processing. This is computation of the checksum. It would - -be nice if this cost could be avoided somehow, but the idea of an end- - -to-end checksum is absolutely central to the functioning of TCP. No - -host implementor should think of omitting the validation of a checksum - -on incoming data. - - - Various clever tricks have been used to try to minimize the cost of - -computing the checksum. If it is possible to add additional microcoded - -instructions to the machine, a checksum instruction is the most obvious - -candidate. Since computing the checksum involves picking up every byte - -of the segment and examining it, it is possible to combine the operation - -of computing the checksum with the operation of copying the segment from - -one location to another. Since a number of data copies are probably - -already required as part of the processing structure, this kind of - -sharing might conceivably pay off if it didn't cause too much trouble to - - 20 - - -the modularity of the program. Finally, computation of the checksum - -seems to be one place where careful attention to the details of the - -algorithm used can make a drastic difference in the throughput of the - -program. The Multics system provides one of the best case studies of - -this, since Multics is about as poorly organized to perform this - -function as any machine implementing TCP. Multics is a 36-bit word - -machine, with four 9-bit bytes per word. The eight-bit bytes of a TCP - -segment are laid down packed in memory, ignoring word boundaries. This - -means that when it is necessary to pick up the data as a set of 16-bit - -units for the purpose of adding them to compute checksums, horrible - -masking and shifting is required for each 16-bit value. An early - -version of a program using this strategy required 6 milliseconds to - -checksum a 576-byte segment. Obviously, at this point, checksum - -computation was becoming the central bottleneck to throughput. A more - -careful recoding of this algorithm reduced the checksum processing time - -to less than one millisecond. The strategy used was extremely dirty. - -It involved adding up carefully selected words of the area in which the - -data lay, knowing that for those particular words, the 16-bit values - -were properly aligned inside the words. Only after the addition had - -been done were the various sums shifted, and finally added to produce - -the eventual checksum. This kind of highly specialized programming is - -probably not acceptable if used everywhere within an operating system. - -It is clearly appropriate for one highly localized function which can be - -clearly identified as an extreme performance bottleneck. - - - Another area of TCP processing which may cause performance problems - -is the overhead of examining all of the possible flags and options which - - 21 - - -occur in each incoming packet. One paper, by Bunch and Day [2], asserts - -that the overhead of packet header processing is actually an important - -limiting factor in throughput computation. Not all measurement - -experiments have tended to support this result. To whatever extent it - -is true, however, there is an obvious strategy which the implementor - -ought to use in designing his program. He should build his program to - -optimize the expected case. It is easy, especially when first designing - -a program, to pay equal attention to all of the possible outcomes of - -every test. In practice, however, few of these will ever happen. A TCP - -should be built on the assumption that the next packet to arrive will - -have absolutely nothing special about it, and will be the next one - -expected in the sequence space. One or two tests are sufficient to - -determine that the expected set of control flags are on. (The ACK flag - -should be on; the Push flag may or may not be on. No other flags should - -be on.) One test is sufficient to determine that the sequence number of - -the incoming packet is one greater than the last sequence number - -received. In almost every case, that will be the actual result. Again, - -using the Multics system as an example, failure to optimize the case of - -receiving the expected sequence number had a detectable effect on the - -performance of the system. The particular problem arose when a number - -of packets arrived at once. TCP attempted to process all of these - -packets before awaking the user. As a result, by the time the last - -packet arrived, there was a threaded list of packets which had several - -items on it. When a new packet arrived, the list was searched to find - -the location into which the packet should be inserted. Obviously, the - -list should be searched from highest sequence number to lowest sequence - - 22 - - -number, because one is expecting to receive a packet which comes after - -those already received. By mistake, the list was searched from front to - -back, starting with the packets with the lowest sequence number. The - -amount of time spent searching this list backwards was easily detectable - -in the metering measurements. - - - Other data structures can be organized to optimize the action which - -is normally taken on them. For example, the retransmission queue is - -very seldom actually used for retransmission, so it should not be - -organized to optimize that action. In fact, it should be organized to - -optimized the discarding of things from it when the acknowledgement - -arrives. In many cases, the easiest way to do this is not to save the - -packet at all, but to reconstruct it only if it needs to be - -retransmitted, starting from the data as it was originally buffered by - -the user. - - - There is another generality, at least as important as optimizing - -the common case, which is to avoid copying data any more times than - -necessary. One more result from the Multics TCP may prove enlightening - -here. Multics takes between two and three milliseconds within the TCP - -layer to process an incoming packet, depending on its size. For a 576- - -byte packet, the three milliseconds is used up approximately as follows. - -One millisecond is used computing the checksum. Six hundred - -microseconds is spent copying the data. (The data is copied twice, at - -.3 milliseconds a copy.) One of those copy operations could correctly - -be included as part of the checksum cost, since it is done to get the - -data on a known word boundary to optimize the checksum algorithm. - - 23 - - -However, the copy also performs another necessary transfer at the same - -time. Header processing and packet resequencing takes .7 milliseconds. - -The rest of the time is used in miscellaneous processing, such as - -removing packets from the retransmission queue which are acknowledged by - -this packet. Data copying is the second most expensive single operation - -after data checksuming. Some implementations, often because of an - -excessively layered modularity, end up copying the data around a great - -deal. Other implementations end up copying the data because there is no - -shared memory between processes, and the data must be moved from process - -to process via a kernel operation. Unless the amount of this activity - -is kept strictly under control, it will quickly become the major - -performance bottleneck. - - - 7. Conclusions - - - This document has addressed two aspects of obtaining performance - -from a protocol implementation, the way in which the protocol is layered - -and integrated into the operating system, and the way in which the - -detailed handling of the packet is optimized. It would be nice if one - -or the other of these costs would completely dominate, so that all of - -one's attention could be concentrated there. Regrettably, this is not - -so. Depending on the particular sort of traffic one is getting, for - -example, whether Telnet one-byte packets or file transfer maximum size - -packets at maximum speed, one can expect to see one or the other cost - -being the major bottleneck to throughput. Most implementors who have - -studied their programs in an attempt to find out where the time was - -going have reached the unsatisfactory conclusion that it is going - - 24 - - -equally to all parts of their program. With the possible exception of - -checksum processing, very few people have ever found that their - -performance problems were due to a single, horrible bottleneck which - -they could fix by a single stroke of inventive programming. Rather, the - -performance was something which was improved by painstaking tuning of - -the entire program. - - - Most discussions of protocols begin by introducing the concept of - -layering, which tends to suggest that layering is a fundamentally - -wonderful idea which should be a part of every consideration of - -protocols. In fact, layering is a mixed blessing. Clearly, a layer - -interface is necessary whenever more than one client of a particular - -layer is to be allowed to use that same layer. But an interface, - -precisely because it is fixed, inevitably leads to a lack of complete - -understanding as to what one layer wishes to obtain from another. This - -has to lead to inefficiency. Furthermore, layering is a potential snare - -in that one is tempted to think that a layer boundary, which was an - -artifact of the specification procedure, is in fact the proper boundary - -to use in modularizing the implementation. Again, in certain cases, an - -architected layer must correspond to an implemented layer, precisely so - -that several clients can have access to that layer in a reasonably - -straightforward manner. In other cases, cunning rearrangement of the - -implemented module boundaries to match with various functions, such as - -the demultiplexing of incoming packets, or the sending of asynchronous - -outgoing packets, can lead to unexpected performance improvements - -compared to more traditional implementation strategies. Finally, good - -performance is something which is difficult to retrofit onto an existing - - 25 - - -program. Since performance is influenced, not just by the fine detail, - -but by the gross structure, it is sometimes the case that in order to - -obtain a substantial performance improvement, it is necessary to - -completely redo the program from the bottom up. This is a great - -disappointment to programmers, especially those doing a protocol - -implementation for the first time. Programmers who are somewhat - -inexperienced and unfamiliar with protocols are sufficiently concerned - -with getting their program logically correct that they do not have the - -capacity to think at the same time about the performance of the - -structure they are building. Only after they have achieved a logically - -correct program do they discover that they have done so in a way which - -has precluded real performance. Clearly, it is more difficult to design - -a program thinking from the start about both logical correctness and - -performance. With time, as implementors as a group learn more about the - -appropriate structures to use for building protocols, it will be - -possible to proceed with an implementation project having more - -confidence that the structure is rational, that the program will work, - -and that the program will work well. Those of us now implementing - -protocols have the privilege of being on the forefront of this learning - -process. It should be no surprise that our programs sometimes suffer - -from the uncertainty we bring to bear on them. - - 26 - - -Citations - - - [1] Cohen and Postel, "On Protocol Multiplexing", Sixth Data - -Communications Symposium, ACM/IEEE, November 1979. - - - [2] Bunch and Day, "Control Structure Overhead in TCP", Trends and - -Applications: Computer Networking, NBS Symposium, May 1980. - - diff --git a/kernel/picotcp/RFC/rfc0826.txt b/kernel/picotcp/RFC/rfc0826.txt deleted file mode 100644 index 8d5aae2..0000000 --- a/kernel/picotcp/RFC/rfc0826.txt +++ /dev/null @@ -1,470 +0,0 @@ -Network Working Group David C. Plummer -Request For Comments: 826 (DCP@MIT-MC) - November 1982 - - - An Ethernet Address Resolution Protocol - -- or -- - Converting Network Protocol Addresses - to 48.bit Ethernet Address - for Transmission on - Ethernet Hardware - - - - - - Abstract - -The implementation of protocol P on a sending host S decides, -through protocol P's routing mechanism, that it wants to transmit -to a target host T located some place on a connected piece of -10Mbit Ethernet cable. To actually transmit the Ethernet packet -a 48.bit Ethernet address must be generated. The addresses of -hosts within protocol P are not always compatible with the -corresponding Ethernet address (being different lengths or -values). Presented here is a protocol that allows dynamic -distribution of the information needed to build tables to -translate an address A in protocol P's address space into a -48.bit Ethernet address. - -Generalizations have been made which allow the protocol to be -used for non-10Mbit Ethernet hardware. Some packet radio -networks are examples of such hardware. - --------------------------------------------------------------------- - -The protocol proposed here is the result of a great deal of -discussion with several other people, most notably J. Noel -Chiappa, Yogen Dalal, and James E. Kulp, and helpful comments -from David Moon. - - - - -[The purpose of this RFC is to present a method of Converting -Protocol Addresses (e.g., IP addresses) to Local Network -Addresses (e.g., Ethernet addresses). This is a issue of general -concern in the ARPA Internet community at this time. The -method proposed here is presented for your consideration and -comment. This is not the specification of a Internet Standard.] - -Notes: ------- - -This protocol was originally designed for the DEC/Intel/Xerox -10Mbit Ethernet. It has been generalized to allow it to be used -for other types of networks. Much of the discussion will be -directed toward the 10Mbit Ethernet. Generalizations, where -applicable, will follow the Ethernet-specific discussion. - -DOD Internet Protocol will be referred to as Internet. - -Numbers here are in the Ethernet standard, which is high byte -first. This is the opposite of the byte addressing of machines -such as PDP-11s and VAXes. Therefore, special care must be taken -with the opcode field (ar$op) described below. - -An agreed upon authority is needed to manage hardware name space -values (see below). Until an official authority exists, requests -should be submitted to - David C. Plummer - Symbolics, Inc. - 243 Vassar Street - Cambridge, Massachusetts 02139 -Alternatively, network mail can be sent to DCP@MIT-MC. - -The Problem: ------------- - -The world is a jungle in general, and the networking game -contributes many animals. At nearly every layer of a network -architecture there are several potential protocols that could be -used. For example, at a high level, there is TELNET and SUPDUP -for remote login. Somewhere below that there is a reliable byte -stream protocol, which might be CHAOS protocol, DOD TCP, Xerox -BSP or DECnet. Even closer to the hardware is the logical -transport layer, which might be CHAOS, DOD Internet, Xerox PUP, -or DECnet. The 10Mbit Ethernet allows all of these protocols -(and more) to coexist on a single cable by means of a type field -in the Ethernet packet header. However, the 10Mbit Ethernet -requires 48.bit addresses on the physical cable, yet most -protocol addresses are not 48.bits long, nor do they necessarily -have any relationship to the 48.bit Ethernet address of the -hardware. For example, CHAOS addresses are 16.bits, DOD Internet -addresses are 32.bits, and Xerox PUP addresses are 8.bits. A -protocol is needed to dynamically distribute the correspondences -between a pair and a 48.bit Ethernet address. - -Motivation: ------------ - -Use of the 10Mbit Ethernet is increasing as more manufacturers -supply interfaces that conform to the specification published by -DEC, Intel and Xerox. With this increasing availability, more -and more software is being written for these interfaces. There -are two alternatives: (1) Every implementor invents his/her own -method to do some form of address resolution, or (2) every -implementor uses a standard so that his/her code can be -distributed to other systems without need for modification. This -proposal attempts to set the standard. - -Definitions: ------------- - -Define the following for referring to the values put in the TYPE -field of the Ethernet packet header: - ether_type$XEROX_PUP, - ether_type$DOD_INTERNET, - ether_type$CHAOS, -and a new one: - ether_type$ADDRESS_RESOLUTION. -Also define the following values (to be discussed later): - ares_op$REQUEST (= 1, high byte transmitted first) and - ares_op$REPLY (= 2), -and - ares_hrd$Ethernet (= 1). - -Packet format: --------------- - -To communicate mappings from pairs to 48.bit -Ethernet addresses, a packet format that embodies the Address -Resolution protocol is needed. The format of the packet follows. - - Ethernet transmission layer (not necessarily accessible to - the user): - 48.bit: Ethernet address of destination - 48.bit: Ethernet address of sender - 16.bit: Protocol type = ether_type$ADDRESS_RESOLUTION - Ethernet packet data: - 16.bit: (ar$hrd) Hardware address space (e.g., Ethernet, - Packet Radio Net.) - 16.bit: (ar$pro) Protocol address space. For Ethernet - hardware, this is from the set of type - fields ether_typ$. - 8.bit: (ar$hln) byte length of each hardware address - 8.bit: (ar$pln) byte length of each protocol address - 16.bit: (ar$op) opcode (ares_op$REQUEST | ares_op$REPLY) - nbytes: (ar$sha) Hardware address of sender of this - packet, n from the ar$hln field. - mbytes: (ar$spa) Protocol address of sender of this - packet, m from the ar$pln field. - nbytes: (ar$tha) Hardware address of target of this - packet (if known). - mbytes: (ar$tpa) Protocol address of target. - - -Packet Generation: ------------------- - -As a packet is sent down through the network layers, routing -determines the protocol address of the next hop for the packet -and on which piece of hardware it expects to find the station -with the immediate target protocol address. In the case of the -10Mbit Ethernet, address resolution is needed and some lower -layer (probably the hardware driver) must consult the Address -Resolution module (perhaps implemented in the Ethernet support -module) to convert the -pair to a 48.bit Ethernet address. The Address Resolution module -tries to find this pair in a table. If it finds the pair, it -gives the corresponding 48.bit Ethernet address back to the -caller (hardware driver) which then transmits the packet. If it -does not, it probably informs the caller that it is throwing the -packet away (on the assumption the packet will be retransmitted -by a higher network layer), and generates an Ethernet packet with -a type field of ether_type$ADDRESS_RESOLUTION. The Address -Resolution module then sets the ar$hrd field to -ares_hrd$Ethernet, ar$pro to the protocol type that is being -resolved, ar$hln to 6 (the number of bytes in a 48.bit Ethernet -address), ar$pln to the length of an address in that protocol, -ar$op to ares_op$REQUEST, ar$sha with the 48.bit ethernet address -of itself, ar$spa with the protocol address of itself, and ar$tpa -with the protocol address of the machine that is trying to be -accessed. It does not set ar$tha to anything in particular, -because it is this value that it is trying to determine. It -could set ar$tha to the broadcast address for the hardware (all -ones in the case of the 10Mbit Ethernet) if that makes it -convenient for some aspect of the implementation. It then causes -this packet to be broadcast to all stations on the Ethernet cable -originally determined by the routing mechanism. - - - -Packet Reception: ------------------ - -When an address resolution packet is received, the receiving -Ethernet module gives the packet to the Address Resolution module -which goes through an algorithm similar to the following. -Negative conditionals indicate an end of processing and a -discarding of the packet. - -?Do I have the hardware type in ar$hrd? -Yes: (almost definitely) - [optionally check the hardware length ar$hln] - ?Do I speak the protocol in ar$pro? - Yes: - [optionally check the protocol length ar$pln] - Merge_flag := false - If the pair is - already in my translation table, update the sender - hardware address field of the entry with the new - information in the packet and set Merge_flag to true. - ?Am I the target protocol address? - Yes: - If Merge_flag is false, add the triplet to - the translation table. - ?Is the opcode ares_op$REQUEST? (NOW look at the opcode!!) - Yes: - Swap hardware and protocol fields, putting the local - hardware and protocol addresses in the sender fields. - Set the ar$op field to ares_op$REPLY - Send the packet to the (new) target hardware address on - the same hardware on which the request was received. - -Notice that the triplet is merged into the table before the -opcode is looked at. This is on the assumption that communcation -is bidirectional; if A has some reason to talk to B, then B will -probably have some reason to talk to A. Notice also that if an -entry already exists for the pair, then the new hardware address supersedes the old -one. Related Issues gives some motivation for this. - -Generalization: The ar$hrd and ar$hln fields allow this protocol -and packet format to be used for non-10Mbit Ethernets. For the -10Mbit Ethernet takes on the value <1, 6>. For -other hardware networks, the ar$pro field may no longer -correspond to the Ethernet type field, but it should be -associated with the protocol whose address resolution is being -sought. - - -Why is it done this way?? -------------------------- - -Periodic broadcasting is definitely not desired. Imagine 100 -workstations on a single Ethernet, each broadcasting address -resolution information once per 10 minutes (as one possible set -of parameters). This is one packet every 6 seconds. This is -almost reasonable, but what use is it? The workstations aren't -generally going to be talking to each other (and therefore have -100 useless entries in a table); they will be mainly talking to a -mainframe, file server or bridge, but only to a small number of -other workstations (for interactive conversations, for example). -The protocol described in this paper distributes information as -it is needed, and only once (probably) per boot of a machine. - -This format does not allow for more than one resolution to be -done in the same packet. This is for simplicity. If things were -multiplexed the packet format would be considerably harder to -digest, and much of the information could be gratuitous. Think -of a bridge that talks four protocols telling a workstation all -four protocol addresses, three of which the workstation will -probably never use. - -This format allows the packet buffer to be reused if a reply is -generated; a reply has the same length as a request, and several -of the fields are the same. - -The value of the hardware field (ar$hrd) is taken from a list for -this purpose. Currently the only defined value is for the 10Mbit -Ethernet (ares_hrd$Ethernet = 1). There has been talk of using -this protocol for Packet Radio Networks as well, and this will -require another value as will other future hardware mediums that -wish to use this protocol. - -For the 10Mbit Ethernet, the value in the protocol field (ar$pro) -is taken from the set ether_type$. This is a natural reuse of -the assigned protocol types. Combining this with the opcode -(ar$op) would effectively halve the number of protocols that can -be resolved under this protocol and would make a monitor/debugger -more complex (see Network Monitoring and Debugging below). It is -hoped that we will never see 32768 protocols, but Murphy made -some laws which don't allow us to make this assumption. - -In theory, the length fields (ar$hln and ar$pln) are redundant, -since the length of a protocol address should be determined by -the hardware type (found in ar$hrd) and the protocol type (found -in ar$pro). It is included for optional consistency checking, -and for network monitoring and debugging (see below). - -The opcode is to determine if this is a request (which may cause -a reply) or a reply to a previous request. 16 bits for this is -overkill, but a flag (field) is needed. - -The sender hardware address and sender protocol address are -absolutely necessary. It is these fields that get put in a -translation table. - -The target protocol address is necessary in the request form of -the packet so that a machine can determine whether or not to -enter the sender information in a table or to send a reply. It -is not necessarily needed in the reply form if one assumes a -reply is only provoked by a request. It is included for -completeness, network monitoring, and to simplify the suggested -processing algorithm described above (which does not look at the -opcode until AFTER putting the sender information in a table). - -The target hardware address is included for completeness and -network monitoring. It has no meaning in the request form, since -it is this number that the machine is requesting. Its meaning in -the reply form is the address of the machine making the request. -In some implementations (which do not get to look at the 14.byte -ethernet header, for example) this may save some register -shuffling or stack space by sending this field to the hardware -driver as the hardware destination address of the packet. - -There are no padding bytes between addresses. The packet data -should be viewed as a byte stream in which only 3 byte pairs are -defined to be words (ar$hrd, ar$pro and ar$op) which are sent -most significant byte first (Ethernet/PDP-10 byte style). - - -Network monitoring and debugging: ---------------------------------- - -The above Address Resolution protocol allows a machine to gain -knowledge about the higher level protocol activity (e.g., CHAOS, -Internet, PUP, DECnet) on an Ethernet cable. It can determine -which Ethernet protocol type fields are in use (by value) and the -protocol addresses within each protocol type. In fact, it is not -necessary for the monitor to speak any of the higher level -protocols involved. It goes something like this: - -When a monitor receives an Address Resolution packet, it always -enters the in a table. It can determine the length of the -hardware and protocol address from the ar$hln and ar$pln fields -of the packet. If the opcode is a REPLY the monitor can then -throw the packet away. If the opcode is a REQUEST and the target -protocol address matches the protocol address of the monitor, the -monitor sends a REPLY as it normally would. The monitor will -only get one mapping this way, since the REPLY to the REQUEST -will be sent directly to the requesting host. The monitor could -try sending its own REQUEST, but this could get two monitors into -a REQUEST sending loop, and care must be taken. - -Because the protocol and opcode are not combined into one field, -the monitor does not need to know which request opcode is -associated with which reply opcode for the same higher level -protocol. The length fields should also give enough information -to enable it to "parse" a protocol addresses, although it has no -knowledge of what the protocol addresses mean. - -A working implementation of the Address Resolution protocol can -also be used to debug a non-working implementation. Presumably a -hardware driver will successfully broadcast a packet with Ethernet -type field of ether_type$ADDRESS_RESOLUTION. The format of the -packet may not be totally correct, because initial -implementations may have bugs, and table management may be -slightly tricky. Because requests are broadcast a monitor will -receive the packet and can display it for debugging if desired. - - -An Example: ------------ - -Let there exist machines X and Y that are on the same 10Mbit -Ethernet cable. They have Ethernet address EA(X) and EA(Y) and -DOD Internet addresses IPA(X) and IPA(Y) . Let the Ethernet type -of Internet be ET(IP). Machine X has just been started, and -sooner or later wants to send an Internet packet to machine Y on -the same cable. X knows that it wants to send to IPA(Y) and -tells the hardware driver (here an Ethernet driver) IPA(Y). The -driver consults the Address Resolution module to convert into a 48.bit Ethernet address, but because X was just -started, it does not have this information. It throws the -Internet packet away and instead creates an ADDRESS RESOLUTION -packet with - (ar$hrd) = ares_hrd$Ethernet - (ar$pro) = ET(IP) - (ar$hln) = length(EA(X)) - (ar$pln) = length(IPA(X)) - (ar$op) = ares_op$REQUEST - (ar$sha) = EA(X) - (ar$spa) = IPA(X) - (ar$tha) = don't care - (ar$tpa) = IPA(Y) -and broadcasts this packet to everybody on the cable. - -Machine Y gets this packet, and determines that it understands -the hardware type (Ethernet), that it speaks the indicated -protocol (Internet) and that the packet is for it -((ar$tpa)=IPA(Y)). It enters (probably replacing any existing -entry) the information that maps to EA(X). It -then notices that it is a request, so it swaps fields, putting -EA(Y) in the new sender Ethernet address field (ar$sha), sets the -opcode to reply, and sends the packet directly (not broadcast) to -EA(X). At this point Y knows how to send to X, but X still -doesn't know how to send to Y. - -Machine X gets the reply packet from Y, forms the map from - to EA(Y), notices the packet is a reply and -throws it away. The next time X's Internet module tries to send -a packet to Y on the Ethernet, the translation will succeed, and -the packet will (hopefully) arrive. If Y's Internet module then -wants to talk to X, this will also succeed since Y has remembered -the information from X's request for Address Resolution. - -Related issue: ---------------- - -It may be desirable to have table aging and/or timeouts. The -implementation of these is outside the scope of this protocol. -Here is a more detailed description (thanks to MOON@SCRC@MIT-MC). - -If a host moves, any connections initiated by that host will -work, assuming its own address resolution table is cleared when -it moves. However, connections initiated to it by other hosts -will have no particular reason to know to discard their old -address. However, 48.bit Ethernet addresses are supposed to be -unique and fixed for all time, so they shouldn't change. A host -could "move" if a host name (and address in some other protocol) -were reassigned to a different physical piece of hardware. Also, -as we know from experience, there is always the danger of -incorrect routing information accidentally getting transmitted -through hardware or software error; it should not be allowed to -persist forever. Perhaps failure to initiate a connection should -inform the Address Resolution module to delete the information on -the basis that the host is not reachable, possibly because it is -down or the old translation is no longer valid. Or perhaps -receiving of a packet from a host should reset a timeout in the -address resolution entry used for transmitting packets to that -host; if no packets are received from a host for a suitable -length of time, the address resolution entry is forgotten. This -may cause extra overhead to scan the table for each incoming -packet. Perhaps a hash or index can make this faster. - -The suggested algorithm for receiving address resolution packets -tries to lessen the time it takes for recovery if a host does -move. Recall that if the is already in the translation table, then the sender -hardware address supersedes the existing entry. Therefore, on a -perfect Ethernet where a broadcast REQUEST reaches all stations -on the cable, each station will be get the new hardware address. - -Another alternative is to have a daemon perform the timeouts. -After a suitable time, the daemon considers removing an entry. -It first sends (with a small number of retransmissions if needed) -an address resolution packet with opcode REQUEST directly to the -Ethernet address in the table. If a REPLY is not seen in a short -amount of time, the entry is deleted. The request is sent -directly so as not to bother every station on the Ethernet. Just -forgetting entries will likely cause useful information to be -forgotten, which must be regained. - -Since hosts don't transmit information about anyone other than -themselves, rebooting a host will cause its address mapping table -to be up to date. Bad information can't persist forever by being -passed around from machine to machine; the only bad information -that can exist is in a machine that doesn't know that some other -machine has changed its 48.bit Ethernet address. Perhaps -manually resetting (or clearing) the address mapping table will -suffice. - -This issue clearly needs more thought if it is believed to be -important. It is caused by any address resolution-like protocol. - diff --git a/kernel/picotcp/RFC/rfc0872.txt b/kernel/picotcp/RFC/rfc0872.txt deleted file mode 100644 index c53bcb2..0000000 --- a/kernel/picotcp/RFC/rfc0872.txt +++ /dev/null @@ -1,549 +0,0 @@ - - - RFC 872 September 1982 - M82-48 - - - - - - - - TCP-ON-A-LAN - - - - - - - - - - - - - - - - - - - - - - M.A. PADLIPSKY - THE MITRE CORPORATION - Bedford, Massachusetts - - - - - - Abstract - - - - - The sometimes-held position that the DoD Standard - Transmission Control Protocol (TCP) and Internet Protocol (IP) - are inappropriate for use "on" a Local Area Network (LAN) is - shown to be fallacious. The paper is a companion piece to - M82-47, M82-49, M82-50, and M82-51. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - i - - - - - "TCP-ON-A-LAN" - - M. A. Padlipsky - - Thesis - - It is the thesis of this paper that fearing "TCP-on-a-LAN" - is a Woozle which needs slaying. To slay the "TCP-on-a-LAN" - Woozle, we need to know three things: What's a Woozle? What's a - LAN? What's a TCP? - - Woozles - - The first is rather straightforward [1]: - - One fine winter's day when Piglet was brushing away the - snow in front of his house, he happened to look up, and - there was Winnie-the-Pooh. Pooh was walking round and round - in a circle, thinking of something else, and when Piglet - called to him, he just went on walking. - "Hallo!" said Piglet, "what are you doing?" - "Hunting," said Pooh. - "Hunting what?" - "Tracking something," said Winnie-the-Pooh very - mysteriously. - "Tracking what?" said Piglet, coming closer. - "That's just what I ask myself. I ask myself, What?" - "What do you think you'll answer?" - "I shall have to wait until I catch up with it," said - Winnie-the-Pooh. "Now look there." He pointed to the - ground in front of him. "What do you see there? - "Tracks," said Piglet, "Paw-marks." he gave a little - squeak of excitement. "Oh, Pooh! Do you think it's a--a--a - Woozle?" - - Well, they convince each other that it is a Woozle, keep - "tracking," convince each other that it's a herd of Hostile - Animals, and get duly terrified before Christopher Robin comes - along and points out that they were following their own tracks - all the long. - - In other words, it is our contention that expressed fears - about the consequences of using a particular protocol named "TCP" - in a particular environment called a Local Area Net stem from - misunderstandings of the protocol and the environment, not from - the technical facts of the situation. - - - - - - - 1 - RFC 872 September 1982 - - - LAN's - - The second thing we need to know is somewhat less - straightforward: A LAN is, properly speaking [2], a - communications mechanism (or subnetwork) employing a transmission - technology suitable for relatively short distances (typically a - few kilometers) at relatively high bit-per-second rates - (typically greater than a few hundred kilobits per second) with - relatively low error rates, which exists primarily to enable - suitably attached computer systems (or "Hosts") to exchange bits, - and secondarily, though not necessarily, to allow terminals of - the teletypewriter and CRT classes to exchange bits with Hosts. - The Hosts are, at least in principle, heterogeneous; that is, - they are not merely multiple instances of the same operating - system. The Hosts are assumed to communicate by means of layered - protocols in order to achieve what the ARPANET tradition calls - "resource sharing" and what the newer ISO tradition calls "Open - System Interconnection." Addressing typically can be either - Host-Host (point-to-point) or "broadcast." (In some environments, - e.g., Ethernet, interesting advantage can be taken of broadcast - addressing; in other environments, e.g., LAN's which are - constituents of ARPA- or ISO-style "internets", broadcast - addressing is deemed too expensive to implement throughout the - internet as a whole and so may be ignored in the constituent LAN - even if available as part of the Host-LAN interface.) - - Note that no assumptions are made about the particular - transmission medium or the particular topology in play. LAN - media can be twisted-pair wires, CATV or other coaxial-type - cables, optical fibers, or whatever. However, if the medium is a - processor-to-processor bus it is likely that the system in - question is going to turn out to "be" a moderately closely - coupled distributed processor or a somewhat loosely coupled - multiprocessor rather than a LAN, because the processors are - unlikely to be using either ARPANET or ISO-style layered - protocols. (They'll usually -- either be homogeneous processors - interpreting only the protocol necessary to use the transmission - medium, or heterogeneous with one emulating the expectations of - the other.) Systems like "PDSC" or "NMIC" (the evolutionarily - related, bus-oriented, multiple PDP-11 systems in use at the - Pacific Data Services Center and the National Military - Intelligence Center, respectively), then, aren't LANs. - - LAN topologies can be either "bus," "ring," or "star". That - is, a digital PBX can be a LAN, in the sense of furnishing a - transmission medium/communications subnetwork for Hosts to do - resource sharing/Open System Interconnection over, though it - might not present attractive speed or failure mode properties. - (It might, though.) Topologically, it would probably be a - neutron star. - - - - 2 - RFC 872 September 1982 - - - For our purposes, the significant properties of a LAN are - the high bit transmission capacity and the good error properties. - Intuitively, a medium with these properties in some sense - "shouldn't require a heavy-duty protocol designed for long-haul - nets," according to some. (We will not address the issue of - "wasted bandwidth" due to header sizes. [2], pp. 1509f, provides - ample refutation of that traditional communications notion.) - However, it must be borne in mind that for our purposes the - assumption of resource-sharing/OSI type protocols between/among - the attached Hosts is also extremely significant. That is, if - all you're doing is letting some terminals access some different - Hosts, but the Hosts don't really have any intercomputer - networking protocols between them, what you have should be viewed - as a Localized Communications Network (LCN), not a LAN in the - sense we're talking about here. - - TCP - - The third thing we have to know can be either - straightforward or subtle, depending largely on how aware we are - of the context estabished by ARPANET-style prococols: For the - visual-minded, Figure 1 and Figure 2 might be all that need be - "said." Their moral is meant to be that in ARPANET-style - layering, layers aren't monoliths. For those who need more - explanation, here goes: TCP [3] (we'll take IP later) is a - Host-Host protocol (roughly equivalent to the functionality - implied by some of ISO Level 5 and all of ISO Level 4). Its most - significant property is that it presents reliable logical - connections to protocols above itself. (This point will be - returned to subsequently.) Its next most significant property is - that it is designed to operate in a "catenet" (also known as the, - or an, "internet"); that is, its addressing discipline is such - that Hosts attached to communications subnets other than the one - a given Host is attached to (the "proximate net") can be - communicated with as well as Hosts on the proximate net. Other - significant properties are those common to the breed: Host-Host - protocols (and Transport protocols) "all" offer mechanisms for - flow Control, Out-of-Band Signals, Logical Connection management, - and the like. - - Because TCP has a catenet-oriented addressing mechanism - (that is, it expresses foreign Host addresses as the - "two-dimensional" entity Foreign Net/Foreign Host because it - cannot assume that the Foreign Host is attached to the proximate - net), to be a full Host-Host protocol it needs an adjunct to deal - with the proximate net. This adjunct, the Internet Protocol (IP) - was designed as a separate protocol from TCP, however, in order - to allow it to play the same role it plays for TCP for other - Host-Host protocols too. - - - - - 3 - RFC 872 September 1982 - - - In order to "deal with the proximate net", IP possess the - following significant properties: An IP implementation maps from - a virtualization (or common intermediate representation) of - generic proximate net qualities (such as precedence, grade of - service, security labeling) to the closest equivalent on the - proximate net. It determines whether the "Internet Address" of a - given transmission is on the proximate net or not; if so, it - sends it; if not, it sends it to a "Gateway" (where another IP - module resides). That is, IP handles internet routing, whereas - TCP (or some other Host-Host protocol) handles only internet - addressing. Because some proximate nets will accept smaller - transmissions ("packets") than others, IP, qua protocol, also has - a discipline for allowing packets to be fragmented while in the - catenet and reassembled at their destination. Finally (for our - purposes), IP offers a mechanism to allow the particular protocol - it was called by (for a given packet) to be identified so that - the receiver can demultiplex transmissions based on IP-level - information only. (This is in accordance with the Principle of - Layering: you don't want to have to look at the data IP is - conveying to find out what to do with it.) - - Now that all seems rather complex, even though it omits a - number of mechanisms. (For a more complete discussion, see - Reference [4].) But it should be just about enough to slay the - Woozle, especially if just one more protocol's most significant - property can be snuck in. An underpublicized member of the - ARPANET suite of protocols is called UDP--the "User Datagram - Protocol." UDP is designed for speed rather than accuracy. That - is, it's not "reliable." All there is to UDP, basically, is a - mechanism to allow a given packet to be associated with a given - logical connection. Not a TCP logical connection, mind you, but a - UDP logical connection. So if all you want is the ability to - demultiplex data streams from your Host-Host protocol, you use - UDP, not TCP. ("You" is usually supposed to be a Packetized - Speech protocol, but doesn't have to be.) (And we'll worry about - Flow Control some other time.) - - TCP-on-a-LAN - - So whether you're a Host proximate to a LAN or not, and even - whether your TCP/IP is "inboard" or "outboard" of you, if you're - talking to a Host somewhere out there on the catenet, you use IP; - and if you're exercising some process-level/applications protocol - (roughly equivalent to some of some versions of ISO L5 and all of - L6 and L7) that expects TCP/IP as its Host-Host protocol (because - it "wants" reliable, flow controlled, ordered delivery [whoops, - forgot that "ordered" property earlier--but it doesn't matter all - that much for present purposes] over logical connections which - allow it to be - - - - - 4 - RFC 872 September 1982 - - - addressed via a Well-Known Socket), you use TCP "above" IP - regardless of whether the other Host is on your proximate net or - not. But if your application doesn't require the properties of - TCP (say for Packetized Speech), don't use it--regardless of - where or what you are. And if you want to make the decision - about whether you're talking to a proximate Host explicitly and - not even go through IP, you can even arrange to do that (though - it might make for messy implementation under some circumstances). - That is, if you want to take advantage of the properties of your - LAN "in the raw" and have or don't need appropriate applications - protocols, the Reference Model to which TCP/IP were designed - won't stop you. See Figure 2 if you're visual. A word of - caution, though: those applications probably will need protocols - of some sort--and they'll probably need some sort of Host-Host - protocol under them, so unless you relish maintaining "parallel" - suites of protocols.... that is, you really would be better off - with TCP most of the time locally anyway, because you've got to - have it to talk to the catenet and it's a nuisance to have - "something else" to talk over the LAN--when, of course, what - you're talking requires a Host-Host protocol. - - We'll touch on "performance" issues in a bit more detail - later. At this level, though, one point really does need to be - made: On the "reliability" front, many (including the author) at - first blush take the TCP checksum to be "overkill" for use on a - LAN, which does, after all, typically present extremely good - error properties. Interestingly enough, however, metering of TCP - implementations on several Host types in the research community - shows that the processing time expended on the TCP checksum is - only around 12% of the per-transmission processing time anyway. - So, again, it's not clear that it's worthwhile to bother with an - alternate Host-Host protocol for local use (if, that is, you need - the rest of the properties of TCP other than "reliability"--and, - of course, always assuming you've got a LAN, not an LCN, as - distinguished earlier.) - - Take that, Woozle! - - Other Significant Properties - - Oh, by the way, one or two other properties of TCP/IP really - do bear mention: - - 1. Protocol interpreters for TCP/IP exist for a dozen or - two different operating systems. - - 2. TCP/IP work, and have been working (though in less - refined versions) for several years. - - - - - - 5 - RFC 872 September 1982 - - - 3. IP levies no constraints on the interface protocol - presented by the proximate net (though some protocols - at that level are more wasteful than others). - - 4. IP levies no constraints on its users; in particular, - any proximate net that offers alternate routing can be - taken advantage of (unlike X.25, which appears to - preclude alternate routing). - - 5. IP-bearing Gateways both exist and present and exploit - properties 3 and 4. - - 6. TCP/IP are Department of Defense Standards. - - 7. Process (or application) protocols compatible with - TCP/IP for Virtual Terminal and File Transfer - (including "electronic mail") exist and have been - implemented on numerous operating systems. - - 8. "Vendor-style" specifications of TCP/IP are being - prepared under the aegis of the DoD Protocol Standards - Technical Panel, for those who find the - research-community-provided specs not to their liking. - - 9. The research community has recently reported speeds in - excess of 300 kb/s on an 800 kb/s subnet, 1.2 Mb/s on a - 3 Mb/s subnet, and 9.2 kbs on a 9.6 kb/s phone - line--all using TCP. (We don't know of any numbers for - alternative protocol suites, but it's unlikely they'd - be appreciably better if they confer like - functionality--and they may well be worse if they - represent implementations which haven't been around - enough to have been iterated a time or three.) - - With the partial exception of property 8, no other - resource-sharing protocol suite can make those claims. - - Note particularly well that none of the above should be - construed as eliminating the need for extremely careful - measurement of TCP/IP performance in/on a LAN. (You do, after - all, want to know their limitations, to guide you in when to - bother ringing in "local" alternatives--but be very careful: 1. - they're hard to measure commensurately with alternative - protocols; and 2. most conventional Hosts can't take [or give] - as many bits per second as you might imagine.) It merely - dramatically refocuses the motivation for doing such measurement. - (And levies a constraint or two on how you outboard, if you're - outboarding.) - - - - - - 6 - RFC 872 September 1982 - - - Other Contextual Data - - Our case could really rest here, but some amplification of - the aside above about Host capacities is warranted, if only to - suggest that some quantification is available to supplement the a - priori argument: Consider the previously mentioned PDSC. Its - local terminals operate in a screen-at-a-time mode, each - screen-load comprising some 16 kb. How many screens can one of - its Hosts handle in a given second? Well, we're told that each - disk fetch requires 17 ms average latency, and each context - switch costs around 2 ms, so allowing 1 ms for transmission of - the data from the disk and to the "net" (it makes the arithmetic - easy), that would add up to 20 ms "processing" time per screen, - even if no processing were done to the disk image. Thus, even if - the Host were doing nothing else, and even if the native disk - I/O software were optimized to do 16 kb reads, it could only - present 50 screens to its communications mechanism - (processor-processor bus) per second. That's 800 kb/s. And - that's well within the range of TCP-achievable rates (cf. Other - Significant Property 9). So in a realistic sample environment, - it would certainly seem that typical Hosts can't necessarily - present so many bits as to overtax the protocols anyway. (The - analysis of how many bits typical Hosts can accept is more - difficult because it depends more heavily on system internals. - However, the point is nearly moot in that even in the intuitively - unlikely event that receiving were appreciably faster in - principle [unlikely because of typical operating system - constraints on address space sizes, the need to do input to a - single address space, and the need to share buffers in the - address space among several processes], you can't accept more - than you can be given.) - - Conclusion - - The sometimes-expressed fear that using TCP on a local net - is a bad idea is unfounded. - - References - - [1] Milne, A. A., "Winnie-the-Pooh", various publishers. - - [2] The LAN description is based on Clark, D. D. et al., "An - Introduction to Local Area Networks," IEEE Proc., V. 66, N. - 11, November 1978, pp. 1497-1517, several year's worth of - conversations with Dr. Clark, and the author's observations - of both the open literature and the Oral Tradition (which - were sufficiently well-thought of to have prompted The MITRE - Corporation/NBS/NSA Local Nets "Brain Picking Panel" to have - - - - - - 7 - RFC 872 September 1982 - - - solicited his testimony during the year he was in FACC's - employ.*) - - [3] The TCP/IP descriptions are based on Postel, J. B., - "Internet Protocol Specification," and "Transmission Control - Specification" in DARPA Internet Program Protocol - Specifications, USC Information Sciences Institute, - September, 1981, and on more than 10 years' worth of - conversations with Dr. Postel, Dr. Clark (now the DARPA - "Internet Architect") and Dr. Vinton G. Cerf (co-originator - of TCP), and on numerous discussions with several other - members of the TCP/IP design team, on having edited the - referenced documents for the PSTP, and, for that matter, on - having been one of the developers of the ARPANET "Reference - Model." - - [4] Padlipsky, M. A., "A Perspective on the ARPANET Reference - Model", M82-47, The MITRE Corporation, September 1982; also - available in Proc. INFOCOM '83. - - ________________ - * In all honesty, as far as I know I started the rumor that TCP - might be overkill for a LAN at that meeting. At the next TCP - design meeting, however, they separated IP out from TCP, and - everything's been alright for about three years now--except - for getting the rumor killed. (I'd worry about Woozles - turning into roosting chickens if it weren't for the facts - that: 1. People tend to ignore their local guru; 2. I was - trying to encourage the IP separation; and 3. All I ever - wanted was some empirical data.) - - NOTE: FIGURE 1. ARM in the Abstract, and FIGURE 2. ARMS, - Somewhat Particularized, may be obtained by writing to: Mike - Padlipsky, MITRE Corporation, P.O. Box 208, Bedford, - Massachusetts, 01730, or sending computer mail to - Padlipsky@USC-ISIA. - - - - - - - - - - - - - - - - - - 8 \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc0879.txt b/kernel/picotcp/RFC/rfc0879.txt deleted file mode 100644 index fdde68e..0000000 --- a/kernel/picotcp/RFC/rfc0879.txt +++ /dev/null @@ -1,638 +0,0 @@ - - -Network Working Group J. Postel -Request for Comments: 879 ISI - November 1983 - - - - The TCP Maximum Segment Size - and Related Topics - -This memo discusses the TCP Maximum Segment Size Option and related -topics. The purposes is to clarify some aspects of TCP and its -interaction with IP. This memo is a clarification to the TCP -specification, and contains information that may be considered as -"advice to implementers". - -1. Introduction - - This memo discusses the TCP Maximum Segment Size and its relation to - the IP Maximum Datagram Size. TCP is specified in reference [1]. IP - is specified in references [2,3]. - - This discussion is necessary because the current specification of - this TCP option is ambiguous. - - Much of the difficulty with understanding these sizes and their - relationship has been due to the variable size of the IP and TCP - headers. - - There have been some assumptions made about using other than the - default size for datagrams with some unfortunate results. - - HOSTS MUST NOT SEND DATAGRAMS LARGER THAN 576 OCTETS UNLESS THEY - HAVE SPECIFIC KNOWLEDGE THAT THE DESTINATION HOST IS PREPARED TO - ACCEPT LARGER DATAGRAMS. - - This is a long established rule. - - To resolve the ambiguity in the TCP Maximum Segment Size option - definition the following rule is established: - - THE TCP MAXIMUM SEGMENT SIZE IS THE IP MAXIMUM DATAGRAM SIZE MINUS - FORTY. - - The default IP Maximum Datagram Size is 576. - The default TCP Maximum Segment Size is 536. - - - - - - - - - -Postel [Page 1] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - -2. The IP Maximum Datagram Size - - Hosts are not required to reassemble infinitely large IP datagrams. - The maximum size datagram that all hosts are required to accept or - reassemble from fragments is 576 octets. The maximum size reassembly - buffer every host must have is 576 octets. Hosts are allowed to - accept larger datagrams and assemble fragments into larger datagrams, - hosts may have buffers as large as they please. - - Hosts must not send datagrams larger than 576 octets unless they have - specific knowledge that the destination host is prepared to accept - larger datagrams. - -3. The TCP Maximum Segment Size Option - - TCP provides an option that may be used at the time a connection is - established (only) to indicate the maximum size TCP segment that can - be accepted on that connection. This Maximum Segment Size (MSS) - announcement (often mistakenly called a negotiation) is sent from the - data receiver to the data sender and says "I can accept TCP segments - up to size X". The size (X) may be larger or smaller than the - default. The MSS can be used completely independently in each - direction of data flow. The result may be quite different maximum - sizes in the two directions. - - The MSS counts only data octets in the segment, it does not count the - TCP header or the IP header. - - A footnote: The MSS value counts only data octets, thus it does not - count the TCP SYN and FIN control bits even though SYN and FIN do - consume TCP sequence numbers. - -4. The Relationship of TCP Segments and IP Datagrams - - TCP segment are transmitted as the data in IP datagrams. The - correspondence between TCP segments and IP datagrams must be one to - one. This is because TCP expects to find exactly one complete TCP - segment in each block of data turned over to it by IP, and IP must - turn over a block of data for each datagram received (or completely - reassembled). - - - - - - - - - - -Postel [Page 2] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - -5. Layering and Modularity - - TCP is an end to end reliable data stream protocol with error - control, flow control, etc. TCP remembers many things about the - state of a connection. - - IP is a one shot datagram protocol. IP has no memory of the - datagrams transmitted. It is not appropriate for IP to keep any - information about the maximum datagram size a particular destination - host might be capable of accepting. - - TCP and IP are distinct layers in the protocol architecture, and are - often implemented in distinct program modules. - - Some people seem to think that there must be no communication between - protocol layers or program modules. There must be communication - between layers and modules, but it should be carefully specified and - controlled. One problem in understanding the correct view of - communication between protocol layers or program modules in general, - or between TCP and IP in particular is that the documents on - protocols are not very clear about it. This is often because the - documents are about the protocol exchanges between machines, not the - program architecture within a machine, and the desire to allow many - program architectures with different organization of tasks into - modules. - -6. IP Information Requirements - - There is no general requirement that IP keep information on a per - host basis. - - IP must make a decision about which directly attached network address - to send each datagram to. This is simply mapping an IP address into - a directly attached network address. - - There are two cases to consider: the destination is on the same - network, and the destination is on a different network. - - Same Network - - For some networks the the directly attached network address can - be computed from the IP address for destination hosts on the - directly attached network. - - For other networks the mapping must be done by table look up - (however the table is initialized and maintained, for - example, [4]). - - - -Postel [Page 3] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - - Different Network - - The IP address must be mapped to the directly attached network - address of a gateway. For networks with one gateway to the - rest of the Internet the host need only determine and remember - the gateway address and use it for sending all datagrams to - other networks. - - For networks with multiple gateways to the rest of the - Internet, the host must decide which gateway to use for each - datagram sent. It need only check the destination network of - the IP address and keep information on which gateway to use for - each network. - - The IP does, in some cases, keep per host routing information for - other hosts on the directly attached network. The IP does, in some - cases, keep per network routing information. - - A Special Case - - There are two ICMP messages that convey information about - particular hosts. These are subtypes of the Destination - Unreachable and the Redirect ICMP messages. These messages are - expected only in very unusual circumstances. To make effective - use of these messages the receiving host would have to keep - information about the specific hosts reported on. Because these - messages are quite rare it is strongly recommended that this be - done through an exception mechanism rather than having the IP keep - per host tables for all hosts. - -7. The Relationship between IP Datagram and TCP Segment Sizes - - The relationship between the value of the maximum IP datagram size - and the maximum TCP segment size is obscure. The problem is that - both the IP header and the TCP header may vary in length. The TCP - Maximum Segment Size option (MSS) is defined to specify the maximum - number of data octets in a TCP segment exclusive of TCP (or IP) - header. - - To notify the data sender of the largest TCP segment it is possible - to receive the calculation of the MSS value to send is: - - MSS = MTU - sizeof(TCPHDR) - sizeof(IPHDR) - - On receipt of the MSS option the calculation of the size of segment - that can be sent is: - - SndMaxSegSiz = MIN((MTU - sizeof(TCPHDR) - sizeof(IPHDR)), MSS) - - -Postel [Page 4] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - - where MSS is the value in the option, and MTU is the Maximum - Transmission Unit (or the maximum packet size) allowed on the - directly attached network. - - This begs the question, though. What value should be used for the - "sizeof(TCPHDR)" and for the "sizeof(IPHDR)"? - - There are three reasonable positions to take: the conservative, the - moderate, and the liberal. - - The conservative or pessimistic position assumes the worst -- that - both the IP header and the TCP header are maximum size, that is, 60 - octets each. - - MSS = MTU - 60 - 60 = MTU - 120 - - If MTU is 576 then MSS = 456 - - The moderate position assumes the that the IP is maximum size (60 - octets) and the TCP header is minimum size (20 octets), because there - are no TCP header options currently defined that would normally be - sent at the same time as data segments. - - MSS = MTU - 60 - 20 = MTU - 80 - - If MTU is 576 then MSS = 496 - - The liberal or optimistic position assumes the best -- that both the - IP header and the TCP header are minimum size, that is, 20 octets - each. - - MSS = MTU - 20 - 20 = MTU - 40 - - If MTU is 576 then MSS = 536 - - If nothing is said about MSS, the data sender may cram as much as - possible into a 576 octet datagram, and if the datagram has - minimum headers (which is most likely), the result will be 536 - data octets in the TCP segment. The rule relating MSS to the - maximum datagram size ought to be consistent with this. - - A practical point is raised in favor of the liberal position too. - Since the use of minimum IP and TCP headers is very likely in the - very large percentage of cases, it seems wasteful to limit the TCP - segment data to so much less than could be transmitted at once, - especially since it is less that 512 octets. - - - - -Postel [Page 5] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - - For comparison: 536/576 is 93% data, 496/576 is 86% data, 456/576 - is 79% data. - -8. Maximum Packet Size - - Each network has some maximum packet size, or maximum transmission - unit (MTU). Ultimately there is some limit imposed by the - technology, but often the limit is an engineering choice or even an - administrative choice. Different installations of the same network - product do not have to use the same maximum packet size. Even within - one installation not all host must use the same packet size (this way - lies madness, though). - - Some IP implementers have assumed that all hosts on the directly - attached network will be the same or at least run the same - implementation. This is a dangerous assumption. It has often - developed that after a small homogeneous set of host have become - operational additional hosts of different types are introduced into - the environment. And it has often developed that it is desired to - use a copy of the implementation in a different inhomogeneous - environment. - - Designers of gateways should be prepared for the fact that successful - gateways will be copied and used in other situation and - installations. Gateways must be prepared to accept datagrams as - large as can be sent in the maximum packets of the directly attached - networks. Gateway implementations should be easily configured for - installation in different circumstances. - - A footnote: The MTUs of some popular networks (note that the actual - limit in some installations may be set lower by administrative - policy): - - ARPANET, MILNET = 1007 - Ethernet (10Mb) = 1500 - Proteon PRONET = 2046 - -9. Source Fragmentation - - A source host would not normally create datagram fragments. Under - normal circumstances datagram fragments only arise when a gateway - must send a datagram into a network with a smaller maximum packet - size than the datagram. In this case the gateway must fragment the - datagram (unless it is marked "don't fragment" in which case it is - discarded, with the option of sending an ICMP message to the source - reporting the problem). - - It might be desirable for the source host to send datagram fragments - - -Postel [Page 6] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - - if the maximum segment size (default or negotiated) allowed by the - data receiver were larger than the maximum packet size allowed by the - directly attached network. However, such datagram fragments must not - combine to a size larger than allowed by the destination host. - - For example, if the receiving TCP announced that it would accept - segments up to 5000 octets (in cooperation with the receiving IP) - then the sending TCP could give such a large segment to the - sending IP provided the sending IP would send it in datagram - fragments that fit in the packets of the directly attached - network. - - There are some conditions where source host fragmentation would be - necessary. - - If the host is attached to a network with a small packet size (for - example 256 octets), and it supports an application defined to - send fixed sized messages larger than that packet size (for - example TFTP [5]). - - If the host receives ICMP Echo messages with data it is required - to send an ICMP Echo-Reply message with the same data. If the - amount of data in the Echo were larger than the packet size of the - directly attached network the following steps might be required: - (1) receive the fragments, (2) reassemble the datagram, (3) - interpret the Echo, (4) create an Echo-Reply, (5) fragment it, and - (6) send the fragments. - -10. Gateway Fragmentation - - Gateways must be prepared to do fragmentation. It is not an optional - feature for a gateway. - - Gateways have no information about the size of datagrams destination - hosts are prepared to accept. It would be inappropriate for gateways - to attempt to keep such information. - - Gateways must be prepared to accept the largest datagrams that are - allowed on each of the directly attached networks, even if it is - larger than 576 octets. - - Gateways must be prepared to fragment datagrams to fit into the - packets of the next network, even if it smaller than 576 octets. - - If a source host thought to take advantage of the local network's - ability to carry larger datagrams but doesn't have the slightest idea - if the destination host can accept larger than default datagrams and - expects the gateway to fragment the datagram into default size - - -Postel [Page 7] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - - fragments, then the source host is misguided. If indeed, the - destination host can't accept larger than default datagrams, it - probably can't reassemble them either. If the gateway either passes - on the large datagram whole or fragments into default size fragments - the destination will not accept it. Thus, this mode of behavior by - source hosts must be outlawed. - - A larger than default datagram can only arrive at a gateway because - the source host knows that the destination host can handle such large - datagrams (probably because the destination host announced it to the - source host in an TCP MSS option). Thus, the gateway should pass on - this large datagram in one piece or in the largest fragments that fit - into the next network. - - An interesting footnote is that even though the gateways may know - about know the 576 rule, it is irrelevant to them. - -11. Inter-Layer Communication - - The Network Driver (ND) or interface should know the Maximum - Transmission Unit (MTU) of the directly attached network. - - The IP should ask the Network Driver for the Maximum Transmission - Unit. - - The TCP should ask the IP for the Maximum Datagram Data Size (MDDS). - This is the MTU minus the IP header length (MDDS = MTU - IPHdrLen). - - When opening a connection TCP can send an MSS option with the value - equal MDDS - TCPHdrLen. - - TCP should determine the Maximum Segment Data Size (MSDS) from either - the default or the received value of the MSS option. - - TCP should determine if source fragmentation is possible (by asking - the IP) and desirable. - - If so TCP may hand to IP segments (including the TCP header) up to - MSDS + TCPHdrLen. - - If not TCP may hand to IP segments (including the TCP header) up - to the lesser of (MSDS + TCPHdrLen) and MDDS. - - IP checks the length of data passed to it by TCP. If the length is - less than or equal MDDS, IP attached the IP header and hands it to - the ND. Otherwise the IP must do source fragmentation. - - - - -Postel [Page 8] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - -12. What is the Default MSS ? - - Another way of asking this question is "What transmitted value for - MSS has exactly the same effect of not transmitting the option at - all?". - - In terms of the previous section: - - The default assumption is that the Maximum Transmission Unit is - 576 octets. - - MTU = 576 - - The Maximum Datagram Data Size (MDDS) is the MTU minus the IP - header length. - - MDDS = MTU - IPHdrLen = 576 - 20 = 556 - - When opening a connection TCP can send an MSS option with the - value equal MDDS - TCPHdrLen. - - MSS = MDDS - TCPHdrLen = 556 - 20 = 536 - - TCP should determine the Maximum Segment Data Size (MSDS) from - either the default or the received value of the MSS option. - - Default MSS = 536, then MSDS = 536 - - TCP should determine if source fragmentation is possible and - desirable. - - If so TCP may hand to IP segments (including the TCP header) up - to MSDS + TCPHdrLen (536 + 20 = 556). - - If not TCP may hand to IP segments (including the TCP header) - up to the lesser of (MSDS + TCPHdrLen (536 + 20 = 556)) and - MDDS (556). - - - - - - - - - - - - - -Postel [Page 9] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - -13. The Truth - - The rule relating the maximum IP datagram size and the maximum TCP - segment size is: - - TCP Maximum Segment Size = IP Maximum Datagram Size - 40 - - The rule must match the default case. - - If the TCP Maximum Segment Size option is not transmitted then the - data sender is allowed to send IP datagrams of maximum size (576) - with a minimum IP header (20) and a minimum TCP header (20) and - thereby be able to stuff 536 octets of data into each TCP segment. - - The definition of the MSS option can be stated: - - The maximum number of data octets that may be received by the - sender of this TCP option in TCP segments with no TCP header - options transmitted in IP datagrams with no IP header options. - -14. The Consequences - - When TCP is used in a situation when either the IP or TCP headers are - not minimum and yet the maximum IP datagram that can be received - remains 576 octets then the TCP Maximum Segment Size option must be - used to reduce the limit on data octets allowed in a TCP segment. - - For example, if the IP Security option (11 octets) were in use and - the IP maximum datagram size remained at 576 octets, then the TCP - should send the MSS with a value of 525 (536-11). - - - - - - - - - - - - - - - - - - - - -Postel [Page 10] - - - -RFC 879 November 1983 -TCP Maximum Segment Size - - -15. References - - [1] Postel, J., ed., "Transmission Control Protocol - DARPA Internet - Program Protocol Specification", RFC 793, USC/Information - Sciences Institute, September 1981. - - [2] Postel, J., ed., "Internet Protocol - DARPA Internet Program - Protocol Specification", RFC 791, USC/Information Sciences - Institute, September 1981. - - [3] Postel, J., "Internet Control Message Protocol - DARPA Internet - Program Protocol Specification", RFC 792, USC/Information - Sciences Institute, September 1981. - - [4] Plummer, D., "An Ethernet Address Resolution Protocol or - Converting Network Protocol Addresses to 48-bit Ethernet - Addresses for Transmission on Ethernet Hardware", RFC 826, - MIT/LCS, November 1982. - - [5] Sollins, K., "The TFTP Protocol (Revision 2)", RFC 783, MIT/LCS, - June 1981. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Postel [Page 11] - diff --git a/kernel/picotcp/RFC/rfc0896.txt b/kernel/picotcp/RFC/rfc0896.txt deleted file mode 100644 index d8a480a..0000000 --- a/kernel/picotcp/RFC/rfc0896.txt +++ /dev/null @@ -1,512 +0,0 @@ - - -Network Working Group John Nagle -Request For Comments: 896 6 January 1984 - Ford Aerospace and Communications Corporation - - Congestion Control in IP/TCP Internetworks - -This memo discusses some aspects of congestion control in IP/TCP -Internetworks. It is intended to stimulate thought and further -discussion of this topic. While some specific suggestions are -made for improved congestion control implementation, this memo -does not specify any standards. - - Introduction - -Congestion control is a recognized problem in complex networks. -We have discovered that the Department of Defense's Internet Pro- -tocol (IP) , a pure datagram protocol, and Transmission Control -Protocol (TCP), a transport layer protocol, when used together, -are subject to unusual congestion problems caused by interactions -between the transport and datagram layers. In particular, IP -gateways are vulnerable to a phenomenon we call "congestion col- -lapse", especially when such gateways connect networks of widely -different bandwidth. We have developed solutions that prevent -congestion collapse. - -These problems are not generally recognized because these proto- -cols are used most often on networks built on top of ARPANET IMP -technology. ARPANET IMP based networks traditionally have uni- -form bandwidth and identical switching nodes, and are sized with -substantial excess capacity. This excess capacity, and the abil- -ity of the IMP system to throttle the transmissions of hosts has -for most IP / TCP hosts and networks been adequate to handle -congestion. With the recent split of the ARPANET into two inter- -connected networks and the growth of other networks with differ- -ing properties connected to the ARPANET, however, reliance on the -benign properties of the IMP system is no longer enough to allow -hosts to communicate rapidly and reliably. Improved handling of -congestion is now mandatory for successful network operation -under load. - -Ford Aerospace and Communications Corporation, and its parent -company, Ford Motor Company, operate the only private IP/TCP -long-haul network in existence today. This network connects four -facilities (one in Michigan, two in California, and one in Eng- -land) some with extensive local networks. This net is cross-tied -to the ARPANET but uses its own long-haul circuits; traffic -between Ford facilities flows over private leased circuits, -including a leased transatlantic satellite connection. All -switching nodes are pure IP datagram switches with no node-to- -node flow control, and all hosts run software either written or -heavily modified by Ford or Ford Aerospace. Bandwidth of links -in this network varies widely, from 1200 to 10,000,000 bits per -second. In general, we have not been able to afford the luxury -of excess long-haul bandwidth that the ARPANET possesses, and our -long-haul links are heavily loaded during peak periods. Transit -times of several seconds are thus common in our network. - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -Because of our pure datagram orientation, heavy loading, and wide -variation in bandwidth, we have had to solve problems that the -ARPANET / MILNET community is just beginning to recognize. Our -network is sensitive to suboptimal behavior by host TCP implemen- -tations, both on and off our own net. We have devoted consider- -able effort to examining TCP behavior under various conditions, -and have solved some widely prevalent problems with TCP. We -present here two problems and their solutions. Many TCP imple- -mentations have these problems; if throughput is worse through an -ARPANET / MILNET gateway for a given TCP implementation than -throughput across a single net, there is a high probability that -the TCP implementation has one or both of these problems. - - Congestion collapse - -Before we proceed with a discussion of the two specific problems -and their solutions, a description of what happens when these -problems are not addressed is in order. In heavily loaded pure -datagram networks with end to end retransmission, as switching -nodes become congested, the round trip time through the net -increases and the count of datagrams in transit within the net -also increases. This is normal behavior under load. As long as -there is only one copy of each datagram in transit, congestion is -under control. Once retransmission of datagrams not yet -delivered begins, there is potential for serious trouble. - -Host TCP implementations are expected to retransmit packets -several times at increasing time intervals until some upper limit -on the retransmit interval is reached. Normally, this mechanism -is enough to prevent serious congestion problems. Even with the -better adaptive host retransmission algorithms, though, a sudden -load on the net can cause the round-trip time to rise faster than -the sending hosts measurements of round-trip time can be updated. -Such a load occurs when a new bulk transfer, such a file -transfer, begins and starts filling a large window. Should the -round-trip time exceed the maximum retransmission interval for -any host, that host will begin to introduce more and more copies -of the same datagrams into the net. The network is now in seri- -ous trouble. Eventually all available buffers in the switching -nodes will be full and packets must be dropped. The round-trip -time for packets that are delivered is now at its maximum. Hosts -are sending each packet several times, and eventually some copy -of each packet arrives at its destination. This is congestion -collapse. - -This condition is stable. Once the saturation point has been -reached, if the algorithm for selecting packets to be dropped is -fair, the network will continue to operate in a degraded condi- -tion. In this condition every packet is being transmitted -several times and throughput is reduced to a small fraction of -normal. We have pushed our network into this condition experi- -mentally and observed its stability. It is possible for round- -trip time to become so large that connections are broken because - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -the hosts involved time out. - -Congestion collapse and pathological congestion are not normally -seen in the ARPANET / MILNET system because these networks have -substantial excess capacity. Where connections do not pass -through IP gateways, the IMP-to host flow control mechanisms usu- -ally prevent congestion collapse, especially since TCP implemen- -tations tend to be well adjusted for the time constants associ- -ated with the pure ARPANET case. However, other than ICMP Source -Quench messages, nothing fundamentally prevents congestion col- -lapse when TCP is run over the ARPANET / MILNET and packets are -being dropped at gateways. Worth noting is that a few badly- -behaved hosts can by themselves congest the gateways and prevent -other hosts from passing traffic. We have observed this problem -repeatedly with certain hosts (with whose administrators we have -communicated privately) on the ARPANET. - -Adding additional memory to the gateways will not solve the prob- -lem. The more memory added, the longer round-trip times must -become before packets are dropped. Thus, the onset of congestion -collapse will be delayed but when collapse occurs an even larger -fraction of the packets in the net will be duplicates and -throughput will be even worse. - - The two problems - -Two key problems with the engineering of TCP implementations have -been observed; we call these the small-packet problem and the -source-quench problem. The second is being addressed by several -implementors; the first is generally believed (incorrectly) to be -solved. We have discovered that once the small-packet problem -has been solved, the source-quench problem becomes much more -tractable. We thus present the small-packet problem and our -solution to it first. - - The small-packet problem - -There is a special problem associated with small packets. When -TCP is used for the transmission of single-character messages -originating at a keyboard, the typical result is that 41 byte -packets (one byte of data, 40 bytes of header) are transmitted -for each byte of useful data. This 4000% overhead is annoying -but tolerable on lightly loaded networks. On heavily loaded net- -works, however, the congestion resulting from this overhead can -result in lost datagrams and retransmissions, as well as exces- -sive propagation time caused by congestion in switching nodes and -gateways. In practice, throughput may drop so low that TCP con- -nections are aborted. - -This classic problem is well-known and was first addressed in the -Tymnet network in the late 1960s. The solution used there was to -impose a limit on the count of datagrams generated per unit time. -This limit was enforced by delaying transmission of small packets - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -until a short (200-500ms) time had elapsed, in hope that another -character or two would become available for addition to the same -packet before the timer ran out. An additional feature to -enhance user acceptability was to inhibit the time delay when a -control character, such as a carriage return, was received. - -This technique has been used in NCP Telnet, X.25 PADs, and TCP -Telnet. It has the advantage of being well-understood, and is not -too difficult to implement. Its flaw is that it is hard to come -up with a time limit that will satisfy everyone. A time limit -short enough to provide highly responsive service over a 10M bits -per second Ethernet will be too short to prevent congestion col- -lapse over a heavily loaded net with a five second round-trip -time; and conversely, a time limit long enough to handle the -heavily loaded net will produce frustrated users on the Ethernet. - - The solution to the small-packet problem - -Clearly an adaptive approach is desirable. One would expect a -proposal for an adaptive inter-packet time limit based on the -round-trip delay observed by TCP. While such a mechanism could -certainly be implemented, it is unnecessary. A simple and -elegant solution has been discovered. - -The solution is to inhibit the sending of new TCP segments when -new outgoing data arrives from the user if any previously -transmitted data on the connection remains unacknowledged. This -inhibition is to be unconditional; no timers, tests for size of -data received, or other conditions are required. Implementation -typically requires one or two lines inside a TCP program. - -At first glance, this solution seems to imply drastic changes in -the behavior of TCP. This is not so. It all works out right in -the end. Let us see why this is so. - -When a user process writes to a TCP connection, TCP receives some -data. It may hold that data for future sending or may send a -packet immediately. If it refrains from sending now, it will -typically send the data later when an incoming packet arrives and -changes the state of the system. The state changes in one of two -ways; the incoming packet acknowledges old data the distant host -has received, or announces the availability of buffer space in -the distant host for new data. (This last is referred to as -"updating the window"). Each time data arrives on a connec- -tion, TCP must reexamine its current state and perhaps send some -packets out. Thus, when we omit sending data on arrival from the -user, we are simply deferring its transmission until the next -message arrives from the distant host. A message must always -arrive soon unless the connection was previously idle or communi- -cations with the other end have been lost. In the first case, -the idle connection, our scheme will result in a packet being -sent whenever the user writes to the TCP connection. Thus we do -not deadlock in the idle condition. In the second case, where - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -the distant host has failed, sending more data is futile anyway. -Note that we have done nothing to inhibit normal TCP retransmis- -sion logic, so lost messages are not a problem. - -Examination of the behavior of this scheme under various condi- -tions demonstrates that the scheme does work in all cases. The -first case to examine is the one we wanted to solve, that of the -character-oriented Telnet connection. Let us suppose that the -user is sending TCP a new character every 200ms, and that the -connection is via an Ethernet with a round-trip time including -software processing of 50ms. Without any mechanism to prevent -small-packet congestion, one packet will be sent for each charac- -ter, and response will be optimal. Overhead will be 4000%, but -this is acceptable on an Ethernet. The classic timer scheme, -with a limit of 2 packets per second, will cause two or three -characters to be sent per packet. Response will thus be degraded -even though on a high-bandwidth Ethernet this is unnecessary. -Overhead will drop to 1500%, but on an Ethernet this is a bad -tradeoff. With our scheme, every character the user types will -find TCP with an idle connection, and the character will be sent -at once, just as in the no-control case. The user will see no -visible delay. Thus, our scheme performs as well as the no- -control scheme and provides better responsiveness than the timer -scheme. - -The second case to examine is the same Telnet test but over a -long-haul link with a 5-second round trip time. Without any -mechanism to prevent small-packet congestion, 25 new packets -would be sent in 5 seconds.* Overhead here is 4000%. With the -classic timer scheme, and the same limit of 2 packets per second, -there would still be 10 packets outstanding and contributing to -congestion. Round-trip time will not be improved by sending many -packets, of course; in general it will be worse since the packets -will contend for line time. Overhead now drops to 1500%. With -our scheme, however, the first character from the user would find -an idle TCP connection and would be sent immediately. The next -24 characters, arriving from the user at 200ms intervals, would -be held pending a message from the distant host. When an ACK -arrived for the first packet at the end of 5 seconds, a single -packet with the 24 queued characters would be sent. Our scheme -thus results in an overhead reduction to 320% with no penalty in -response time. Response time will usually be improved with our -scheme because packet overhead is reduced, here by a factor of -4.7 over the classic timer scheme. Congestion will be reduced by -this factor and round-trip delay will decrease sharply. For this -________ - * This problem is not seen in the pure ARPANET case because the - IMPs will block the host when the count of packets - outstanding becomes excessive, but in the case where a pure - datagram local net (such as an Ethernet) or a pure datagram - gateway (such as an ARPANET / MILNET gateway) is involved, it - is possible to have large numbers of tiny packets - outstanding. - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -case, our scheme has a striking advantage over either of the -other approaches. - -We use our scheme for all TCP connections, not just Telnet con- -nections. Let us see what happens for a file transfer data con- -nection using our technique. The two extreme cases will again be -considered. - -As before, we first consider the Ethernet case. The user is now -writing data to TCP in 512 byte blocks as fast as TCP will accept -them. The user's first write to TCP will start things going; our -first datagram will be 512+40 bytes or 552 bytes long. The -user's second write to TCP will not cause a send but will cause -the block to be buffered. Assume that the user fills up TCP's -outgoing buffer area before the first ACK comes back. Then when -the ACK comes in, all queued data up to the window size will be -sent. From then on, the window will be kept full, as each ACK -initiates a sending cycle and queued data is sent out. Thus, -after a one round-trip time initial period when only one block is -sent, our scheme settles down into a maximum-throughput condi- -tion. The delay in startup is only 50ms on the Ethernet, so the -startup transient is insignificant. All three schemes provide -equivalent performance for this case. - -Finally, let us look at a file transfer over the 5-second round -trip time connection. Again, only one packet will be sent until -the first ACK comes back; the window will then be filled and kept -full. Since the round-trip time is 5 seconds, only 512 bytes of -data are transmitted in the first 5 seconds. Assuming a 2K win- -dow, once the first ACK comes in, 2K of data will be sent and a -steady rate of 2K per 5 seconds will be maintained thereafter. -Only for this case is our scheme inferior to the timer scheme, -and the difference is only in the startup transient; steady-state -throughput is identical. The naive scheme and the timer scheme -would both take 250 seconds to transmit a 100K byte file under -the above conditions and our scheme would take 254 seconds, a -difference of 1.6%. - -Thus, for all cases examined, our scheme provides at least 98% of -the performance of both other schemes, and provides a dramatic -improvement in Telnet performance over paths with long round trip -times. We use our scheme in the Ford Aerospace Software -Engineering Network, and are able to run screen editors over Eth- -ernet and talk to distant TOPS-20 hosts with improved performance -in both cases. - - Congestion control with ICMP - -Having solved the small-packet congestion problem and with it the -problem of excessive small-packet congestion within our own net- -work, we turned our attention to the problem of general conges- -tion control. Since our own network is pure datagram with no -node-to-node flow control, the only mechanism available to us - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -under the IP standard was the ICMP Source Quench message. With -careful handling, we find this adequate to prevent serious -congestion problems. We do find it necessary to be careful about -the behavior of our hosts and switching nodes regarding Source -Quench messages. - - When to send an ICMP Source Quench - -The present ICMP standard* specifies that an ICMP Source Quench -message should be sent whenever a packet is dropped, and addi- -tionally may be sent when a gateway finds itself becoming short -of resources. There is some ambiguity here but clearly it is a -violation of the standard to drop a packet without sending an -ICMP message. - -Our basic assumption is that packets ought not to be dropped dur- -ing normal network operation. We therefore want to throttle -senders back before they overload switching nodes and gateways. -All our switching nodes send ICMP Source Quench messages well -before buffer space is exhausted; they do not wait until it is -necessary to drop a message before sending an ICMP Source Quench. -As demonstrated in our analysis of the small-packet problem, -merely providing large amounts of buffering is not a solution. -In general, our experience is that Source Quench should be sent -when about half the buffering space is exhausted; this is not -based on extensive experimentation but appears to be a reasonable -engineering decision. One could argue for an adaptive scheme -that adjusted the quench generation threshold based on recent -experience; we have not found this necessary as yet. - -There exist other gateway implementations that generate Source -Quenches only after more than one packet has been discarded. We -consider this approach undesirable since any system for control- -ling congestion based on the discarding of packets is wasteful of -bandwidth and may be susceptible to congestion collapse under -heavy load. Our understanding is that the decision to generate -Source Quenches with great reluctance stems from a fear that ack- -nowledge traffic will be quenched and that this will result in -connection failure. As will be shown below, appropriate handling -of Source Quench in host implementations eliminates this possi- -bility. - - What to do when an ICMP Source Quench is received - -We inform TCP or any other protocol at that layer when ICMP -receives a Source Quench. The basic action of our TCP implemen- -tations is to reduce the amount of data outstanding on connec- -tions to the host mentioned in the Source Quench. This control is -________ - * ARPANET RFC 792 is the present standard. We are advised by - the Defense Communications Agency that the description of - ICMP in MIL-STD-1777 is incomplete and will be deleted from - future revision of that standard. - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -applied by causing the sending TCP to behave as if the distant -host's window size has been reduced. Our first implementation -was simplistic but effective; once a Source Quench has been -received our TCP behaves as if the window size is zero whenever -the window isn't empty. This behavior continues until some -number (at present 10) of ACKs have been received, at that time -TCP returns to normal operation.* David Mills of Linkabit Cor- -poration has since implemented a similar but more elaborate -throttle on the count of outstanding packets in his DCN systems. -The additional sophistication seems to produce a modest gain in -throughput, but we have not made formal tests. Both implementa- -tions effectively prevent congestion collapse in switching nodes. - -Source Quench thus has the effect of limiting the connection to a -limited number (perhaps one) of outstanding messages. Thus, com- -munication can continue but at a reduced rate, that is exactly -the effect desired. - -This scheme has the important property that Source Quench doesn't -inhibit the sending of acknowledges or retransmissions. Imple- -mentations of Source Quench entirely within the IP layer are usu- -ally unsuccessful because IP lacks enough information to throttle -a connection properly. Holding back acknowledges tends to pro- -duce retransmissions and thus unnecessary traffic. Holding back -retransmissions may cause loss of a connection by a retransmis- -sion timeout. Our scheme will keep connections alive under -severe overload but at reduced bandwidth per connection. - -Other protocols at the same layer as TCP should also be respon- -sive to Source Quench. In each case we would suggest that new -traffic should be throttled but acknowledges should be treated -normally. The only serious problem comes from the User Datagram -Protocol, not normally a major traffic generator. We have not -implemented any throttling in these protocols as yet; all are -passed Source Quench messages by ICMP but ignore them. - - Self-defense for gateways - -As we have shown, gateways are vulnerable to host mismanagement -of congestion. Host misbehavior by excessive traffic generation -can prevent not only the host's own traffic from getting through, -but can interfere with other unrelated traffic. The problem can -be dealt with at the host level but since one malfunctioning host -can interfere with others, future gateways should be capable of -defending themselves against such behavior by obnoxious or mali- -cious hosts. We offer some basic self-defense techniques. - -On one occasion in late 1983, a TCP bug in an ARPANET host caused -the host to frantically generate retransmissions of the same -datagram as fast as the ARPANET would accept them. The gateway -________ - * This follows the control engineering dictum "Never bother - with proportional control unless bang-bang doesn't work". - - -RFC 896 Congestion Control in IP/TCP Internetworks 1/6/84 - - -that connected our net with the ARPANET was saturated and little -useful traffic could get through, since the gateway had more -bandwidth to the ARPANET than to our net. The gateway busily -sent ICMP Source Quench messages but the malfunctioning host -ignored them. This continued for several hours, until the mal- -functioning host crashed. During this period, our network was -effectively disconnected from the ARPANET. - -When a gateway is forced to discard a packet, the packet is -selected at the discretion of the gateway. Classic techniques -for making this decision are to discard the most recently -received packet, or the packet at the end of the longest outgoing -queue. We suggest that a worthwhile practical measure is to dis- -card the latest packet from the host that originated the most -packets currently queued within the gateway. This strategy will -tend to balance throughput amongst the hosts using the gateway. -We have not yet tried this strategy, but it seems a reasonable -starting point for gateway self-protection. - -Another strategy is to discard a newly arrived packet if the -packet duplicates a packet already in the queue. The computa- -tional load for this check is not a problem if hashing techniques -are used. This check will not protect against malicious hosts -but will provide some protection against TCP implementations with -poor retransmission control. Gateways between fast local net- -works and slower long-haul networks may find this check valuable -if the local hosts are tuned to work well with the local network. - -Ideally the gateway should detect malfunctioning hosts and -squelch them; such detection is difficult in a pure datagram sys- -tem. Failure to respond to an ICMP Source Quench message, -though, should be regarded as grounds for action by a gateway to -disconnect a host. Detecting such failure is non-trivial but is -a worthwhile area for further research. - - Conclusion - -The congestion control problems associated with pure datagram -networks are difficult, but effective solutions exist. If IP / -TCP networks are to be operated under heavy load, TCP implementa- -tions must address several key issues in ways at least as effec- -tive as the ones described here. - diff --git a/kernel/picotcp/RFC/rfc0964.txt b/kernel/picotcp/RFC/rfc0964.txt deleted file mode 100644 index ba78650..0000000 --- a/kernel/picotcp/RFC/rfc0964.txt +++ /dev/null @@ -1,570 +0,0 @@ - - -Network Working Group Deepinder P. Sidhu -Request for Comments: 964 Thomas P. Blumer - SDC - A Burroughs Company - November 1985 - - SOME PROBLEMS WITH THE SPECIFICATION OF THE - MILITARY STANDARD TRANSMISSION CONTROL PROTOCOL - - -STATUS OF THIS MEMO - - The purpose of this RFC is to provide helpful information on the - Military Standard Transmission Control Protocol (MIL-STD-1778) so - that one can obtain a reliable implementation of this protocol - standard. Distribution of this note is unlimited. - - Reprinted from: Proc. Protocol Specification, Testing and - Verification IV, (ed.) Y. Yemini, et al, North-Holland (1984). - -ABSTRACT - - This note points out three errors with the specification of the - Military Standard Transmission Control Protocol (MIL-STD-1778, dated - August 1983 [MILS83]). These results are based on an initial - investigation of this protocol standard. The first problem is that - data accompanying a SYN can not be accepted because of errors in the - acceptance policy. The second problem is that no retransmission - timer is set for a SYN packet, and therefore the SYN will not be - retransmitted if it is lost. The third problem is that when the - connection has been established, neither entity takes the proper - steps to accept incoming data. This note also proposes solutions to - these problems. - -1. Introduction - - In recent years, much progress has been made in creating an - integrated set of tools for developing reliable communication - protocols. These tools provide assistance in the specification, - verification, implementation and testing of protocols. Several - protocols have been analyzed and developed using such tools. - - In a recent paper, the authors discussed the verification of the - connection management of NBS class 4 transport protocol (TP4). The - verification was carried out with the help of a software tool we - developed [BLUT82] [BLUT83] [SIDD83]. In spite of the very precise - specification of this protocol, our analysis discovered several - errors in the current specification of NBS TP4. These errors are - incompleteness errors in the specification, that is, states where - there is no transition for the reception of some input event. Our - analysis did not find deadlocks, livelocks or any other problem in - the connection management of TP4. In that paper, we proposed - - -Sidhu & Blumer [Page 1] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - - solutions for all errors except for errors associated with 2 states - whose satisfactory resolution may require redesigning parts of TP4. - Modifications to TP4 specification are currently underway to solve - the remaining incompleteness problems with 2 states. It is important - to emphasize that we did not find any obvious error in the NBS - specification of TP4. - - The authors are currently working on the verification of connection - management of the Military Standard Transmission Control Protocol - (TCP). This analysis will be based on the published specification - [MILS83] of TCP dated 12 August 1983. - - While studying the MIL standard TCP specification in preparation for - our analysis of the connection management features, we have noticed - several errors in the specification. As a consequence of these - errors, the Transmission Control Protocol (as specified in [MILS83]) - will not permit data to be received by TCP entities in SYN_RECVD and - ESTAB states. - - The proof of this statement follows from the specification of the - three-way handshake mechanism of TCP [MILS83] and from a decision - table associated with ESTAB state. - -2. Transmission Control Protocol - - The Transmission Control Protocol (TCP) is a transport level - connection-oriented protocol in the DoD protocol hierarchy for use in - packet-switched and other networks. Its most important services are - reliable transfer and ordered delivery of data over full-duplex and - flow-controlled virtual connections. TCP is designed to operate - successfully over channels that are inherently unreliable, i.e., they - can lose, damage, duplicate, and reorder packets. - - TCP is based, in part, on a protocol discussed by Cerf and Kahn - [CERV74]. Over the years, DARPA has supported specifications of - several versions of this protocol, the last one appeared in [POSJ81]. - Some issues in the connection management of this protocol are - discussed in [SUNC78]. - - A few years ago, DCA decided to standardize TCP for use in DoD - networks and supported formal specification of this protocol - following the design of this protocol discussed in [POSJ81]. A - detailed specification of this protocol given in [MILS83] has been - adopted as the DoD standard for the Transmission Control Protocol, a - reliable connection-oriented transport protocol for DoD networks. - - A TCP connection progresses through three phases: opening (or - - -Sidhu & Blumer [Page 2] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - - synchronization), maintenance, and closing. In this note we consider - data transfer in the opening and maintenance phases of the - connection. - -3. Problems with MIL Standard TCP - - One basic feature of TCP is the three-way handshake which is used to - set up a properly synchronized connection between two remote TCP - entities. This mechanism is incorrectly specified in the current - specification of TCP. One problem is that data associated with the - SYN packet can not be delivered. This results from an incorrect - specification of the interaction between the accept_policy action - procedure and the record_syn action procedure. Neither of the 2 - possible strategies suggested in accept_policy will give the correct - result when called from the record_syn procedure, because the - recv_next variable is updated in record_syn before the accept_policy - procedure is called. - - Another problem with the specification of the three-way handshake is - apparent in the actions listed for the Active Open event (with or - without data) when in the CLOSED state. No retransmission timer is - set in these actions, and therefore if the initial SYN is lost, there - will be no timer expiration to trigger retransmission. This will - prevent connection establishment if the initial SYN packet is lost by - the network. - - The third problem with the specification is that the actions for - receiving data in the ESTAB state are incorrect. The accept action - procedure must be called when data is received, so that arriving data - may be queued and possibly passed to the user. - - A general problem with this specification is that the program - language and action table portions of the specification were clearly - not checked by any automatic syntax checking process. Several - variable and procedure names are misspelled, and the syntax of the - action statements is often incorrect. This can be confusing, - especially when a procedure name cannot be found in the alphabetized - list of procedures because of misspelling. - - These are some of the very serious errors that we have discovered - with the MIL standard TCP. - - - - - - - - -Sidhu & Blumer [Page 3] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - -4. Detailed Discussion of the Problem - - Problem 1: Problem with Receiving Data Accompanying SYN - - The following scenario traces the actions of 2 communicating - entities during the establishment of a connection. Only the - simplest case is considered, i.e., the case where the connection - is established by the exchange of 3 segments. - - TCP entity A TCP entity B - ------------ ------------ - - state segment segment state - transition recvd or sent recvd or sent transition - by A by B - - CLOSED -> LISTEN - - CLOSED -> SYN_SENT SYN --> - - SYN --> LISTEN -> SYN_RECVD - <-- SYN ACK - - SYN_SENT -> ESTAB <-- SYN ACK - ACK --> - - ACK --> SYN_RECVD -> ESTAB - - As shown in the above diagram, 5 state transitions occur and 3 TCP - segments are exchanged during the simplest case of the three-way - handshake. We now examine in detail the actions of each entity - during this exchange. Special attention is given to the sequence - numbers carried in each packet and recorded in the state variables - of each entity. - - In the diagram below, the actions occurring within a procedure are - shown indented from the procedure call. The resulting values of - sequence number variables are shown in square brackets to the - right of each statement. The sequence number variables are shown - with the entity name (A or B) as prefix so that the two sets of - state variables may be easily distinguished. - - - - - - - - -Sidhu & Blumer [Page 4] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - - Transition 1 (entity B goes from state CLOSED to state LISTEN). - The user associated with entity B issues a Passive Open. - - Actions: (see p. 104) - open; (see p. 144) - new state := LISTEN; - - Transition 2 (entity A goes from state CLOSED to SYN_SENT). The - user associated with entity A issues an Active Open with Data. - - Actions: (see p. 104) - open; (see p. 144) - gen_syn(WITH_DATA); (see p. 141) - send_isn := gen_isn(); [A.send_isn = 100] - send_next := send_isn + 1; [A.send_next = 101] - send_una := send_isn; [A.send_una = 100] - seg.seq_num := send_isn; [seg.seq_num = 100] - seg.ack_flag := FALSE; [seg.ack_flag = FALSE] - seg.wndw := 0; [seg.wndw = 0] - amount := send_policy() [assume amount > 0] - new state := SYN_SENT; - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Sidhu & Blumer [Page 5] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - - Transition 3 (Entity B goes from state LISTEN to state SYN_RECVD). - Entity B receives the SYN segment accompanying data sent by entity - A. - - Actions: (see p. 106) - (since this segment has no RESET, no ACK, does have SYN, and - we assume reasonable security and precedence parameters, row - 3 of the table applies) - record_syn; (see p. 147) - recv_isn := seg.seq_num; [B.recv_isn = seg_seq_num = 100] - recv_next := recv_isn + 1; [B.recv_next = 101] - if seg.ack_flag then - send_una := seg.ack_num; [no change] - accept_policy; (see p. 131) - Accept in-order data only: - Acceptance Test is - seg.seq_num = recv_next; - Accept any data within the receive window: - Acceptance Test has two parts - recv_next =< seg.seq_num =< recv_next + - recv_wndw - or - recv_next =< seg.seq_num + length =< - recv_next + recv_wndw - ******************************************** - An error occurs here, with either possible - strategy given in accept_policy, because - recv_next > seg.seq_num. Therefore - accept_policy will incorrectly indicate that - the data cannot be accepted. - ******************************************** - gen_syn(WITH_ACK); (see p. 141) - send_isn := gen_isn(); [B.send_isn = 300] - send_next := send_isn + 1; [B.send_next = 301] - send_una := send_isn; [B.send_una = 300] - seg.seq_num := send_next; [seg.seq_num = 301] - seg.ack_flag := TRUE; [seg.ack_flag = TRUE] - seg.ack_num := recv_isn + 1; [seg.ack_num = 102] - new state := SYN_RECVD; - - - - - - - - - - -Sidhu & Blumer [Page 6] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - - Transition 4 (entity A goes from state SYN_SENT to ESTAB) Entity A - receives the SYN ACK sent by entity B. - - Actions: (see p. 107) - In order to select the applicable row of the table on p. - 107, we first evaluate the decision function - ACK_status_test1. - ACK_status_test1(); - if(seg.ack_flag = FALSE) then - return(NONE); - if(seg.ack_num <= send_una) or - (seg.ack_num > send_next) then - return(INVALID) - else - return(VALID); - - ... and so on. - - The important thing to notice in the above scenario is the error - that occurs in transition 3, where the wrong value for recv_next - leads to the routine record_syn refusing to accept the data. - - Problem 2: Problem with Retransmission of SYN Packet - - The actions listed for Active Open (with or without data; see p. - 103) are calls to the routines open and gen_syn. Neither of these - routines (or routines that they call) explicitly sets a - retransmission timer. Therefore if the initial SYN is lost there - is no timer expiration to trigger retransmission of the SYN. If - this happens, the TCP will fail in its attempt to establish the - desired connection with a remote TCP. - - Note that this differs with the actions specified for transmission - of data from the ESTAB state. In that transition the routine - dispatch (p. 137) is called first which in turn calls the routine - send_new_data (p. 156). One of actions of the last routine is to - start a retransmission timer for the newly sent data. - - - - - - - - - - - - -Sidhu & Blumer [Page 7] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - - Problem 3: Problem with Receiving Data in TCP ESTAB State - - When both entities are in the state ESTAB, and one sends data to - the other, an error in the actions of the receiver prohibits the - data from being accepted. The following simple scenario - illustrates the problem. Here the user associated with entity A - issues a Send request, and A sends data to entity B. When B - receives the data it replies with an acknowledgment. - - TCP entity A TCP entity B - ------------ ------------ - - state segment segment state - transition recvd or sent recvd or sent transition - by A by B - - ESTAB -> ESTAB DATA --> - - DATA --> ESTAB -> ESTAB - <-- ACK - - Transition 1 (entity A goes from state ESTAB to ESTAB) Entity A - sends data packet to entity B. - - Actions: (see p. 110) - dispatch; (see p. 137) - - Transition 2 (entity B goes from state ESTAB to ESTAB) Entity B - receives data packet from entity B. - - Actions: (see p. 111) - Assuming the data is in order and valid, we use row 6 of the - table. - update; (see p. 159) - ************************************************************ - An error occurs here, because the routine update does - nothing to accept the incoming data, or to arrange to - pass it on to the user. - ************************************************************ - - - - - - - - - - -Sidhu & Blumer [Page 8] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - -5. Solutions to Problems - - The problem with record_syn and accept_policy can be solved by having - record_syn call accept_policy before the variable recv_next is - updated. - - The problem with gen_syn can be corrected by having gen_syn or open - explicitly request the retransmission timer. - - The problem with the reception of data in the ESTAB state is - apparently caused by the transposition of the action tables on pages - 111 and 112. These tables should be interchanged. This solution - will also correct a related problem, namely that an entity can never - reach the CLOSE_WAIT state from the ESTAB state. - - Syntax errors in the action statements and tables could be easily - caught by an automatic syntax checker if the document used a more - formal description technique. This would be difficult to do for - [MILS83] since this document is not based on a formalized description - technique [BREM83]. - - The errors pointed out in this note have been submitted to DCA and - will be corrected in the next update of the MIL STD TCP - specification. - -6. Implementation of MIL Standard TCP - - In the discussion above, we pointed out several serious errors in the - specification of the Military Standard Transmission Control Protocol - [MILS83]. These errors imply that a TCP implementation that - faithfully conforms to the Military TCP standard will not be able to - - Receive data sent with a SYN packet. - - Establish a connection if the initial SYN packet is lost. - - Receive data when in the ESTAB state. - - It also follows from our discussion that an implementation of MIL - Standard TCP [MILS83] must include corrections mentioned above to get - a running TCP. - - The problems pointed out in this paper with the current specification - of the MIL Standard TCP [MILS83] are based on an initial - investigation of this protocol standard by the authors. - - - - -Sidhu & Blumer [Page 9] - - - -RFC 964 November 1985 -Some Problems with MIL-STD TCP - - -REFERENCES - - [BLUT83] Blumer, T. P., and Sidhu, D. P., "Mechanical Verification - and Automatic Implementation of Authentication Protocols - for Computer Networks", SDC Burroughs Report (1983), - submitted for publication. - - [BLUT82] Blumer, T. P., and Tenney, R. L., "A Formal Specification - Technique and Implementation Method for Protocols", - Computer Networks, Vol. 6, No. 3, July 1982, pp. 201-217. - - [BREM83] Breslin, M., Pollack, R. and Sidhu D. P., "Formalization of - DoD Protocol Specification Technique", SDC - Burroughs - Report 1983. - - [CERV74] Cerf, V., and Kahn, R., "A Protocol for Packet Network - Interconnection", IEEE Trans. Comm., May 1974. - - [MILS83] "Military Standard Transmission Control Protocol", - MIL-STD-1778, 12 August 1983. - - [POSJ81] Postel, J. (ed.), "DoD Standard Transmission Control - Protocol", Defense Advanced Research Projects Agency, - Information Processing Techniques Office, RFC-793, - September 1981. - - [SIDD83] Sidhu, D. P., and Blumer, T. P., "Verification of NBS Class - 4 Transport Protocol", SDC Burroughs Report (1983), - submitted for publication. - - [SUNC78] Sunshine, C., and Dalal, Y., "Connection Management in - Transport Protocols", Computer Networks, Vol. 2, pp.454-473 - (1978). - - - - - - - - - - - - - - - - -Sidhu & Blumer [Page 10] - diff --git a/kernel/picotcp/RFC/rfc1066.txt b/kernel/picotcp/RFC/rfc1066.txt deleted file mode 100644 index 66aae55..0000000 --- a/kernel/picotcp/RFC/rfc1066.txt +++ /dev/null @@ -1,5043 +0,0 @@ - - - - - - -Network Working Group K. McCloghrie -Request For Comments: 1066 M. Rose - TWG - August 1988 - - - Management Information Base for Network Management - of TCP/IP-based internets - - Table of Contents - - 1. Status of this Memo ................................... 1 - 2. IAB POLICY STATEMENT .................................. 2 - 3. Introduction .......................................... 2 - 4. Objects ............................................... 5 - 4.1 Object Groups ........................................ 5 - 4.2 Format of Definitions ................................ 6 - 5. Object Definitions .................................... 7 - 5.1 The System Group ..................................... 8 - 5.2 The Interfaces Group ................................. 10 - 5.2.1 The Interfaces Table ............................... 10 - 5.3 The Address Translation Group ........................ 22 - 5.4 The IP Group ......................................... 25 - 5.4.1 The IP Address Table ............................... 33 - 5.4.2 The IP Routing Table ............................... 35 - 5.5 The ICMP Group ....................................... 42 - 5.6 The TCP Group ........................................ 52 - 5.7 The UDP Group ........................................ 61 - 5.8 The EGP Group ........................................ 63 - 5.8.1 The EGP Neighbor Table ............................. 64 - 6. Definitions ........................................... 67 - 7. Acknowledgements ...................................... 88 - 8. References ............................................ 89 - -1. Status of this Memo - - This memo provides the initial version of the Management Information - Base (MIB) for use with network management protocols in TCP/IP-based - internets in the short-term. In particular, together with its - companion memos which describe the structure of management - information along with the initial network management protocol, these - documents provide a simple, workable architecture and system for - managing TCP/IP-based internets and in particular the Internet. - - - - - - - - -McCloghrie & Rose [Page 1] - -RFC 1066 MIB August 1988 - - - This memo specifies a draft standard for the Internet community. - TCP/IP implementations in the Internet which are network manageable - are expected to adopt and implement this specification. - - Distribution of this memo is unlimited. - -2. IAB POLICY STATEMENT - - This MIB specification is the first edition of an evolving document - defining variables needed for monitoring and control of various - components of the Internet. Not all groups of defined variables are - mandatory for all Internet components. - - For example, the EGP group is mandatory for gateways using EGP but - not for hosts which should not be running EGP. Similarly, the TCP - group is mandatory for hosts running TCP but not for gateways which - aren't running it. What IS mandatory, however, is that all variables - of a group be supported if any element of the group is supported. - - It is expected that additional MIB groups and variables will be - defined over time to accommodate the monitoring and control needs of - new or changing components of the Internet. The MIB working group - will continue to refine this specification and projects a revision - incorporating new requirements in early 1989. - -3. Introduction - - As reported in RFC 1052, IAB Recommendations for the Development of - Internet Network Management Standards [1], the Internet Activities - Board has directed the Internet Engineering Task Force (IETF) to - create two new working groups in the area of network management. One - group is charged with the further specification and definition of - elements to be included in the Management Information Base. The - other is charged with defining the modifications to the Simple - Network Management Protocol (SNMP) to accommodate the short-term - needs of the network vendor and operator communities. The long-term - needs of the Internet community are to be met using the ISO CMIS/CMIP - [2,3] framework as a basis. An existing IETF working group, the - "NETMAN" group, is already engaged in defining the use of CMIS/CMIP - in a TCP/IP network, and will continue with responsibility for - addressing the longer-term requirements. - - The output of the MIB working group is to be provided to both the - SNMP working group and the NETMAN group, so as to ensure - compatibility of monitored items for both network management - frameworks. - - The MIB working group has produced this memo and a companion. The - - - -McCloghrie & Rose [Page 2] - -RFC 1066 MIB August 1988 - - - companion memo [4] defines a Structure for Management Information - (SMI) for use by the managed objects contained in the MIB. This memo - defines the list of managed objects. - - The IAB also urged the working groups to be "extremely sensitive to - the need to keep SNMP simple," and recommends that the MIB working - group take as its starting inputs the MIB definitions found in the - High-Level Entity Management Systems (HEMS) RFC 1024 [5], the initial - SNMP specification [6], and the CMIS/CMIP memos [7,8]. - - Thus, the list of managed objects defined here, has been derived by - taking only those elements which are considered essential. Since - such elements are essential, there is no need to allow the - implementation of individual objects, to be optional. Rather, all - compliant implementations will contain all applicable (see below) - objects defined in this memo. - - This approach of taking only the essential objects is NOT - restrictive, since the SMI defined in the companion memo provides - three extensibility mechanisms: one, the addition of new standard - objects through the definitions of new versions of the MIB; two, the - addition of widely-available but non-standard objects through the - multilateral subtree; and three, the addition of private objects - through the enterprises subtree. Such additional objects can not only - be used for vendor-specific elements, but also for experimentation as - required to further the knowledge of which other objects are - essential. - - The primary criterion for being considered essential was for an - object to be contained in all of the above referenced MIB - definitions. A few other objects have been included, but only if the - MIB working group believed they are truly essential. The detailed - list of criteria against which potential inclusions in this (initial) - MIB were considered, was: - - 1) An object needed to be essential for either fault or - configuration management. - - 2) Only weak control objects were permitted (by weak, it - is meant that tampering with them can do only limited - damage). This criterion reflects the fact that the - current management protocols are not sufficiently secure - to do more powerful control operations. - - 3) Evidence of current use and utility was required. - - 4) An attempt was made to limit the number of objects to - about 100 to make it easier for vendors to fully - - - -McCloghrie & Rose [Page 3] - -RFC 1066 MIB August 1988 - - - instrument their software. - - 5) To avoid redundant variables, it was required that no - object be included that can be derived from others in the - MIB. - - 6) Implementation specific objects (e.g., for BSD UNIX) - were excluded. - - 7) It was agreed to avoid heavily instrumenting critical - sections of code. The general guideline was one counter - per critical section per layer. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 4] - -RFC 1066 MIB August 1988 - - -4. Objects - - Managed objects are accessed via a virtual information store, termed - the Management Information Base or MIB. Objects in the MIB are - defined using Abstract Syntax Notation One (ASN.1) [9]. - - The mechanisms used for describing these objects are specified in the - companion memo. In particular, each object has a name, a syntax, and - an encoding. The name is an object identifier, an administratively - assigned name, which specifies an object type. The object type - together with an object instance serves to uniquely identify a - specific instantiation of the object. For human convenience, we - often use a textual string, termed the OBJECT DESCRIPTOR, to also - refer to the object type. - - The syntax of an object type defines the abstract data structure - corresponding to that object type. The ASN.1 language is used for - this purpose. However, the companion memo purposely restricts the - ASN.1 constructs which may be used. These restrictions are - explicitly made for simplicity. - - The encoding of an object type is simply how that object type is - represented using the object type's syntax. Implicitly tied to the - notion of an object type's syntax and encoding is how the object type - is represented when being transmitted on the network. This memo - specifies the use of the basic encoding rules of ASN.1 [10]. - -4.1. Object Groups - - Since this list of managed objects contains only the essential - elements, there is no need to allow individual objects to be - optional. Rather, the objects are arranged into the following - groups: - - - System - - Interfaces - - Address Translation - - IP - - ICMP - - TCP - - UDP - - EGP - - There are two reasons for defining these groups: one, to provide a - means of assigning object identifiers; two, to provide a method for - implementations of managed agents to know which objects they must - implement. This method is as follows: if the semantics of a group is - applicable to an implementation, then it must implement all objects - - - -McCloghrie & Rose [Page 5] - -RFC 1066 MIB August 1988 - - - in that group. For example, an implementation must implement the EGP - group if and only if it implements the EGP protocol. - -4.2. Format of Definitions - - The next section contains the specification of all object types - contained in the MIB. Following the conventions of the companion - memo, the object types are defined using the following fields: - - OBJECT: - ------- - A textual name, termed the OBJECT DESCRIPTOR, for the - object type, along with its corresponding OBJECT - IDENTIFIER. - - Syntax: - The abstract syntax for the object type, presented using - ASN.1. This must resolve to an instance of the ASN.1 - type ObjectSyntax defined in the SMI. - - Definition: - A textual description of the semantics of the object - type. Implementations should ensure that their - interpretation of the object type fulfills this - definition since this MIB is intended for use in multi- - vendor environments. As such it is vital that object - types have consistent meaning across all machines. - - Access: - One of read-only, read-write, write-only, or - not-accessible. - - Status: - One of mandatory, optional, or obsolete. - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 6] - -RFC 1066 MIB August 1988 - - -5. Object Definitions - - RFC1066-MIB { iso org(3) dod(6) internet(1) mgmt(2) 1 } - - DEFINITIONS ::= BEGIN - - IMPORTS - mgmt, OBJECT-TYPE, NetworkAddress, IpAddress, - Counter, Gauge, TimeTicks - FROM RFC1065-SMI; - - mib OBJECT IDENTIFIER ::= { mgmt 1 } - - system OBJECT IDENTIFIER ::= { mib 1 } - interfaces OBJECT IDENTIFIER ::= { mib 2 } - at OBJECT IDENTIFIER ::= { mib 3 } - ip OBJECT IDENTIFIER ::= { mib 4 } - icmp OBJECT IDENTIFIER ::= { mib 5 } - tcp OBJECT IDENTIFIER ::= { mib 6 } - udp OBJECT IDENTIFIER ::= { mib 7 } - egp OBJECT IDENTIFIER ::= { mib 8 } - - END - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 7] - -RFC 1066 MIB August 1988 - - -5.1. The System Group - - Implementation of the System group is mandatory for all - systems. - - OBJECT: - ------- - sysDescr { system 1 } - - Syntax: - OCTET STRING - - Definition: - A textual description of the entity. This value should - include the full name and version identification of the - system's hardware type, software operating-system, and - networking software. It is mandatory that this only - contain printable ASCII characters. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - sysObjectID { system 2 } - - Syntax: - OBJECT IDENTIFIER - - Definition: - The vendor's authoritative identification of the network - management subsystem contained in the entity. This value - is allocated within the SMI enterprises subtree - (1.3.6.1.4.1) and provides an easy and unambiguous means - for determining "what kind of box" is being managed. For - example, if vendor "Flintstones, Inc." was assigned the - subtree 1.3.6.1.4.1.42, it could assign the identifier - 1.3.6.1.4.1.42.1.1 to its "Fred Router". - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 8] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - sysUpTime { system 3 } - - Syntax: - TimeTicks - - Definition: - The time (in hundredths of a second) since the network - management portion of the system was last re-initialized. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 9] - -RFC 1066 MIB August 1988 - - -5.2. The Interfaces Group - - Implementation of the Interfaces group is mandatory for all - systems. - - OBJECT: - ------- - ifNumber { interfaces 1 } - - Syntax: - INTEGER - - Definition: - The number of network interfaces (regardless of their - current state) on which this system can send/receive IP - datagrams. - - Access: - read-only. - - Status: - mandatory. - -5.2.1. The Interfaces Table - - OBJECT: - ------- - ifTable { interfaces 2 } - - Syntax: - SEQUENCE OF IfEntry - - Definition: - A list of interface entries. The number of entries is - given by the value of ifNumber. - - Access: - read-write. - - Status: - mandatory. - - OBJECT: - ------- - ifEntry { ifTable 1 } - - Syntax: - IfEntry ::= SEQUENCE { - - - -McCloghrie & Rose [Page 10] - -RFC 1066 MIB August 1988 - - - ifIndex - INTEGER, - ifDescr - OCTET STRING, - ifType - INTEGER, - ifMtu - INTEGER, - ifSpeed - Gauge, - ifPhysAddress - OCTET STRING, - ifAdminStatus - INTEGER, - ifOperStatus - INTEGER, - ifLastChange - TimeTicks, - ifInOctets - Counter, - ifInUcastPkts - Counter, - ifInNUcastPkts - Counter, - ifInDiscards - Counter, - ifInErrors - Counter, - ifInUnknownProtos - Counter, - ifOutOctets - Counter, - ifOutUcastPkts - Counter, - ifOutNUcastPkts - Counter, - ifOutDiscards - Counter, - ifOutErrors - Counter, - ifOutQLen - Gauge - } - - Definition: - An interface entry containing objects at the subnetwork - layer and below for a particular interface. - - - - -McCloghrie & Rose [Page 11] - -RFC 1066 MIB August 1988 - - - Access: - read-write. - - Status: - mandatory. - - - We now consider the individual components of each interface - entry: - - - OBJECT: - ------- - ifIndex { ifEntry 1 } - - Syntax: - INTEGER - - Definition: - A unique value for each interface. Its value ranges - between 1 and the value of ifNumber. The value for each - interface must remain constant at least from one re- - initialization of the entity's network management system - to the next re-initialization. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifDescr { ifEntry 2 } - - Syntax: - OCTET STRING - - Definition: - A text string containing information about the interface. - This string should include the name of the manufacturer, - the product name and the version of the hardware - interface. The string is intended for presentation to a - human; it must not contain anything but printable ASCII - characters. - - - - - -McCloghrie & Rose [Page 12] - -RFC 1066 MIB August 1988 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifType { ifEntry 3 } - - Syntax: - INTEGER { - other(1), -- none of the following - regular1822(2), - hdh1822(3), - ddn-x25(4), - rfc877-x25(5), - ethernet-csmacd(6), - iso88023-csmacd(7), - iso88024-tokenBus(8), - iso88025-tokenRing(9), - iso88026-man(10), - starLan(11), - proteon-10MBit(12), - proteon-80MBit(13), - hyperchannel(14), - fddi(15), - lapb(16), - sdlc(17), - t1-carrier(18), - cept(19), -- european equivalent of T-1 - basicIsdn(20), - primaryIsdn(21), - -- proprietary serial - propPointToPointSerial(22) - } - - Definition: - The type of interface, distinguished according to the - physical/link/network protocol(s) immediately "below" IP - in the protocol stack. - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 13] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ifMtu { ifEntry 4 } - - Syntax: - INTEGER - - Definition: - The size of the largest IP datagram which can be - sent/received on the interface, specified in octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifSpeed { ifEntry 5 } - - Syntax: - Gauge - - Definition: - An estimate of the interface's current bandwidth in bits - per second. For interfaces which do not vary in - bandwidth or for those where no accurate estimation can - be made, this object should contain the nominal - bandwidth. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifPhysAddress { ifEntry 6 } - - Syntax: - OCTET STRING - - Definition: - The interface's address at the protocol layer immediately - - - -McCloghrie & Rose [Page 14] - -RFC 1066 MIB August 1988 - - - "below" IP in the protocol stack. For interfaces which - do not have such an address (e.g., a serial line), this - object should contain an octet string of zero length. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifAdminStatus { ifEntry 7 } - - Syntax: - INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - - Definition: - The desired state of the interface. The testing(3) state - indicates that no operational packets can be passed. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ifOperStatus { ifEntry 8 } - - Syntax: - INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - - Definition: - The current operational state of the interface. The - testing(3) state indicates that no operational packets - can be passed. - - - -McCloghrie & Rose [Page 15] - -RFC 1066 MIB August 1988 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifLastChange { ifEntry 9 } - - Syntax: - TimeTicks - - Definition: - The value of sysUpTime at the time the interface entered - its current operational state. If the current state was - entered prior to the last re-initialization of the local - network management subsystem, then this object contains a - zero value. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInOctets { ifEntry 10 } - - Syntax: - Counter - - Definition: - The total number of octets received on the interface, - including framing characters. - - Access: - read-only. - - Status: - mandatory. - - - - - - - -McCloghrie & Rose [Page 16] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ifInUcastPkts { ifEntry 11 } - - Syntax: - Counter - - Definition: - The number of (subnet) unicast packets delivered to a - higher-layer protocol. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInNUcastPkts { ifEntry 12 } - - Syntax: - Counter - - Definition: - The number of non-unicast (i.e., subnet broadcast or - subnet multicast) packets delivered to a higher-layer - protocol. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInDiscards { ifEntry 13 } - - Syntax: - Counter - - Definition: - The number of inbound packets which were chosen to be - discarded even though no errors had been detected to - prevent their being deliverable to a higher-layer - - - -McCloghrie & Rose [Page 17] - -RFC 1066 MIB August 1988 - - - protocol. One possible reason for discarding such a - packet could be to free up buffer space. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInErrors { ifEntry 14 } - - Syntax: - Counter - - Definition: - The number of inbound packets that contained errors - preventing them from being deliverable to a higher-layer - protocol. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInUnknownProtos { ifEntry 15 } - - Syntax: - Counter - - Definition: - The number of packets received via the interface which - were discarded because of an unknown or unsupported - protocol. - - Access: - read-only. - - Status: - mandatory. - - - - - -McCloghrie & Rose [Page 18] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ifOutOctets { ifEntry 16 } - - Syntax: - Counter - - Definition: - The total number of octets transmitted out of the - interface, including framing characters. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutUcastPkts { ifEntry 17 } - - Syntax: - Counter - - Definition: - The total number of packets that higher-level protocols - requested be transmitted to a subnet-unicast address, - including those that were discarded or not sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutNUcastPkts { ifEntry 18 } - - Syntax: - Counter - - Definition: - The total number of packets that higher-level protocols - requested be transmitted to a non-unicast (i.e., a subnet - broadcast or subnet multicast) address, including those - - - -McCloghrie & Rose [Page 19] - -RFC 1066 MIB August 1988 - - - that were discarded or not sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutDiscards { ifEntry 19 } - - Syntax: - Counter - - Definition: - The number of outbound packets which were chosen to be - discarded even though no errors had been detected to - prevent their being transmitted. One possible reason for - discarding such a packet could be to free up buffer - space. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutErrors { ifEntry 20 } - - Syntax: - Counter - - Definition: - The number of outbound packets that could not be - transmitted because of errors. - - Access: - read-only. - - Status: - mandatory. - - - - - -McCloghrie & Rose [Page 20] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ifOutQLen { ifEntry 21 } - - Syntax: - Gauge - - Definition: - The length of the output packet queue (in packets). - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 21] - -RFC 1066 MIB August 1988 - - -5.3. The Address Translation Group - - Implementation of the Address Translation group is mandatory - for all systems. - - The Address Translation group contains one table which is the - union across all interfaces of the translation tables for - converting a NetworkAddress (e.g., an IP address) into a - subnetwork-specific address. For lack of a better term, this - document refers to such a subnetwork-specific address as a - "physical" address. - - Examples of such translation tables are: for broadcast media - where ARP is in use, the translation table is equivalent to - the ARP cache; or, on an X.25 network where non-algorithmic - translation to X.121 addresses is required, the translation - table contains the NetworkAddress to X.121 address - equivalences. - - OBJECT: - ------- - atTable { at 1 } - - Syntax: - SEQUENCE OF AtEntry - - Definition: - The Address Translation tables contain the NetworkAddress - to "physical" address equivalences. Some interfaces do - not use translation tables for determining address - equivalences (e.g., DDN-X.25 has an algorithmic method); - if all interfaces are of this type, then the Address - Translation table is empty, i.e., has zero entries. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - atEntry { atTable 1 } - - Syntax: - AtEntry ::= SEQUENCE { - atIfIndex - - - -McCloghrie & Rose [Page 22] - -RFC 1066 MIB August 1988 - - - INTEGER, - atPhysAddress - OCTET STRING, - atNetAddress - NetworkAddress - } - - Definition: - Each entry contains one NetworkAddress to "physical" - address equivalence. - - Access: - read-write. - - Status: - mandatory. - - We now consider the individual components of each Address - Translation table entry: - - - OBJECT: - ------- - atIfIndex { atEntry 1 } - - Syntax: - INTEGER - - Definition: - The interface on which this entry's equivalence is - effective. The interface identified by a particular - value of this index is the same interface as identified - by the same value of ifIndex. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - atPhysAddress { atEntry 2 } - - Syntax: - OCTET STRING - - - - -McCloghrie & Rose [Page 23] - -RFC 1066 MIB August 1988 - - - Definition: - The media-dependent "physical" address. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - atNetAddress { atEntry 3 } - - Syntax: - NetworkAddress - - Definition: - The NetworkAddress (e.g., the IP address) corresponding to - the media-dependent "physical" address. - - Access: - read-write. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 24] - -RFC 1066 MIB August 1988 - - -5.4. The IP Group - - Implementation of the IP group is mandatory for all systems. - - - OBJECT: - ------- - ipForwarding { ip 1 } - - Syntax: - INTEGER { - gateway(1), -- entity forwards datagrams - host(2) -- entity does NOT forward datagrams - } - - Definition: - The indication of whether this entity is acting as an IP - gateway in respect to the forwarding of datagrams - received by, but not addressed to, this entity. IP - gateways forward datagrams; Hosts do not (except those - Source-Routed via the host). - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipDefaultTTL { ip 2 } - - Syntax: - INTEGER - - Definition: - The default value inserted into the Time-To-Live field of - the IP header of datagrams originated at this entity, - whenever a TTL value is not supplied by the transport - layer protocol. - - Access: - read-write. - - Status: - mandatory. - - - - -McCloghrie & Rose [Page 25] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ipInReceives { ip 3 } - - Syntax: - Counter - - Definition: - The total number of input datagrams received from - interfaces, including those received in error. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInHdrErrors { ip 4 } - - Syntax: - Counter - - Definition: - The number of input datagrams discarded due to errors in - their IP headers, including bad checksums, version number - mismatch, other format errors, time-to-live exceeded, - errors discovered in processing their IP options, etc. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInAddrErrors { ip 5 } - - Syntax: - Counter - - Definition: - The number of input datagrams discarded because the IP - address in their IP header's destination field was not a - - - -McCloghrie & Rose [Page 26] - -RFC 1066 MIB August 1988 - - - valid address to be received at this entity. This count - includes invalid addresses (e.g., 0.0.0.0) and addresses - of unsupported Classes (e.g., Class E). For entities - which are not IP Gateways and therefore do not forward - datagrams, this counter includes datagrams discarded - because the destination address was not a local address. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipForwDatagrams { ip 6 } - - Syntax: - Counter - - Definition: - The number of input datagrams for which this entity was - not their final IP destination, as a result of which an - attempt was made to find a route to forward them to that - final destination. In entities which do not act as IP - Gateways, this counter will include only those packets - which were Source-Routed via this entity, and the - Source-Route option processing was successful. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInUnknownProtos { ip 7 } - - Syntax: - Counter - - Definition: - The number of locally-addressed datagrams received - successfully but discarded because of an unknown or - unsupported protocol. - - - -McCloghrie & Rose [Page 27] - -RFC 1066 MIB August 1988 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInDiscards { ip 8 } - - Syntax: - Counter - - Definition: - The number of input IP datagrams for which no problems - were encountered to prevent their continued processing, - but which were discarded (e.g. for lack of buffer space). - Note that this counter does not include any datagrams - discarded while awaiting re-assembly. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInDelivers { ip 9 } - - Syntax: - Counter - - Definition: - The total number of input datagrams successfully - delivered to IP user-protocols (including ICMP). - - Access: - read-only. - - Status: - mandatory. - - OBJECT: - ------- - ipOutRequests { ip 10 } - - - -McCloghrie & Rose [Page 28] - -RFC 1066 MIB August 1988 - - - Syntax: - Counter - - Definition: - The total number of IP datagrams which local IP user- - protocols (including ICMP) supplied to IP in requests for - transmission. Note that this counter does not include - any datagrams counted in ipForwDatagrams. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipOutDiscards { ip 11 } - - Syntax: - Counter - - Definition: - The number of output IP datagrams for which no problem - was encountered to prevent their transmission to their - destination, but which were discarded (e.g., for lack of - buffer space). Note that this counter would include - datagrams counted in ipForwDatagrams if any such packets - met this (discretionary) discard criterion. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipOutNoRoutes { ip 12 } - - Syntax: - Counter - - - - - - - -McCloghrie & Rose [Page 29] - -RFC 1066 MIB August 1988 - - - Definition: - The number of IP datagrams discarded because no route - could be found to transmit them to their destination. - Note that this counter includes any packets counted in - ipForwDatagrams which meet this "no-route" criterion. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipReasmTimeout { ip 13 } - - Syntax: - INTEGER - - Definition: - The maximum number of seconds which received fragments - are held while they are awaiting reassembly at this - entity. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipReasmReqds { ip 14 } - - Syntax: - Counter - - Definition: - The number of IP fragments received which needed to be - reassembled at this entity. - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 30] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ipReasmOKs { ip 15 } - - Syntax: - Counter - - Definition: - The number of IP datagrams successfully re-assembled. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipReasmFails { ip 16 } - - Syntax: - Counter - - Definition: - The number of failures detected by the IP re-assembly - algorithm (for whatever reason: timed out, errors, etc). - - Note that this is not necessarily a count of discarded IP - fragments since some algorithms (notably RFC 815's) can - lose track of the number of fragments by combining them - as they are received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipFragOKs { ip 17 } - - Syntax: - Counter - - - - - -McCloghrie & Rose [Page 31] - -RFC 1066 MIB August 1988 - - - Definition: - The number of IP datagrams that have been successfully - fragmented at this entity. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipFragFails { ip 18 } - - Syntax: - Counter - - Definition: - The number of IP datagrams that have been discarded - because they needed to be fragmented at this entity but - could not be, e.g., because their "Don't Fragment" flag - was set. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipFragCreates { ip 19 } - - Syntax: - Counter - - Definition: - The number of IP datagram fragments that have been - generated as a result of fragmentation at this entity. - - Access: - read-only. - - Status: - mandatory. - - - - -McCloghrie & Rose [Page 32] - -RFC 1066 MIB August 1988 - - -5.4.1. The IP Address Table - - The Ip Address table contains this entity's IP addressing - information. - - - OBJECT: - ------- - ipAddrTable { ip 20 } - - Syntax: - SEQUENCE OF IpAddrEntry - - Definition: - The table of addressing information relevant to this - entity's IP addresses. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAddrEntry { ipAddrTable 1 } - - Syntax: - IpAddrEntry ::= SEQUENCE { - ipAdEntAddr - IpAddress, - ipAdEntIfIndex - INTEGER, - ipAdEntNetMask - IpAddress, - ipAdEntBcastAddr - INTEGER - } - - Definition: - The addressing information for one of this entity's IP - addresses. - - Access: - read-only. - - - - - -McCloghrie & Rose [Page 33] - -RFC 1066 MIB August 1988 - - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntAddr { ipAddrEntry 1 } - - Syntax: - IpAddress - - Definition: - The IP address to which this entry's addressing - information pertains. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntIfIndex { ipAddrEntry 2 } - - Syntax: - INTEGER - - Definition: - The index value which uniquely identifies the interface - to which this entry is applicable. The interface - identified by a particular value of this index is the - same interface as identified by the same value of - ifIndex. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntNetMask { ipAddrEntry 3 } - - - - - -McCloghrie & Rose [Page 34] - -RFC 1066 MIB August 1988 - - - Syntax: - IpAddress - - Definition: - The subnet mask associated with the IP address of this - entry. The value of the mask is an IP address with all - the network bits set to 1 and all the hosts bits set to - 0. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntBcastAddr { ipAddrEntry 4 } - - Syntax: - INTEGER - - Definition: - The value of the least-significant bit in the IP - broadcast address used for sending datagrams on the - (logical) interface associated with the IP address of - this entry. For example, when the Internet standard - all-ones broadcast address is used, the value will be 1. - - Access: - read-only. - - Status: - mandatory. - -5.4.2. The IP Routing Table - - The IP Routing Table contains an entry for each route - presently known to this entity. Note that the action to be - taken in response to a request to read a non-existent entry, - is specific to the network management protocol being used. - - - OBJECT: - ------- - ipRoutingTable { ip 21 } - - - - -McCloghrie & Rose [Page 35] - -RFC 1066 MIB August 1988 - - - Syntax: - SEQUENCE OF IpRouteEntry - - Definition: - This entity's IP Routing table. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteEntry { ipRoutingTable 1 } - - Syntax: - IpRouteEntry ::= SEQUENCE { - ipRouteDest - IpAddress, - ipRouteIfIndex - INTEGER, - ipRouteMetric1 - INTEGER, - ipRouteMetric2 - INTEGER, - ipRouteMetric3 - INTEGER, - ipRouteMetric4 - INTEGER, - ipRouteNextHop - IpAddress, - ipRouteType - INTEGER, - ipRouteProto - INTEGER, - ipRouteAge - INTEGER - } - - Definition: - A route to a particular destination. - - Access: - read-write. - - - - - -McCloghrie & Rose [Page 36] - -RFC 1066 MIB August 1988 - - - Status: - mandatory. - - We now consider the individual components of each route in the - IP Routing Table: - - - OBJECT: - ------- - ipRouteDest { ipRouteEntry 1 } - - Syntax: - IpAddress - - Definition: - The destination IP address of this route. An entry with - a value of 0.0.0.0 is considered a default route. - Multiple such default routes can appear in the table, but - access to such multiple entries is dependent on the - table-access mechanisms defined by the network management - protocol in use. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteIfIndex { ipRouteEntry 2 } - - Syntax: - INTEGER - - Definition: - The index value which uniquely identifies the local - interface through which the next hop of this route should - be reached. The interface identified by a particular - value of this index is the same interface as identified - by the same value of ifIndex. - - Access: - read-write. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 37] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ipRouteMetric1 { ipRouteEntry 3 } - - Syntax: - INTEGER - - Definition: - The primary routing metric for this route. The semantics - of this metric are determined by the routing-protocol - specified in the route's ipRouteProto value. If this - metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteMetric2 { ipRouteEntry 4 } - - Syntax: - INTEGER - - Definition: - An alternate routing metric for this route. The - semantics of this metric are determined by the routing- - protocol specified in the route's ipRouteProto value. If - this metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteMetric3 { ipRouteEntry 5 } - - Syntax: - INTEGER - - - - - -McCloghrie & Rose [Page 38] - -RFC 1066 MIB August 1988 - - - Definition: - An alternate routing metric for this route. The - semantics of this metric are determined by the routing- - protocol specified in the route's ipRouteProto value. If - this metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteMetric4 { ipRouteEntry 6 } - - Syntax: - INTEGER - - Definition: - An alternate routing metric for this route. The - semantics of this metric are determined by the routing- - protocol specified in the route's ipRouteProto value. If - this metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteNextHop { ipRouteEntry 7 } - - Syntax: - IpAddress - - Definition: - The IP address of the next hop of this route. - - Access: - read-write. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 39] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - ipRouteType { ipRouteEntry 8 } - - Syntax: - INTEGER { - other(1), -- none of the following - - invalid(2), -- an invalidated route - - -- route to directly - direct(3), -- connected (sub-)network - - -- route to a non-local - remote(4), -- host/network/sub-network - } - - Definition: - The type of route. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteProto { ipRouteEntry 9 } - - Syntax: - INTEGER { - other(1), -- none of the following - - -- non-protocol information, - -- e.g., manually configured - local(2), -- entries - - -- set via a network management - netmgmt(3), -- protocol - - -- obtained via ICMP, - icmp(4), -- e.g., Redirect - - -- the remaining values are - -- all gateway routing protocols - egp(5), - - - -McCloghrie & Rose [Page 40] - -RFC 1066 MIB August 1988 - - - ggp(6), - hello(7), - rip(8), - is-is(9), - es-is(10), - ciscoIgrp(11), - bbnSpfIgp(12), - oigp(13) - } - - Definition: - The routing mechanism via which this route was learned. - Inclusion of values for gateway routing protocols is not - intended to imply that hosts should support those - protocols. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteAge { ipRouteEntry 10 } - - Syntax: - INTEGER - - Definition: - The number of seconds since this route was last updated - or otherwise determined to be correct. Note that no - semantics of "too old" can be implied except through - knowledge of the routing protocol by which the route was - learned. - - Access: - read-write. - - Status: - mandatory. - - - - - - - - - -McCloghrie & Rose [Page 41] - -RFC 1066 MIB August 1988 - - -5.5. The ICMP Group - - Implementation of the ICMP group is mandatory for all systems. - - The ICMP group contains the ICMP input and output statistics. - - Note that individual counters for ICMP message (sub-)codes have been - omitted from this (version of the) MIB for simplicity. - - - OBJECT: - ------- - icmpInMsgs { icmp 1 } - - Syntax: - Counter - - Definition: - The total number of ICMP messages which the entity - received. Note that this counter includes all those - counted by icmpInErrors. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInErrors { icmp 2 } - - Syntax: - Counter - - Definition: - The number of ICMP messages which the entity received but - determined as having errors (bad ICMP checksums, bad - length, etc.). - - Access: - read-only. - - Status: - mandatory. - - - - - -McCloghrie & Rose [Page 42] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - icmpInDestUnreachs { icmp 3 } - - Syntax: - Counter - - Definition: - The number of ICMP Destination Unreachable messages - received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInTimeExcds { icmp 4 } - - Syntax: - Counter - - Definition: - The number of ICMP Time Exceeded messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInParmProbs { icmp 5 } - - Syntax: - Counter - - Definition: - The number of ICMP Parameter Problem messages received. - - Access: - read-only. - - - - -McCloghrie & Rose [Page 43] - -RFC 1066 MIB August 1988 - - - Status: - mandatory. - - - OBJECT: - ------- - icmpInSrcQuenchs { icmp 6 } - - Syntax: - Counter - - Definition: - The number of ICMP Source Quench messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInRedirects { icmp 7 } - - Syntax: - Counter - - Definition: - The number of ICMP Redirect messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInEchos { icmp 8 } - - Syntax: - Counter - - Definition: - The number of ICMP Echo (request) messages received. - - - - -McCloghrie & Rose [Page 44] - -RFC 1066 MIB August 1988 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInEchoReps { icmp 9 } - - Syntax: - Counter - - Definition: - The number of ICMP Echo Reply messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInTimestamps { icmp 10 } - - Syntax: - Counter - - Definition: - The number of ICMP Timestamp (request) messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInTimestampReps { icmp 11 } - - Syntax: - Counter - - - - -McCloghrie & Rose [Page 45] - -RFC 1066 MIB August 1988 - - - Definition: - The number of ICMP Timestamp Reply messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInAddrMasks { icmp 12 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Request messages - received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInAddrMaskReps { icmp 13 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Reply messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutMsgs { icmp 14 } - - - -McCloghrie & Rose [Page 46] - -RFC 1066 MIB August 1988 - - - Syntax: - Counter - - Definition: - The total number of ICMP messages which this entity - attempted to send. Note that this counter includes all - those counted by icmpOutErrors. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutErrors { icmp 15 } - - Syntax: - Counter - - Definition: - The number of ICMP messages which this entity did not - send due to problems discovered within ICMP such as a - lack of buffers. This value should not include errors - discovered outside the ICMP layer such as the inability - of IP to route the resultant datagram. In some - implementations there may be no types of error which - contribute to this counter's value. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutDestUnreachs { icmp 16 } - - Syntax: - Counter - - Definition: - The number of ICMP Destination Unreachable messages sent. - - - - -McCloghrie & Rose [Page 47] - -RFC 1066 MIB August 1988 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutTimeExcds { icmp 17 } - - Syntax: - Counter - - Definition: - The number of ICMP Time Exceeded messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutParmProbs { icmp 18 } - - Syntax: - Counter - - Definition: - The number of ICMP Parameter Problem messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutSrcQuenchs { icmp 19 } - - Syntax: - Counter - - - - -McCloghrie & Rose [Page 48] - -RFC 1066 MIB August 1988 - - - Definition: - The number of ICMP Source Quench messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutRedirects { icmp 20 } - - Syntax: - Counter - - Definition: - The number of ICMP Redirect messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutEchos { icmp 21 } - - Syntax: - Counter - - Definition: - The number of ICMP Echo (request) messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutEchoReps { icmp 22 } - - - - -McCloghrie & Rose [Page 49] - -RFC 1066 MIB August 1988 - - - Syntax: - Counter - - Definition: - The number of ICMP Echo Reply messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutTimestamps { icmp 23 } - - Syntax: - Counter - - Definition: - The number of ICMP Timestamp (request) messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutTimestampReps { icmp 24 } - - Syntax: - Counter - - Definition: - The number of ICMP Timestamp Reply messages sent. - - Access: - read-only. - - Status: - mandatory. - - - - - - -McCloghrie & Rose [Page 50] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - icmpOutAddrMasks { icmp 25 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Request messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutAddrMaskReps { icmp 26 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Reply messages sent. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 51] - -RFC 1066 MIB August 1988 - - -5.6. The TCP Group - - Implementation of the TCP group is mandatory for all systems - that implement the TCP protocol. - - Note that instances of object types that represent information - about a particular TCP connection are transient; they persist - only as long as the connection in question. - - OBJECT: - ------- - tcpRtoAlgorithm { tcp 1 } - - Syntax: - INTEGER { - other(1), -- none of the following - constant(2), -- a constant rto - rsre(3), -- MIL-STD-1778, Appendix B - vanj(4) -- Van Jacobson's algorithm [11] - } - - Definition: - The algorithm used to determine the timeout value used - for retransmitting unacknowledged octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpRtoMin { tcp 2 } - - Syntax: - INTEGER - - Definition: - The minimum value permitted by a TCP implementation - for the retransmission timeout, measured in - milliseconds. More refined semantics for objects - of this type depend upon the algorithm used to - determine the retransmission timeout. In particular, - when the timeout algorithm is rsre(3), an object - of this type has the semantics of the LBOUND - quantity described in RFC 793. - - - -McCloghrie & Rose [Page 52] - -RFC 1066 MIB August 1988 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpRtoMax { tcp 3 } - - Syntax: - INTEGER - - Definition: - The maximum value permitted by a TCP implementation - for the retransmission timeout, measured - in milliseconds. More refined semantics for objects - of this type depend upon the algorithm used to - determine the retransmission timeout. In particular, - when the timeout algorithm is rsre(3), an object of - this type has the semantics of the UBOUND quantity - described in RFC 793. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpMaxConn { tcp 4 } - - Syntax: - INTEGER - - Definition: - The limit on the total number of TCP connections the - entity can support. In entities where the maximum - number of connections is dynamic, this object should - contain the value "-1". - - Access: - read-only. - - - - - -McCloghrie & Rose [Page 53] - -RFC 1066 MIB August 1988 - - - Status: - mandatory. - - - OBJECT: - ------- - tcpActiveOpens { tcp 5 } - - Syntax: - Counter - - Definition: - The number of times TCP connections have made a direct - transition to the SYN-SENT state from the CLOSED - state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpPassiveOpens { tcp 6 } - - Syntax: - Counter - - Definition: - The number of times TCP connections have made a direct - transition to the SYN-RCVD state from the LISTEN - state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpAttemptFails { tcp 7 } - - Syntax: - Counter - - - -McCloghrie & Rose [Page 54] - -RFC 1066 MIB August 1988 - - - Definition: - The number of times TCP connections have made a direct - transition to the CLOSED state from either the - SYN-SENT state or the SYN-RCVD state, plus the number - of times TCP connections have made a direct transition - to the LISTEN state from the SYN-RCVD state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpEstabResets { tcp 8 } - - Syntax: - Counter - - Definition: - The number of times TCP connections have made a direct - transition to the CLOSED state from either the - ESTABLISHED state or the CLOSE-WAIT state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpCurrEstab { tcp 9 } - - Syntax: - Gauge - - Definition: - The number of TCP connections for which the current - state is either ESTABLISHED or CLOSE-WAIT. - - Access: - read-only. - - - - - -McCloghrie & Rose [Page 55] - -RFC 1066 MIB August 1988 - - - Status: - mandatory. - - - OBJECT: - ------- - tcpInSegs { tcp 10 } - - Syntax: - Counter - - Definition: - The total number of segments received, including those - received in error. This count includes segments - received on currently established connections. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpOutSegs { tcp 11 } - - Syntax: - Counter - - Definition: - The total number of segments sent, including those on - current connections but excluding those containing - only retransmitted octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpRetransSegs { tcp 12 } - - Syntax: - Counter - - - -McCloghrie & Rose [Page 56] - -RFC 1066 MIB August 1988 - - - Definition: - The total number of segments retransmitted - that is, - the number of TCP segments transmitted containing one - or more previously transmitted octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnTable { tcp 13 } - - Syntax: - SEQUENCE OF TcpConnEntry - - Definition: - A table containing TCP connection-specific - information. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnEntry { tcpConnTable 1 } - - Syntax: - TcpConnEntry ::= SEQUENCE { - tcpConnState - INTEGER, - tcpConnLocalAddress - IpAddress, - tcpConnLocalPort - INTEGER (0..65535), - tcpConnRemAddress - IpAddress, - tcpConnRemPort - INTEGER (0..65535) - } - - - - -McCloghrie & Rose [Page 57] - -RFC 1066 MIB August 1988 - - - Definition: - Information about a particular current TCP connection. - An object of this type is transient, in that it ceases - to exist when (or soon after) the connection makes the - transition to the CLOSED state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnState { tcpConnEntry 1 } - - Syntax: - INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11) - } - - Definition: - The state of this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnLocalAddress { tcpConnEntry 2 } - - Syntax: - IpAddress - - - -McCloghrie & Rose [Page 58] - -RFC 1066 MIB August 1988 - - - Definition: - The local IP address for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnLocalPort { tcpConnEntry 3 } - - Syntax: - INTEGER (0..65535) - - Definition: - The local port number for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnRemAddress { tcpConnEntry 4 } - - Syntax: - IpAddress - - Definition: - The remote IP address for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnRemPort { tcpConnEntry 5 } - - - - -McCloghrie & Rose [Page 59] - -RFC 1066 MIB August 1988 - - - Syntax: - INTEGER (0..65535) - - Definition: - The remote port number for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 60] - -RFC 1066 MIB August 1988 - - -5.7. The UDP Group - - Implementation of the UDP group is mandatory for all systems - which implement the UDP protocol. - - OBJECT: - ------- - udpInDatagrams { udp 1 } - - Syntax: - Counter - - Definition: - The total number of UDP datagrams delivered to UDP - users. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - udpNoPorts { udp 2 } - - Syntax: - Counter - - Definition: - The total number of received UDP datagrams for which - there was no application at the destination port. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - udpInErrors { udp 3 } - - Syntax: - Counter - - - - -McCloghrie & Rose [Page 61] - -RFC 1066 MIB August 1988 - - - Definition: - The number of received UDP datagrams that could not be - delivered for reasons other than the lack of an - application at the destination port. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - udpOutDatagrams { udp 4 } - - Syntax: - Counter - - Definition: - The total number of UDP datagrams sent from this - entity. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 62] - -RFC 1066 MIB August 1988 - - -5.8. The EGP Group - - Implementation of the EGP group is mandatory for all systems - which implement the EGP protocol. - - OBJECT: - ------- - egpInMsgs { egp 1 } - - Syntax: - Counter - - Definition: - The number of EGP messages received without error. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - egpInErrors { egp 2 } - - Syntax: - Counter - - Definition: - The number of EGP messages received that proved to be - in error. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - egpOutMsgs { egp 3 } - - Syntax: - Counter - - - - - -McCloghrie & Rose [Page 63] - -RFC 1066 MIB August 1988 - - - Definition: - The total number of locally generated EGP messages. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - egpOutErrors { egp 4 } - - Syntax: - Counter - - Definition: - The number of locally generated EGP messages not sent - due to resource limitations within an EGP entity. - - Access: - read-only. - - Status: - mandatory. - -5.8.1. The EGP Neighbor Table - - The Egp Neighbor table contains information about this entity's EGP - neighbors. - - - OBJECT: - ------- - egpNeighTable { egp 5 } - - Syntax: - SEQUENCE OF EgpNeighEntry - - Definition: - The EGP neighbor table. - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 64] - -RFC 1066 MIB August 1988 - - - OBJECT: - ------- - egpNeighEntry { egpNeighTable 1 } - - Syntax: - EgpNeighEntry ::= SEQUENCE { - egpNeighState - INTEGER, - egpNeighAddr - IpAddress - } - - Definition: - Information about this entity's relationship with a - particular EGP neighbor. - - Access: - read-only. - - Status: - mandatory. - - - We now consider the individual components of each EGP - neighbor entry: - - - OBJECT: - ------- - egpNeighState { egpNeighEntry 1 } - - Syntax: - INTEGER { - idle(1), - acquisition(2), - down(3), - up(4), - cease(5) - } - - Definition: - The EGP state of the local system with respect to this - entry's EGP neighbor. Each EGP state is represented - by a value that is one greater than the numerical - value associated with said state in RFC 904. - - Access: - read-only. - - - -McCloghrie & Rose [Page 65] - -RFC 1066 MIB August 1988 - - - Status: - mandatory. - - - OBJECT: - ------- - egpNeighAddr { egpNeighEntry 2 } - - Syntax: - IpAddress - - Definition: - The IP address of this entry's EGP neighbor. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 66] - -RFC 1066 MIB August 1988 - - -6. Definitions - - RFC1066-MIB { iso org(3) dod(6) internet(1) mgmt(2) 1 } - - DEFINITIONS ::= BEGIN - - IMPORTS - mgmt, OBJECT-TYPE, NetworkAddress, IpAddress, - Counter, Gauge, TimeTicks - FROM RFC1065-SMI; - - mib OBJECT IDENTIFIER ::= { mgmt 1 } - - system OBJECT IDENTIFIER ::= { mib 1 } - interfaces OBJECT IDENTIFIER ::= { mib 2 } - at OBJECT IDENTIFIER ::= { mib 3 } - ip OBJECT IDENTIFIER ::= { mib 4 } - icmp OBJECT IDENTIFIER ::= { mib 5 } - tcp OBJECT IDENTIFIER ::= { mib 6 } - udp OBJECT IDENTIFIER ::= { mib 7 } - egp OBJECT IDENTIFIER ::= { mib 8 } - - -- object types - - -- the System group - - sysDescr OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-only - STATUS mandatory - ::= { system 1 } - - sysObjectID OBJECT-TYPE - SYNTAX OBJECT IDENTIFIER - ACCESS read-only - STATUS mandatory - ::= { system 2 } - - sysUpTime OBJECT-TYPE - SYNTAX TimeTicks - ACCESS read-only - STATUS mandatory - ::= { system 3 } - - -- the Interfaces group - - ifNumber OBJECT-TYPE - SYNTAX INTEGER - - - -McCloghrie & Rose [Page 67] - -RFC 1066 MIB August 1988 - - - ACCESS read-only - STATUS mandatory - ::= { interfaces 1 } - - -- the Interfaces table - - ifTable OBJECT-TYPE - SYNTAX SEQUENCE OF IfEntry - ACCESS read-write - STATUS mandatory - ::= { interfaces 2 } - - ifEntry OBJECT-TYPE - SYNTAX IfEntry - ACCESS read-write - STATUS mandatory - ::= { ifTable 1 } - - IfEntry ::= SEQUENCE { - ifIndex - INTEGER, - ifDescr - OCTET STRING, - ifType - INTEGER, - ifMtu - INTEGER, - ifSpeed - Gauge, - ifPhysAddress - OCTET STRING, - ifAdminStatus - INTEGER, - ifOperStatus - INTEGER, - ifLastChange - TimeTicks, - ifInOctets - Counter, - ifInUcastPkts - Counter, - ifInNUcastPkts - Counter, - ifInDiscards - Counter, - ifInErrors - Counter, - ifInUnknownProtos - - - -McCloghrie & Rose [Page 68] - -RFC 1066 MIB August 1988 - - - Counter, - ifOutOctets - Counter, - ifOutUcastPkts - Counter, - ifOutNUcastPkts - Counter, - ifOutDiscards - Counter, - ifOutErrors - Counter, - ifOutQLen - Gauge - } - - ifIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ifEntry 1 } - - ifDescr OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-only - STATUS mandatory - ::= { ifEntry 2 } - - ifType OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - regular1822(2), - hdh1822(3), - ddn-x25(4), - rfc877-x25(5), - ethernet-csmacd(6), - iso88023-csmacd(7), - iso88024-tokenBus(8), - iso88025-tokenRing(9), - iso88026-man(10), - starLan(11), - proteon-10MBit(12), - proteon-80MBit(13), - hyperchannel(14), - fddi(15), - lapb(16), - sdlc(17), - t1-carrier(18), - cept(19), - - - -McCloghrie & Rose [Page 69] - -RFC 1066 MIB August 1988 - - - basicIsdn(20), - primaryIsdn(21), - -- proprietary serial - propPointToPointSerial(22) - } - ACCESS read-only - STATUS mandatory - ::= { ifEntry 3 } - - ifMtu OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ifEntry 4 } - - ifSpeed OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - ::= { ifEntry 5 } - - ifPhysAddress OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-only - STATUS mandatory - ::= { ifEntry 6 } - - ifAdminStatus OBJECT-TYPE - SYNTAX INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - ACCESS read-write - STATUS mandatory - ::= { ifEntry 7 } - - ifOperStatus OBJECT-TYPE - SYNTAX INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - ACCESS read-only - STATUS mandatory - ::= { ifEntry 8 } - - ifLastChange OBJECT-TYPE - - - -McCloghrie & Rose [Page 70] - -RFC 1066 MIB August 1988 - - - SYNTAX TimeTicks - ACCESS read-only - STATUS mandatory - ::= { ifEntry 9 } - - ifInOctets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 10 } - - ifInUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 11 } - - ifInNUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 12 } - - ifInDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 13 } - - ifInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 14 } - - ifInUnknownProtos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 15 } - - ifOutOctets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 16 } - - ifOutUcastPkts OBJECT-TYPE - - - -McCloghrie & Rose [Page 71] - -RFC 1066 MIB August 1988 - - - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 17 } - - ifOutNUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 18 } - - ifOutDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 19 } - - ifOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 20 } - - ifOutQLen OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - ::= { ifEntry 21 } - - -- the Address Translation group - - atTable OBJECT-TYPE - SYNTAX SEQUENCE OF AtEntry - ACCESS read-write - STATUS mandatory - ::= { at 1 } - - atEntry OBJECT-TYPE - SYNTAX AtEntry - ACCESS read-write - STATUS mandatory - ::= { atTable 1 } - - AtEntry ::= SEQUENCE { - atIfIndex - INTEGER, - atPhysAddress - OCTET STRING, - - - -McCloghrie & Rose [Page 72] - -RFC 1066 MIB August 1988 - - - atNetAddress - NetworkAddress - } - - atIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { atEntry 1 } - - atPhysAddress OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-write - STATUS mandatory - ::= { atEntry 2 } - - atNetAddress OBJECT-TYPE - SYNTAX NetworkAddress - ACCESS read-write - STATUS mandatory - ::= { atEntry 3 } - - -- the IP group - - ipForwarding OBJECT-TYPE - SYNTAX INTEGER { - gateway(1), -- entity forwards datagrams - host(2) -- entity does NOT forward datagrams - } - ACCESS read-only - STATUS mandatory - ::= { ip 1 } - - ipDefaultTTL OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ip 2 } - - ipInReceives OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 3 } - - ipInHdrErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - - - -McCloghrie & Rose [Page 73] - -RFC 1066 MIB August 1988 - - - STATUS mandatory - ::= { ip 4 } - - ipInAddrErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 5 } - - ipForwDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 6 } - - ipInUnknownProtos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 7 } - - ipInDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 8 } - - ipInDelivers OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 9 } - - ipOutRequests OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 10 } - - ipOutDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 11 } - - ipOutNoRoutes OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - - - -McCloghrie & Rose [Page 74] - -RFC 1066 MIB August 1988 - - - STATUS mandatory - ::= { ip 12 } - - ipReasmTimeout OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ip 13 } - - ipReasmReqds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 14 } - - ipReasmOKs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 15 } - - ipReasmFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 16 } - - ipFragOKs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 17 } - - ipFragFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 18 } - - ipFragCreates OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 19 } - - -- the IP Interface table - - ipAddrTable OBJECT-TYPE - - - -McCloghrie & Rose [Page 75] - -RFC 1066 MIB August 1988 - - - SYNTAX SEQUENCE OF IpAddrEntry - ACCESS read-only - STATUS mandatory - ::= { ip 20 } - - ipAddrEntry OBJECT-TYPE - SYNTAX IpAddrEntry - ACCESS read-only - STATUS mandatory - ::= { ipAddrTable 1 } - - IpAddrEntry ::= SEQUENCE { - ipAdEntAddr - IpAddress, - ipAdEntIfIndex - INTEGER, - ipAdEntNetMask - IpAddress, - ipAdEntBcastAddr - INTEGER - } - - ipAdEntAddr OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 1 } - - ipAdEntIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 2 } - - ipAdEntNetMask OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 3 } - - ipAdEntBcastAddr OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 4 } - - -- the IP Routing table - - - - -McCloghrie & Rose [Page 76] - -RFC 1066 MIB August 1988 - - - ipRoutingTable OBJECT-TYPE - SYNTAX SEQUENCE OF IpRouteEntry - ACCESS read-write - STATUS mandatory - ::= { ip 21 } - - ipRouteEntry OBJECT-TYPE - SYNTAX IpRouteEntry - ACCESS read-write - STATUS mandatory - ::= { ipRoutingTable 1 } - - IpRouteEntry ::= SEQUENCE { - ipRouteDest - IpAddress, - ipRouteIfIndex - INTEGER, - ipRouteMetric1 - INTEGER, - ipRouteMetric2 - INTEGER, - ipRouteMetric3 - INTEGER, - ipRouteMetric4 - INTEGER, - ipRouteNextHop - IpAddress, - ipRouteType - INTEGER, - ipRouteProto - INTEGER, - ipRouteAge - INTEGER - } - - ipRouteDest OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 1 } - - ipRouteIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 2 } - - ipRouteMetric1 OBJECT-TYPE - - - -McCloghrie & Rose [Page 77] - -RFC 1066 MIB August 1988 - - - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 3 } - - ipRouteMetric2 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 4 } - - ipRouteMetric3 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 5 } - - ipRouteMetric4 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 6 } - - ipRouteNextHop OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 7 } - - ipRouteType OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - - invalid(2), -- an invalidated route - - -- route to directly - direct(3), -- connected (sub-)network - - -- route to a non-local - remote(4), -- host/network/sub-network - } - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 8 } - - ipRouteProto OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - - - -McCloghrie & Rose [Page 78] - -RFC 1066 MIB August 1988 - - - -- non-protocol information - -- e.g., manually - local(2), -- configured entries - - -- set via a network - netmgmt(3), -- management protocol - - -- obtained via ICMP, - icmp(4), -- e.g., Redirect - - -- the following are - -- gateway routing protocols - egp(5), - ggp(6), - hello(7), - rip(8), - is-is(9), - es-is(10), - ciscoIgrp(11), - bbnSpfIgp(12), - oigp(13) - } - ACCESS read-only - STATUS mandatory - ::= { ipRouteEntry 9 } - - ipRouteAge OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 10 } - - -- the ICMP group - - icmpInMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 1 } - - icmpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 2 } - - icmpInDestUnreachs OBJECT-TYPE - SYNTAX Counter - - - -McCloghrie & Rose [Page 79] - -RFC 1066 MIB August 1988 - - - ACCESS read-only - STATUS mandatory - ::= { icmp 3 } - - icmpInTimeExcds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 4 } - - icmpInParmProbs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 5 } - - icmpInSrcQuenchs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 6 } - - icmpInRedirects OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 7 } - - icmpInEchos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 8 } - - icmpInEchoReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 9 } - - icmpInTimestamps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 10 } - - icmpInTimestampReps OBJECT-TYPE - SYNTAX Counter - - - -McCloghrie & Rose [Page 80] - -RFC 1066 MIB August 1988 - - - ACCESS read-only - STATUS mandatory - ::= { icmp 11 } - - icmpInAddrMasks OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 12 } - - icmpInAddrMaskReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 13 } - - icmpOutMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 14 } - - icmpOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 15 } - - icmpOutDestUnreachs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 16 } - - icmpOutTimeExcds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 17 } - - icmpOutParmProbs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 18 } - - icmpOutSrcQuenchs OBJECT-TYPE - SYNTAX Counter - - - -McCloghrie & Rose [Page 81] - -RFC 1066 MIB August 1988 - - - ACCESS read-only - STATUS mandatory - ::= { icmp 19 } - - icmpOutRedirects OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 20 } - - icmpOutEchos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 21 } - - icmpOutEchoReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 22 } - - icmpOutTimestamps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 23 } - - icmpOutTimestampReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 24 } - - icmpOutAddrMasks OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 25 } - - icmpOutAddrMaskReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 26 } - - -- the TCP group - - - - -McCloghrie & Rose [Page 82] - -RFC 1066 MIB August 1988 - - - tcpRtoAlgorithm OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - constant(2), -- a constant rto - rsre(3), -- MIL-STD-1778, Appendix B - vanj(4) -- Van Jacobson's algorithm [11] - } - ACCESS read-only - STATUS mandatory - ::= { tcp 1 } - - tcpRtoMin OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { tcp 2 } - - tcpRtoMax OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { tcp 3 } - - tcpMaxConn OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { tcp 4 } - - tcpActiveOpens OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 5 } - - tcpPassiveOpens OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 6 } - - tcpAttemptFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 7 } - - tcpEstabResets OBJECT-TYPE - - - -McCloghrie & Rose [Page 83] - -RFC 1066 MIB August 1988 - - - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 8 } - - tcpCurrEstab OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - ::= { tcp 9 } - - tcpInSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 10 } - - tcpOutSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 11 } - - tcpRetransSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 12 } - - -- the TCP connections table - - tcpConnTable OBJECT-TYPE - SYNTAX SEQUENCE OF TcpConnEntry - ACCESS read-only - STATUS mandatory - ::= { tcp 13 } - - tcpConnEntry OBJECT-TYPE - SYNTAX TcpConnEntry - ACCESS read-only - STATUS mandatory - ::= { tcpConnTable 1 } - - TcpConnEntry ::= SEQUENCE { - tcpConnState - INTEGER, - tcpConnLocalAddress - IpAddress, - - - -McCloghrie & Rose [Page 84] - -RFC 1066 MIB August 1988 - - - tcpConnLocalPort - INTEGER (0..65535), - tcpConnRemAddress - IpAddress, - tcpConnRemPort - INTEGER (0..65535) - } - - tcpConnState OBJECT-TYPE - SYNTAX INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11) - } - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 1 } - - tcpConnLocalAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 2 } - - tcpConnLocalPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 3 } - - tcpConnRemAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 4 } - - tcpConnRemPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - STATUS mandatory - - - -McCloghrie & Rose [Page 85] - -RFC 1066 MIB August 1988 - - - ::= { tcpConnEntry 5 } - - -- the UDP group - - udpInDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 1 } - - udpNoPorts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 2 } - - udpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 3 } - - udpOutDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 4 } - - -- the EGP group - - egpInMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 1 } - - egpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 2 } - - egpOutMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 3 } - - - - -McCloghrie & Rose [Page 86] - -RFC 1066 MIB August 1988 - - - egpOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 4 } - - -- the EGP Neighbor table - - egpNeighTable OBJECT-TYPE - SYNTAX SEQUENCE OF EgpNeighEntry - ACCESS read-only - STATUS mandatory - ::= { egp 5 } - - egpNeighEntry OBJECT-TYPE - SYNTAX EgpNeighEntry - ACCESS read-only - STATUS mandatory - ::= { egpNeighTable 1 } - - EgpNeighEntry ::= SEQUENCE { - egpNeighState - INTEGER, - egpNeighAddr - IpAddress - } - - egpNeighState OBJECT-TYPE - SYNTAX INTEGER { - idle(1), - acquisition(2), - down(3), - up(4), - cease(5) - } - ACCESS read-only - STATUS mandatory - ::= { egpNeighEntry 1 } - - egpNeighAddr OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { egpNeighEntry 2 } - - END - - - - - -McCloghrie & Rose [Page 87] - -RFC 1066 MIB August 1988 - - -7. Acknowledgements - - The initial draft of this memo was heavily influenced by the the HEMS - [5] and SNMP [6] MIBs. - - Its final form is the result of the suggestions, the dicussions, and - the compromises reached by the members of the IETF MIB working group: - - Karl Auerbach, Epilogue Technology - K. Ramesh Babu, Excelan - Lawrence Besaw, Hewlett-Packard - Jeffrey D. Case, University of Tennessee at Knoxville - James R. Davin, Proteon - Mark S. Fedor, NYSERNet - Robb Foster, BBN - Phill Gross, The MITRE Corporation - Bent Torp Jensen, Convergent Technology - Lee Labarre, The MITRE Corporation - Dan Lynch, Advanced Computing Environments - Keith McCloghrie, The Wollongong Group - Dave Mackie, 3Com/Bridge - Craig Partridge, BBN (chair) - Jim Robertson, 3Com/Bridge - Marshall T. Rose, The Wollongong Group - Greg Satz, cisco - Martin Lee Schoffstall, Rensselaer Polytechnic Institute - Lou Steinberg, IBM - Dean Throop, Data General - Unni Warrier, Unisys - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 88] - -RFC 1066 MIB August 1988 - - -8. References - - [1] Cerf, V., "IAB Recommendations for the Development of Internet - Network Management Standards", RFC 1052, IAB, April 1988. - - [2] Information processing systems - Open Systems Interconnection, - "Management Information Services Definition", International - Organization for Standardization, Draft Proposal 9595/2, - December 1987. - - [3] Information processing systems - Open Systems Interconnection, - "Management Information Protocol Specification", International - Organization for Standardization, Draft Proposal 9596/2, - December 1987. - - [4] Rose M., and K. McCloghrie, "Structure and Identification of - Management Information for TCP/IP-based internets", RFC 1065, - TWG, August 1988. - - [5] Partridge C., and G. Trewitt, "The High-Level Entity Management - System (HEMS)", RFCs 1021-1024, BBN and Stanford, October 1987. - - [6] Case, J., M. Fedor, M. Schoffstall, and J. Davin, "A Simple - Network Management Protocol", RFC 1067, University of Tennessee - At Knoxville, NYSERNet, Rensselaer Polytechnic, Proteon, August - 1988. - - [7] LaBarre, L., "Structure and Identification of Management - Information for the Internet", Internet Engineering Task Force - working note, Network Information Center, SRI International, - Menlo Park, California, April 1988. - - [8] LaBarre, L., "Transport Layer Management Information: TCP", - Internet Engineering Task Force working note in preparation. - Network Information Center, SRI International, Menlo Park, - California, (unpublished). - - [9] Information processing systems - Open Systems Interconnection, - "Specification of Abstract Syntax Notation One (ASN.1)", - International Organization for Standardization, International - Standard 8824, December 1987. - - [10] Information processing systems - Open Systems Interconnection, - "Specification of Basic Encoding Rules for Abstract Notation One - (ASN.1)", International Organization for Standardization, - International Standard 8825, December 1987. - - [11] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM, 1988, - - - -McCloghrie & Rose [Page 89] - -RFC 1066 MIB August 1988 - - - Stanford, California. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 90] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1071.txt b/kernel/picotcp/RFC/rfc1071.txt deleted file mode 100644 index 3b94108..0000000 --- a/kernel/picotcp/RFC/rfc1071.txt +++ /dev/null @@ -1,1417 +0,0 @@ - - - - -Network Working Group R. Braden -Request for Comments: 1071 ISI - D. Borman - Cray Research - C. Partridge - BBN Laboratories - September 1988 - - - Computing the Internet Checksum - - -Status of This Memo - - This memo summarizes techniques and algorithms for efficiently - computing the Internet checksum. It is not a standard, but a set of - useful implementation techniques. Distribution of this memo is - unlimited. - -1. Introduction - - This memo discusses methods for efficiently computing the Internet - checksum that is used by the standard Internet protocols IP, UDP, and - TCP. - - An efficient checksum implementation is critical to good performance. - As advances in implementation techniques streamline the rest of the - protocol processing, the checksum computation becomes one of the - limiting factors on TCP performance, for example. It is usually - appropriate to carefully hand-craft the checksum routine, exploiting - every machine-dependent trick possible; a fraction of a microsecond - per TCP data byte can add up to a significant CPU time savings - overall. - - In outline, the Internet checksum algorithm is very simple: - - (1) Adjacent octets to be checksummed are paired to form 16-bit - integers, and the 1's complement sum of these 16-bit integers is - formed. - - (2) To generate a checksum, the checksum field itself is cleared, - the 16-bit 1's complement sum is computed over the octets - concerned, and the 1's complement of this sum is placed in the - checksum field. - - (3) To check a checksum, the 1's complement sum is computed over the - same set of octets, including the checksum field. If the result - is all 1 bits (-0 in 1's complement arithmetic), the check - succeeds. - - Suppose a checksum is to be computed over the sequence of octets - - - -Braden, Borman, & Partridge [Page 1] - -RFC 1071 Computing the Internet Checksum September 1988 - - - A, B, C, D, ... , Y, Z. Using the notation [a,b] for the 16-bit - integer a*256+b, where a and b are bytes, then the 16-bit 1's - complement sum of these bytes is given by one of the following: - - [A,B] +' [C,D] +' ... +' [Y,Z] [1] - - [A,B] +' [C,D] +' ... +' [Z,0] [2] - - where +' indicates 1's complement addition. These cases - correspond to an even or odd count of bytes, respectively. - - On a 2's complement machine, the 1's complement sum must be - computed by means of an "end around carry", i.e., any overflows - from the most significant bits are added into the least - significant bits. See the examples below. - - Section 2 explores the properties of this checksum that may be - exploited to speed its calculation. Section 3 contains some - numerical examples of the most important implementation - techniques. Finally, Section 4 includes examples of specific - algorithms for a variety of common CPU types. We are grateful - to Van Jacobson and Charley Kline for their contribution of - algorithms to this section. - - The properties of the Internet checksum were originally - discussed by Bill Plummer in IEN-45, entitled "Checksum Function - Design". Since IEN-45 has not been widely available, we include - it as an extended appendix to this RFC. - - 2. Calculating the Checksum - - This simple checksum has a number of wonderful mathematical - properties that may be exploited to speed its calculation, as we - will now discuss. - - - (A) Commutative and Associative - - As long as the even/odd assignment of bytes is respected, the - sum can be done in any order, and it can be arbitrarily split - into groups. - - For example, the sum [1] could be split into: - - ( [A,B] +' [C,D] +' ... +' [J,0] ) - - +' ( [0,K] +' ... +' [Y,Z] ) [3] - - - - - - - -Braden, Borman, & Partridge [Page 2] - -RFC 1071 Computing the Internet Checksum September 1988 - - - (B) Byte Order Independence - - The sum of 16-bit integers can be computed in either byte order. - Thus, if we calculate the swapped sum: - - [B,A] +' [D,C] +' ... +' [Z,Y] [4] - - the result is the same as [1], except the bytes are swapped in - the sum! To see why this is so, observe that in both orders the - carries are the same: from bit 15 to bit 0 and from bit 7 to bit - 8. In other words, consistently swapping bytes simply rotates - the bits within the sum, but does not affect their internal - ordering. - - Therefore, the sum may be calculated in exactly the same way - regardless of the byte order ("big-endian" or "little-endian") - of the underlaying hardware. For example, assume a "little- - endian" machine summing data that is stored in memory in network - ("big-endian") order. Fetching each 16-bit word will swap - bytes, resulting in the sum [4]; however, storing the result - back into memory will swap the sum back into network byte order. - - Byte swapping may also be used explicitly to handle boundary - alignment problems. For example, the second group in [3] can be - calculated without concern to its odd/even origin, as: - - [K,L] +' ... +' [Z,0] - - if this sum is byte-swapped before it is added to the first - group. See the example below. - - (C) Parallel Summation - - On machines that have word-sizes that are multiples of 16 bits, - it is possible to develop even more efficient implementations. - Because addition is associative, we do not have to sum the - integers in the order they appear in the message. Instead we - can add them in "parallel" by exploiting the larger word size. - - To compute the checksum in parallel, simply do a 1's complement - addition of the message using the native word size of the - machine. For example, on a 32-bit machine we can add 4 bytes at - a time: [A,B,C,D]+'... When the sum has been computed, we "fold" - the long sum into 16 bits by adding the 16-bit segments. Each - 16-bit addition may produce new end-around carries that must be - added. - - Furthermore, again the byte order does not matter; we could - instead sum 32-bit words: [D,C,B,A]+'... or [B,A,D,C]+'... and - then swap the bytes of the final 16-bit sum as necessary. See - the examples below. Any permutation is allowed that collects - - - -Braden, Borman, & Partridge [Page 3] - -RFC 1071 Computing the Internet Checksum September 1988 - - - all the even-numbered data bytes into one sum byte and the odd- - numbered data bytes into the other sum byte. - - - There are further coding techniques that can be exploited to speed up - the checksum calculation. - - (1) Deferred Carries - - Depending upon the machine, it may be more efficient to defer - adding end-around carries until the main summation loop is - finished. - - One approach is to sum 16-bit words in a 32-bit accumulator, so - the overflows build up in the high-order 16 bits. This approach - typically avoids a carry-sensing instruction but requires twice - as many additions as would adding 32-bit segments; which is - faster depends upon the detailed hardware architecture. - - (2) Unwinding Loops - - To reduce the loop overhead, it is often useful to "unwind" the - inner sum loop, replicating a series of addition commands within - one loop traversal. This technique often provides significant - savings, although it may complicate the logic of the program - considerably. - - (3) Combine with Data Copying - - Like checksumming, copying data from one memory location to - another involves per-byte overhead. In both cases, the - bottleneck is essentially the memory bus, i.e., how fast the - data can be fetched. On some machines (especially relatively - slow and simple micro-computers), overhead can be significantly - reduced by combining memory-to-memory copy and the checksumming, - fetching the data only once for both. - - (4) Incremental Update - - Finally, one can sometimes avoid recomputing the entire checksum - when one header field is updated. The best-known example is a - gateway changing the TTL field in the IP header, but there are - other examples (for example, when updating a source route). In - these cases it is possible to update the checksum without - scanning the message or datagram. - - To update the checksum, simply add the differences of the - sixteen bit integers that have been changed. To see why this - works, observe that every 16-bit integer has an additive inverse - and that addition is associative. From this it follows that - given the original value m, the new value m', and the old - - - -Braden, Borman, & Partridge [Page 4] - -RFC 1071 Computing the Internet Checksum September 1988 - - - checksum C, the new checksum C' is: - - C' = C + (-m) + m' = C + (m' - m) - - -3. Numerical Examples - - We now present explicit examples of calculating a simple 1's - complement sum on a 2's complement machine. The examples show the - same sum calculated byte by bye, by 16-bits words in normal and - swapped order, and 32 bits at a time in 3 different orders. All - numbers are in hex. - - Byte-by-byte "Normal" Swapped - Order Order - - Byte 0/1: 00 01 0001 0100 - Byte 2/3: f2 03 f203 03f2 - Byte 4/5: f4 f5 f4f5 f5f4 - Byte 6/7: f6 f7 f6f7 f7f6 - --- --- ----- ----- - Sum1: 2dc 1f0 2ddf0 1f2dc - - dc f0 ddf0 f2dc - Carrys: 1 2 2 1 - -- -- ---- ---- - Sum2: dd f2 ddf2 f2dd - - Final Swap: dd f2 ddf2 ddf2 - - - Byte 0/1/2/3: 0001f203 010003f2 03f20100 - Byte 4/5/6/7: f4f5f6f7 f5f4f7f6 f7f6f5f4 - -------- -------- -------- - Sum1: 0f4f7e8fa 0f6f4fbe8 0fbe8f6f4 - - Carries: 0 0 0 - - Top half: f4f7 f6f4 fbe8 - Bottom half: e8fa fbe8 f6f4 - ----- ----- ----- - Sum2: 1ddf1 1f2dc 1f2dc - - ddf1 f2dc f2dc - Carrys: 1 1 1 - ---- ---- ---- - Sum3: ddf2 f2dd f2dd - - Final Swap: ddf2 ddf2 ddf2 - - - - - -Braden, Borman, & Partridge [Page 5] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Finally, here an example of breaking the sum into two groups, with - the second group starting on a odd boundary: - - - Byte-by-byte Normal - Order - - Byte 0/1: 00 01 0001 - Byte 2/ : f2 (00) f200 - --- --- ----- - Sum1: f2 01 f201 - - Byte 4/5: 03 f4 03f4 - Byte 6/7: f5 f6 f5f6 - Byte 8/: f7 (00) f700 - --- --- ----- - Sum2: 1f0ea - - Sum2: f0ea - Carry: 1 - ----- - Sum3: f0eb - - Sum1: f201 - Sum3 byte swapped: ebf0 - ----- - Sum4: 1ddf1 - - Sum4: ddf1 - Carry: 1 - ----- - Sum5: ddf2 - - - - - - - - - - - - - - - - - - - - - - -Braden, Borman, & Partridge [Page 6] - -RFC 1071 Computing the Internet Checksum September 1988 - - -4. Implementation Examples - - In this section we show examples of Internet checksum implementation - algorithms that have been found to be efficient on a variety of - CPU's. In each case, we show the core of the algorithm, without - including environmental code (e.g., subroutine linkages) or special- - case code. - -4.1 "C" - - The following "C" code algorithm computes the checksum with an inner - loop that sums 16-bits at a time in a 32-bit accumulator. - - in 6 - { - /* Compute Internet Checksum for "count" bytes - * beginning at location "addr". - */ - register long sum = 0; - - while( count > 1 ) { - /* This is the inner loop */ - sum += * (unsigned short) addr++; - count -= 2; - } - - /* Add left-over byte, if any */ - if( count > 0 ) - sum += * (unsigned char *) addr; - - /* Fold 32-bit sum to 16 bits */ - while (sum>>16) - sum = (sum & 0xffff) + (sum >> 16); - - checksum = ~sum; - } - - - - - - - - - - - - - - - - - - -Braden, Borman, & Partridge [Page 7] - -RFC 1071 Computing the Internet Checksum September 1988 - - -4.2 Motorola 68020 - - The following algorithm is given in assembler language for a Motorola - 68020 chip. This algorithm performs the sum 32 bits at a time, and - unrolls the loop with 16 replications. For clarity, we have omitted - the logic to add the last fullword when the length is not a multiple - of 4. The result is left in register d0. - - With a 20MHz clock, this routine was measured at 134 usec/kB summing - random data. This algorithm was developed by Van Jacobson. - - - movl d1,d2 - lsrl #6,d1 | count/64 = # loop traversals - andl #0x3c,d2 | Then find fractions of a chunk - negl d2 - andb #0xf,cc | Clear X (extended carry flag) - - jmp pc@(2$-.-2:b,d2) | Jump into loop - - 1$: | Begin inner loop... - - movl a0@+,d2 | Fetch 32-bit word - addxl d2,d0 | Add word + previous carry - movl a0@+,d2 | Fetch 32-bit word - addxl d2,d0 | Add word + previous carry - - | ... 14 more replications - 2$: - dbra d1,1$ | (NB- dbra doesn't affect X) - - movl d0,d1 | Fold 32 bit sum to 16 bits - swap d1 | (NB- swap doesn't affect X) - addxw d1,d0 - jcc 3$ - addw #1,d0 - 3$: - andl #0xffff,d0 - - - - - - - - - - - - - - - - -Braden, Borman, & Partridge [Page 8] - -RFC 1071 Computing the Internet Checksum September 1988 - - -4.3 Cray - - The following example, in assembler language for a Cray CPU, was - contributed by Charley Kline. It implements the checksum calculation - as a vector operation, summing up to 512 bytes at a time with a basic - summation unit of 32 bits. This example omits many details having to - do with short blocks, for clarity. - - Register A1 holds the address of a 512-byte block of memory to - checksum. First two copies of the data are loaded into two vector - registers. One is vector-shifted right 32 bits, while the other is - vector-ANDed with a 32 bit mask. Then the two vectors are added - together. Since all these operations chain, it produces one result - per clock cycle. Then it collapses the result vector in a loop that - adds each element to a scalar register. Finally, the end-around - carry is performed and the result is folded to 16-bits. - - EBM - A0 A1 - VL 64 use full vectors - S1 <32 form 32-bit mask from the right. - A2 32 - V1 ,A0,1 load packet into V1 - V2 S1&V1 Form right-hand 32-bits in V2. - V3 V1>A2 Form left-hand 32-bits in V3. - V1 V2+V3 Add the two together. - A2 63 Prepare to collapse into a scalar. - S1 0 - S4 <16 Form 16-bit mask from the right. - A4 16 - CK$LOOP S2 V1,A2 - A2 A2-1 - A0 A2 - S1 S1+S2 - JAN CK$LOOP - S2 S1&S4 Form right-hand 16-bits in S2 - S1 S1>A4 Form left-hand 16-bits in S1 - S1 S1+S2 - S2 S1&S4 Form right-hand 16-bits in S2 - S1 S1>A4 Form left-hand 16-bits in S1 - S1 S1+S2 - S1 #S1 Take one's complement - CMR At this point, S1 contains the checksum. - - - - - - - - - - - -Braden, Borman, & Partridge [Page 9] - -RFC 1071 Computing the Internet Checksum September 1988 - - -4.4 IBM 370 - - The following example, in assembler language for an IBM 370 CPU, sums - the data 4 bytes at a time. For clarity, we have omitted the logic - to add the last fullword when the length is not a multiple of 4, and - to reverse the bytes when necessary. The result is left in register - RCARRY. - - This code has been timed on an IBM 3090 CPU at 27 usec/KB when - summing all one bits. This time is reduced to 24.3 usec/KB if the - trouble is taken to word-align the addends (requiring special cases - at both the beginning and the end, and byte-swapping when necessary - to compensate for starting on an odd byte). - - * Registers RADDR and RCOUNT contain the address and length of - * the block to be checksummed. - * - * (RCARRY, RSUM) must be an even/odd register pair. - * (RCOUNT, RMOD) must be an even/odd register pair. - * - CHECKSUM SR RSUM,RSUM Clear working registers. - SR RCARRY,RCARRY - LA RONE,1 Set up constant 1. - * - SRDA RCOUNT,6 Count/64 to RCOUNT. - AR RCOUNT,RONE +1 = # times in loop. - SRL RMOD,26 Size of partial chunk to RMOD. - AR RADDR,R3 Adjust addr to compensate for - S RADDR,=F(64) jumping into the loop. - SRL RMOD,1 (RMOD/4)*2 is halfword index. - LH RMOD,DOPEVEC9(RMOD) Use magic dope-vector for offset, - B LOOP(RMOD) and jump into the loop... - * - * Inner loop: - * - LOOP AL RSUM,0(,RADDR) Add Logical fullword - BC 12,*+6 Branch if no carry - AR RCARRY,RONE Add 1 end-around - AL RSUM,4(,RADDR) Add Logical fullword - BC 12,*+6 Branch if no carry - AR RCARRY,RONE Add 1 end-around - * - * ... 14 more replications ... - * - A RADDR,=F'64' Increment address ptr - BCT RCOUNT,LOOP Branch on Count - * - * Add Carries into sum, and fold to 16 bits - * - ALR RCARRY,RSUM Add SUM and CARRY words - BC 12,*+6 and take care of carry - - - -Braden, Borman, & Partridge [Page 10] - -RFC 1071 Computing the Internet Checksum September 1988 - - - AR RCARRY,RONE - SRDL RCARRY,16 Fold 32-bit sum into - SRL RSUM,16 16-bits - ALR RCARRY,RSUM - C RCARRY,=X'0000FFFF' and take care of any - BNH DONE last carry - S RCARRY,=X'0000FFFF' - DONE X RCARRY,=X'0000FFFF' 1's complement - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Braden, Borman, & Partridge [Page 11] - -RFC 1071 Computing the Internet Checksum September 1988 - - - IEN 45 - Section 2.4.4.5 - - - - - - - - - - - - - - - - - TCP Checksum Function Design - - - - William W. Plummer - - - Bolt Beranek and Newman, Inc. - 50 Moulton Street - Cambridge MA 02138 - - - 5 June 1978 - - - - - - - - - - - - - - - - - - - - - - - -Braden, Borman, & Partridge [Page 12] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - 1. Introduction - - Checksums are included in packets in order that errors - encountered during transmission may be detected. For Internet - protocols such as TCP [1,9] this is especially important because - packets may have to cross wireless networks such as the Packet - Radio Network [2] and Atlantic Satellite Network [3] where - packets may be corrupted. Internet protocols (e.g., those for - real time speech transmission) can tolerate a certain level of - transmission errors and forward error correction techniques or - possibly no checksum at all might be better. The focus in this - paper is on checksum functions for protocols such as TCP where - the required reliable delivery is achieved by retransmission. - - Even if the checksum appears good on a message which has been - received, the message may still contain an undetected error. The - probability of this is bounded by 2**(-C) where C is the number - of checksum bits. Errors can arise from hardware (and software) - malfunctions as well as transmission errors. Hardware induced - errors are usually manifested in certain well known ways and it - is desirable to account for this in the design of the checksum - function. Ideally no error of the "common hardware failure" type - would go undetected. - - An example of a failure that the current checksum function - handles successfully is picking up a bit in the network interface - (or I/O buss, memory channel, etc.). This will always render the - checksum bad. For an example of how the current function is - inadequate, assume that a control signal stops functioning in the - network interface and the interface stores zeros in place of the - real data. These "all zero" messages appear to have valid - checksums. Noise on the "There's Your Bit" line of the ARPANET - Interface [4] may go undetected because the extra bits input may - cause the checksum to be perturbed (i.e., shifted) in the same - way as the data was. - - Although messages containing undetected errors will occasionally - be passed to higher levels of protocol, it is likely that they - will not make sense at that level. In the case of TCP most such - messages will be ignored, but some could cause a connection to be - aborted. Garbled data could be viewed as a problem for a layer - of protocol above TCP which itself may have a checksuming scheme. - - This paper is the first step in design of a new checksum function - for TCP and some other Internet protocols. Several useful - properties of the current function are identified. If possible - - - 1 - - - - -Braden, Borman, & Partridge [Page 13] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - these should be retained in any new function. A number of - plausible checksum schemes are investigated. Of these only the - "product code" seems to be simple enough for consideration. - - 2. The Current TCP Checksum Function - - The current function is oriented towards sixteen-bit machines - such as the PDP-11 but can be computed easily on other machines - (e.g., PDP-10). A packet is thought of as a string of 16-bit - bytes and the checksum function is the one's complement sum (add - with end-around carry) of those bytes. It is the one's - complement of this sum which is stored in the checksum field of - the TCP header. Before computing the checksum value, the sender - places a zero in the checksum field of the packet. If the - checksum value computed by a receiver of the packet is zero, the - packet is assumed to be valid. This is a consequence of the - "negative" number in the checksum field exactly cancelling the - contribution of the rest of the packet. - - Ignoring the difficulty of actually evaluating the checksum - function for a given packet, the way of using the checksum - described above is quite simple, but it assumes some properties - of the checksum operator (one's complement addition, "+" in what - follows): - - (P1) + is commutative. Thus, the order in which - the 16-bit bytes are "added" together is - unimportant. - - (P2) + has at least one identity element (The - current function has two: +0 and -0). This - allows the sender to compute the checksum - function by placing a zero in the packet checksum - field before computing the value. - - (P3) + has an inverse. Thus, the receiver may - evaluate the checksum function and expect a zero. - - (P4) + is associative, allowing the checksum field - to be anywhere in the packet and the 16-bit bytes - to be scanned sequentially. - - Mathematically, these properties of the binary operation "+" over - the set of 16-bit numbers forms an Abelian group [5]. Of course, - there are many Abelian groups but not all would be satisfactory - for use as checksum operators. (Another operator readily - - - 2 - - - - -Braden, Borman, & Partridge [Page 14] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - available in the PDP-11 instruction set that has all of these - properties is exclusive-OR, but XOR is unsatisfactory for other - reasons.) - - Albeit imprecise, another property which must be preserved in any - future checksum scheme is: - - (P5) + is fast to compute on a variety of machines - with limited storage requirements. - - The current function is quite good in this respect. On the - PDP-11 the inner loop looks like: - - LOOP: ADD (R1)+,R0 ; Add the next 16-bit byte - ADC R0 ; Make carry be end-around - SOB R2,LOOP ; Loop over entire packet. - - ( 4 memory cycles per 16-bit byte ) - - On the PDP-10 properties P1-4 are exploited further and two - 16-bit bytes per loop are processed: - - LOOP: ILDB THIS,PTR ; Get 2 16-bit bytes - ADD SUM,THIS ; Add into current sum - JUMPGE SUM,CHKSU2 ; Jump if fewer than 8 carries - LDB THIS,[POINT 20,SUM,19] ; Get left 16 and carries - ANDI SUM,177777 ; Save just low 16 here - ADD SUM,THIS ; Fold in carries - CHKSU2: SOJG COUNT,LOOP ; Loop over entire packet - - ( 3.1 memory cycles per 16-bit byte ) - - The "extra" instruction in the loops above are required to - convert the two's complement ADD instruction(s) into a one's - complement add by making the carries be end-around. One's - complement arithmetic is better than two's complement because it - is equally sensitive to errors in all bit positions. If two's - complement addition were used, an even number of 1's could be - dropped (or picked up) in the most significant bit channel - without affecting the value of the checksum. It is just this - property that makes some sort of addition preferable to a simple - exclusive-OR which is frequently used but permits an even number - of drops (pick ups) in any bit channel. RIM10B paper tape format - used on PDP-10s [10] uses two's complement add because space for - the loader program is extremely limited. - - - 3 - - - - - -Braden, Borman, & Partridge [Page 15] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - Another property of the current checksum scheme is: - - (P6) Adding the checksum to a packet does not change - the information bytes. Peterson [6] calls this a - "systematic" code. - - This property allows intermediate computers such as gateway - machines to act on fields (i.e., the Internet Destination - Address) without having to first decode the packet. Cyclical - Redundancy Checks used for error correction are not systematic - either. However, most applications of CRCs tend to emphasize - error detection rather than correction and consequently can send - the message unchanged, with the CRC check bits being appended to - the end. The 24-bit CRC used by ARPANET IMPs and Very Distant - Host Interfaces [4] and the ANSI standards for 800 and 6250 bits - per inch magnetic tapes (described in [11]) use this mode. - - Note that the operation of higher level protocols are not (by - design) affected by anything that may be done by a gateway acting - on possibly invalid packets. It is permissible for gateways to - validate the checksum on incoming packets, but in general - gateways will not know how to do this if the checksum is a - protocol-specific feature. - - A final property of the current checksum scheme which is actually - a consequence of P1 and P4 is: - - (P7) The checksum may be incrementally modified. - - This property permits an intermediate gateway to add information - to a packet, for instance a timestamp, and "add" an appropriate - change to the checksum field of the packet. Note that the - checksum will still be end-to-end since it was not fully - recomputed. - - 3. Product Codes - - Certain "product codes" are potentially useful for checksuming - purposes. The following is a brief description of product codes - in the context of TCP. More general treatment can be found in - Avizienis [7] and probably other more recent works. - - The basic concept of this coding is that the message (packet) to - be sent is formed by transforming the original source message and - adding some "check" bits. By reading this and applying a - (possibly different) transformation, a receiver can reconstruct - - - 4 - - - - -Braden, Borman, & Partridge [Page 16] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - the original message and determine if it has been corrupted - during transmission. - - Mo Ms Mr - - ----- ----- ----- - | A | code | 7 | decode | A | - | B | ==> | 1 | ==> | B | - | C | | 4 | | C | - ----- |...| ----- - | 2 | check plus "valid" flag - ----- info - - Original Sent Reconstructed - - With product codes the transformation is Ms = K * Mo . That is, - the message sent is simply the product of the original message - Mo and some well known constant K . To decode, the received - Ms is divided by K which will yield Mr as the quotient and - 0 as the remainder if Mr is to be considered the same as Mo . - - The first problem is selecting a "good" value for K, the "check - factor". K must be relatively prime to the base chosen to - express the message. (Example: Binary messages with K - incorrectly chosen to be 8. This means that Ms looks exactly - like Mo except that three zeros have been appended. The only - way the message could look bad to a receiver dividing by 8 is if - the error occurred in one of those three bits.) - - For TCP the base R will be chosen to be 2**16. That is, every - 16-bit byte (word on the PDP-11) will be considered as a digit of - a big number and that number is the message. Thus, - - Mo = SIGMA [ Bi * (R**i)] , Bi is i-th byte - i=0 to N - - Ms = K * Mo - - Corrupting a single digit of Ms will yield Ms' = Ms +or- - C*(R**j) for some radix position j . The receiver will compute - Ms'/K = Mo +or- C(R**j)/K. Since R and K are relatively prime, - C*(R**j) cannot be any exact multiple of K. Therefore, the - division will result in a non-zero remainder which indicates that - Ms' is a corrupted version of Ms. As will be seen, a good - choice for K is (R**b - 1), for some b which is the "check - length" which controls the degree of detection to be had for - - - 5 - - - - -Braden, Borman, & Partridge [Page 17] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - burst errors which affect a string of digits (i.e., 16-bit bytes) - in the message. In fact b will be chosen to be 1, so K will - be 2**16 - 1 so that arithmetic operations will be simple. This - means that all bursts of 15 or fewer bits will be detected. - According to [7] this choice for b results in the following - expression for the fraction of undetected weight 2 errors: - - f = 16(k-1)/[32(16k-3) + (6/k)] where k is the message length. - - For large messages f approaches 3.125 per cent as k goes to - infinity. - - Multiple precision multiplication and division are normally quite - complex operations, especially on small machines which typically - lack even single precision multiply and divide operations. The - exception to this is exactly the case being dealt with here -- - the factor is 2**16 - 1 on machines with a word length of 16 - bits. The reason for this is due to the following identity: - - Q*(R**j) = Q, mod (R-1) 0 <= Q < R - - That is, any digit Q in the selected radix (0, 1, ... R-1) - multiplied by any power of the radix will have a remainder of Q - when divided by the radix minus 1. - - Example: In decimal R = 10. Pick Q = 6. - - 6 = 0 * 9 + 6 = 6, mod 9 - 60 = 6 * 9 + 6 = 6, mod 9 - 600 = 66 * 9 + 6 = 6, mod 9 etc. - - More to the point, rem(31415/9) = rem((30000+1000+400+10+5)/9) - = (3 mod 9) + (1 mod 9) + (4 mod 9) + (1 mod 9) + (5 mod 9) - = (3+1+4+1+5) mod 9 - = 14 mod 9 - = 5 - - So, the remainder of a number divided by the radix minus one can - be found by simply summing the digits of the number. Since the - radix in the TCP case has been chosen to be 2**16 and the check - factor is 2**16 - 1, a message can quickly be checked by summing - all of the 16-bit words (on a PDP-11), with carries being - end-around. If zero is the result, the message can be considered - valid. Thus, checking a product coded message is exactly the - same complexity as with the current TCP checksum! - - - 6 - - - - - -Braden, Borman, & Partridge [Page 18] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - In order to form Ms, the sender must multiply the multiple - precision "number" Mo by 2**16 - 1. Or, Ms = (2**16)Mo - Mo. - This is performed by shifting Mo one whole word's worth of - precision and subtracting Mo. Since carries must propagate - between digits, but it is only the current digit which is of - interest, one's complement arithmetic is used. - - (2**16)Mo = Mo0 + Mo1 + Mo2 + ... + MoX + 0 - - Mo = - ( Mo0 + Mo1 + ......... + MoX) - --------- ---------------------------------- - Ms = Ms0 + Ms1 + ... - MoX - - A loop which implements this function on a PDP-11 might look - like: - LOOP: MOV -2(R2),R0 ; Next byte of (2**16)Mo - SBC R0 ; Propagate carries from last SUB - SUB (R2)+,R0 ; Subtract byte of Mo - MOV R0,(R3)+ ; Store in Ms - SOB R1,LOOP ; Loop over entire message - ; 8 memory cycles per 16-bit byte - - Note that the coding procedure is not done in-place since it is - not systematic. In general the original copy, Mo, will have to - be retained by the sender for retransmission purposes and - therefore must remain readable. Thus the MOV R0,(R3)+ is - required which accounts for 2 of the 8 memory cycles per loop. - - The coding procedure will add exactly one 16-bit word to the - message since Ms < (2**16)Mo . This additional 16 bits will be - at the tail of the message, but may be moved into the defined - location in the TCP header immediately before transmission. The - receiver will have to undo this to put Ms back into standard - format before decoding the message. - - The code in the receiver for fully decoding the message may be - inferred by observing that any word in Ms contains the - difference between two successive words of Mo minus the carries - from the previous word, and the low order word contains minus the - low word of Mo. So the low order (i.e., rightmost) word of Mr is - just the negative of the low order byte of Ms. The next word of - Mr is the next word of Ms plus the just computed word of Mr - plus the carry from that previous computation. - - A slight refinement of the procedure is required in order to - protect against an all-zero message passing to the destination. - This will appear to have a valid checksum because Ms'/K = 0/K - - - 7 - - - - -Braden, Borman, & Partridge [Page 19] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - = 0 with 0 remainder. The refinement is to make the coding be - Ms = K*Mo + C where C is some arbitrary, well-known constant. - Adding this constant requires a second pass over the message, but - this will typically be very short since it can stop as soon as - carries stop propagating. Chosing C = 1 is sufficient in most - cases. - - The product code checksum must be evaluated in terms of the - desired properties P1 - P7. It has been shown that a factor of - two more machine cycles are consumed in computing or verifying a - product code checksum (P5 satisfied?). - - Although the code is not systematic, the checksum can be verified - quickly without decoding the message. If the Internet - Destination Address is located at the least significant end of - the packet (where the product code computation begins) then it is - possible for a gateway to decode only enough of the message to - see this field without having to decode the entire message. - Thus, P6 is at least partially satisfied. The algebraic - properties P1 through P4 are not satisfied, but only a small - amount of computation is needed to account for this -- the - message needs to be reformatted as previously mentioned. - - P7 is satisfied since the product code checksum can be - incrementally updated to account for an added word, although the - procedure is somewhat involved. Imagine that the original - message has two halves, H1 and H2. Thus, Mo = H1*(R**j) + H2. - The timestamp word is to be inserted between these halves to form - a modified Mo' = H1*(R**(j+1)) + T*(R**j) + H2. Since K has - been chosen to be R-1, the transmitted message Ms' = Mo'(R-1). - Then, - - Ms' = Ms*R + T(R-1)(R**j) + P2((R-1)**2) - - = Ms*R + T*(R**(j+1)) + T*(R**j) + P2*(R**2) - 2*P2*R - P2 - - Recalling that R is 2**16, the word size on the PDP-11, - multiplying by R means copying down one word in memory. So, - the first term of Ms' is simply the unmodified message copied - down one word. The next term is the new data T added into the - Ms' being formed beginning at the (j+1)th word. The addition is - fairly easy here since after adding in T all that is left is - propagating the carry, and that can stop as soon as no carry is - produced. The other terms can be handle similarly. - - - 8 - - - - - - -Braden, Borman, & Partridge [Page 20] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - 4. More Complicated Codes - - There exists a wealth of theory on error detecting and correcting - codes. Peterson [6] is an excellent reference. Most of these - "CRC" schemes are designed to be implemented using a shift - register with a feedback network composed of exclusive-ORs. - Simulating such a logic circuit with a program would be too slow - to be useful unless some programming trick is discovered. - - One such trick has been proposed by Kirstein [8]. Basically, a - few bits (four or eight) of the current shift register state are - combined with bits from the input stream (from Mo) and the result - is used as an index to a table which yields the new shift - register state and, if the code is not systematic, bits for the - output stream (Ms). A trial coding of an especially "good" CRC - function using four-bit bytes showed showed this technique to be - about four times as slow as the current checksum function. This - was true for both the PDP-10 and PDP-11 machines. Of the - desirable properties listed above, CRC schemes satisfy only P3 - (It has an inverse.), and P6 (It is systematic.). Placement of - the checksum field in the packet is critical and the CRC cannot - be incrementally modified. - - Although the bulk of coding theory deals with binary codes, most - of the theory works if the alphabet contains q symbols, where - q is a power of a prime number. For instance q taken as 2**16 - should make a great deal of the theory useful on a word-by-word - basis. - - 5. Outboard Processing - - When a function such as computing an involved checksum requires - extensive processing, one solution is to put that processing into - an outboard processor. In this way "encode message" and "decode - message" become single instructions which do not tax the main - host processor. The Digital Equipment Corporation VAX/780 - computer is equipped with special hardware for generating and - checking CRCs [13]. In general this is not a very good solution - since such a processor must be constructed for every different - host machine which uses TCP messages. - - It is conceivable that the gateway functions for a large host may - be performed entirely in an "Internet Frontend Machine". This - machine would be responsible for forwarding packets received - - - 9 - - - - - - -Braden, Borman, & Partridge [Page 21] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - either from the network(s) or from the Internet protocol modules - in the connected host, and for reassembling Internet fragments - into segments and passing these to the host. Another capability - of this machine would be to check the checksum so that the - segments given to the host are known to be valid at the time they - leave the frontend. Since computer cycles are assumed to be both - inexpensive and available in the frontend, this seems reasonable. - - The problem with attempting to validate checksums in the frontend - is that it destroys the end-to-end character of the checksum. If - anything, this is the most powerful feature of the TCP checksum! - There is a way to make the host-to-frontend link be covered by - the end-to-end checksum. A separate, small protocol must be - developed to cover this link. After having validated an incoming - packet from the network, the frontend would pass it to the host - saying "here is an Internet segment for you. Call it #123". The - host would save this segment, and send a copy back to the - frontend saying, "Here is what you gave me as #123. Is it OK?". - The frontend would then do a word-by-word comparison with the - first transmission, and tell the host either "Here is #123 - again", or "You did indeed receive #123 properly. Release it to - the appropriate module for further processing." - - The headers on the messages crossing the host-frontend link would - most likely be covered by a fairly strong checksum so that - information like which function is being performed and the - message reference numbers are reliable. These headers would be - quite short, maybe only sixteen bits, so the checksum could be - quite strong. The bulk of the message would not be checksumed of - course. - The reason this scheme reduces the computing burden on the host - is that all that is required in order to validate the message - using the end-to-end checksum is to send it back to the frontend - machine. In the case of the PDP-10, this requires only 0.5 - memory cycles per 16-bit byte of Internet message, and only a few - processor cycles to setup the required transfers. - - 6. Conclusions - - There is an ordering of checksum functions: first and simplest is - none at all which provides no error detection or correction. - Second, is sending a constant which is checked by the receiver. - This also is extremely weak. Third, the exclusive-OR of the data - may be sent. XOR takes the minimal amount of computer time to - generate and check, but is not a good checksum. A two's - complement sum of the data is somewhat better and takes no more - - - 10 - - - - -Braden, Borman, & Partridge [Page 22] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - computer time to compute. Fifth, is the one's complement sum - which is what is currently used by TCP. It is slightly more - expensive in terms of computer time. The next step is a product - code. The product code is strongly related to one's complement - sum, takes still more computer time to use, provides a bit more - protection against common hardware failures, but has some - objectionable properties. Next is a genuine CRC polynomial code, - used for checking purposes only. This is very expensive for a - program to implement. Finally, a full CRC error correcting and - detecting scheme may be used. - - For TCP and Internet applications the product code scheme is - viable. It suffers mainly in that messages must be (at least - partially) decoded by intermediate gateways in order that they - can be forwarded. Should product codes not be chosen as an - improved checksum, some slight modification to the existing - scheme might be possible. For instance the "add and rotate" - function used for paper tape by the PDP-6/10 group at the - Artificial Intelligence Laboratory at M.I.T. Project MAC [12] - could be useful if it can be proved that it is better than the - current scheme and that it can be computed efficiently on a - variety of machines. - - - - - - - - - - - - - - - - - - - - - - - 11 - - - - - - - - -Braden, Borman, & Partridge [Page 23] - -RFC 1071 Computing the Internet Checksum September 1988 - - - Internet Experiment Note 45 5 June 1978 - TCP Checksum Function Design William W. Plummer - - References - - [1] Cerf, V.G. and Kahn, Robert E., "A Protocol for Packet Network - Communications," IEEE Transactions on Communications, vol. - COM-22, No. 5, May 1974. - - [2] Kahn, Robert E., "The Organization of Computer Resources into - a Packet Radio Network", IEEE Transactions on Communications, - vol. COM-25, no. 1, pp. 169-178, January 1977. - - [3] Jacobs, Irwin, et al., "CPODA - A Demand Assignment Protocol - for SatNet", Fifth Data Communications Symposium, September - 27-9, 1977, Snowbird, Utah - - [4] Bolt Beranek and Newman, Inc. "Specifications for the - Interconnection of a Host and an IMP", Report 1822, January - 1976 edition. - - [5] Dean, Richard A., "Elements of Abstract Algebra", John Wyley - and Sons, Inc., 1966 - - [6] Peterson, W. Wesley, "Error Correcting Codes", M.I.T. Press - Cambridge MA, 4th edition, 1968. - - [7] Avizienis, Algirdas, "A Study of the Effectiveness of Fault- - Detecting Codes for Binary Arithmetic", Jet Propulsion - Laboratory Technical Report No. 32-711, September 1, 1965. - - [8] Kirstein, Peter, private communication - - [9] Cerf, V. G. and Postel, Jonathan B., "Specification of - Internetwork Transmission Control Program Version 3", - University of Southern California Information Sciences - Institute, January 1978. - - [10] Digital Equipment Corporation, "PDP-10 Reference Handbook", - 1970, pp. 114-5. - - [11] Swanson, Robert, "Understanding Cyclic Redundancy Codes", - Computer Design, November, 1975, pp. 93-99. - - [12] Clements, Robert C., private communication. - - [13] Conklin, Peter F., and Rodgers, David P., "Advanced - Minicomputer Designed by Team Evaluation of Hardware/Software - Tradeoffs", Computer Design, April 1978, pp. 136-7. - - - 12 - - - - -Braden, Borman, & Partridge [Page 24] - diff --git a/kernel/picotcp/RFC/rfc1072.txt b/kernel/picotcp/RFC/rfc1072.txt deleted file mode 100644 index 6ed8d5b..0000000 --- a/kernel/picotcp/RFC/rfc1072.txt +++ /dev/null @@ -1,893 +0,0 @@ -Network Working Group V. Jacobson -Request for Comments: 1072 LBL - R. Braden - ISI - October 1988 - - - TCP Extensions for Long-Delay Paths - - -Status of This Memo - - This memo proposes a set of extensions to the TCP protocol to provide - efficient operation over a path with a high bandwidth*delay product. - These extensions are not proposed as an Internet standard at this - time. Instead, they are intended as a basis for further - experimentation and research on transport protocol performance. - Distribution of this memo is unlimited. - -1. INTRODUCTION - - Recent work on TCP performance has shown that TCP can work well over - a variety of Internet paths, ranging from 800 Mbit/sec I/O channels - to 300 bit/sec dial-up modems [Jacobson88]. However, there is still - a fundamental TCP performance bottleneck for one transmission regime: - paths with high bandwidth and long round-trip delays. The - significant parameter is the product of bandwidth (bits per second) - and round-trip delay (RTT in seconds); this product is the number of - bits it takes to "fill the pipe", i.e., the amount of unacknowledged - data that TCP must handle in order to keep the pipeline full. TCP - performance problems arise when this product is large, e.g., - significantly exceeds 10**5 bits. We will refer to an Internet path - operating in this region as a "long, fat pipe", and a network - containing this path as an "LFN" (pronounced "elephan(t)"). - - High-capacity packet satellite channels (e.g., DARPA's Wideband Net) - are LFN's. For example, a T1-speed satellite channel has a - bandwidth*delay product of 10**6 bits or more; this corresponds to - 100 outstanding TCP segments of 1200 bytes each! Proposed future - terrestrial fiber-optical paths will also fall into the LFN class; - for example, a cross-country delay of 30 ms at a DS3 bandwidth - (45Mbps) also exceeds 10**6 bits. - - Clever algorithms alone will not give us good TCP performance over - LFN's; it will be necessary to actually extend the protocol. This - RFC proposes a set of TCP extensions for this purpose. - - There are three fundamental problems with the current TCP over LFN - - - -Jacobson & Braden [Page 1] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - paths: - - - (1) Window Size Limitation - - The TCP header uses a 16 bit field to report the receive window - size to the sender. Therefore, the largest window that can be - used is 2**16 = 65K bytes. (In practice, some TCP - implementations will "break" for windows exceeding 2**15, - because of their failure to do unsigned arithmetic). - - To circumvent this problem, we propose a new TCP option to allow - windows larger than 2**16. This option will define an implicit - scale factor, to be used to multiply the window size value found - in a TCP header to obtain the true window size. - - - (2) Cumulative Acknowledgments - - Any packet losses in an LFN can have a catastrophic effect on - throughput. This effect is exaggerated by the simple cumulative - acknowledgment of TCP. Whenever a segment is lost, the - transmitting TCP will (eventually) time out and retransmit the - missing segment. However, the sending TCP has no information - about segments that may have reached the receiver and been - queued because they were not at the left window edge, so it may - be forced to retransmit these segments unnecessarily. - - We propose a TCP extension to implement selective - acknowledgements. By sending selective acknowledgments, the - receiver of data can inform the sender about all segments that - have arrived successfully, so the sender need retransmit only - the segments that have actually been lost. - - Selective acknowledgments have been included in a number of - experimental Internet protocols -- VMTP [Cheriton88], NETBLT - [Clark87], and RDP [Velten84]. There is some empirical evidence - in favor of selective acknowledgments -- simple experiments with - RDP have shown that disabling the selective acknowlegment - facility greatly increases the number of retransmitted segments - over a lossy, high-delay Internet path [Partridge87]. A - simulation study of a simple form of selective acknowledgments - added to the ISO transport protocol TP4 also showed promise of - performance improvement [NBS85]. - - - - - - - -Jacobson & Braden [Page 2] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - (3) Round Trip Timing - - TCP implements reliable data delivery by measuring the RTT, - i.e., the time interval between sending a segment and receiving - an acknowledgment for it, and retransmitting any segments that - are not acknowledged within some small multiple of the average - RTT. Experience has shown that accurate, current RTT estimates - are necessary to adapt to changing traffic conditions and, - without them, a busy network is subject to an instability known - as "congestion collapse" [Nagle84]. - - In part because TCP segments may be repacketized upon - retransmission, and in part because of complications due to the - cumulative TCP acknowledgement, measuring a segments's RTT may - involve a non-trivial amount of computation in some - implementations. To minimize this computation, some - implementations time only one segment per window. While this - yields an adequate approximation to the RTT for small windows - (e.g., a 4 to 8 segment Arpanet window), for an LFN (e.g., 100 - segment Wideband Network windows) it results in an unacceptably - poor RTT estimate. - - In the presence of errors, the problem becomes worse. Zhang - [Zhang86], Jain [Jain86] and Karn [Karn87] have shown that it is - not possible to accumulate reliable RTT estimates if - retransmitted segments are included in the estimate. Since a - full window of data will have been transmitted prior to a - retransmission, all of the segments in that window will have to - be ACKed before the next RTT sample can be taken. This means at - least an additional window's worth of time between RTT - measurements and, as the error rate approaches one per window of - data (e.g., 10**-6 errors per bit for the Wideband Net), it - becomes effectively impossible to obtain an RTT measurement. - - We propose a TCP "echo" option that allows each segment to carry - its own timestamp. This will allow every segment, including - retransmissions, to be timed at negligible computational cost. - - - In designing new TCP options, we must pay careful attention to - interoperability with existing implementations. The only TCP option - defined to date is an "initial option", i.e., it may appear only on a - SYN segment. It is likely that most implementations will properly - ignore any options in the SYN segment that they do not understand, so - new initial options should not cause a problem. On the other hand, - we fear that receiving unexpected non-initial options may cause some - TCP's to crash. - - - - -Jacobson & Braden [Page 3] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - Therefore, in each of the extensions we propose, non-initial options - may be sent only if an exchange of initial options has indicated that - both sides understand the extension. This approach will also allow a - TCP to determine when the connection opens how big a TCP header it - will be sending. - -2. TCP WINDOW SCALE OPTION - - The obvious way to implement a window scale factor would be to define - a new TCP option that could be included in any segment specifying a - window. The receiver would include it in every acknowledgment - segment, and the sender would interpret it. Unfortunately, this - simple approach would not work. The sender must reliably know the - receiver's current scale factor, but a TCP option in an - acknowledgement segment will not be delivered reliably (unless the - ACK happens to be piggy-backed on data). - - However, SYN segments are always sent reliably, suggesting that each - side may communicate its window scale factor in an initial TCP - option. This approach has a disadvantage: the scale must be - established when the connection is opened, and cannot be changed - thereafter. However, other alternatives would be much more - complicated, and we therefore propose a new initial option called - Window Scale. - -2.1 Window Scale Option - - This three-byte option may be sent in a SYN segment by a TCP (1) - to indicate that it is prepared to do both send and receive window - scaling, and (2) to communicate a scale factor to be applied to - its receive window. The scale factor is encoded logarithmically, - as a power of 2 (presumably to be implemented by binary shifts). - - Note: the window in the SYN segment itself is never scaled. - - TCP Window Scale Option: - - Kind: 3 - - +---------+---------+---------+ - | Kind=3 |Length=3 |shift.cnt| - +---------+---------+---------+ - - Here shift.cnt is the number of bits by which the receiver right- - shifts the true receive-window value, to scale it into a 16-bit - value to be sent in TCP header (this scaling is explained below). - The value shift.cnt may be zero (offering to scale, while applying - a scale factor of 1 to the receive window). - - - -Jacobson & Braden [Page 4] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - This option is an offer, not a promise; both sides must send - Window Scale options in their SYN segments to enable window - scaling in either direction. - -2.2 Using the Window Scale Option - - A model implementation of window scaling is as follows, using the - notation of RFC-793 [Postel81]: - - * The send-window (SND.WND) and receive-window (RCV.WND) sizes - in the connection state block and in all sequence space - calculations are expanded from 16 to 32 bits. - - * Two window shift counts are added to the connection state: - snd.scale and rcv.scale. These are shift counts to be - applied to the incoming and outgoing windows, respectively. - The precise algorithm is shown below. - - * All outgoing SYN segments are sent with the Window Scale - option, containing a value shift.cnt = R that the TCP would - like to use for its receive window. - - * Snd.scale and rcv.scale are initialized to zero, and are - changed only during processing of a received SYN segment. If - the SYN segment contains a Window Scale option with shift.cnt - = S, set snd.scale to S and set rcv.scale to R; otherwise, - both snd.scale and rcv.scale are left at zero. - - * The window field (SEG.WND) in the header of every incoming - segment, with the exception of SYN segments, will be left- - shifted by snd.scale bits before updating SND.WND: - - SND.WND = SEG.WND << snd.scale - - (assuming the other conditions of RFC793 are met, and using - the "C" notation "<<" for left-shift). - - * The window field (SEG.WND) of every outgoing segment, with - the exception of SYN segments, will have been right-shifted - by rcv.scale bits: - - SEG.WND = RCV.WND >> rcv.scale. - - - TCP determines if a data segment is "old" or "new" by testing if - its sequence number is within 2**31 bytes of the left edge of the - window. If not, the data is "old" and discarded. To insure that - new data is never mistakenly considered old and vice-versa, the - - - -Jacobson & Braden [Page 5] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - left edge of the sender's window has to be at least 2**31 away - from the right edge of the receiver's window. Similarly with the - sender's right edge and receiver's left edge. Since the right and - left edges of either the sender's or receiver's window differ by - the window size, and since the sender and receiver windows can be - out of phase by at most the window size, the above constraints - imply that 2 * the max window size must be less than 2**31, or - - max window < 2**30 - - Since the max window is 2**S (where S is the scaling shift count) - times at most 2**16 - 1 (the maximum unscaled window), the maximum - window is guaranteed to be < 2*30 if S <= 14. Thus, the shift - count must be limited to 14. (This allows windows of 2**30 = 1 - Gbyte.) If a Window Scale option is received with a shift.cnt - value exceeding 14, the TCP should log the error but use 14 - instead of the specified value. - - -3. TCP SELECTIVE ACKNOWLEDGMENT OPTIONS - - To minimize the impact on the TCP protocol, the selective - acknowledgment extension uses the form of two new TCP options. The - first is an enabling option, "SACK-permitted", that may be sent in a - SYN segment to indicate that the the SACK option may be used once the - connection is established. The other is the SACK option itself, - which may be sent over an established connection once permission has - been given by SACK-permitted. - - The SACK option is to be included in a segment sent from a TCP that - is receiving data to the TCP that is sending that data; we will refer - to these TCP's as the data receiver and the data sender, - respectively. We will consider a particular simplex data flow; any - data flowing in the reverse direction over the same connection can be - treated independently. - -3.1 SACK-Permitted Option - - This two-byte option may be sent in a SYN by a TCP that has been - extended to receive (and presumably process) the SACK option once - the connection has opened. - - - - - - - - - - -Jacobson & Braden [Page 6] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - TCP Sack-Permitted Option: - - Kind: 4 - - +---------+---------+ - | Kind=4 | Length=2| - +---------+---------+ - -3.2 SACK Option - - The SACK option is to be used to convey extended acknowledgment - information over an established connection. Specifically, it is - to be sent by a data receiver to inform the data transmitter of - non-contiguous blocks of data that have been received and queued. - The data receiver is awaiting the receipt of data in later - retransmissions to fill the gaps in sequence space between these - blocks. At that time, the data receiver will acknowledge the data - normally by advancing the left window edge in the Acknowledgment - Number field of the TCP header. - - It is important to understand that the SACK option will not change - the meaning of the Acknowledgment Number field, whose value will - still specify the left window edge, i.e., one byte beyond the last - sequence number of fully-received data. The SACK option is - advisory; if it is ignored, TCP acknowledgments will continue to - function as specified in the protocol. - - However, SACK will provide additional information that the data - transmitter can use to optimize retransmissions. The TCP data - receiver may include the SACK option in an acknowledgment segment - whenever it has data that is queued and unacknowledged. Of - course, the SACK option may be sent only when the TCP has received - the SACK-permitted option in the SYN segment for that connection. - - TCP SACK Option: - - Kind: 5 - - Length: Variable - - - +--------+--------+--------+--------+--------+--------+...---+ - | Kind=5 | Length | Relative Origin | Block Size | | - +--------+--------+--------+--------+--------+--------+...---+ - - - This option contains a list of the blocks of contiguous sequence - space occupied by data that has been received and queued within - - - -Jacobson & Braden [Page 7] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - the window. Each block is contiguous and isolated; that is, the - octets just below the block, - - Acknowledgment Number + Relative Origin -1, - - and just above the block, - - Acknowledgment Number + Relative Origin + Block Size, - - have not been received. - - Each contiguous block of data queued at the receiver is defined in - the SACK option by two 16-bit integers: - - - * Relative Origin - - This is the first sequence number of this block, relative to - the Acknowledgment Number field in the TCP header (i.e., - relative to the data receiver's left window edge). - - - * Block Size - - This is the size in octets of this block of contiguous data. - - - A SACK option that specifies n blocks will have a length of 4*n+2 - octets, so the 44 bytes available for TCP options can specify a - maximum of 10 blocks. Of course, if other TCP options are - introduced, they will compete for the 44 bytes, and the limit of - 10 may be reduced in particular segments. - - There is no requirement on the order in which blocks can appear in - a single SACK option. - - Note: requiring that the blocks be ordered would allow a - slightly more efficient algorithm in the transmitter; however, - this does not seem to be an important optimization. - -3.3 SACK with Window Scaling - - If window scaling is in effect, then 16 bits may not be sufficient - for the SACK option fields that define the origin and length of a - block. There are two possible ways to handle this: - - (1) Expand the SACK origin and length fields to 24 or 32 bits. - - - - -Jacobson & Braden [Page 8] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - (2) Scale the SACK fields by the same factor as the window. - - - The first alternative would significantly reduce the number of - blocks possible in a SACK option; therefore, we have chosen the - second alternative, scaling the SACK information as well as the - window. - - Scaling the SACK information introduces some loss of precision, - since a SACK option must report queued data blocks whose origins - and lengths are multiples of the window scale factor rcv.scale. - These reported blocks must be equal to or smaller than the actual - blocks of queued data. - - Specifically, suppose that the receiver has a contiguous block of - queued data that occupies sequence numbers L, L+1, ... L+N-1, and - that the window scale factor is S = rcv.scale. Then the - corresponding block that will be reported in a SACK option will - be: - - Relative Origin = int((L+S-1)/S) - - Block Size = int((L+N)/S) - (Relative Origin) - - where the function int(x) returns the greatest integer contained - in x. - - The resulting loss of precision is not a serious problem for the - sender. If the data-sending TCP keeps track of the boundaries of - all segments in its retransmission queue, it will generally be - able to infer from the imprecise SACK data which full segments - don't need to be retransmitted. This will fail only if S is - larger than the maximum segment size, in which case some segments - may be retransmitted unnecessarily. If the sending TCP does not - keep track of transmitted segment boundaries, the imprecision of - the scaled SACK quantities will only result in retransmitting a - small amount of unneeded sequence space. On the average, the data - sender will unnecessarily retransmit J*S bytes of the sequence - space for each SACK received; here J is the number of blocks - reported in the SACK, and S = snd.scale. - -3.4 SACK Option Examples - - Assume the left window edge is 5000 and that the data transmitter - sends a burst of 8 segments, each containing 500 data bytes. - Unless specified otherwise, we assume that the scale factor S = 1. - - - - - -Jacobson & Braden [Page 9] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - Case 1: The first 4 segments are received but the last 4 are - dropped. - - The data receiver will return a normal TCP ACK segment - acknowledging sequence number 7000, with no SACK option. - - - Case 2: The first segment is dropped but the remaining 7 are - received. - - The data receiver will return a TCP ACK segment that - acknowledges sequence number 5000 and contains a SACK option - specifying one block of queued data: - - Relative Origin = 500; Block Size = 3500 - - - Case 3: The 2nd, 4th, 6th, and 8th (last) segments are - dropped. - - The data receiver will return a TCP ACK segment that - acknowledges sequence number 5500 and contains a SACK option - specifying the 3 blocks: - - Relative Origin = 500; Block Size = 500 - Relative Origin = 1500; Block Size = 500 - Relative Origin = 2500; Block Size = 500 - - - Case 4: Same as Case 3, except Scale Factor S = 16. - - The SACK option would specify the 3 scaled blocks: - - Relative Origin = 32; Block Size = 30 - Relative Origin = 94; Block Size = 31 - Relative Origin = 157; Block Size = 30 - - These three reported blocks have sequence numbers 512 through - 991, 1504 through 1999, and 2512 through 2992, respectively. - - -3.5 Generating the SACK Option - - Let us assume that the data receiver maintains a queue of valid - segments that it has neither passed to the user nor acknowledged - because of earlier missing data, and that this queue is ordered by - starting sequence number. Computation of the SACK option can be - done with one pass down this queue. Segments that occupy - - - -Jacobson & Braden [Page 10] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - contiguous sequence space are aggregated into a single SACK block, - and each gap in the sequence space (except a gap that is - terminated by the right window edge) triggers the start of a new - SACK block. If this algorithm defines more than 10 blocks, only - the first 10 can be included in the option. - -3.6 Interpreting the SACK Option - - The data transmitter is assumed to have a retransmission queue - that contains the segments that have been transmitted but not yet - acknowledged, in sequence-number order. If the data transmitter - performs re-packetization before retransmission, the block - boundaries in a SACK option that it receives may not fall on - boundaries of segments in the retransmission queue; however, this - does not pose a serious difficulty for the transmitter. - - Let us suppose that for each segment in the retransmission queue - there is a (new) flag bit "ACK'd", to be used to indicate that - this particular segment has been entirely acknowledged. When a - segment is first transmitted, it will be entered into the - retransmission queue with its ACK'd bit off. If the ACK'd bit is - subsequently turned on (as the result of processing a received - SACK option), the data transmitter will skip this segment during - any later retransmission. However, the segment will not be - dequeued and its buffer freed until the left window edge is - advanced over it. - - When an acknowledgment segment arrives containing a SACK option, - the data transmitter will turn on the ACK'd bits for segments that - have been selectively acknowleged. More specifically, for each - block in the SACK option, the data transmitter will turn on the - ACK'd flags for all segments in the retransmission queue that are - wholly contained within that block. This requires straightforward - sequence number comparisons. - - -4. TCP ECHO OPTIONS - - A simple method for measuring the RTT of a segment would be: the - sender places a timestamp in the segment and the receiver returns - that timestamp in the corresponding ACK segment. When the ACK segment - arrives at the sender, the difference between the current time and - the timestamp is the RTT. To implement this timing method, the - receiver must simply reflect or echo selected data (the timestamp) - from the sender's segments. This idea is the basis of the "TCP Echo" - and "TCP Echo Reply" options. - - - - - -Jacobson & Braden [Page 11] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - -4.1 TCP Echo and TCP Echo Reply Options - - TCP Echo Option: - - Kind: 6 - - Length: 6 - - +--------+--------+--------+--------+--------+--------+ - | Kind=6 | Length | 4 bytes of info to be echoed | - +--------+--------+--------+--------+--------+--------+ - - This option carries four bytes of information that the receiving TCP - may send back in a subsequent TCP Echo Reply option (see below). A - TCP may send the TCP Echo option in any segment, but only if a TCP - Echo option was received in a SYN segment for the connection. - - When the TCP echo option is used for RTT measurement, it will be - included in data segments, and the four information bytes will define - the time at which the data segment was transmitted in any format - convenient to the sender. - - TCP Echo Reply Option: - - Kind: 7 - - Length: 6 - - +--------+--------+--------+--------+--------+--------+ - | Kind=7 | Length | 4 bytes of echoed info | - +--------+--------+--------+--------+--------+--------+ - - - A TCP that receives a TCP Echo option containing four information - bytes will return these same bytes in a TCP Echo Reply option. - - This TCP Echo Reply option must be returned in the next segment - (e.g., an ACK segment) that is sent. If more than one Echo option is - received before a reply segment is sent, the TCP must choose only one - of the options to echo, ignoring the others; specifically, it must - choose the newest segment with the oldest sequence number (see next - section.) - - To use the TCP Echo and Echo Reply options, a TCP must send a TCP - Echo option in its own SYN segment and receive a TCP Echo option in a - SYN segment from the other TCP. A TCP that does not implement the - TCP Echo or Echo Reply options must simply ignore any TCP Echo - options it receives. However, a TCP should not receive one of these - - - -Jacobson & Braden [Page 12] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - options in a non-SYN segment unless it included a TCP Echo option in - its own SYN segment. - -4.2 Using the Echo Options - - If we wish to use the Echo/Echo Reply options for RTT measurement, we - have to define what the receiver does when there is not a one-to-one - correspondence between data and ACK segments. Assuming that we want - to minimize the state kept in the receiver (i.e., the number of - unprocessed Echo options), we can plan on a receiver remembering the - information value from at most one Echo between ACKs. There are - three situations to consider: - - (A) Delayed ACKs. - - Many TCP's acknowledge only every Kth segment out of a group of - segments arriving within a short time interval; this policy is - known generally as "delayed ACK's". The data-sender TCP must - measure the effective RTT, including the additional time due to - delayed ACK's, or else it will retransmit unnecessarily. Thus, - when delayed ACK's are in use, the receiver should reply with - the Echo option information from the earliest unacknowledged - segment. - - (B) A hole in the sequence space (segment(s) have been lost). - - The sender will continue sending until the window is filled, and - we may be generating ACKs as these out-of-order segments arrive - (e.g., for the SACK information or to aid "fast retransmit"). - An Echo Reply option will tell the sender the RTT of some - recently sent segment (since the ACK can only contain the - sequence number of the hole, the sender may not be able to - determine which segment, but that doesn't matter). If the loss - was due to congestion, these RTTs may be particularly valuable - to the sender since they reflect the network characteristics - immediately after the congestion. - - (C) A filled hole in the sequence space. - - The segment that fills the hole represents the most recent - measurement of the network characteristics. On the other hand, - an RTT computed from an earlier segment would probably include - the sender's retransmit time-out, badly biasing the sender's - average RTT estimate. - - - Case (A) suggests the receiver should remember and return the Echo - option information from the oldest unacknowledged segment. Cases (B) - - - -Jacobson & Braden [Page 13] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - and (C) suggest that the option should come from the most recent - unacknowledged segment. An algorithm that covers all three cases is - for the receiver to return the Echo option information from the - newest segment with the oldest sequence number, as specified earlier. - - A model implementation of these options is as follows. - - - (1) Receiver Implementation - - A 32-bit slot for Echo option data, rcv.echodata, is added to - the receiver connection state, together with a flag, - rcv.echopresent, that indicates whether there is anything in the - slot. When the receiver generates a segment, it checks - rcv.echopresent and, if it is set, adds an echo-reply option - containing rcv.echodata to the outgoing segment then clears - rcv.echopresent. - - If an incoming segment is in the window and contains an echo - option, the receiver checks rcv.echopresent. If it isn't set, - the value of the echo option is copied to rcv.echodata and - rcv.echopresent is set. If rcv.echopresent is already set, the - receiver checks whether the segment is at the left edge of the - window. If so, the segment's echo option value is copied to - rcv.echodata (this is situation (C) above). Otherwise, the - segment's echo option is ignored. - - - (2) Sender Implementation - - The sender's connection state has a single flag bit, - snd.echoallowed, added. If snd.echoallowed is set or if the - segment contains a SYN, the sender is free to add a TCP Echo - option (presumably containing the current time in some units - convenient to the sender) to every outgoing segment. - - Snd.echoallowed should be set if a SYN is received with a TCP - Echo option (presumably, a host that implements the option will - attempt to use it to time the SYN segment). - - -5. CONCLUSIONS AND ACKNOWLEDGMENTS - -We have proposed five new TCP options for scaled windows, selective -acknowledgments, and round-trip timing, in order to provide efficient -operation over large-bandwidth*delay-product paths. These extensions -are designed to provide compatible interworking with TCP's that do not -implement the extensions. - - - -Jacobson & Braden [Page 14] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - -The Window Scale option was originally suggested by Mike St. Johns of -USAF/DCA. The present form of the option was suggested by Mike Karels -of UC Berkeley in response to a more cumbersome scheme proposed by Van -Jacobson. Gerd Beling of FGAN (West Germany) contributed the initial -definition of the SACK option. - -All three options have evolved through discussion with the End-to-End -Task Force, and the authors are grateful to the other members of the -Task Force for their advice and encouragement. - -6. REFERENCES - - [Cheriton88] Cheriton, D., "VMTP: Versatile Message Transaction - Protocol", RFC 1045, Stanford University, February 1988. - - [Jain86] Jain, R., "Divergence of Timeout Algorithms for Packet - Retransmissions", Proc. Fifth Phoenix Conf. on Comp. and Comm., - Scottsdale, Arizona, March 1986. - - [Karn87] Karn, P. and C. Partridge, "Estimating Round-Trip Times - in Reliable Transport Protocols", Proc. SIGCOMM '87, Stowe, VT, - August 1987. - - [Clark87] Clark, D., Lambert, M., and L. Zhang, "NETBLT: A Bulk - Data Transfer Protocol", RFC 998, MIT, March 1987. - - [Nagle84] Nagle, J., "Congestion Control in IP/TCP - Internetworks", RFC 896, FACC, January 1984. - - [NBS85] Colella, R., Aronoff, R., and K. Mills, "Performance - Improvements for ISO Transport", Ninth Data Comm Symposium, - published in ACM SIGCOMM Comp Comm Review, vol. 15, no. 5, - September 1985. - - [Partridge87] Partridge, C., "Private Communication", February - 1987. - - [Postel81] Postel, J., "Transmission Control Protocol - DARPA - Internet Program Protocol Specification", RFC 793, DARPA, - September 1981. - - [Velten84] Velten, D., Hinden, R., and J. Sax, "Reliable Data - Protocol", RFC 908, BBN, July 1984. - - [Jacobson88] Jacobson, V., "Congestion Avoidance and Control", to - be presented at SIGCOMM '88, Stanford, CA., August 1988. - - [Zhang86] Zhang, L., "Why TCP Timers Don't Work Well", Proc. - - - -Jacobson & Braden [Page 15] - -RFC 1072 TCP Extensions for Long-Delay Paths October 1988 - - - SIGCOMM '86, Stowe, Vt., August 1986. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Jacobson & Braden [Page 16] - diff --git a/kernel/picotcp/RFC/rfc1106.txt b/kernel/picotcp/RFC/rfc1106.txt deleted file mode 100644 index 95fb0f5..0000000 --- a/kernel/picotcp/RFC/rfc1106.txt +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - -Network Working Group R. Fox -Request for Comments: 1106 Tandem - June 1989 - - - TCP Big Window and Nak Options - -Status of this Memo - - This memo discusses two extensions to the TCP protocol to provide a - more efficient operation over a network with a high bandwidth*delay - product. The extensions described in this document have been - implemented and shown to work using resources at NASA. This memo - describes an Experimental Protocol, these extensions are not proposed - as an Internet standard, but as a starting point for further - research. Distribution of this memo is unlimited. - -Abstract - - Two extensions to the TCP protocol are described in this RFC in order - to provide a more efficient operation over a network with a high - bandwidth*delay product. The main issue that still needs to be - solved is congestion versus noise. This issue is touched on in this - memo, but further research is still needed on the applicability of - the extensions in the Internet as a whole infrastructure and not just - high bandwidth*delay product networks. Even with this outstanding - issue, this document does describe the use of these options in the - isolated satellite network environment to help facilitate more - efficient use of this special medium to help off load bulk data - transfers from links needed for interactive use. - -1. Introduction - - Recent work on TCP has shown great performance gains over a variety - of network paths [1]. However, these changes still do not work well - over network paths that have a large round trip delay (satellite with - a 600 ms round trip delay) or a very large bandwidth - (transcontinental DS3 line). These two networks exhibit a higher - bandwidth*delay product, over 10**6 bits, than the 10**5 bits that - TCP is currently limited to. This high bandwidth*delay product - refers to the amount of data that may be unacknowledged so that all - of the networks bandwidth is being utilized by TCP. This may also be - referred to as "filling the pipe" [2] so that the sender of data can - always put data onto the network and the receiver will always have - something to read, and neither end of the connection will be forced - to wait for the other end. - - After the last batch of algorithm improvements to TCP, performance - - - -Fox [Page 1] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - over high bandwidth*delay networks is still very poor. It appears - that no algorithm changes alone will make any significant - improvements over high bandwidth*delay networks, but will require an - extension to the protocol itself. This RFC discusses two possible - options to TCP for this purpose. - - The two options implemented and discussed in this RFC are: - - 1. NAKs - - This extension allows the receiver of data to inform the sender - that a packet of data was not received and needs to be resent. - This option proves to be useful over any network path (both high - and low bandwidth*delay type networks) that experiences periodic - errors such as lost packets, noisy links, or dropped packets due - to congestion. The information conveyed by this option is - advisory and if ignored, does not have any effect on TCP what so - ever. - - 2. Big Windows - - This option will give a method of expanding the current 16 bit (64 - Kbytes) TCP window to 32 bits of which 30 bits (over 1 gigabytes) - are allowed for the receive window. (The maximum window size - allowed in TCP due to the requirement of TCP to detect old data - versus new data. For a good explanation please see [2].) No - changes are required to the standard TCP header [6]. The 16 bit - field in the TCP header that is used to convey the receive window - will remain unchanged. The 32 bit receive window is achieved - through the use of an option that contains the upper half of the - window. It is this option that is necessary to fill large data - pipes such as a satellite link. - - This RFC is broken up into the following sections: section 2 will - discuss the operation of the NAK option in greater detail, section 3 - will discuss the big window option in greater detail. Section 4 will - discuss other effects of the big windows and nak feature when used - together. Included in this section will be a brief discussion on the - effects of congestion versus noise to TCP and possible options for - satellite networks. Section 5 will be a conclusion with some hints - as to what future development may be done at NASA, and then an - appendix containing some test results is included. - -2. NAK Option - - Any packet loss in a high bandwidth*delay network will have a - catastrophic effect on throughput because of the simple - acknowledgement of TCP. TCP always acks the stream of data that has - - - -Fox [Page 2] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - successfully been received and tells the sender the next byte of data - of the stream that is expected. If a packet is lost and succeeding - packets arrive the current protocol has no way of telling the sender - that it missed one packet but received following packets. TCP - currently resends all of the data over again, after a timeout or the - sender suspects a lost packet due to a duplicate ack algorithm [1], - until the receiver receives the lost packet and can then ack the lost - packet as well as succeeding packets received. On a normal low - bandwidth*delay network this effect is minimal if the timeout period - is set short enough. However, on a long delay network such as a T1 - satellite channel this is catastrophic because by the time the lost - packet can be sent and the ack returned the TCP window would have - been exhausted and both the sender and receiver would be temporarily - stalled waiting for the packet and ack to fully travel the data pipe. - This causes the pipe to become empty and requires the sender to - refill the pipe after the ack is received. This will cause a minimum - of 3*X bandwidth loss, where X is the one way delay of the medium and - may be much higher depending on the size of the timeout period and - bandwidth*delay product. Its 1X for the packet to be resent, 1X for - the ack to be received and 1X for the next packet being sent to reach - the destination. This calculation assumes that the window size is - much smaller than the pipe size (window = 1/2 data pipe or 1X), which - is the typical case with the current TCP window limitation over long - delay networks such as a T1 satellite link. - - An attempt to reduce this wasted bandwidth from 3*X was introduced in - [1] by having the sender resend a packet after it notices that a - number of consecutively received acks completely acknowledges already - acknowledged data. On a typical network this will reduce the lost - bandwidth to almost nil, since the packet will be resent before the - TCP window is exhausted and with the data pipe being much smaller - than the TCP window, the data pipe will not become empty and no - bandwidth will be lost. On a high delay network the reduction of - lost bandwidth is minimal such that lost bandwidth is still - significant. On a very noisy satellite, for instance, the lost - bandwidth is very high (see appendix for some performance figures) - and performance is very poor. - - There are two methods of informing the sender of lost data. - Selective acknowledgements and NAKS. Selective acknowledgements have - been the object of research in a number of experimental protocols - including VMTP [3], NETBLT [4], and SatFTP [5]. The idea behind - selective acks is that the receiver tells the sender which pieces it - received so that the sender can resend the data not acked but already - sent once. NAKs on the other hand, tell the sender that a particular - packet of data needs to be resent. - - There are a couple of disadvantages of selective acks. Namely, in - - - -Fox [Page 3] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - some of the protocols mentioned above, the receiver waits a certain - time before sending the selective ack so that acks may be bundled up. - This delay can cause some wasted bandwidth and requires more complex - state information than the simple nak. Even if the receiver doesn't - bundle up the selective acks but sends them as it notices that - packets have been lost, more complex state information is needed to - determine which packets have been acked and which packets need to be - resent. With naks, only the immediate data needed to move the left - edge of the window is naked, thus almost completely eliminating all - state information. - - The selective ack has one advantage over naks. If the link is very - noisy and packets are being lost close together, then the sender will - find out about all of the missing data at once and can send all of - the missing data out immediately in an attempt to move the left - window edge in the acknowledge number of the TCP header, thus keeping - the data pipe flowing. Whereas with naks, the sender will be - notified of lost packets one at a time and this will cause the sender - to process extra packets compared to selective acks. However, - empirical studies has shown that most lost packets occur far enough - apart that the advantage of selective acks over naks is rarely seen. - Also, if naks are sent out as soon as a packet has been determined - lost, then the advantage of selective acks becomes no more than - possibly a more aesthetic algorithm for handling lost data, but - offers no gains over naks as described in this paper. It is this - reason that the simplicity of naks was chosen over selective acks for - the current implementation. - -2.1 Implementation details - - When the receiver of data notices a gap between the expected sequence - number and the actual sequence number of the packet received, the - receiver can assume that the data between the two sequence numbers is - either going to arrive late or is lost forever. Since the receiver - can not distinguish between the two events a nak should be sent in - the TCP option field. Naking a packet still destined to arrive has - the effect of causing the sender to resend the packet, wasting one - packets worth of bandwidth. Since this event is fairly rare, the - lost bandwidth is insignificant as compared to that of not sending a - nak when the packet is not going to arrive. The option will take the - form as follows: - - +========+=========+=========================+================+ - +option= + length= + sequence number of + number of + - + A + 7 + first byte being naked + segments naked + - +========+=========+=========================+================+ - - This option contains the first sequence number not received and a - - - -Fox [Page 4] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - count of how many segments of bytes needed to be resent, where - segments is the size of the current TCP MSS being used for the - connection. Since a nak is an advisory piece of information, the - sending of a nak is unreliable and no means for retransmitting a nak - is provided at this time. - - When the sender of data receives the option it may either choose to - do nothing or it will resend the missing data immediately and then - continue sending data where it left off before receiving the nak. - The receiver will keep track of the last nak sent so that it will not - repeat the same nak. If it were to repeat the same nak the protocol - could get into the mode where on every reception of data the receiver - would nak the first missing data frame. Since the data pipe may be - very large by the time the first nak is read and responded to by the - sender, many naks would have been sent by the receiver. Since the - sender does not know that the naks are repetitious it will resend the - data each time, thus wasting the network bandwidth with useless - retransmissions of the same piece of data. Having an unreliable nak - may result in a nak being damaged and not being received by the - sender, and in this case, we will let the tcp recover by its normal - means. Empirical data has shown that the likelihood of the nak being - lost is quite small and thus, this advisory nak option works quite - well. - -3. Big Window Option - - Currently TCP has a 16 bit window limitation built into the protocol. - This limits the amount of outstanding unacknowledged data to 64 - Kbytes. We have already seen that some networks have a pipe larger - than 64 Kbytes. A T1 satellite channel and a cross country DS3 - network with a 30ms delay have data pipes much larger than 64 Kbytes. - Thus, even on a perfectly conditioned link with no bandwidth wasted - due to errors, the data pipe will not be filled and bandwidth will be - wasted. What is needed is the ability to send more unacknowledged - data. This is achieved by having bigger windows, bigger than the - current limitation of 16 bits. This option to expands the window - size to 30 bits or over 1 gigabytes by literally expanding the window - size mechanism currently used by TCP. The added option contains the - upper 15 bits of the window while the lower 16 bits will continue to - go where they normally go [6] in the TCP header. - - A TCP session will use the big window options only if both sides - agree to use them, otherwise the option is not used and the normal 16 - bit windows will be used. Once the 2 sides agree to use the big - windows then every packet thereafter will be expected to contain the - window option with the current upper 15 bits of the window. The - negotiation to decide whether or not to use the bigger windows takes - place during the SYN and SYN ACK segments of the TCP connection - - - -Fox [Page 5] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - startup process. The originator of the connection will include in - the SYN segment the following option: - - 1 byte 1 byte 4 bytes - +=========+==========+===============+ - +option=B + length=6 + 30 bit window + - +=========+==========+===============+ - - - If the other end of the connection wants to use big windows it will - include the same option back in the SYN ACK segment that it must - send. At this point, both sides have agreed to use big windows and - the specified windows will be used. It should be noted that the SYN - and SYN ACK segments will use the small windows, and once the big - window option has been negotiated then the bigger windows will be - used. - - Once both sides have agreed to use 32 bit windows the protocol will - function just as it did before with no difference in operation, even - in the event of lost packets. This claim holds true since the - rcv_wnd and snd_wnd variables of tcp contain the 16 bit windows until - the big window option is negotiated and then they are replaced with - the appropriate 32 bit values. Thus, the use of big windows becomes - part of the state information kept by TCP. - - Other methods of expanding the windows have been presented, including - a window multiple [2] or streaming [5], but this solution is more - elegant in the sense that it is a true extension of the window that - one day may easily become part of the protocol and not just be an - option to the protocol. - -3.1 How does it work - - Once a connection has decided to use big windows every succeeding - packet must contain the following option: - - +=========+==========+==========================+ - +option=C + length=4 + upper 15 bits of rcv_wnd + - +=========+==========+==========================+ - - With all segments sent, the sender supplies the size of its receive - window. If the connection is only using 16 bits then this option is - not supplied, otherwise the lower 16 bits of the receive window go - into the tcp header where it currently resides [6] and the upper 15 - bits of the window is put into the data portion of the option C. - When the receiver processes the packet it must first reform the - window and then process the packet as it would in the absence of the - option. - - - -Fox [Page 6] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - -3.2 Impact of changes - - In implementing the first version of the big window option there was - very little change required to the source. State information must be - added to the protocol to determine if the big window option is to be - used and all 16 bit variables that dealt with window information must - now become 32 bit quantities. A future document will describe in - more detail the changes required to the 4.3 bsd tcp source code. - Test results of the window change only are presented in the appendix. - When expanding 16 bit quantities to 32 bit quantities in the TCP - control block in the source (4.3 bsd source) may cause the structure - to become larger than the mbuf used to hold the structure. Care must - be taken to insure this doesn't occur with your system or - undetermined events may take place. - -4. Effects of Big Windows and Naks when used together - - With big windows alone, transfer times over a satellite were quite - impressive with the absence of any introduced errors. However, when - an error simulator was used to create random errors during transfers, - performance went down extremely fast. When the nak option was added - to the big window option performance in the face of errors went up - some but not to the level that was expected. This section will - discuss some issues that were overcome to produce the results given - in the appendix. - -4.1 Window Size and Nak benefits - - With out errors, the window size required to keep the data pipe full - is equal to the round trip delay * throughput desired, or the data - pipe bandwidth (called Z from now on). This and other calculations - assume that processing time of the hosts is negligible. In the event - of an error (without NAKs), the window size needs to become larger - than Z in order to keep the data pipe full while the sender is - waiting for the ack of the resent packet. If the window size is - equaled to Z and we assume that the retransmission timer is equaled - to Z, then when a packet is lost, the retransmission timer will go - off as the last piece of data in the window is sent. In this case, - the lost piece of data can be resent with no delay. The data pipe - will empty out because it will take 1/2Z worth of data to get the ack - back to the sender, an additional 1/2Z worth of data to get the data - pipe refilled with new data. This causes the required window to be - 2Z, 1Z to keep the data pipe full during normal operations and 1Z to - keep the data pipe full while waiting for a lost packet to be resent - and acked. - - If the same scenario in the last paragraph is used with the addition - of NAKs, the required window size still needs to be 2Z to avoid - - - -Fox [Page 7] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - wasting any bandwidth in the event of a dropped packet. This appears - to mean that the nak option does not provide any benefits at all. - Testing showed that the retransmission timer was larger than the data - pipe and in the event of errors became much bigger than the data - pipe, because of the retransmission backoff. Thus, the nak option - bounds the required window to 2Z such that in the event of an error - there is no lost bandwidth, even with the retransmission timer - fluctuations. The results in the appendix shows that by using naks, - bandwidth waste associated with the retransmission timer facility is - eliminated. - -4.2 Congestions vs Noise - - An issue that must be looked at when implementing both the NAKs and - big window scheme together is in the area of congestion versus lost - packets due to the medium, or noise. In the recent algorithm - enhancements [1], slow start was introduced so that whenever a data - transfer is being started on a connection or right after a dropped - packet, the effective send window would be set to a very small size - (typically would equal the MSS being used). This is done so that a - new connection would not cause congestion by immediately overloading - the network, and so that an existing connection would back off the - network if a packet was dropped due to congestion and allow the - network to clear up. If a connection using big windows loses a - packet due to the medium (a packet corrupted by an error) the last - thing that should be done is to close the send window so that the - connection can only send 1 packet and must use the slow start - algorithm to slowly work itself back up to sending full windows worth - of data. This algorithm would quickly limit the usefulness of the - big window and nak options over lossy links. - - On the other hand, if a packet was dropped due to congestion and the - sender assumes the packet was dropped because of noise the sender - will continue sending large amounts of data. This action will cause - the congestion to continue, more packets will be dropped, and that - part of the network will collapse. In this instance, the sender - would want to back off from sending at the current window limit. - Using the current slow start mechanism over a satellite builds up the - window too slowly [1]. Possibly a better solution would be for the - window to be opened 2*Rlog2(W) instead of R*log2(W) [1] (open window - by 2 packets instead of 1 for each acked packet). This will reduce - the wasted bandwidth by opening the window much quicker while giving - the network a chance to clear up. More experimentation is necessary - to find the optimal rate of opening the window, especially when large - windows are being used. - - The current recommendation for TCP is to use the slow start mechanism - in the event of any lost packet. If an application knows that it - - - -Fox [Page 8] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - will be using a satellite with a high error rate, it doesn't make - sense to force it to use the slow start mechanism for every dropped - packet. Instead, the application should be able to choose what - action should happen in the event of a lost packet. In the BSD - environment, a setsockopt call should be provided so that the - application may inform TCP to handle lost packets in a special way - for this particular connection. If the known error rate of a link is - known to be small, then by using slow start with modified rate from - above, will cause the amount of bandwidth loss to be very small in - respect to the amount of bandwidth actually utilized. In this case, - the setsockopt call should not be used. What is really needed is a - way for a host to determine if a packet or packets are being dropped - due to congestion or noise. Then, the host can choose to do the - right thing. This will require a mechanism like source quench to be - used. For this to happen more experimentation is necessary to - determine a solid definition on the use of this mechanism. Now it is - believed by some that using source quench to avoid congestion only - adds to the problem, not help suppress it. - - The TCP used to gather the results in the appendix for the big window - with nak experiment, assumed that lost packets were the result of - noise and not congestion. This assumption was used to show how to - make the current TCP work in such an environment. The actual - satellite used in the experiment (when the satellite simulator was - not used) only experienced an error rate around 10e-10. With this - error rate it is suggested that in practice when big windows are used - over the link, TCP should use the slow start mechanism for all lost - packets with the 2*Rlog2(W) rate discussed above. Under most - situations when long delay networks are being used (transcontinental - DS3 networks using fiber with very low error rates, or satellite - links with low error rates) big windows and naks should be used with - the assumption that lost packets are the result of congestion until a - better algorithm is devised [7]. - - Another problem noticed, while testing the affects of slow start over - a satellite link, was at times, the retransmission timer was set so - restrictive, that milliseconds before a naked packet's ack is - received the retransmission timer would go off due to a timed packet - within the send window. The timer was set at the round trip delay of - the network allowing no time for packet processing. If this timer - went off due to congestion then backing off is the right thing to do, - otherwise to avoid the scenario discovered by experimentation, the - transmit timer should be set a little longer so that the - retransmission timer does not go off too early. Care must be taken - to make sure the right thing is done in the implementation in - question so that a packet isn't retransmitted too soon, and blamed on - congestion when in fact, the ack is on its way. - - - - -Fox [Page 9] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - -4.3 Duplicate Acks - - Another problem found with the 4.3bsd implementation is in the area - of duplicate acks. When the sender of data receives a certain number - of acks (3 in the current Berkeley release) that acknowledge - previously acked data before, it then assumes that a packet has been - lost and will resend the one packet assumed lost, and close its send - window as if the network is congested and the slow start algorithm - mention above will be used to open the send window. This facility is - no longer needed since the sender can use the reception of a nak as - its indicator that a particular packet was dropped. If the nak - packet is lost then the retransmit timer will go off and the packet - will be retransmitted by normal means. If a senders algorithm - continues to count duplicate acks the sender will find itself - possibly receiving many duplicate acks after it has already resent - the packet due to a nak being received because of the large size of - the data pipe. By receiving all of these duplicate acks the sender - may find itself doing nothing but resending the same packet of data - unnecessarily while keeping the send window closed for absolutely no - reason. By removing this feature of the implementation a user can - expect to find a satellite connection working much better in the face - of errors and other connections should not see any performance loss, - but a slight improvement in performance if anything at all. - -5. Conclusion - - This paper has described two new options that if used will make TCP a - more efficient protocol in the face of errors and a more efficient - protocol over networks that have a high bandwidth*delay product - without decreasing performance over more common networks. If a - system that implements the options talks with one that does not, the - two systems should still be able to communicate with no problems. - This assumes that the system doesn't use the option numbers defined - in this paper in some other way or doesn't panic when faced with an - option that the machine does not implement. Currently at NASA, there - are many machines that do not implement either option and communicate - just fine with the systems that do implement them. - - The drive for implementing big windows has been the direct result of - trying to make TCP more efficient over large delay networks [2,3,4,5] - such as a T1 satellite. However, another practical use of large - windows is becoming more apparent as the local area networks being - developed are becoming faster and supporting much larger MTU's. - Hyperchannel, for instances, has been stated to be able to support 1 - Mega bit MTU's in their new line of products. With the current - implementation of TCP, efficient use of hyperchannel is not utilized - as it should because the physical mediums MTU is larger than the - maximum window of the protocol being used. By increasing the TCP - - - -Fox [Page 10] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - window size, better utilization of networks like hyperchannel will be - gained instantly because the sender can send 64 Kbyte packets (IP - limitation) but not have to operate in a stop and wait fashion. - Future work is being started to increase the IP maximum datagram size - so that even better utilization of fast local area networks will be - seen by having the TCP/IP protocols being able to send large packets - over mediums with very large MTUs. This will hopefully, eliminate - the network protocol as the bottleneck in data transfers while - workstations and workstation file system technology advances even - more so, than it already has. - - An area of concern when using the big window mechanism is the use of - machine resources. When running over a satellite and a packet is - dropped such that 2Z (where Z is the round trip delay) worth of data - is unacknowledged, both ends of the connection need to be able to - buffer the data using machine mbufs (or whatever mechanism the - machine uses), usually a valuable and scarce commodity. If the - window size is not chosen properly, some machines will crash when the - memory is all used up, or it will keep other parts of the system from - running. Thus, setting the window to some fairly large arbitrary - number is not a good idea, especially on a general purpose machine - where many users log on at any time. What is currently being - engineered at NASA is the ability for certain programs to use the - setsockopt feature or 4.3bsd asking to use big windows such that the - average user may not have access to the large windows, thus limiting - the use of big windows to applications that absolutely need them and - to protect a valuable system resource. - -6. References - - [1] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM 88, - Stanford, Ca., August 1988. - - [2] Jacobson, V., and R. Braden, "TCP Extensions for Long-Delay - Paths", LBL, USC/Information Sciences Institute, RFC 1072, - October 1988. - - [3] Cheriton, D., "VMTP: Versatile Message Transaction Protocol", RFC - 1045, Stanford University, February 1988. - - [4] Clark, D., M. Lambert, and L. Zhang, "NETBLT: A Bulk Data - Transfer Protocol", RFC 998, MIT, March 1987. - - [5] Fox, R., "Draft of Proposed Solution for High Delay Circuit File - Transfer", GE/NAS Internal Document, March 1988. - - [6] Postel, J., "Transmission Control Protocol - DARPA Internet - Program Protocol Specification", RFC 793, DARPA, September 1981. - - - -Fox [Page 11] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - [7] Leiner, B., "Critical Issues in High Bandwidth Networking", RFC - 1077, DARPA, November 1989. - -7. Appendix - - Both options have been implemented and tested. Contained in this - section is some performance gathered to support the use of these two - options. The satellite channel used was a 1.544 Mbit link with a - 580ms round trip delay. All values are given as units of bytes. - - - TCP with Big Windows, No Naks: - - - |---------------transfer rates----------------------| - Window Size | no error | 10e-7 error rate | 10e-6 error rate | - ----------------------------------------------------------------- - 64K | 94K | 53K | 14K | - ----------------------------------------------------------------- - 72K | 106K | 51K | 15K | - ----------------------------------------------------------------- - 80K | 115K | 42K | 14K | - ----------------------------------------------------------------- - 92K | 115K | 43K | 14K | - ----------------------------------------------------------------- - 100K | 135K | 66K | 15K | - ----------------------------------------------------------------- - 112K | 126K | 53K | 17K | - ----------------------------------------------------------------- - 124K | 154K | 45K | 14K | - ----------------------------------------------------------------- - 136K | 160K | 66K | 15K | - ----------------------------------------------------------------- - 156K | 167K | 45K | 14K | - ----------------------------------------------------------------- - Figure 1. - - - - - - - - - - - - - - - -Fox [Page 12] - -RFC 1106 TCP Big Window and Nak Options June 1989 - - - TCP with Big Windows, and Naks: - - - |---------------transfer rates----------------------| - Window Size | no error | 10e-7 error rate | 10e-6 error rate | - ----------------------------------------------------------------- - 64K | 95K | 83K | 43K | - ----------------------------------------------------------------- - 72K | 104K | 87K | 49K | - ----------------------------------------------------------------- - 80K | 117K | 96K | 62K | - ----------------------------------------------------------------- - 92K | 124K | 119K | 39K | - ----------------------------------------------------------------- - 100K | 140K | 124K | 35K | - ----------------------------------------------------------------- - 112K | 151K | 126K | 53K | - ----------------------------------------------------------------- - 124K | 160K | 140K | 36K | - ----------------------------------------------------------------- - 136K | 167K | 148K | 38K | - ----------------------------------------------------------------- - 156K | 167K | 160K | 38K | - ----------------------------------------------------------------- - Figure 2. - - With a 10e-6 error rate, many naks as well as data packets were - dropped, causing the wild swing in transfer times. Also, please note - that the machines used are SGI Iris 2500 Turbos with the 3.6 OS with - the new TCP enhancements. The performance associated with the Irises - are slower than a Sun 3/260, but due to some source code restrictions - the Iris was used. Initial results on the Sun showed slightly higher - performance and less variance. - -Author's Address - - Richard Fox - 950 Linden #208 - Sunnyvale, Cal, 94086 - - EMail: rfox@tandem.com - - - - - - - - - - -Fox [Page 13] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1110.txt b/kernel/picotcp/RFC/rfc1110.txt deleted file mode 100644 index 29a33e3..0000000 --- a/kernel/picotcp/RFC/rfc1110.txt +++ /dev/null @@ -1,171 +0,0 @@ - - - - - - -Network Working Group A. McKenzie -Request for Comments: 1110 BBN STC - August 1989 - - - A Problem with the TCP Big Window Option - -Status of this Memo - - This memo comments on the TCP Big Window option described in RFC - 1106. Distribution of this memo is unlimited. - -Abstract - - The TCP Big Window option discussed in RFC 1106 will not work - properly in an Internet environment which has both a high bandwidth * - delay product and the possibility of disordering and duplicating - packets. In such networks, the window size must not be increased - without a similar increase in the sequence number space. Therefore, - a different approach to big windows should be taken in the Internet. - -Discussion - - TCP was designed to work in a packet store-and-forward environment - characterized by the possibility of packet loss, packet disordering, - and packet duplication. Packet loss can occur, for example, by a - congested network element discarding a packet. Packet disordering - can occur, for example, by packets of a TCP connection being - arbitrarily transmitted partially over a low bandwidth terrestrial - path and partially over a high bandwidth satellite path. Packet - duplication can occur, for example, when two directly-connected - network elements use a reliable link protocol and the link goes down - after the receiver correctly receives a packet but before the - transmitter receives an acknowledgement for the packet; the - transmitter and receiver now each take responsibility for attempting - to deliver the same packet to its ultimate destination. - - TCP has the task of recreating at the destination an exact copy of - the data stream generated at the source, in the same order and with - no gaps or duplicates. The mechanism used to accomplish this task is - to assign a "unique" sequence number to each byte of data at its - source, and to sort the bytes at the destination according to the - sequence number. The sorting operation corrects any disordering. An - acknowledgement, timeout, and retransmission scheme corrects for data - loss. The uniqueness of the sequence number corrects for data - duplication. - - As a practical matter, however, the sequence number is not unique; it - - - -McKenzie [Page 1] - -RFC 1110 Comments on TCP Big Window Option August 1989 - - - is contained in a 32-bit field and therefore "wraps around" after the - transmission of 2**32 bytes of data. Two additional mechanisms are - used to insure the effective uniqueness of sequence numbers; these - are the TCP transmission window and bounds on packet lifetime within - the Internet, including the IP Time-to-Live (TTL). The transmission - window specifies the maximum number of bytes which may be sent by the - source in one source-destination roundtrip time. Since the TCP - transmission window is specified by 16 bits, which is 1/65536 of the - sequence number space, a sequence number will not be reused (used to - number another byte) for 65,536 roundtrip times. So long as the - combination of gateway action on the IP TTL and holding times within - the individual networks which interconnect the gateways do not allow - a packet's lifetime to exceed 65,536 roundtrip times, each sequence - number is effectively unique. It was believed by the TCP designers - that the networks and gateways forming the internet would meet this - constraint, and such has been the case. - - The proposed TCP Big Window option, as described in RFC 1106, expands - the size of the window specification to 30 bits, while leaving the - sequence number space unchanged. Thus, a sequence number can be - reused after 4 roundtrip times. Further, the Nak option allows a - packet to be retransmitted (i.e., potentially duplicated) by the - source after only one roundtrip time. Thus, if a packet becomes - "lost" in the Internet for only about 5 roundtrip times it may be - delivered when its sequence number again lies within the window, - albeit a later cycle of the window. In this case, TCP will not - necessarily recreate at the destination an exact copy of the data - stream generated at the source; it may replace some data with earlier - data. - - Of course, the problem described above results from the storage of - the "lost" packet within the net, and its subsequent out-of-order - delivery. RFC 1106 seems to describe use of the proposed options in - an isolated satellite network. We may hypothesize that this network - is memoryless, and thus cannot deliver packets out of order; it - either delivers a packet in order or loses it. If this is the case, - then there is no problem with the proposed options. The Internet, - however, can deliver packets out of order, and this will likely - continue to be true even if gigabit links become part of the - Internet. Therefore, the approach described in RFC 1106 cannot be - adopted for general Internet use. - - - - - - - - - - -McKenzie [Page 2] - -RFC 1110 Comments on TCP Big Window Option August 1989 - - -Author's Address - - Alex McKenzie - Bolt Beranek and Newman Inc. - 10 Moulton Street - Cambridge, MA 02238 - - Phone: (617) 873-2962 - - EMail: MCKENZIE@BBN.COM - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McKenzie [Page 3] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1122.txt b/kernel/picotcp/RFC/rfc1122.txt deleted file mode 100644 index c14f2e5..0000000 --- a/kernel/picotcp/RFC/rfc1122.txt +++ /dev/null @@ -1,6844 +0,0 @@ - - - - - - -Network Working Group Internet Engineering Task Force -Request for Comments: 1122 R. Braden, Editor - October 1989 - - - Requirements for Internet Hosts -- Communication Layers - - -Status of This Memo - - This RFC is an official specification for the Internet community. It - incorporates by reference, amends, corrects, and supplements the - primary protocol standards documents relating to hosts. Distribution - of this document is unlimited. - -Summary - - This is one RFC of a pair that defines and discusses the requirements - for Internet host software. This RFC covers the communications - protocol layers: link layer, IP layer, and transport layer; its - companion RFC-1123 covers the application and support protocols. - - - - Table of Contents - - - - - 1. INTRODUCTION ............................................... 5 - 1.1 The Internet Architecture .............................. 6 - 1.1.1 Internet Hosts .................................... 6 - 1.1.2 Architectural Assumptions ......................... 7 - 1.1.3 Internet Protocol Suite ........................... 8 - 1.1.4 Embedded Gateway Code ............................. 10 - 1.2 General Considerations ................................. 12 - 1.2.1 Continuing Internet Evolution ..................... 12 - 1.2.2 Robustness Principle .............................. 12 - 1.2.3 Error Logging ..................................... 13 - 1.2.4 Configuration ..................................... 14 - 1.3 Reading this Document .................................. 15 - 1.3.1 Organization ...................................... 15 - 1.3.2 Requirements ...................................... 16 - 1.3.3 Terminology ....................................... 17 - 1.4 Acknowledgments ........................................ 20 - - 2. LINK LAYER .................................................. 21 - 2.1 INTRODUCTION ........................................... 21 - - - -Internet Engineering Task Force [Page 1] - - - - -RFC1122 INTRODUCTION October 1989 - - - 2.2 PROTOCOL WALK-THROUGH .................................. 21 - 2.3 SPECIFIC ISSUES ........................................ 21 - 2.3.1 Trailer Protocol Negotiation ...................... 21 - 2.3.2 Address Resolution Protocol -- ARP ................ 22 - 2.3.2.1 ARP Cache Validation ......................... 22 - 2.3.2.2 ARP Packet Queue ............................. 24 - 2.3.3 Ethernet and IEEE 802 Encapsulation ............... 24 - 2.4 LINK/INTERNET LAYER INTERFACE .......................... 25 - 2.5 LINK LAYER REQUIREMENTS SUMMARY ........................ 26 - - 3. INTERNET LAYER PROTOCOLS .................................... 27 - 3.1 INTRODUCTION ............................................ 27 - 3.2 PROTOCOL WALK-THROUGH .................................. 29 - 3.2.1 Internet Protocol -- IP ............................ 29 - 3.2.1.1 Version Number ............................... 29 - 3.2.1.2 Checksum ..................................... 29 - 3.2.1.3 Addressing ................................... 29 - 3.2.1.4 Fragmentation and Reassembly ................. 32 - 3.2.1.5 Identification ............................... 32 - 3.2.1.6 Type-of-Service .............................. 33 - 3.2.1.7 Time-to-Live ................................. 34 - 3.2.1.8 Options ...................................... 35 - 3.2.2 Internet Control Message Protocol -- ICMP .......... 38 - 3.2.2.1 Destination Unreachable ...................... 39 - 3.2.2.2 Redirect ..................................... 40 - 3.2.2.3 Source Quench ................................ 41 - 3.2.2.4 Time Exceeded ................................ 41 - 3.2.2.5 Parameter Problem ............................ 42 - 3.2.2.6 Echo Request/Reply ........................... 42 - 3.2.2.7 Information Request/Reply .................... 43 - 3.2.2.8 Timestamp and Timestamp Reply ................ 43 - 3.2.2.9 Address Mask Request/Reply ................... 45 - 3.2.3 Internet Group Management Protocol IGMP ........... 47 - 3.3 SPECIFIC ISSUES ........................................ 47 - 3.3.1 Routing Outbound Datagrams ........................ 47 - 3.3.1.1 Local/Remote Decision ........................ 47 - 3.3.1.2 Gateway Selection ............................ 48 - 3.3.1.3 Route Cache .................................. 49 - 3.3.1.4 Dead Gateway Detection ....................... 51 - 3.3.1.5 New Gateway Selection ........................ 55 - 3.3.1.6 Initialization ............................... 56 - 3.3.2 Reassembly ........................................ 56 - 3.3.3 Fragmentation ..................................... 58 - 3.3.4 Local Multihoming ................................. 60 - 3.3.4.1 Introduction ................................. 60 - 3.3.4.2 Multihoming Requirements ..................... 61 - 3.3.4.3 Choosing a Source Address .................... 64 - 3.3.5 Source Route Forwarding ........................... 65 - - - -Internet Engineering Task Force [Page 2] - - - - -RFC1122 INTRODUCTION October 1989 - - - 3.3.6 Broadcasts ........................................ 66 - 3.3.7 IP Multicasting ................................... 67 - 3.3.8 Error Reporting ................................... 69 - 3.4 INTERNET/TRANSPORT LAYER INTERFACE ..................... 69 - 3.5 INTERNET LAYER REQUIREMENTS SUMMARY .................... 72 - - 4. TRANSPORT PROTOCOLS ......................................... 77 - 4.1 USER DATAGRAM PROTOCOL -- UDP .......................... 77 - 4.1.1 INTRODUCTION ...................................... 77 - 4.1.2 PROTOCOL WALK-THROUGH ............................. 77 - 4.1.3 SPECIFIC ISSUES ................................... 77 - 4.1.3.1 Ports ........................................ 77 - 4.1.3.2 IP Options ................................... 77 - 4.1.3.3 ICMP Messages ................................ 78 - 4.1.3.4 UDP Checksums ................................ 78 - 4.1.3.5 UDP Multihoming .............................. 79 - 4.1.3.6 Invalid Addresses ............................ 79 - 4.1.4 UDP/APPLICATION LAYER INTERFACE ................... 79 - 4.1.5 UDP REQUIREMENTS SUMMARY .......................... 80 - 4.2 TRANSMISSION CONTROL PROTOCOL -- TCP ................... 82 - 4.2.1 INTRODUCTION ...................................... 82 - 4.2.2 PROTOCOL WALK-THROUGH ............................. 82 - 4.2.2.1 Well-Known Ports ............................. 82 - 4.2.2.2 Use of Push .................................. 82 - 4.2.2.3 Window Size .................................. 83 - 4.2.2.4 Urgent Pointer ............................... 84 - 4.2.2.5 TCP Options .................................. 85 - 4.2.2.6 Maximum Segment Size Option .................. 85 - 4.2.2.7 TCP Checksum ................................. 86 - 4.2.2.8 TCP Connection State Diagram ................. 86 - 4.2.2.9 Initial Sequence Number Selection ............ 87 - 4.2.2.10 Simultaneous Open Attempts .................. 87 - 4.2.2.11 Recovery from Old Duplicate SYN ............. 87 - 4.2.2.12 RST Segment ................................. 87 - 4.2.2.13 Closing a Connection ........................ 87 - 4.2.2.14 Data Communication .......................... 89 - 4.2.2.15 Retransmission Timeout ...................... 90 - 4.2.2.16 Managing the Window ......................... 91 - 4.2.2.17 Probing Zero Windows ........................ 92 - 4.2.2.18 Passive OPEN Calls .......................... 92 - 4.2.2.19 Time to Live ................................ 93 - 4.2.2.20 Event Processing ............................ 93 - 4.2.2.21 Acknowledging Queued Segments ............... 94 - 4.2.3 SPECIFIC ISSUES ................................... 95 - 4.2.3.1 Retransmission Timeout Calculation ........... 95 - 4.2.3.2 When to Send an ACK Segment .................. 96 - 4.2.3.3 When to Send a Window Update ................. 97 - 4.2.3.4 When to Send Data ............................ 98 - - - -Internet Engineering Task Force [Page 3] - - - - -RFC1122 INTRODUCTION October 1989 - - - 4.2.3.5 TCP Connection Failures ...................... 100 - 4.2.3.6 TCP Keep-Alives .............................. 101 - 4.2.3.7 TCP Multihoming .............................. 103 - 4.2.3.8 IP Options ................................... 103 - 4.2.3.9 ICMP Messages ................................ 103 - 4.2.3.10 Remote Address Validation ................... 104 - 4.2.3.11 TCP Traffic Patterns ........................ 104 - 4.2.3.12 Efficiency .................................. 105 - 4.2.4 TCP/APPLICATION LAYER INTERFACE ................... 106 - 4.2.4.1 Asynchronous Reports ......................... 106 - 4.2.4.2 Type-of-Service .............................. 107 - 4.2.4.3 Flush Call ................................... 107 - 4.2.4.4 Multihoming .................................. 108 - 4.2.5 TCP REQUIREMENT SUMMARY ........................... 108 - - 5. REFERENCES ................................................. 112 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 4] - - - - -RFC1122 INTRODUCTION October 1989 - - -1. INTRODUCTION - - This document is one of a pair that defines and discusses the - requirements for host system implementations of the Internet protocol - suite. This RFC covers the communication protocol layers: link - layer, IP layer, and transport layer. Its companion RFC, - "Requirements for Internet Hosts -- Application and Support" - [INTRO:1], covers the application layer protocols. This document - should also be read in conjunction with "Requirements for Internet - Gateways" [INTRO:2]. - - These documents are intended to provide guidance for vendors, - implementors, and users of Internet communication software. They - represent the consensus of a large body of technical experience and - wisdom, contributed by the members of the Internet research and - vendor communities. - - This RFC enumerates standard protocols that a host connected to the - Internet must use, and it incorporates by reference the RFCs and - other documents describing the current specifications for these - protocols. It corrects errors in the referenced documents and adds - additional discussion and guidance for an implementor. - - For each protocol, this document also contains an explicit set of - requirements, recommendations, and options. The reader must - understand that the list of requirements in this document is - incomplete by itself; the complete set of requirements for an - Internet host is primarily defined in the standard protocol - specification documents, with the corrections, amendments, and - supplements contained in this RFC. - - A good-faith implementation of the protocols that was produced after - careful reading of the RFC's and with some interaction with the - Internet technical community, and that followed good communications - software engineering practices, should differ from the requirements - of this document in only minor ways. Thus, in many cases, the - "requirements" in this RFC are already stated or implied in the - standard protocol documents, so that their inclusion here is, in a - sense, redundant. However, they were included because some past - implementation has made the wrong choice, causing problems of - interoperability, performance, and/or robustness. - - This document includes discussion and explanation of many of the - requirements and recommendations. A simple list of requirements - would be dangerous, because: - - o Some required features are more important than others, and some - features are optional. - - - -Internet Engineering Task Force [Page 5] - - - - -RFC1122 INTRODUCTION October 1989 - - - o There may be valid reasons why particular vendor products that - are designed for restricted contexts might choose to use - different specifications. - - However, the specifications of this document must be followed to meet - the general goal of arbitrary host interoperation across the - diversity and complexity of the Internet system. Although most - current implementations fail to meet these requirements in various - ways, some minor and some major, this specification is the ideal - towards which we need to move. - - These requirements are based on the current level of Internet - architecture. This document will be updated as required to provide - additional clarifications or to include additional information in - those areas in which specifications are still evolving. - - This introductory section begins with a brief overview of the - Internet architecture as it relates to hosts, and then gives some - general advice to host software vendors. Finally, there is some - guidance on reading the rest of the document and some terminology. - - 1.1 The Internet Architecture - - General background and discussion on the Internet architecture and - supporting protocol suite can be found in the DDN Protocol - Handbook [INTRO:3]; for background see for example [INTRO:9], - [INTRO:10], and [INTRO:11]. Reference [INTRO:5] describes the - procedure for obtaining Internet protocol documents, while - [INTRO:6] contains a list of the numbers assigned within Internet - protocols. - - 1.1.1 Internet Hosts - - A host computer, or simply "host," is the ultimate consumer of - communication services. A host generally executes application - programs on behalf of user(s), employing network and/or - Internet communication services in support of this function. - An Internet host corresponds to the concept of an "End-System" - used in the OSI protocol suite [INTRO:13]. - - An Internet communication system consists of interconnected - packet networks supporting communication among host computers - using the Internet protocols. The networks are interconnected - using packet-switching computers called "gateways" or "IP - routers" by the Internet community, and "Intermediate Systems" - by the OSI world [INTRO:13]. The RFC "Requirements for - Internet Gateways" [INTRO:2] contains the official - specifications for Internet gateways. That RFC together with - - - -Internet Engineering Task Force [Page 6] - - - - -RFC1122 INTRODUCTION October 1989 - - - the present document and its companion [INTRO:1] define the - rules for the current realization of the Internet architecture. - - Internet hosts span a wide range of size, speed, and function. - They range in size from small microprocessors through - workstations to mainframes and supercomputers. In function, - they range from single-purpose hosts (such as terminal servers) - to full-service hosts that support a variety of online network - services, typically including remote login, file transfer, and - electronic mail. - - A host is generally said to be multihomed if it has more than - one interface to the same or to different networks. See - Section 1.1.3 on "Terminology". - - 1.1.2 Architectural Assumptions - - The current Internet architecture is based on a set of - assumptions about the communication system. The assumptions - most relevant to hosts are as follows: - - (a) The Internet is a network of networks. - - Each host is directly connected to some particular - network(s); its connection to the Internet is only - conceptual. Two hosts on the same network communicate - with each other using the same set of protocols that they - would use to communicate with hosts on distant networks. - - (b) Gateways don't keep connection state information. - - To improve robustness of the communication system, - gateways are designed to be stateless, forwarding each IP - datagram independently of other datagrams. As a result, - redundant paths can be exploited to provide robust service - in spite of failures of intervening gateways and networks. - - All state information required for end-to-end flow control - and reliability is implemented in the hosts, in the - transport layer or in application programs. All - connection control information is thus co-located with the - end points of the communication, so it will be lost only - if an end point fails. - - (c) Routing complexity should be in the gateways. - - Routing is a complex and difficult problem, and ought to - be performed by the gateways, not the hosts. An important - - - -Internet Engineering Task Force [Page 7] - - - - -RFC1122 INTRODUCTION October 1989 - - - objective is to insulate host software from changes caused - by the inevitable evolution of the Internet routing - architecture. - - (d) The System must tolerate wide network variation. - - A basic objective of the Internet design is to tolerate a - wide range of network characteristics -- e.g., bandwidth, - delay, packet loss, packet reordering, and maximum packet - size. Another objective is robustness against failure of - individual networks, gateways, and hosts, using whatever - bandwidth is still available. Finally, the goal is full - "open system interconnection": an Internet host must be - able to interoperate robustly and effectively with any - other Internet host, across diverse Internet paths. - - Sometimes host implementors have designed for less - ambitious goals. For example, the LAN environment is - typically much more benign than the Internet as a whole; - LANs have low packet loss and delay and do not reorder - packets. Some vendors have fielded host implementations - that are adequate for a simple LAN environment, but work - badly for general interoperation. The vendor justifies - such a product as being economical within the restricted - LAN market. However, isolated LANs seldom stay isolated - for long; they are soon gatewayed to each other, to - organization-wide internets, and eventually to the global - Internet system. In the end, neither the customer nor the - vendor is served by incomplete or substandard Internet - host software. - - The requirements spelled out in this document are designed - for a full-function Internet host, capable of full - interoperation over an arbitrary Internet path. - - - 1.1.3 Internet Protocol Suite - - To communicate using the Internet system, a host must implement - the layered set of protocols comprising the Internet protocol - suite. A host typically must implement at least one protocol - from each layer. - - The protocol layers used in the Internet architecture are as - follows [INTRO:4]: - - - o Application Layer - - - -Internet Engineering Task Force [Page 8] - - - - -RFC1122 INTRODUCTION October 1989 - - - The application layer is the top layer of the Internet - protocol suite. The Internet suite does not further - subdivide the application layer, although some of the - Internet application layer protocols do contain some - internal sub-layering. The application layer of the - Internet suite essentially combines the functions of the - top two layers -- Presentation and Application -- of the - OSI reference model. - - We distinguish two categories of application layer - protocols: user protocols that provide service directly - to users, and support protocols that provide common system - functions. Requirements for user and support protocols - will be found in the companion RFC [INTRO:1]. - - The most common Internet user protocols are: - - o Telnet (remote login) - o FTP (file transfer) - o SMTP (electronic mail delivery) - - There are a number of other standardized user protocols - [INTRO:4] and many private user protocols. - - Support protocols, used for host name mapping, booting, - and management, include SNMP, BOOTP, RARP, and the Domain - Name System (DNS) protocols. - - - o Transport Layer - - The transport layer provides end-to-end communication - services for applications. There are two primary - transport layer protocols at present: - - o Transmission Control Protocol (TCP) - o User Datagram Protocol (UDP) - - TCP is a reliable connection-oriented transport service - that provides end-to-end reliability, resequencing, and - flow control. UDP is a connectionless ("datagram") - transport service. - - Other transport protocols have been developed by the - research community, and the set of official Internet - transport protocols may be expanded in the future. - - Transport layer protocols are discussed in Chapter 4. - - - -Internet Engineering Task Force [Page 9] - - - - -RFC1122 INTRODUCTION October 1989 - - - o Internet Layer - - All Internet transport protocols use the Internet Protocol - (IP) to carry data from source host to destination host. - IP is a connectionless or datagram internetwork service, - providing no end-to-end delivery guarantees. Thus, IP - datagrams may arrive at the destination host damaged, - duplicated, out of order, or not at all. The layers above - IP are responsible for reliable delivery service when it - is required. The IP protocol includes provision for - addressing, type-of-service specification, fragmentation - and reassembly, and security information. - - The datagram or connectionless nature of the IP protocol - is a fundamental and characteristic feature of the - Internet architecture. Internet IP was the model for the - OSI Connectionless Network Protocol [INTRO:12]. - - ICMP is a control protocol that is considered to be an - integral part of IP, although it is architecturally - layered upon IP, i.e., it uses IP to carry its data end- - to-end just as a transport protocol like TCP or UDP does. - ICMP provides error reporting, congestion reporting, and - first-hop gateway redirection. - - IGMP is an Internet layer protocol used for establishing - dynamic host groups for IP multicasting. - - The Internet layer protocols IP, ICMP, and IGMP are - discussed in Chapter 3. - - - o Link Layer - - To communicate on its directly-connected network, a host - must implement the communication protocol used to - interface to that network. We call this a link layer or - media-access layer protocol. - - There is a wide variety of link layer protocols, - corresponding to the many different types of networks. - See Chapter 2. - - - 1.1.4 Embedded Gateway Code - - Some Internet host software includes embedded gateway - functionality, so that these hosts can forward packets as a - - - -Internet Engineering Task Force [Page 10] - - - - -RFC1122 INTRODUCTION October 1989 - - - gateway would, while still performing the application layer - functions of a host. - - Such dual-purpose systems must follow the Gateway Requirements - RFC [INTRO:2] with respect to their gateway functions, and - must follow the present document with respect to their host - functions. In all overlapping cases, the two specifications - should be in agreement. - - There are varying opinions in the Internet community about - embedded gateway functionality. The main arguments are as - follows: - - o Pro: in a local network environment where networking is - informal, or in isolated internets, it may be convenient - and economical to use existing host systems as gateways. - - There is also an architectural argument for embedded - gateway functionality: multihoming is much more common - than originally foreseen, and multihoming forces a host to - make routing decisions as if it were a gateway. If the - multihomed host contains an embedded gateway, it will - have full routing knowledge and as a result will be able - to make more optimal routing decisions. - - o Con: Gateway algorithms and protocols are still changing, - and they will continue to change as the Internet system - grows larger. Attempting to include a general gateway - function within the host IP layer will force host system - maintainers to track these (more frequent) changes. Also, - a larger pool of gateway implementations will make - coordinating the changes more difficult. Finally, the - complexity of a gateway IP layer is somewhat greater than - that of a host, making the implementation and operation - tasks more complex. - - In addition, the style of operation of some hosts is not - appropriate for providing stable and robust gateway - service. - - There is considerable merit in both of these viewpoints. One - conclusion can be drawn: an host administrator must have - conscious control over whether or not a given host acts as a - gateway. See Section 3.1 for the detailed requirements. - - - - - - - -Internet Engineering Task Force [Page 11] - - - - -RFC1122 INTRODUCTION October 1989 - - - 1.2 General Considerations - - There are two important lessons that vendors of Internet host - software have learned and which a new vendor should consider - seriously. - - 1.2.1 Continuing Internet Evolution - - The enormous growth of the Internet has revealed problems of - management and scaling in a large datagram-based packet - communication system. These problems are being addressed, and - as a result there will be continuing evolution of the - specifications described in this document. These changes will - be carefully planned and controlled, since there is extensive - participation in this planning by the vendors and by the - organizations responsible for operations of the networks. - - Development, evolution, and revision are characteristic of - computer network protocols today, and this situation will - persist for some years. A vendor who develops computer - communication software for the Internet protocol suite (or any - other protocol suite!) and then fails to maintain and update - that software for changing specifications is going to leave a - trail of unhappy customers. The Internet is a large - communication network, and the users are in constant contact - through it. Experience has shown that knowledge of - deficiencies in vendor software propagates quickly through the - Internet technical community. - - 1.2.2 Robustness Principle - - At every layer of the protocols, there is a general rule whose - application can lead to enormous benefits in robustness and - interoperability [IP:1]: - - "Be liberal in what you accept, and - conservative in what you send" - - Software should be written to deal with every conceivable - error, no matter how unlikely; sooner or later a packet will - come in with that particular combination of errors and - attributes, and unless the software is prepared, chaos can - ensue. In general, it is best to assume that the network is - filled with malevolent entities that will send in packets - designed to have the worst possible effect. This assumption - will lead to suitable protective design, although the most - serious problems in the Internet have been caused by - unenvisaged mechanisms triggered by low-probability events; - - - -Internet Engineering Task Force [Page 12] - - - - -RFC1122 INTRODUCTION October 1989 - - - mere human malice would never have taken so devious a course! - - Adaptability to change must be designed into all levels of - Internet host software. As a simple example, consider a - protocol specification that contains an enumeration of values - for a particular header field -- e.g., a type field, a port - number, or an error code; this enumeration must be assumed to - be incomplete. Thus, if a protocol specification defines four - possible error codes, the software must not break when a fifth - code shows up. An undefined code might be logged (see below), - but it must not cause a failure. - - The second part of the principle is almost as important: - software on other hosts may contain deficiencies that make it - unwise to exploit legal but obscure protocol features. It is - unwise to stray far from the obvious and simple, lest untoward - effects result elsewhere. A corollary of this is "watch out - for misbehaving hosts"; host software should be prepared, not - just to survive other misbehaving hosts, but also to cooperate - to limit the amount of disruption such hosts can cause to the - shared communication facility. - - 1.2.3 Error Logging - - The Internet includes a great variety of host and gateway - systems, each implementing many protocols and protocol layers, - and some of these contain bugs and mis-features in their - Internet protocol software. As a result of complexity, - diversity, and distribution of function, the diagnosis of - Internet problems is often very difficult. - - Problem diagnosis will be aided if host implementations include - a carefully designed facility for logging erroneous or - "strange" protocol events. It is important to include as much - diagnostic information as possible when an error is logged. In - particular, it is often useful to record the header(s) of a - packet that caused an error. However, care must be taken to - ensure that error logging does not consume prohibitive amounts - of resources or otherwise interfere with the operation of the - host. - - There is a tendency for abnormal but harmless protocol events - to overflow error logging files; this can be avoided by using a - "circular" log, or by enabling logging only while diagnosing a - known failure. It may be useful to filter and count duplicate - successive messages. One strategy that seems to work well is: - (1) always count abnormalities and make such counts accessible - through the management protocol (see [INTRO:1]); and (2) allow - - - -Internet Engineering Task Force [Page 13] - - - - -RFC1122 INTRODUCTION October 1989 - - - the logging of a great variety of events to be selectively - enabled. For example, it might useful to be able to "log - everything" or to "log everything for host X". - - Note that different managements may have differing policies - about the amount of error logging that they want normally - enabled in a host. Some will say, "if it doesn't hurt me, I - don't want to know about it", while others will want to take a - more watchful and aggressive attitude about detecting and - removing protocol abnormalities. - - 1.2.4 Configuration - - It would be ideal if a host implementation of the Internet - protocol suite could be entirely self-configuring. This would - allow the whole suite to be implemented in ROM or cast into - silicon, it would simplify diskless workstations, and it would - be an immense boon to harried LAN administrators as well as - system vendors. We have not reached this ideal; in fact, we - are not even close. - - At many points in this document, you will find a requirement - that a parameter be a configurable option. There are several - different reasons behind such requirements. In a few cases, - there is current uncertainty or disagreement about the best - value, and it may be necessary to update the recommended value - in the future. In other cases, the value really depends on - external factors -- e.g., the size of the host and the - distribution of its communication load, or the speeds and - topology of nearby networks -- and self-tuning algorithms are - unavailable and may be insufficient. In some cases, - configurability is needed because of administrative - requirements. - - Finally, some configuration options are required to communicate - with obsolete or incorrect implementations of the protocols, - distributed without sources, that unfortunately persist in many - parts of the Internet. To make correct systems coexist with - these faulty systems, administrators often have to "mis- - configure" the correct systems. This problem will correct - itself gradually as the faulty systems are retired, but it - cannot be ignored by vendors. - - When we say that a parameter must be configurable, we do not - intend to require that its value be explicitly read from a - configuration file at every boot time. We recommend that - implementors set up a default for each parameter, so a - configuration file is only necessary to override those defaults - - - -Internet Engineering Task Force [Page 14] - - - - -RFC1122 INTRODUCTION October 1989 - - - that are inappropriate in a particular installation. Thus, the - configurability requirement is an assurance that it will be - POSSIBLE to override the default when necessary, even in a - binary-only or ROM-based product. - - This document requires a particular value for such defaults in - some cases. The choice of default is a sensitive issue when - the configuration item controls the accommodation to existing - faulty systems. If the Internet is to converge successfully to - complete interoperability, the default values built into - implementations must implement the official protocol, not - "mis-configurations" to accommodate faulty implementations. - Although marketing considerations have led some vendors to - choose mis-configuration defaults, we urge vendors to choose - defaults that will conform to the standard. - - Finally, we note that a vendor needs to provide adequate - documentation on all configuration parameters, their limits and - effects. - - - 1.3 Reading this Document - - 1.3.1 Organization - - Protocol layering, which is generally used as an organizing - principle in implementing network software, has also been used - to organize this document. In describing the rules, we assume - that an implementation does strictly mirror the layering of the - protocols. Thus, the following three major sections specify - the requirements for the link layer, the internet layer, and - the transport layer, respectively. A companion RFC [INTRO:1] - covers application level software. This layerist organization - was chosen for simplicity and clarity. - - However, strict layering is an imperfect model, both for the - protocol suite and for recommended implementation approaches. - Protocols in different layers interact in complex and sometimes - subtle ways, and particular functions often involve multiple - layers. There are many design choices in an implementation, - many of which involve creative "breaking" of strict layering. - Every implementor is urged to read references [INTRO:7] and - [INTRO:8]. - - This document describes the conceptual service interface - between layers using a functional ("procedure call") notation, - like that used in the TCP specification [TCP:1]. A host - implementation must support the logical information flow - - - -Internet Engineering Task Force [Page 15] - - - - -RFC1122 INTRODUCTION October 1989 - - - implied by these calls, but need not literally implement the - calls themselves. For example, many implementations reflect - the coupling between the transport layer and the IP layer by - giving them shared access to common data structures. These - data structures, rather than explicit procedure calls, are then - the agency for passing much of the information that is - required. - - In general, each major section of this document is organized - into the following subsections: - - (1) Introduction - - (2) Protocol Walk-Through -- considers the protocol - specification documents section-by-section, correcting - errors, stating requirements that may be ambiguous or - ill-defined, and providing further clarification or - explanation. - - (3) Specific Issues -- discusses protocol design and - implementation issues that were not included in the walk- - through. - - (4) Interfaces -- discusses the service interface to the next - higher layer. - - (5) Summary -- contains a summary of the requirements of the - section. - - - Under many of the individual topics in this document, there is - parenthetical material labeled "DISCUSSION" or - "IMPLEMENTATION". This material is intended to give - clarification and explanation of the preceding requirements - text. It also includes some suggestions on possible future - directions or developments. The implementation material - contains suggested approaches that an implementor may want to - consider. - - The summary sections are intended to be guides and indexes to - the text, but are necessarily cryptic and incomplete. The - summaries should never be used or referenced separately from - the complete RFC. - - 1.3.2 Requirements - - In this document, the words that are used to define the - significance of each particular requirement are capitalized. - - - -Internet Engineering Task Force [Page 16] - - - - -RFC1122 INTRODUCTION October 1989 - - - These words are: - - * "MUST" - - This word or the adjective "REQUIRED" means that the item - is an absolute requirement of the specification. - - * "SHOULD" - - This word or the adjective "RECOMMENDED" means that there - may exist valid reasons in particular circumstances to - ignore this item, but the full implications should be - understood and the case carefully weighed before choosing - a different course. - - * "MAY" - - This word or the adjective "OPTIONAL" means that this item - is truly optional. One vendor may choose to include the - item because a particular marketplace requires it or - because it enhances the product, for example; another - vendor may omit the same item. - - - An implementation is not compliant if it fails to satisfy one - or more of the MUST requirements for the protocols it - implements. An implementation that satisfies all the MUST and - all the SHOULD requirements for its protocols is said to be - "unconditionally compliant"; one that satisfies all the MUST - requirements but not all the SHOULD requirements for its - protocols is said to be "conditionally compliant". - - 1.3.3 Terminology - - This document uses the following technical terms: - - Segment - A segment is the unit of end-to-end transmission in the - TCP protocol. A segment consists of a TCP header followed - by application data. A segment is transmitted by - encapsulation inside an IP datagram. - - Message - In this description of the lower-layer protocols, a - message is the unit of transmission in a transport layer - protocol. In particular, a TCP segment is a message. A - message consists of a transport protocol header followed - by application protocol data. To be transmitted end-to- - - - -Internet Engineering Task Force [Page 17] - - - - -RFC1122 INTRODUCTION October 1989 - - - end through the Internet, a message must be encapsulated - inside a datagram. - - IP Datagram - An IP datagram is the unit of end-to-end transmission in - the IP protocol. An IP datagram consists of an IP header - followed by transport layer data, i.e., of an IP header - followed by a message. - - In the description of the internet layer (Section 3), the - unqualified term "datagram" should be understood to refer - to an IP datagram. - - Packet - A packet is the unit of data passed across the interface - between the internet layer and the link layer. It - includes an IP header and data. A packet may be a - complete IP datagram or a fragment of an IP datagram. - - Frame - A frame is the unit of transmission in a link layer - protocol, and consists of a link-layer header followed by - a packet. - - Connected Network - A network to which a host is interfaced is often known as - the "local network" or the "subnetwork" relative to that - host. However, these terms can cause confusion, and - therefore we use the term "connected network" in this - document. - - Multihomed - A host is said to be multihomed if it has multiple IP - addresses. For a discussion of multihoming, see Section - 3.3.4 below. - - Physical network interface - This is a physical interface to a connected network and - has a (possibly unique) link-layer address. Multiple - physical network interfaces on a single host may share the - same link-layer address, but the address must be unique - for different hosts on the same physical network. - - Logical [network] interface - We define a logical [network] interface to be a logical - path, distinguished by a unique IP address, to a connected - network. See Section 3.3.4. - - - - -Internet Engineering Task Force [Page 18] - - - - -RFC1122 INTRODUCTION October 1989 - - - Specific-destination address - This is the effective destination address of a datagram, - even if it is broadcast or multicast; see Section 3.2.1.3. - - Path - At a given moment, all the IP datagrams from a particular - source host to a particular destination host will - typically traverse the same sequence of gateways. We use - the term "path" for this sequence. Note that a path is - uni-directional; it is not unusual to have different paths - in the two directions between a given host pair. - - MTU - The maximum transmission unit, i.e., the size of the - largest packet that can be transmitted. - - - The terms frame, packet, datagram, message, and segment are - illustrated by the following schematic diagrams: - - A. Transmission on connected network: - _______________________________________________ - | LL hdr | IP hdr | (data) | - |________|________|_____________________________| - - <---------- Frame -----------------------------> - <----------Packet --------------------> - - - B. Before IP fragmentation or after IP reassembly: - ______________________________________ - | IP hdr | transport| Application Data | - |________|____hdr___|__________________| - - <-------- Datagram ------------------> - <-------- Message -----------> - or, for TCP: - ______________________________________ - | IP hdr | TCP hdr | Application Data | - |________|__________|__________________| - - <-------- Datagram ------------------> - <-------- Segment -----------> - - - - - - - - -Internet Engineering Task Force [Page 19] - - - - -RFC1122 INTRODUCTION October 1989 - - - 1.4 Acknowledgments - - This document incorporates contributions and comments from a large - group of Internet protocol experts, including representatives of - university and research labs, vendors, and government agencies. - It was assembled primarily by the Host Requirements Working Group - of the Internet Engineering Task Force (IETF). - - The Editor would especially like to acknowledge the tireless - dedication of the following people, who attended many long - meetings and generated 3 million bytes of electronic mail over the - past 18 months in pursuit of this document: Philip Almquist, Dave - Borman (Cray Research), Noel Chiappa, Dave Crocker (DEC), Steve - Deering (Stanford), Mike Karels (Berkeley), Phil Karn (Bellcore), - John Lekashman (NASA), Charles Lynn (BBN), Keith McCloghrie (TWG), - Paul Mockapetris (ISI), Thomas Narten (Purdue), Craig Partridge - (BBN), Drew Perkins (CMU), and James Van Bokkelen (FTP Software). - - In addition, the following people made major contributions to the - effort: Bill Barns (Mitre), Steve Bellovin (AT&T), Mike Brescia - (BBN), Ed Cain (DCA), Annette DeSchon (ISI), Martin Gross (DCA), - Phill Gross (NRI), Charles Hedrick (Rutgers), Van Jacobson (LBL), - John Klensin (MIT), Mark Lottor (SRI), Milo Medin (NASA), Bill - Melohn (Sun Microsystems), Greg Minshall (Kinetics), Jeff Mogul - (DEC), John Mullen (CMC), Jon Postel (ISI), John Romkey (Epilogue - Technology), and Mike StJohns (DCA). The following also made - significant contributions to particular areas: Eric Allman - (Berkeley), Rob Austein (MIT), Art Berggreen (ACC), Keith Bostic - (Berkeley), Vint Cerf (NRI), Wayne Hathaway (NASA), Matt Korn - (IBM), Erik Naggum (Naggum Software, Norway), Robert Ullmann - (Prime Computer), David Waitzman (BBN), Frank Wancho (USA), Arun - Welch (Ohio State), Bill Westfield (Cisco), and Rayan Zachariassen - (Toronto). - - We are grateful to all, including any contributors who may have - been inadvertently omitted from this list. - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 20] - - - - -RFC1122 LINK LAYER October 1989 - - -2. LINK LAYER - - 2.1 INTRODUCTION - - All Internet systems, both hosts and gateways, have the same - requirements for link layer protocols. These requirements are - given in Chapter 3 of "Requirements for Internet Gateways" - [INTRO:2], augmented with the material in this section. - - 2.2 PROTOCOL WALK-THROUGH - - None. - - 2.3 SPECIFIC ISSUES - - 2.3.1 Trailer Protocol Negotiation - - The trailer protocol [LINK:1] for link-layer encapsulation MAY - be used, but only when it has been verified that both systems - (host or gateway) involved in the link-layer communication - implement trailers. If the system does not dynamically - negotiate use of the trailer protocol on a per-destination - basis, the default configuration MUST disable the protocol. - - DISCUSSION: - The trailer protocol is a link-layer encapsulation - technique that rearranges the data contents of packets - sent on the physical network. In some cases, trailers - improve the throughput of higher layer protocols by - reducing the amount of data copying within the operating - system. Higher layer protocols are unaware of trailer - use, but both the sending and receiving host MUST - understand the protocol if it is used. - - Improper use of trailers can result in very confusing - symptoms. Only packets with specific size attributes are - encapsulated using trailers, and typically only a small - fraction of the packets being exchanged have these - attributes. Thus, if a system using trailers exchanges - packets with a system that does not, some packets - disappear into a black hole while others are delivered - successfully. - - IMPLEMENTATION: - On an Ethernet, packets encapsulated with trailers use a - distinct Ethernet type [LINK:1], and trailer negotiation - is performed at the time that ARP is used to discover the - link-layer address of a destination system. - - - -Internet Engineering Task Force [Page 21] - - - - -RFC1122 LINK LAYER October 1989 - - - Specifically, the ARP exchange is completed in the usual - manner using the normal IP protocol type, but a host that - wants to speak trailers will send an additional "trailer - ARP reply" packet, i.e., an ARP reply that specifies the - trailer encapsulation protocol type but otherwise has the - format of a normal ARP reply. If a host configured to use - trailers receives a trailer ARP reply message from a - remote machine, it can add that machine to the list of - machines that understand trailers, e.g., by marking the - corresponding entry in the ARP cache. - - Hosts wishing to receive trailer encapsulations send - trailer ARP replies whenever they complete exchanges of - normal ARP messages for IP. Thus, a host that received an - ARP request for its IP protocol address would send a - trailer ARP reply in addition to the normal IP ARP reply; - a host that sent the IP ARP request would send a trailer - ARP reply when it received the corresponding IP ARP reply. - In this way, either the requesting or responding host in - an IP ARP exchange may request that it receive trailer - encapsulations. - - This scheme, using extra trailer ARP reply packets rather - than sending an ARP request for the trailer protocol type, - was designed to avoid a continuous exchange of ARP packets - with a misbehaving host that, contrary to any - specification or common sense, responded to an ARP reply - for trailers with another ARP reply for IP. This problem - is avoided by sending a trailer ARP reply in response to - an IP ARP reply only when the IP ARP reply answers an - outstanding request; this is true when the hardware - address for the host is still unknown when the IP ARP - reply is received. A trailer ARP reply may always be sent - along with an IP ARP reply responding to an IP ARP - request. - - 2.3.2 Address Resolution Protocol -- ARP - - 2.3.2.1 ARP Cache Validation - - An implementation of the Address Resolution Protocol (ARP) - [LINK:2] MUST provide a mechanism to flush out-of-date cache - entries. If this mechanism involves a timeout, it SHOULD be - possible to configure the timeout value. - - A mechanism to prevent ARP flooding (repeatedly sending an - ARP Request for the same IP address, at a high rate) MUST be - included. The recommended maximum rate is 1 per second per - - - -Internet Engineering Task Force [Page 22] - - - - -RFC1122 LINK LAYER October 1989 - - - destination. - - DISCUSSION: - The ARP specification [LINK:2] suggests but does not - require a timeout mechanism to invalidate cache entries - when hosts change their Ethernet addresses. The - prevalence of proxy ARP (see Section 2.4 of [INTRO:2]) - has significantly increased the likelihood that cache - entries in hosts will become invalid, and therefore - some ARP-cache invalidation mechanism is now required - for hosts. Even in the absence of proxy ARP, a long- - period cache timeout is useful in order to - automatically correct any bad ARP data that might have - been cached. - - IMPLEMENTATION: - Four mechanisms have been used, sometimes in - combination, to flush out-of-date cache entries. - - (1) Timeout -- Periodically time out cache entries, - even if they are in use. Note that this timeout - should be restarted when the cache entry is - "refreshed" (by observing the source fields, - regardless of target address, of an ARP broadcast - from the system in question). For proxy ARP - situations, the timeout needs to be on the order - of a minute. - - (2) Unicast Poll -- Actively poll the remote host by - periodically sending a point-to-point ARP Request - to it, and delete the entry if no ARP Reply is - received from N successive polls. Again, the - timeout should be on the order of a minute, and - typically N is 2. - - (3) Link-Layer Advice -- If the link-layer driver - detects a delivery problem, flush the - corresponding ARP cache entry. - - (4) Higher-layer Advice -- Provide a call from the - Internet layer to the link layer to indicate a - delivery problem. The effect of this call would - be to invalidate the corresponding cache entry. - This call would be analogous to the - "ADVISE_DELIVPROB()" call from the transport layer - to the Internet layer (see Section 3.4), and in - fact the ADVISE_DELIVPROB routine might in turn - call the link-layer advice routine to invalidate - - - -Internet Engineering Task Force [Page 23] - - - - -RFC1122 LINK LAYER October 1989 - - - the ARP cache entry. - - Approaches (1) and (2) involve ARP cache timeouts on - the order of a minute or less. In the absence of proxy - ARP, a timeout this short could create noticeable - overhead traffic on a very large Ethernet. Therefore, - it may be necessary to configure a host to lengthen the - ARP cache timeout. - - 2.3.2.2 ARP Packet Queue - - The link layer SHOULD save (rather than discard) at least - one (the latest) packet of each set of packets destined to - the same unresolved IP address, and transmit the saved - packet when the address has been resolved. - - DISCUSSION: - Failure to follow this recommendation causes the first - packet of every exchange to be lost. Although higher- - layer protocols can generally cope with packet loss by - retransmission, packet loss does impact performance. - For example, loss of a TCP open request causes the - initial round-trip time estimate to be inflated. UDP- - based applications such as the Domain Name System are - more seriously affected. - - 2.3.3 Ethernet and IEEE 802 Encapsulation - - The IP encapsulation for Ethernets is described in RFC-894 - [LINK:3], while RFC-1042 [LINK:4] describes the IP - encapsulation for IEEE 802 networks. RFC-1042 elaborates and - replaces the discussion in Section 3.4 of [INTRO:2]. - - Every Internet host connected to a 10Mbps Ethernet cable: - - o MUST be able to send and receive packets using RFC-894 - encapsulation; - - o SHOULD be able to receive RFC-1042 packets, intermixed - with RFC-894 packets; and - - o MAY be able to send packets using RFC-1042 encapsulation. - - - An Internet host that implements sending both the RFC-894 and - the RFC-1042 encapsulations MUST provide a configuration switch - to select which is sent, and this switch MUST default to RFC- - 894. - - - -Internet Engineering Task Force [Page 24] - - - - -RFC1122 LINK LAYER October 1989 - - - Note that the standard IP encapsulation in RFC-1042 does not - use the protocol id value (K1=6) that IEEE reserved for IP; - instead, it uses a value (K1=170) that implies an extension - (the "SNAP") which can be used to hold the Ether-Type field. - An Internet system MUST NOT send 802 packets using K1=6. - - Address translation from Internet addresses to link-layer - addresses on Ethernet and IEEE 802 networks MUST be managed by - the Address Resolution Protocol (ARP). - - The MTU for an Ethernet is 1500 and for 802.3 is 1492. - - DISCUSSION: - The IEEE 802.3 specification provides for operation over a - 10Mbps Ethernet cable, in which case Ethernet and IEEE - 802.3 frames can be physically intermixed. A receiver can - distinguish Ethernet and 802.3 frames by the value of the - 802.3 Length field; this two-octet field coincides in the - header with the Ether-Type field of an Ethernet frame. In - particular, the 802.3 Length field must be less than or - equal to 1500, while all valid Ether-Type values are - greater than 1500. - - Another compatibility problem arises with link-layer - broadcasts. A broadcast sent with one framing will not be - seen by hosts that can receive only the other framing. - - The provisions of this section were designed to provide - direct interoperation between 894-capable and 1042-capable - systems on the same cable, to the maximum extent possible. - It is intended to support the present situation where - 894-only systems predominate, while providing an easy - transition to a possible future in which 1042-capable - systems become common. - - Note that 894-only systems cannot interoperate directly - with 1042-only systems. If the two system types are set - up as two different logical networks on the same cable, - they can communicate only through an IP gateway. - Furthermore, it is not useful or even possible for a - dual-format host to discover automatically which format to - send, because of the problem of link-layer broadcasts. - - 2.4 LINK/INTERNET LAYER INTERFACE - - The packet receive interface between the IP layer and the link - layer MUST include a flag to indicate whether the incoming packet - was addressed to a link-layer broadcast address. - - - -Internet Engineering Task Force [Page 25] - - - - -RFC1122 LINK LAYER October 1989 - - - DISCUSSION - Although the IP layer does not generally know link layer - addresses (since every different network medium typically has - a different address format), the broadcast address on a - broadcast-capable medium is an important special case. See - Section 3.2.2, especially the DISCUSSION concerning broadcast - storms. - - The packet send interface between the IP and link layers MUST - include the 5-bit TOS field (see Section 3.2.1.6). - - The link layer MUST NOT report a Destination Unreachable error to - IP solely because there is no ARP cache entry for a destination. - - 2.5 LINK LAYER REQUIREMENTS SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION| | | |T|T|e ---------------------------------------------------|-------|-|-|-|-|-|-- - | | | | | | | -Trailer encapsulation |2.3.1 | | |x| | | -Send Trailers by default without negotiation |2.3.1 | | | | |x| -ARP |2.3.2 | | | | | | - Flush out-of-date ARP cache entries |2.3.2.1|x| | | | | - Prevent ARP floods |2.3.2.1|x| | | | | - Cache timeout configurable |2.3.2.1| |x| | | | - Save at least one (latest) unresolved pkt |2.3.2.2| |x| | | | -Ethernet and IEEE 802 Encapsulation |2.3.3 | | | | | | - Host able to: |2.3.3 | | | | | | - Send & receive RFC-894 encapsulation |2.3.3 |x| | | | | - Receive RFC-1042 encapsulation |2.3.3 | |x| | | | - Send RFC-1042 encapsulation |2.3.3 | | |x| | | - Then config. sw. to select, RFC-894 dflt |2.3.3 |x| | | | | - Send K1=6 encapsulation |2.3.3 | | | | |x| - Use ARP on Ethernet and IEEE 802 nets |2.3.3 |x| | | | | -Link layer report b'casts to IP layer |2.4 |x| | | | | -IP layer pass TOS to link layer |2.4 |x| | | | | -No ARP cache entry treated as Dest. Unreach. |2.4 | | | | |x| - - - - - -Internet Engineering Task Force [Page 26] - - - - -RFC1122 INTERNET LAYER October 1989 - - -3. INTERNET LAYER PROTOCOLS - - 3.1 INTRODUCTION - - The Robustness Principle: "Be liberal in what you accept, and - conservative in what you send" is particularly important in the - Internet layer, where one misbehaving host can deny Internet - service to many other hosts. - - The protocol standards used in the Internet layer are: - - o RFC-791 [IP:1] defines the IP protocol and gives an - introduction to the architecture of the Internet. - - o RFC-792 [IP:2] defines ICMP, which provides routing, - diagnostic and error functionality for IP. Although ICMP - messages are encapsulated within IP datagrams, ICMP - processing is considered to be (and is typically implemented - as) part of the IP layer. See Section 3.2.2. - - o RFC-950 [IP:3] defines the mandatory subnet extension to the - addressing architecture. - - o RFC-1112 [IP:4] defines the Internet Group Management - Protocol IGMP, as part of a recommended extension to hosts - and to the host-gateway interface to support Internet-wide - multicasting at the IP level. See Section 3.2.3. - - The target of an IP multicast may be an arbitrary group of - Internet hosts. IP multicasting is designed as a natural - extension of the link-layer multicasting facilities of some - networks, and it provides a standard means for local access - to such link-layer multicasting facilities. - - Other important references are listed in Section 5 of this - document. - - The Internet layer of host software MUST implement both IP and - ICMP. See Section 3.3.7 for the requirements on support of IGMP. - - The host IP layer has two basic functions: (1) choose the "next - hop" gateway or host for outgoing IP datagrams and (2) reassemble - incoming IP datagrams. The IP layer may also (3) implement - intentional fragmentation of outgoing datagrams. Finally, the IP - layer must (4) provide diagnostic and error functionality. We - expect that IP layer functions may increase somewhat in the - future, as further Internet control and management facilities are - developed. - - - -Internet Engineering Task Force [Page 27] - - - - -RFC1122 INTERNET LAYER October 1989 - - - For normal datagrams, the processing is straightforward. For - incoming datagrams, the IP layer: - - (1) verifies that the datagram is correctly formatted; - - (2) verifies that it is destined to the local host; - - (3) processes options; - - (4) reassembles the datagram if necessary; and - - (5) passes the encapsulated message to the appropriate - transport-layer protocol module. - - For outgoing datagrams, the IP layer: - - (1) sets any fields not set by the transport layer; - - (2) selects the correct first hop on the connected network (a - process called "routing"); - - (3) fragments the datagram if necessary and if intentional - fragmentation is implemented (see Section 3.3.3); and - - (4) passes the packet(s) to the appropriate link-layer driver. - - - A host is said to be multihomed if it has multiple IP addresses. - Multihoming introduces considerable confusion and complexity into - the protocol suite, and it is an area in which the Internet - architecture falls seriously short of solving all problems. There - are two distinct problem areas in multihoming: - - (1) Local multihoming -- the host itself is multihomed; or - - (2) Remote multihoming -- the local host needs to communicate - with a remote multihomed host. - - At present, remote multihoming MUST be handled at the application - layer, as discussed in the companion RFC [INTRO:1]. A host MAY - support local multihoming, which is discussed in this document, - and in particular in Section 3.3.4. - - Any host that forwards datagrams generated by another host is - acting as a gateway and MUST also meet the specifications laid out - in the gateway requirements RFC [INTRO:2]. An Internet host that - includes embedded gateway code MUST have a configuration switch to - disable the gateway function, and this switch MUST default to the - - - -Internet Engineering Task Force [Page 28] - - - - -RFC1122 INTERNET LAYER October 1989 - - - non-gateway mode. In this mode, a datagram arriving through one - interface will not be forwarded to another host or gateway (unless - it is source-routed), regardless of whether the host is single- - homed or multihomed. The host software MUST NOT automatically - move into gateway mode if the host has more than one interface, as - the operator of the machine may neither want to provide that - service nor be competent to do so. - - In the following, the action specified in certain cases is to - "silently discard" a received datagram. This means that the - datagram will be discarded without further processing and that the - host will not send any ICMP error message (see Section 3.2.2) as a - result. However, for diagnosis of problems a host SHOULD provide - the capability of logging the error (see Section 1.2.3), including - the contents of the silently-discarded datagram, and SHOULD record - the event in a statistics counter. - - DISCUSSION: - Silent discard of erroneous datagrams is generally intended - to prevent "broadcast storms". - - 3.2 PROTOCOL WALK-THROUGH - - 3.2.1 Internet Protocol -- IP - - 3.2.1.1 Version Number: RFC-791 Section 3.1 - - A datagram whose version number is not 4 MUST be silently - discarded. - - 3.2.1.2 Checksum: RFC-791 Section 3.1 - - A host MUST verify the IP header checksum on every received - datagram and silently discard every datagram that has a bad - checksum. - - 3.2.1.3 Addressing: RFC-791 Section 3.2 - - There are now five classes of IP addresses: Class A through - Class E. Class D addresses are used for IP multicasting - [IP:4], while Class E addresses are reserved for - experimental use. - - A multicast (Class D) address is a 28-bit logical address - that stands for a group of hosts, and may be either - permanent or transient. Permanent multicast addresses are - allocated by the Internet Assigned Number Authority - [INTRO:6], while transient addresses may be allocated - - - -Internet Engineering Task Force [Page 29] - - - - -RFC1122 INTERNET LAYER October 1989 - - - dynamically to transient groups. Group membership is - determined dynamically using IGMP [IP:4]. - - We now summarize the important special cases for Class A, B, - and C IP addresses, using the following notation for an IP - address: - - { , } - - or - { , , } - - and the notation "-1" for a field that contains all 1 bits. - This notation is not intended to imply that the 1-bits in an - address mask need be contiguous. - - (a) { 0, 0 } - - This host on this network. MUST NOT be sent, except as - a source address as part of an initialization procedure - by which the host learns its own IP address. - - See also Section 3.3.6 for a non-standard use of {0,0}. - - (b) { 0, } - - Specified host on this network. It MUST NOT be sent, - except as a source address as part of an initialization - procedure by which the host learns its full IP address. - - (c) { -1, -1 } - - Limited broadcast. It MUST NOT be used as a source - address. - - A datagram with this destination address will be - received by every host on the connected physical - network but will not be forwarded outside that network. - - (d) { , -1 } - - Directed broadcast to the specified network. It MUST - NOT be used as a source address. - - (e) { , , -1 } - - Directed broadcast to the specified subnet. It MUST - NOT be used as a source address. - - - -Internet Engineering Task Force [Page 30] - - - - -RFC1122 INTERNET LAYER October 1989 - - - (f) { , -1, -1 } - - Directed broadcast to all subnets of the specified - subnetted network. It MUST NOT be used as a source - address. - - (g) { 127, } - - Internal host loopback address. Addresses of this form - MUST NOT appear outside a host. - - The is administratively assigned so that - its value will be unique in the entire world. - - IP addresses are not permitted to have the value 0 or -1 for - any of the , , or fields (except in the special cases listed above). - This implies that each of these fields will be at least two - bits long. - - For further discussion of broadcast addresses, see Section - 3.3.6. - - A host MUST support the subnet extensions to IP [IP:3]. As - a result, there will be an address mask of the form: - {-1, -1, 0} associated with each of the host's local IP - addresses; see Sections 3.2.2.9 and 3.3.1.1. - - When a host sends any datagram, the IP source address MUST - be one of its own IP addresses (but not a broadcast or - multicast address). - - A host MUST silently discard an incoming datagram that is - not destined for the host. An incoming datagram is destined - for the host if the datagram's destination address field is: - - (1) (one of) the host's IP address(es); or - - (2) an IP broadcast address valid for the connected - network; or - - (3) the address for a multicast group of which the host is - a member on the incoming physical interface. - - For most purposes, a datagram addressed to a broadcast or - multicast destination is processed as if it had been - addressed to one of the host's IP addresses; we use the term - "specific-destination address" for the equivalent local IP - - - -Internet Engineering Task Force [Page 31] - - - - -RFC1122 INTERNET LAYER October 1989 - - - address of the host. The specific-destination address is - defined to be the destination address in the IP header - unless the header contains a broadcast or multicast address, - in which case the specific-destination is an IP address - assigned to the physical interface on which the datagram - arrived. - - A host MUST silently discard an incoming datagram containing - an IP source address that is invalid by the rules of this - section. This validation could be done in either the IP - layer or by each protocol in the transport layer. - - DISCUSSION: - A mis-addressed datagram might be caused by a link- - layer broadcast of a unicast datagram or by a gateway - or host that is confused or mis-configured. - - An architectural goal for Internet hosts was to allow - IP addresses to be featureless 32-bit numbers, avoiding - algorithms that required a knowledge of the IP address - format. Otherwise, any future change in the format or - interpretation of IP addresses will require host - software changes. However, validation of broadcast and - multicast addresses violates this goal; a few other - violations are described elsewhere in this document. - - Implementers should be aware that applications - depending upon the all-subnets directed broadcast - address (f) may be unusable on some networks. All- - subnets broadcast is not widely implemented in vendor - gateways at present, and even when it is implemented, a - particular network administration may disable it in the - gateway configuration. - - 3.2.1.4 Fragmentation and Reassembly: RFC-791 Section 3.2 - - The Internet model requires that every host support - reassembly. See Sections 3.3.2 and 3.3.3 for the - requirements on fragmentation and reassembly. - - 3.2.1.5 Identification: RFC-791 Section 3.2 - - When sending an identical copy of an earlier datagram, a - host MAY optionally retain the same Identification field in - the copy. - - - - - - -Internet Engineering Task Force [Page 32] - - - - -RFC1122 INTERNET LAYER October 1989 - - - DISCUSSION: - Some Internet protocol experts have maintained that - when a host sends an identical copy of an earlier - datagram, the new copy should contain the same - Identification value as the original. There are two - suggested advantages: (1) if the datagrams are - fragmented and some of the fragments are lost, the - receiver may be able to reconstruct a complete datagram - from fragments of the original and the copies; (2) a - congested gateway might use the IP Identification field - (and Fragment Offset) to discard duplicate datagrams - from the queue. - - However, the observed patterns of datagram loss in the - Internet do not favor the probability of retransmitted - fragments filling reassembly gaps, while other - mechanisms (e.g., TCP repacketizing upon - retransmission) tend to prevent retransmission of an - identical datagram [IP:9]. Therefore, we believe that - retransmitting the same Identification field is not - useful. Also, a connectionless transport protocol like - UDP would require the cooperation of the application - programs to retain the same Identification value in - identical datagrams. - - 3.2.1.6 Type-of-Service: RFC-791 Section 3.2 - - The "Type-of-Service" byte in the IP header is divided into - two sections: the Precedence field (high-order 3 bits), and - a field that is customarily called "Type-of-Service" or - "TOS" (low-order 5 bits). In this document, all references - to "TOS" or the "TOS field" refer to the low-order 5 bits - only. - - The Precedence field is intended for Department of Defense - applications of the Internet protocols. The use of non-zero - values in this field is outside the scope of this document - and the IP standard specification. Vendors should consult - the Defense Communication Agency (DCA) for guidance on the - IP Precedence field and its implications for other protocol - layers. However, vendors should note that the use of - precedence will most likely require that its value be passed - between protocol layers in just the same way as the TOS - field is passed. - - The IP layer MUST provide a means for the transport layer to - set the TOS field of every datagram that is sent; the - default is all zero bits. The IP layer SHOULD pass received - - - -Internet Engineering Task Force [Page 33] - - - - -RFC1122 INTERNET LAYER October 1989 - - - TOS values up to the transport layer. - - The particular link-layer mappings of TOS contained in RFC- - 795 SHOULD NOT be implemented. - - DISCUSSION: - While the TOS field has been little used in the past, - it is expected to play an increasing role in the near - future. The TOS field is expected to be used to - control two aspects of gateway operations: routing and - queueing algorithms. See Section 2 of [INTRO:1] for - the requirements on application programs to specify TOS - values. - - The TOS field may also be mapped into link-layer - service selectors. This has been applied to provide - effective sharing of serial lines by different classes - of TCP traffic, for example. However, the mappings - suggested in RFC-795 for networks that were included in - the Internet as of 1981 are now obsolete. - - 3.2.1.7 Time-to-Live: RFC-791 Section 3.2 - - A host MUST NOT send a datagram with a Time-to-Live (TTL) - value of zero. - - A host MUST NOT discard a datagram just because it was - received with TTL less than 2. - - The IP layer MUST provide a means for the transport layer to - set the TTL field of every datagram that is sent. When a - fixed TTL value is used, it MUST be configurable. The - current suggested value will be published in the "Assigned - Numbers" RFC. - - DISCUSSION: - The TTL field has two functions: limit the lifetime of - TCP segments (see RFC-793 [TCP:1], p. 28), and - terminate Internet routing loops. Although TTL is a - time in seconds, it also has some attributes of a hop- - count, since each gateway is required to reduce the TTL - field by at least one. - - The intent is that TTL expiration will cause a datagram - to be discarded by a gateway but not by the destination - host; however, hosts that act as gateways by forwarding - datagrams must follow the gateway rules for TTL. - - - - -Internet Engineering Task Force [Page 34] - - - - -RFC1122 INTERNET LAYER October 1989 - - - A higher-layer protocol may want to set the TTL in - order to implement an "expanding scope" search for some - Internet resource. This is used by some diagnostic - tools, and is expected to be useful for locating the - "nearest" server of a given class using IP - multicasting, for example. A particular transport - protocol may also want to specify its own TTL bound on - maximum datagram lifetime. - - A fixed value must be at least big enough for the - Internet "diameter," i.e., the longest possible path. - A reasonable value is about twice the diameter, to - allow for continued Internet growth. - - 3.2.1.8 Options: RFC-791 Section 3.2 - - There MUST be a means for the transport layer to specify IP - options to be included in transmitted IP datagrams (see - Section 3.4). - - All IP options (except NOP or END-OF-LIST) received in - datagrams MUST be passed to the transport layer (or to ICMP - processing when the datagram is an ICMP message). The IP - and transport layer MUST each interpret those IP options - that they understand and silently ignore the others. - - Later sections of this document discuss specific IP option - support required by each of ICMP, TCP, and UDP. - - DISCUSSION: - Passing all received IP options to the transport layer - is a deliberate "violation of strict layering" that is - designed to ease the introduction of new transport- - relevant IP options in the future. Each layer must - pick out any options that are relevant to its own - processing and ignore the rest. For this purpose, - every IP option except NOP and END-OF-LIST will include - a specification of its own length. - - This document does not define the order in which a - receiver must process multiple options in the same IP - header. Hosts sending multiple options must be aware - that this introduces an ambiguity in the meaning of - certain options when combined with a source-route - option. - - IMPLEMENTATION: - The IP layer must not crash as the result of an option - - - -Internet Engineering Task Force [Page 35] - - - - -RFC1122 INTERNET LAYER October 1989 - - - length that is outside the possible range. For - example, erroneous option lengths have been observed to - put some IP implementations into infinite loops. - - Here are the requirements for specific IP options: - - - (a) Security Option - - Some environments require the Security option in every - datagram; such a requirement is outside the scope of - this document and the IP standard specification. Note, - however, that the security options described in RFC-791 - and RFC-1038 are obsolete. For DoD applications, - vendors should consult [IP:8] for guidance. - - - (b) Stream Identifier Option - - This option is obsolete; it SHOULD NOT be sent, and it - MUST be silently ignored if received. - - - (c) Source Route Options - - A host MUST support originating a source route and MUST - be able to act as the final destination of a source - route. - - If host receives a datagram containing a completed - source route (i.e., the pointer points beyond the last - field), the datagram has reached its final destination; - the option as received (the recorded route) MUST be - passed up to the transport layer (or to ICMP message - processing). This recorded route will be reversed and - used to form a return source route for reply datagrams - (see discussion of IP Options in Section 4). When a - return source route is built, it MUST be correctly - formed even if the recorded route included the source - host (see case (B) in the discussion below). - - An IP header containing more than one Source Route - option MUST NOT be sent; the effect on routing of - multiple Source Route options is implementation- - specific. - - Section 3.3.5 presents the rules for a host acting as - an intermediate hop in a source route, i.e., forwarding - - - -Internet Engineering Task Force [Page 36] - - - - -RFC1122 INTERNET LAYER October 1989 - - - a source-routed datagram. - - DISCUSSION: - If a source-routed datagram is fragmented, each - fragment will contain a copy of the source route. - Since the processing of IP options (including a - source route) must precede reassembly, the - original datagram will not be reassembled until - the final destination is reached. - - Suppose a source routed datagram is to be routed - from host S to host D via gateways G1, G2, ... Gn. - There was an ambiguity in the specification over - whether the source route option in a datagram sent - out by S should be (A) or (B): - - (A): {>>G2, G3, ... Gn, D} <--- CORRECT - - (B): {S, >>G2, G3, ... Gn, D} <---- WRONG - - (where >> represents the pointer). If (A) is - sent, the datagram received at D will contain the - option: {G1, G2, ... Gn >>}, with S and D as the - IP source and destination addresses. If (B) were - sent, the datagram received at D would again - contain S and D as the same IP source and - destination addresses, but the option would be: - {S, G1, ...Gn >>}; i.e., the originating host - would be the first hop in the route. - - - (d) Record Route Option - - Implementation of originating and processing the Record - Route option is OPTIONAL. - - - (e) Timestamp Option - - Implementation of originating and processing the - Timestamp option is OPTIONAL. If it is implemented, - the following rules apply: - - o The originating host MUST record a timestamp in a - Timestamp option whose Internet address fields are - not pre-specified or whose first pre-specified - address is the host's interface address. - - - - -Internet Engineering Task Force [Page 37] - - - - -RFC1122 INTERNET LAYER October 1989 - - - o The destination host MUST (if possible) add the - current timestamp to a Timestamp option before - passing the option to the transport layer or to - ICMP for processing. - - o A timestamp value MUST follow the rules given in - Section 3.2.2.8 for the ICMP Timestamp message. - - - 3.2.2 Internet Control Message Protocol -- ICMP - - ICMP messages are grouped into two classes. - - * - ICMP error messages: - - Destination Unreachable (see Section 3.2.2.1) - Redirect (see Section 3.2.2.2) - Source Quench (see Section 3.2.2.3) - Time Exceeded (see Section 3.2.2.4) - Parameter Problem (see Section 3.2.2.5) - - - * - ICMP query messages: - - Echo (see Section 3.2.2.6) - Information (see Section 3.2.2.7) - Timestamp (see Section 3.2.2.8) - Address Mask (see Section 3.2.2.9) - - - If an ICMP message of unknown type is received, it MUST be - silently discarded. - - Every ICMP error message includes the Internet header and at - least the first 8 data octets of the datagram that triggered - the error; more than 8 octets MAY be sent; this header and data - MUST be unchanged from the received datagram. - - In those cases where the Internet layer is required to pass an - ICMP error message to the transport layer, the IP protocol - number MUST be extracted from the original header and used to - select the appropriate transport protocol entity to handle the - error. - - An ICMP error message SHOULD be sent with normal (i.e., zero) - TOS bits. - - - -Internet Engineering Task Force [Page 38] - - - - -RFC1122 INTERNET LAYER October 1989 - - - An ICMP error message MUST NOT be sent as the result of - receiving: - - * an ICMP error message, or - - * a datagram destined to an IP broadcast or IP multicast - address, or - - * a datagram sent as a link-layer broadcast, or - - * a non-initial fragment, or - - * a datagram whose source address does not define a single - host -- e.g., a zero address, a loopback address, a - broadcast address, a multicast address, or a Class E - address. - - NOTE: THESE RESTRICTIONS TAKE PRECEDENCE OVER ANY REQUIREMENT - ELSEWHERE IN THIS DOCUMENT FOR SENDING ICMP ERROR MESSAGES. - - DISCUSSION: - These rules will prevent the "broadcast storms" that have - resulted from hosts returning ICMP error messages in - response to broadcast datagrams. For example, a broadcast - UDP segment to a non-existent port could trigger a flood - of ICMP Destination Unreachable datagrams from all - machines that do not have a client for that destination - port. On a large Ethernet, the resulting collisions can - render the network useless for a second or more. - - Every datagram that is broadcast on the connected network - should have a valid IP broadcast address as its IP - destination (see Section 3.3.6). However, some hosts - violate this rule. To be certain to detect broadcast - datagrams, therefore, hosts are required to check for a - link-layer broadcast as well as an IP-layer broadcast - address. - - IMPLEMENTATION: - This requires that the link layer inform the IP layer when - a link-layer broadcast datagram has been received; see - Section 2.4. - - 3.2.2.1 Destination Unreachable: RFC-792 - - The following additional codes are hereby defined: - - 6 = destination network unknown - - - -Internet Engineering Task Force [Page 39] - - - - -RFC1122 INTERNET LAYER October 1989 - - - 7 = destination host unknown - - 8 = source host isolated - - 9 = communication with destination network - administratively prohibited - - 10 = communication with destination host - administratively prohibited - - 11 = network unreachable for type of service - - 12 = host unreachable for type of service - - A host SHOULD generate Destination Unreachable messages with - code: - - 2 (Protocol Unreachable), when the designated transport - protocol is not supported; or - - 3 (Port Unreachable), when the designated transport - protocol (e.g., UDP) is unable to demultiplex the - datagram but has no protocol mechanism to inform the - sender. - - A Destination Unreachable message that is received MUST be - reported to the transport layer. The transport layer SHOULD - use the information appropriately; for example, see Sections - 4.1.3.3, 4.2.3.9, and 4.2.4 below. A transport protocol - that has its own mechanism for notifying the sender that a - port is unreachable (e.g., TCP, which sends RST segments) - MUST nevertheless accept an ICMP Port Unreachable for the - same purpose. - - A Destination Unreachable message that is received with code - 0 (Net), 1 (Host), or 5 (Bad Source Route) may result from a - routing transient and MUST therefore be interpreted as only - a hint, not proof, that the specified destination is - unreachable [IP:11]. For example, it MUST NOT be used as - proof of a dead gateway (see Section 3.3.1). - - 3.2.2.2 Redirect: RFC-792 - - A host SHOULD NOT send an ICMP Redirect message; Redirects - are to be sent only by gateways. - - A host receiving a Redirect message MUST update its routing - information accordingly. Every host MUST be prepared to - - - -Internet Engineering Task Force [Page 40] - - - - -RFC1122 INTERNET LAYER October 1989 - - - accept both Host and Network Redirects and to process them - as described in Section 3.3.1.2 below. - - A Redirect message SHOULD be silently discarded if the new - gateway address it specifies is not on the same connected - (sub-) net through which the Redirect arrived [INTRO:2, - Appendix A], or if the source of the Redirect is not the - current first-hop gateway for the specified destination (see - Section 3.3.1). - - 3.2.2.3 Source Quench: RFC-792 - - A host MAY send a Source Quench message if it is - approaching, or has reached, the point at which it is forced - to discard incoming datagrams due to a shortage of - reassembly buffers or other resources. See Section 2.2.3 of - [INTRO:2] for suggestions on when to send Source Quench. - - If a Source Quench message is received, the IP layer MUST - report it to the transport layer (or ICMP processing). In - general, the transport or application layer SHOULD implement - a mechanism to respond to Source Quench for any protocol - that can send a sequence of datagrams to the same - destination and which can reasonably be expected to maintain - enough state information to make this feasible. See Section - 4 for the handling of Source Quench by TCP and UDP. - - DISCUSSION: - A Source Quench may be generated by the target host or - by some gateway in the path of a datagram. The host - receiving a Source Quench should throttle itself back - for a period of time, then gradually increase the - transmission rate again. The mechanism to respond to - Source Quench may be in the transport layer (for - connection-oriented protocols like TCP) or in the - application layer (for protocols that are built on top - of UDP). - - A mechanism has been proposed [IP:14] to make the IP - layer respond directly to Source Quench by controlling - the rate at which datagrams are sent, however, this - proposal is currently experimental and not currently - recommended. - - 3.2.2.4 Time Exceeded: RFC-792 - - An incoming Time Exceeded message MUST be passed to the - transport layer. - - - -Internet Engineering Task Force [Page 41] - - - - -RFC1122 INTERNET LAYER October 1989 - - - DISCUSSION: - A gateway will send a Time Exceeded Code 0 (In Transit) - message when it discards a datagram due to an expired - TTL field. This indicates either a gateway routing - loop or too small an initial TTL value. - - A host may receive a Time Exceeded Code 1 (Reassembly - Timeout) message from a destination host that has timed - out and discarded an incomplete datagram; see Section - 3.3.2 below. In the future, receipt of this message - might be part of some "MTU discovery" procedure, to - discover the maximum datagram size that can be sent on - the path without fragmentation. - - 3.2.2.5 Parameter Problem: RFC-792 - - A host SHOULD generate Parameter Problem messages. An - incoming Parameter Problem message MUST be passed to the - transport layer, and it MAY be reported to the user. - - DISCUSSION: - The ICMP Parameter Problem message is sent to the - source host for any problem not specifically covered by - another ICMP message. Receipt of a Parameter Problem - message generally indicates some local or remote - implementation error. - - A new variant on the Parameter Problem message is hereby - defined: - Code 1 = required option is missing. - - DISCUSSION: - This variant is currently in use in the military - community for a missing security option. - - 3.2.2.6 Echo Request/Reply: RFC-792 - - Every host MUST implement an ICMP Echo server function that - receives Echo Requests and sends corresponding Echo Replies. - A host SHOULD also implement an application-layer interface - for sending an Echo Request and receiving an Echo Reply, for - diagnostic purposes. - - An ICMP Echo Request destined to an IP broadcast or IP - multicast address MAY be silently discarded. - - - - - - -Internet Engineering Task Force [Page 42] - - - - -RFC1122 INTERNET LAYER October 1989 - - - DISCUSSION: - This neutral provision results from a passionate debate - between those who feel that ICMP Echo to a broadcast - address provides a valuable diagnostic capability and - those who feel that misuse of this feature can too - easily create packet storms. - - The IP source address in an ICMP Echo Reply MUST be the same - as the specific-destination address (defined in Section - 3.2.1.3) of the corresponding ICMP Echo Request message. - - Data received in an ICMP Echo Request MUST be entirely - included in the resulting Echo Reply. However, if sending - the Echo Reply requires intentional fragmentation that is - not implemented, the datagram MUST be truncated to maximum - transmission size (see Section 3.3.3) and sent. - - Echo Reply messages MUST be passed to the ICMP user - interface, unless the corresponding Echo Request originated - in the IP layer. - - If a Record Route and/or Time Stamp option is received in an - ICMP Echo Request, this option (these options) SHOULD be - updated to include the current host and included in the IP - header of the Echo Reply message, without "truncation". - Thus, the recorded route will be for the entire round trip. - - If a Source Route option is received in an ICMP Echo - Request, the return route MUST be reversed and used as a - Source Route option for the Echo Reply message. - - 3.2.2.7 Information Request/Reply: RFC-792 - - A host SHOULD NOT implement these messages. - - DISCUSSION: - The Information Request/Reply pair was intended to - support self-configuring systems such as diskless - workstations, to allow them to discover their IP - network numbers at boot time. However, the RARP and - BOOTP protocols provide better mechanisms for a host to - discover its own IP address. - - 3.2.2.8 Timestamp and Timestamp Reply: RFC-792 - - A host MAY implement Timestamp and Timestamp Reply. If they - are implemented, the following rules MUST be followed. - - - - -Internet Engineering Task Force [Page 43] - - - - -RFC1122 INTERNET LAYER October 1989 - - - o The ICMP Timestamp server function returns a Timestamp - Reply to every Timestamp message that is received. If - this function is implemented, it SHOULD be designed for - minimum variability in delay (e.g., implemented in the - kernel to avoid delay in scheduling a user process). - - The following cases for Timestamp are to be handled - according to the corresponding rules for ICMP Echo: - - o An ICMP Timestamp Request message to an IP broadcast or - IP multicast address MAY be silently discarded. - - o The IP source address in an ICMP Timestamp Reply MUST - be the same as the specific-destination address of the - corresponding Timestamp Request message. - - o If a Source-route option is received in an ICMP Echo - Request, the return route MUST be reversed and used as - a Source Route option for the Timestamp Reply message. - - o If a Record Route and/or Timestamp option is received - in a Timestamp Request, this (these) option(s) SHOULD - be updated to include the current host and included in - the IP header of the Timestamp Reply message. - - o Incoming Timestamp Reply messages MUST be passed up to - the ICMP user interface. - - The preferred form for a timestamp value (the "standard - value") is in units of milliseconds since midnight Universal - Time. However, it may be difficult to provide this value - with millisecond resolution. For example, many systems use - clocks that update only at line frequency, 50 or 60 times - per second. Therefore, some latitude is allowed in a - "standard value": - - (a) A "standard value" MUST be updated at least 15 times - per second (i.e., at most the six low-order bits of the - value may be undefined). - - (b) The accuracy of a "standard value" MUST approximate - that of operator-set CPU clocks, i.e., correct within a - few minutes. - - - - - - - - -Internet Engineering Task Force [Page 44] - - - - -RFC1122 INTERNET LAYER October 1989 - - - 3.2.2.9 Address Mask Request/Reply: RFC-950 - - A host MUST support the first, and MAY implement all three, - of the following methods for determining the address mask(s) - corresponding to its IP address(es): - - (1) static configuration information; - - (2) obtaining the address mask(s) dynamically as a side- - effect of the system initialization process (see - [INTRO:1]); and - - (3) sending ICMP Address Mask Request(s) and receiving ICMP - Address Mask Reply(s). - - The choice of method to be used in a particular host MUST be - configurable. - - When method (3), the use of Address Mask messages, is - enabled, then: - - (a) When it initializes, the host MUST broadcast an Address - Mask Request message on the connected network - corresponding to the IP address. It MUST retransmit - this message a small number of times if it does not - receive an immediate Address Mask Reply. - - (b) Until it has received an Address Mask Reply, the host - SHOULD assume a mask appropriate for the address class - of the IP address, i.e., assume that the connected - network is not subnetted. - - (c) The first Address Mask Reply message received MUST be - used to set the address mask corresponding to the - particular local IP address. This is true even if the - first Address Mask Reply message is "unsolicited", in - which case it will have been broadcast and may arrive - after the host has ceased to retransmit Address Mask - Requests. Once the mask has been set by an Address - Mask Reply, later Address Mask Reply messages MUST be - (silently) ignored. - - Conversely, if Address Mask messages are disabled, then no - ICMP Address Mask Requests will be sent, and any ICMP - Address Mask Replies received for that local IP address MUST - be (silently) ignored. - - A host SHOULD make some reasonableness check on any address - - - -Internet Engineering Task Force [Page 45] - - - - -RFC1122 INTERNET LAYER October 1989 - - - mask it installs; see IMPLEMENTATION section below. - - A system MUST NOT send an Address Mask Reply unless it is an - authoritative agent for address masks. An authoritative - agent may be a host or a gateway, but it MUST be explicitly - configured as a address mask agent. Receiving an address - mask via an Address Mask Reply does not give the receiver - authority and MUST NOT be used as the basis for issuing - Address Mask Replies. - - With a statically configured address mask, there SHOULD be - an additional configuration flag that determines whether the - host is to act as an authoritative agent for this mask, - i.e., whether it will answer Address Mask Request messages - using this mask. - - If it is configured as an agent, the host MUST broadcast an - Address Mask Reply for the mask on the appropriate interface - when it initializes. - - See "System Initialization" in [INTRO:1] for more - information about the use of Address Mask Request/Reply - messages. - - DISCUSSION - Hosts that casually send Address Mask Replies with - invalid address masks have often been a serious - nuisance. To prevent this, Address Mask Replies ought - to be sent only by authoritative agents that have been - selected by explicit administrative action. - - When an authoritative agent receives an Address Mask - Request message, it will send a unicast Address Mask - Reply to the source IP address. If the network part of - this address is zero (see (a) and (b) in 3.2.1.3), the - Reply will be broadcast. - - Getting no reply to its Address Mask Request messages, - a host will assume there is no agent and use an - unsubnetted mask, but the agent may be only temporarily - unreachable. An agent will broadcast an unsolicited - Address Mask Reply whenever it initializes, in order to - update the masks of all hosts that have initialized in - the meantime. - - IMPLEMENTATION: - The following reasonableness check on an address mask - is suggested: the mask is not all 1 bits, and it is - - - -Internet Engineering Task Force [Page 46] - - - - -RFC1122 INTERNET LAYER October 1989 - - - either zero or else the 8 highest-order bits are on. - - 3.2.3 Internet Group Management Protocol IGMP - - IGMP [IP:4] is a protocol used between hosts and gateways on a - single network to establish hosts' membership in particular - multicast groups. The gateways use this information, in - conjunction with a multicast routing protocol, to support IP - multicasting across the Internet. - - At this time, implementation of IGMP is OPTIONAL; see Section - 3.3.7 for more information. Without IGMP, a host can still - participate in multicasting local to its connected networks. - - 3.3 SPECIFIC ISSUES - - 3.3.1 Routing Outbound Datagrams - - The IP layer chooses the correct next hop for each datagram it - sends. If the destination is on a connected network, the - datagram is sent directly to the destination host; otherwise, - it has to be routed to a gateway on a connected network. - - 3.3.1.1 Local/Remote Decision - - To decide if the destination is on a connected network, the - following algorithm MUST be used [see IP:3]: - - (a) The address mask (particular to a local IP address for - a multihomed host) is a 32-bit mask that selects the - network number and subnet number fields of the - corresponding IP address. - - (b) If the IP destination address bits extracted by the - address mask match the IP source address bits extracted - by the same mask, then the destination is on the - corresponding connected network, and the datagram is to - be transmitted directly to the destination host. - - (c) If not, then the destination is accessible only through - a gateway. Selection of a gateway is described below - (3.3.1.2). - - A special-case destination address is handled as follows: - - * For a limited broadcast or a multicast address, simply - pass the datagram to the link layer for the appropriate - interface. - - - -Internet Engineering Task Force [Page 47] - - - - -RFC1122 INTERNET LAYER October 1989 - - - * For a (network or subnet) directed broadcast, the - datagram can use the standard routing algorithms. - - The host IP layer MUST operate correctly in a minimal - network environment, and in particular, when there are no - gateways. For example, if the IP layer of a host insists on - finding at least one gateway to initialize, the host will be - unable to operate on a single isolated broadcast net. - - 3.3.1.2 Gateway Selection - - To efficiently route a series of datagrams to the same - destination, the source host MUST keep a "route cache" of - mappings to next-hop gateways. A host uses the following - basic algorithm on this cache to route a datagram; this - algorithm is designed to put the primary routing burden on - the gateways [IP:11]. - - (a) If the route cache contains no information for a - particular destination, the host chooses a "default" - gateway and sends the datagram to it. It also builds a - corresponding Route Cache entry. - - (b) If that gateway is not the best next hop to the - destination, the gateway will forward the datagram to - the best next-hop gateway and return an ICMP Redirect - message to the source host. - - (c) When it receives a Redirect, the host updates the - next-hop gateway in the appropriate route cache entry, - so later datagrams to the same destination will go - directly to the best gateway. - - Since the subnet mask appropriate to the destination address - is generally not known, a Network Redirect message SHOULD be - treated identically to a Host Redirect message; i.e., the - cache entry for the destination host (only) would be updated - (or created, if an entry for that host did not exist) for - the new gateway. - - DISCUSSION: - This recommendation is to protect against gateways that - erroneously send Network Redirects for a subnetted - network, in violation of the gateway requirements - [INTRO:2]. - - When there is no route cache entry for the destination host - address (and the destination is not on the connected - - - -Internet Engineering Task Force [Page 48] - - - - -RFC1122 INTERNET LAYER October 1989 - - - network), the IP layer MUST pick a gateway from its list of - "default" gateways. The IP layer MUST support multiple - default gateways. - - As an extra feature, a host IP layer MAY implement a table - of "static routes". Each such static route MAY include a - flag specifying whether it may be overridden by ICMP - Redirects. - - DISCUSSION: - A host generally needs to know at least one default - gateway to get started. This information can be - obtained from a configuration file or else from the - host startup sequence, e.g., the BOOTP protocol (see - [INTRO:1]). - - It has been suggested that a host can augment its list - of default gateways by recording any new gateways it - learns about. For example, it can record every gateway - to which it is ever redirected. Such a feature, while - possibly useful in some circumstances, may cause - problems in other cases (e.g., gateways are not all - equal), and it is not recommended. - - A static route is typically a particular preset mapping - from destination host or network into a particular - next-hop gateway; it might also depend on the Type-of- - Service (see next section). Static routes would be set - up by system administrators to override the normal - automatic routing mechanism, to handle exceptional - situations. However, any static routing information is - a potential source of failure as configurations change - or equipment fails. - - 3.3.1.3 Route Cache - - Each route cache entry needs to include the following - fields: - - (1) Local IP address (for a multihomed host) - - (2) Destination IP address - - (3) Type(s)-of-Service - - (4) Next-hop gateway IP address - - Field (2) MAY be the full IP address of the destination - - - -Internet Engineering Task Force [Page 49] - - - - -RFC1122 INTERNET LAYER October 1989 - - - host, or only the destination network number. Field (3), - the TOS, SHOULD be included. - - See Section 3.3.4.2 for a discussion of the implications of - multihoming for the lookup procedure in this cache. - - DISCUSSION: - Including the Type-of-Service field in the route cache - and considering it in the host route algorithm will - provide the necessary mechanism for the future when - Type-of-Service routing is commonly used in the - Internet. See Section 3.2.1.6. - - Each route cache entry defines the endpoints of an - Internet path. Although the connecting path may change - dynamically in an arbitrary way, the transmission - characteristics of the path tend to remain - approximately constant over a time period longer than a - single typical host-host transport connection. - Therefore, a route cache entry is a natural place to - cache data on the properties of the path. Examples of - such properties might be the maximum unfragmented - datagram size (see Section 3.3.3), or the average - round-trip delay measured by a transport protocol. - This data will generally be both gathered and used by a - higher layer protocol, e.g., by TCP, or by an - application using UDP. Experiments are currently in - progress on caching path properties in this manner. - - There is no consensus on whether the route cache should - be keyed on destination host addresses alone, or allow - both host and network addresses. Those who favor the - use of only host addresses argue that: - - (1) As required in Section 3.3.1.2, Redirect messages - will generally result in entries keyed on - destination host addresses; the simplest and most - general scheme would be to use host addresses - always. - - (2) The IP layer may not always know the address mask - for a network address in a complex subnetted - environment. - - (3) The use of only host addresses allows the - destination address to be used as a pure 32-bit - number, which may allow the Internet architecture - to be more easily extended in the future without - - - -Internet Engineering Task Force [Page 50] - - - - -RFC1122 INTERNET LAYER October 1989 - - - any change to the hosts. - - The opposing view is that allowing a mixture of - destination hosts and networks in the route cache: - - (1) Saves memory space. - - (2) Leads to a simpler data structure, easily - combining the cache with the tables of default and - static routes (see below). - - (3) Provides a more useful place to cache path - properties, as discussed earlier. - - - IMPLEMENTATION: - The cache needs to be large enough to include entries - for the maximum number of destination hosts that may be - in use at one time. - - A route cache entry may also include control - information used to choose an entry for replacement. - This might take the form of a "recently used" bit, a - use count, or a last-used timestamp, for example. It - is recommended that it include the time of last - modification of the entry, for diagnostic purposes. - - An implementation may wish to reduce the overhead of - scanning the route cache for every datagram to be - transmitted. This may be accomplished with a hash - table to speed the lookup, or by giving a connection- - oriented transport protocol a "hint" or temporary - handle on the appropriate cache entry, to be passed to - the IP layer with each subsequent datagram. - - Although we have described the route cache, the lists - of default gateways, and a table of static routes as - conceptually distinct, in practice they may be combined - into a single "routing table" data structure. - - 3.3.1.4 Dead Gateway Detection - - The IP layer MUST be able to detect the failure of a "next- - hop" gateway that is listed in its route cache and to choose - an alternate gateway (see Section 3.3.1.5). - - Dead gateway detection is covered in some detail in RFC-816 - [IP:11]. Experience to date has not produced a complete - - - -Internet Engineering Task Force [Page 51] - - - - -RFC1122 INTERNET LAYER October 1989 - - - algorithm which is totally satisfactory, though it has - identified several forbidden paths and promising techniques. - - * A particular gateway SHOULD NOT be used indefinitely in - the absence of positive indications that it is - functioning. - - * Active probes such as "pinging" (i.e., using an ICMP - Echo Request/Reply exchange) are expensive and scale - poorly. In particular, hosts MUST NOT actively check - the status of a first-hop gateway by simply pinging the - gateway continuously. - - * Even when it is the only effective way to verify a - gateway's status, pinging MUST be used only when - traffic is being sent to the gateway and when there is - no other positive indication to suggest that the - gateway is functioning. - - * To avoid pinging, the layers above and/or below the - Internet layer SHOULD be able to give "advice" on the - status of route cache entries when either positive - (gateway OK) or negative (gateway dead) information is - available. - - - DISCUSSION: - If an implementation does not include an adequate - mechanism for detecting a dead gateway and re-routing, - a gateway failure may cause datagrams to apparently - vanish into a "black hole". This failure can be - extremely confusing for users and difficult for network - personnel to debug. - - The dead-gateway detection mechanism must not cause - unacceptable load on the host, on connected networks, - or on first-hop gateway(s). The exact constraints on - the timeliness of dead gateway detection and on - acceptable load may vary somewhat depending on the - nature of the host's mission, but a host generally - needs to detect a failed first-hop gateway quickly - enough that transport-layer connections will not break - before an alternate gateway can be selected. - - Passing advice from other layers of the protocol stack - complicates the interfaces between the layers, but it - is the preferred approach to dead gateway detection. - Advice can come from almost any part of the IP/TCP - - - -Internet Engineering Task Force [Page 52] - - - - -RFC1122 INTERNET LAYER October 1989 - - - architecture, but it is expected to come primarily from - the transport and link layers. Here are some possible - sources for gateway advice: - - o TCP or any connection-oriented transport protocol - should be able to give negative advice, e.g., - triggered by excessive retransmissions. - - o TCP may give positive advice when (new) data is - acknowledged. Even though the route may be - asymmetric, an ACK for new data proves that the - acknowleged data must have been transmitted - successfully. - - o An ICMP Redirect message from a particular gateway - should be used as positive advice about that - gateway. - - o Link-layer information that reliably detects and - reports host failures (e.g., ARPANET Destination - Dead messages) should be used as negative advice. - - o Failure to ARP or to re-validate ARP mappings may - be used as negative advice for the corresponding - IP address. - - o Packets arriving from a particular link-layer - address are evidence that the system at this - address is alive. However, turning this - information into advice about gateways requires - mapping the link-layer address into an IP address, - and then checking that IP address against the - gateways pointed to by the route cache. This is - probably prohibitively inefficient. - - Note that positive advice that is given for every - datagram received may cause unacceptable overhead in - the implementation. - - While advice might be passed using required arguments - in all interfaces to the IP layer, some transport and - application layer protocols cannot deduce the correct - advice. These interfaces must therefore allow a - neutral value for advice, since either always-positive - or always-negative advice leads to incorrect behavior. - - There is another technique for dead gateway detection - that has been commonly used but is not recommended. - - - -Internet Engineering Task Force [Page 53] - - - - -RFC1122 INTERNET LAYER October 1989 - - - This technique depends upon the host passively - receiving ("wiretapping") the Interior Gateway Protocol - (IGP) datagrams that the gateways are broadcasting to - each other. This approach has the drawback that a host - needs to recognize all the interior gateway protocols - that gateways may use (see [INTRO:2]). In addition, it - only works on a broadcast network. - - At present, pinging (i.e., using ICMP Echo messages) is - the mechanism for gateway probing when absolutely - required. A successful ping guarantees that the - addressed interface and its associated machine are up, - but it does not guarantee that the machine is a gateway - as opposed to a host. The normal inference is that if - a Redirect or other evidence indicates that a machine - was a gateway, successful pings will indicate that the - machine is still up and hence still a gateway. - However, since a host silently discards packets that a - gateway would forward or redirect, this assumption - could sometimes fail. To avoid this problem, a new - ICMP message under development will ask "are you a - gateway?" - - IMPLEMENTATION: - The following specific algorithm has been suggested: - - o Associate a "reroute timer" with each gateway - pointed to by the route cache. Initialize the - timer to a value Tr, which must be small enough to - allow detection of a dead gateway before transport - connections time out. - - o Positive advice would reset the reroute timer to - Tr. Negative advice would reduce or zero the - reroute timer. - - o Whenever the IP layer used a particular gateway to - route a datagram, it would check the corresponding - reroute timer. If the timer had expired (reached - zero), the IP layer would send a ping to the - gateway, followed immediately by the datagram. - - o The ping (ICMP Echo) would be sent again if - necessary, up to N times. If no ping reply was - received in N tries, the gateway would be assumed - to have failed, and a new first-hop gateway would - be chosen for all cache entries pointing to the - failed gateway. - - - -Internet Engineering Task Force [Page 54] - - - - -RFC1122 INTERNET LAYER October 1989 - - - Note that the size of Tr is inversely related to the - amount of advice available. Tr should be large enough - to insure that: - - * Any pinging will be at a low level (e.g., <10%) of - all packets sent to a gateway from the host, AND - - * pinging is infrequent (e.g., every 3 minutes) - - Since the recommended algorithm is concerned with the - gateways pointed to by route cache entries, rather than - the cache entries themselves, a two level data - structure (perhaps coordinated with ARP or similar - caches) may be desirable for implementing a route - cache. - - 3.3.1.5 New Gateway Selection - - If the failed gateway is not the current default, the IP - layer can immediately switch to a default gateway. If it is - the current default that failed, the IP layer MUST select a - different default gateway (assuming more than one default is - known) for the failed route and for establishing new routes. - - DISCUSSION: - When a gateway does fail, the other gateways on the - connected network will learn of the failure through - some inter-gateway routing protocol. However, this - will not happen instantaneously, since gateway routing - protocols typically have a settling time of 30-60 - seconds. If the host switches to an alternative - gateway before the gateways have agreed on the failure, - the new target gateway will probably forward the - datagram to the failed gateway and send a Redirect back - to the host pointing to the failed gateway (!). The - result is likely to be a rapid oscillation in the - contents of the host's route cache during the gateway - settling period. It has been proposed that the dead- - gateway logic should include some hysteresis mechanism - to prevent such oscillations. However, experience has - not shown any harm from such oscillations, since - service cannot be restored to the host until the - gateways' routing information does settle down. - - IMPLEMENTATION: - One implementation technique for choosing a new default - gateway is to simply round-robin among the default - gateways in the host's list. Another is to rank the - - - -Internet Engineering Task Force [Page 55] - - - - -RFC1122 INTERNET LAYER October 1989 - - - gateways in priority order, and when the current - default gateway is not the highest priority one, to - "ping" the higher-priority gateways slowly to detect - when they return to service. This pinging can be at a - very low rate, e.g., 0.005 per second. - - 3.3.1.6 Initialization - - The following information MUST be configurable: - - (1) IP address(es). - - (2) Address mask(s). - - (3) A list of default gateways, with a preference level. - - A manual method of entering this configuration data MUST be - provided. In addition, a variety of methods can be used to - determine this information dynamically; see the section on - "Host Initialization" in [INTRO:1]. - - DISCUSSION: - Some host implementations use "wiretapping" of gateway - protocols on a broadcast network to learn what gateways - exist. A standard method for default gateway discovery - is under development. - - 3.3.2 Reassembly - - The IP layer MUST implement reassembly of IP datagrams. - - We designate the largest datagram size that can be reassembled - by EMTU_R ("Effective MTU to receive"); this is sometimes - called the "reassembly buffer size". EMTU_R MUST be greater - than or equal to 576, SHOULD be either configurable or - indefinite, and SHOULD be greater than or equal to the MTU of - the connected network(s). - - DISCUSSION: - A fixed EMTU_R limit should not be built into the code - because some application layer protocols require EMTU_R - values larger than 576. - - IMPLEMENTATION: - An implementation may use a contiguous reassembly buffer - for each datagram, or it may use a more complex data - structure that places no definite limit on the reassembled - datagram size; in the latter case, EMTU_R is said to be - - - -Internet Engineering Task Force [Page 56] - - - - -RFC1122 INTERNET LAYER October 1989 - - - "indefinite". - - Logically, reassembly is performed by simply copying each - fragment into the packet buffer at the proper offset. - Note that fragments may overlap if successive - retransmissions use different packetizing but the same - reassembly Id. - - The tricky part of reassembly is the bookkeeping to - determine when all bytes of the datagram have been - reassembled. We recommend Clark's algorithm [IP:10] that - requires no additional data space for the bookkeeping. - However, note that, contrary to [IP:10], the first - fragment header needs to be saved for inclusion in a - possible ICMP Time Exceeded (Reassembly Timeout) message. - - There MUST be a mechanism by which the transport layer can - learn MMS_R, the maximum message size that can be received and - reassembled in an IP datagram (see GET_MAXSIZES calls in - Section 3.4). If EMTU_R is not indefinite, then the value of - MMS_R is given by: - - MMS_R = EMTU_R - 20 - - since 20 is the minimum size of an IP header. - - There MUST be a reassembly timeout. The reassembly timeout - value SHOULD be a fixed value, not set from the remaining TTL. - It is recommended that the value lie between 60 seconds and 120 - seconds. If this timeout expires, the partially-reassembled - datagram MUST be discarded and an ICMP Time Exceeded message - sent to the source host (if fragment zero has been received). - - DISCUSSION: - The IP specification says that the reassembly timeout - should be the remaining TTL from the IP header, but this - does not work well because gateways generally treat TTL as - a simple hop count rather than an elapsed time. If the - reassembly timeout is too small, datagrams will be - discarded unnecessarily, and communication may fail. The - timeout needs to be at least as large as the typical - maximum delay across the Internet. A realistic minimum - reassembly timeout would be 60 seconds. - - It has been suggested that a cache might be kept of - round-trip times measured by transport protocols for - various destinations, and that these values might be used - to dynamically determine a reasonable reassembly timeout - - - -Internet Engineering Task Force [Page 57] - - - - -RFC1122 INTERNET LAYER October 1989 - - - value. Further investigation of this approach is - required. - - If the reassembly timeout is set too high, buffer - resources in the receiving host will be tied up too long, - and the MSL (Maximum Segment Lifetime) [TCP:1] will be - larger than necessary. The MSL controls the maximum rate - at which fragmented datagrams can be sent using distinct - values of the 16-bit Ident field; a larger MSL lowers the - maximum rate. The TCP specification [TCP:1] arbitrarily - assumes a value of 2 minutes for MSL. This sets an upper - limit on a reasonable reassembly timeout value. - - 3.3.3 Fragmentation - - Optionally, the IP layer MAY implement a mechanism to fragment - outgoing datagrams intentionally. - - We designate by EMTU_S ("Effective MTU for sending") the - maximum IP datagram size that may be sent, for a particular - combination of IP source and destination addresses and perhaps - TOS. - - A host MUST implement a mechanism to allow the transport layer - to learn MMS_S, the maximum transport-layer message size that - may be sent for a given {source, destination, TOS} triplet (see - GET_MAXSIZES call in Section 3.4). If no local fragmentation - is performed, the value of MMS_S will be: - - MMS_S = EMTU_S - - - and EMTU_S must be less than or equal to the MTU of the network - interface corresponding to the source address of the datagram. - Note that in this equation will be 20, unless - the IP reserves space to insert IP options for its own purposes - in addition to any options inserted by the transport layer. - - A host that does not implement local fragmentation MUST ensure - that the transport layer (for TCP) or the application layer - (for UDP) obtains MMS_S from the IP layer and does not send a - datagram exceeding MMS_S in size. - - It is generally desirable to avoid local fragmentation and to - choose EMTU_S low enough to avoid fragmentation in any gateway - along the path. In the absence of actual knowledge of the - minimum MTU along the path, the IP layer SHOULD use - EMTU_S <= 576 whenever the destination address is not on a - connected network, and otherwise use the connected network's - - - -Internet Engineering Task Force [Page 58] - - - - -RFC1122 INTERNET LAYER October 1989 - - - MTU. - - The MTU of each physical interface MUST be configurable. - - A host IP layer implementation MAY have a configuration flag - "All-Subnets-MTU", indicating that the MTU of the connected - network is to be used for destinations on different subnets - within the same network, but not for other networks. Thus, - this flag causes the network class mask, rather than the subnet - address mask, to be used to choose an EMTU_S. For a multihomed - host, an "All-Subnets-MTU" flag is needed for each network - interface. - - DISCUSSION: - Picking the correct datagram size to use when sending data - is a complex topic [IP:9]. - - (a) In general, no host is required to accept an IP - datagram larger than 576 bytes (including header and - data), so a host must not send a larger datagram - without explicit knowledge or prior arrangement with - the destination host. Thus, MMS_S is only an upper - bound on the datagram size that a transport protocol - may send; even when MMS_S exceeds 556, the transport - layer must limit its messages to 556 bytes in the - absence of other knowledge about the destination - host. - - (b) Some transport protocols (e.g., TCP) provide a way to - explicitly inform the sender about the largest - datagram the other end can receive and reassemble - [IP:7]. There is no corresponding mechanism in the - IP layer. - - A transport protocol that assumes an EMTU_R larger - than 576 (see Section 3.3.2), can send a datagram of - this larger size to another host that implements the - same protocol. - - (c) Hosts should ideally limit their EMTU_S for a given - destination to the minimum MTU of all the networks - along the path, to avoid any fragmentation. IP - fragmentation, while formally correct, can create a - serious transport protocol performance problem, - because loss of a single fragment means all the - fragments in the segment must be retransmitted - [IP:9]. - - - - -Internet Engineering Task Force [Page 59] - - - - -RFC1122 INTERNET LAYER October 1989 - - - Since nearly all networks in the Internet currently - support an MTU of 576 or greater, we strongly recommend - the use of 576 for datagrams sent to non-local networks. - - It has been suggested that a host could determine the MTU - over a given path by sending a zero-offset datagram - fragment and waiting for the receiver to time out the - reassembly (which cannot complete!) and return an ICMP - Time Exceeded message. This message would include the - largest remaining fragment header in its body. More - direct mechanisms are being experimented with, but have - not yet been adopted (see e.g., RFC-1063). - - 3.3.4 Local Multihoming - - 3.3.4.1 Introduction - - A multihomed host has multiple IP addresses, which we may - think of as "logical interfaces". These logical interfaces - may be associated with one or more physical interfaces, and - these physical interfaces may be connected to the same or - different networks. - - Here are some important cases of multihoming: - - (a) Multiple Logical Networks - - The Internet architects envisioned that each physical - network would have a single unique IP network (or - subnet) number. However, LAN administrators have - sometimes found it useful to violate this assumption, - operating a LAN with multiple logical networks per - physical connected network. - - If a host connected to such a physical network is - configured to handle traffic for each of N different - logical networks, then the host will have N logical - interfaces. These could share a single physical - interface, or might use N physical interfaces to the - same network. - - (b) Multiple Logical Hosts - - When a host has multiple IP addresses that all have the - same part (and the same part, if any), the logical interfaces are known - as "logical hosts". These logical interfaces might - share a single physical interface or might use separate - - - -Internet Engineering Task Force [Page 60] - - - - -RFC1122 INTERNET LAYER October 1989 - - - physical interfaces to the same physical network. - - (c) Simple Multihoming - - In this case, each logical interface is mapped into a - separate physical interface and each physical interface - is connected to a different physical network. The term - "multihoming" was originally applied only to this case, - but it is now applied more generally. - - A host with embedded gateway functionality will - typically fall into the simple multihoming case. Note, - however, that a host may be simply multihomed without - containing an embedded gateway, i.e., without - forwarding datagrams from one connected network to - another. - - This case presents the most difficult routing problems. - The choice of interface (i.e., the choice of first-hop - network) may significantly affect performance or even - reachability of remote parts of the Internet. - - - Finally, we note another possibility that is NOT - multihoming: one logical interface may be bound to multiple - physical interfaces, in order to increase the reliability or - throughput between directly connected machines by providing - alternative physical paths between them. For instance, two - systems might be connected by multiple point-to-point links. - We call this "link-layer multiplexing". With link-layer - multiplexing, the protocols above the link layer are unaware - that multiple physical interfaces are present; the link- - layer device driver is responsible for multiplexing and - routing packets across the physical interfaces. - - In the Internet protocol architecture, a transport protocol - instance ("entity") has no address of its own, but instead - uses a single Internet Protocol (IP) address. This has - implications for the IP, transport, and application layers, - and for the interfaces between them. In particular, the - application software may have to be aware of the multiple IP - addresses of a multihomed host; in other cases, the choice - can be made within the network software. - - 3.3.4.2 Multihoming Requirements - - The following general rules apply to the selection of an IP - source address for sending a datagram from a multihomed - - - -Internet Engineering Task Force [Page 61] - - - - -RFC1122 INTERNET LAYER October 1989 - - - host. - - (1) If the datagram is sent in response to a received - datagram, the source address for the response SHOULD be - the specific-destination address of the request. See - Sections 4.1.3.5 and 4.2.3.7 and the "General Issues" - section of [INTRO:1] for more specific requirements on - higher layers. - - Otherwise, a source address must be selected. - - (2) An application MUST be able to explicitly specify the - source address for initiating a connection or a - request. - - (3) In the absence of such a specification, the networking - software MUST choose a source address. Rules for this - choice are described below. - - - There are two key requirement issues related to multihoming: - - (A) A host MAY silently discard an incoming datagram whose - destination address does not correspond to the physical - interface through which it is received. - - (B) A host MAY restrict itself to sending (non-source- - routed) IP datagrams only through the physical - interface that corresponds to the IP source address of - the datagrams. - - - DISCUSSION: - Internet host implementors have used two different - conceptual models for multihoming, briefly summarized - in the following discussion. This document takes no - stand on which model is preferred; each seems to have a - place. This ambivalence is reflected in the issues (A) - and (B) being optional. - - o Strong ES Model - - The Strong ES (End System, i.e., host) model - emphasizes the host/gateway (ES/IS) distinction, - and would therefore substitute MUST for MAY in - issues (A) and (B) above. It tends to model a - multihomed host as a set of logical hosts within - the same physical host. - - - -Internet Engineering Task Force [Page 62] - - - - -RFC1122 INTERNET LAYER October 1989 - - - With respect to (A), proponents of the Strong ES - model note that automatic Internet routing - mechanisms could not route a datagram to a - physical interface that did not correspond to the - destination address. - - Under the Strong ES model, the route computation - for an outgoing datagram is the mapping: - - route(src IP addr, dest IP addr, TOS) - -> gateway - - Here the source address is included as a parameter - in order to select a gateway that is directly - reachable on the corresponding physical interface. - Note that this model logically requires that in - general there be at least one default gateway, and - preferably multiple defaults, for each IP source - address. - - o Weak ES Model - - This view de-emphasizes the ES/IS distinction, and - would therefore substitute MUST NOT for MAY in - issues (A) and (B). This model may be the more - natural one for hosts that wiretap gateway routing - protocols, and is necessary for hosts that have - embedded gateway functionality. - - The Weak ES Model may cause the Redirect mechanism - to fail. If a datagram is sent out a physical - interface that does not correspond to the - destination address, the first-hop gateway will - not realize when it needs to send a Redirect. On - the other hand, if the host has embedded gateway - functionality, then it has routing information - without listening to Redirects. - - In the Weak ES model, the route computation for an - outgoing datagram is the mapping: - - route(dest IP addr, TOS) -> gateway, interface - - - - - - - - - -Internet Engineering Task Force [Page 63] - - - - -RFC1122 INTERNET LAYER October 1989 - - - 3.3.4.3 Choosing a Source Address - - DISCUSSION: - When it sends an initial connection request (e.g., a - TCP "SYN" segment) or a datagram service request (e.g., - a UDP-based query), the transport layer on a multihomed - host needs to know which source address to use. If the - application does not specify it, the transport layer - must ask the IP layer to perform the conceptual - mapping: - - GET_SRCADDR(remote IP addr, TOS) - -> local IP address - - Here TOS is the Type-of-Service value (see Section - 3.2.1.6), and the result is the desired source address. - The following rules are suggested for implementing this - mapping: - - (a) If the remote Internet address lies on one of the - (sub-) nets to which the host is directly - connected, a corresponding source address may be - chosen, unless the corresponding interface is - known to be down. - - (b) The route cache may be consulted, to see if there - is an active route to the specified destination - network through any network interface; if so, a - local IP address corresponding to that interface - may be chosen. - - (c) The table of static routes, if any (see Section - 3.3.1.2) may be similarly consulted. - - (d) The default gateways may be consulted. If these - gateways are assigned to different interfaces, the - interface corresponding to the gateway with the - highest preference may be chosen. - - In the future, there may be a defined way for a - multihomed host to ask the gateways on all connected - networks for advice about the best network to use for a - given destination. - - IMPLEMENTATION: - It will be noted that this process is essentially the - same as datagram routing (see Section 3.3.1), and - therefore hosts may be able to combine the - - - -Internet Engineering Task Force [Page 64] - - - - -RFC1122 INTERNET LAYER October 1989 - - - implementation of the two functions. - - 3.3.5 Source Route Forwarding - - Subject to restrictions given below, a host MAY be able to act - as an intermediate hop in a source route, forwarding a source- - routed datagram to the next specified hop. - - However, in performing this gateway-like function, the host - MUST obey all the relevant rules for a gateway forwarding - source-routed datagrams [INTRO:2]. This includes the following - specific provisions, which override the corresponding host - provisions given earlier in this document: - - (A) TTL (ref. Section 3.2.1.7) - - The TTL field MUST be decremented and the datagram perhaps - discarded as specified for a gateway in [INTRO:2]. - - (B) ICMP Destination Unreachable (ref. Section 3.2.2.1) - - A host MUST be able to generate Destination Unreachable - messages with the following codes: - - 4 (Fragmentation Required but DF Set) when a source- - routed datagram cannot be fragmented to fit into the - target network; - - 5 (Source Route Failed) when a source-routed datagram - cannot be forwarded, e.g., because of a routing - problem or because the next hop of a strict source - route is not on a connected network. - - (C) IP Source Address (ref. Section 3.2.1.3) - - A source-routed datagram being forwarded MAY (and normally - will) have a source address that is not one of the IP - addresses of the forwarding host. - - (D) Record Route Option (ref. Section 3.2.1.8d) - - A host that is forwarding a source-routed datagram - containing a Record Route option MUST update that option, - if it has room. - - (E) Timestamp Option (ref. Section 3.2.1.8e) - - A host that is forwarding a source-routed datagram - - - -Internet Engineering Task Force [Page 65] - - - - -RFC1122 INTERNET LAYER October 1989 - - - containing a Timestamp Option MUST add the current - timestamp to that option, according to the rules for this - option. - - To define the rules restricting host forwarding of source- - routed datagrams, we use the term "local source-routing" if the - next hop will be through the same physical interface through - which the datagram arrived; otherwise, it is "non-local - source-routing". - - o A host is permitted to perform local source-routing - without restriction. - - o A host that supports non-local source-routing MUST have a - configurable switch to disable forwarding, and this switch - MUST default to disabled. - - o The host MUST satisfy all gateway requirements for - configurable policy filters [INTRO:2] restricting non- - local forwarding. - - If a host receives a datagram with an incomplete source route - but does not forward it for some reason, the host SHOULD return - an ICMP Destination Unreachable (code 5, Source Route Failed) - message, unless the datagram was itself an ICMP error message. - - 3.3.6 Broadcasts - - Section 3.2.1.3 defined the four standard IP broadcast address - forms: - - Limited Broadcast: {-1, -1} - - Directed Broadcast: {,-1} - - Subnet Directed Broadcast: - {,,-1} - - All-Subnets Directed Broadcast: {,-1,-1} - - A host MUST recognize any of these forms in the destination - address of an incoming datagram. - - There is a class of hosts* that use non-standard broadcast - address forms, substituting 0 for -1. All hosts SHOULD -_________________________ -*4.2BSD Unix and its derivatives, but not 4.3BSD. - - - - -Internet Engineering Task Force [Page 66] - - - - -RFC1122 INTERNET LAYER October 1989 - - - recognize and accept any of these non-standard broadcast - addresses as the destination address of an incoming datagram. - A host MAY optionally have a configuration option to choose the - 0 or the -1 form of broadcast address, for each physical - interface, but this option SHOULD default to the standard (-1) - form. - - When a host sends a datagram to a link-layer broadcast address, - the IP destination address MUST be a legal IP broadcast or IP - multicast address. - - A host SHOULD silently discard a datagram that is received via - a link-layer broadcast (see Section 2.4) but does not specify - an IP multicast or broadcast destination address. - - Hosts SHOULD use the Limited Broadcast address to broadcast to - a connected network. - - - DISCUSSION: - Using the Limited Broadcast address instead of a Directed - Broadcast address may improve system robustness. Problems - are often caused by machines that do not understand the - plethora of broadcast addresses (see Section 3.2.1.3), or - that may have different ideas about which broadcast - addresses are in use. The prime example of the latter is - machines that do not understand subnetting but are - attached to a subnetted net. Sending a Subnet Broadcast - for the connected network will confuse those machines, - which will see it as a message to some other host. - - There has been discussion on whether a datagram addressed - to the Limited Broadcast address ought to be sent from all - the interfaces of a multihomed host. This specification - takes no stand on the issue. - - 3.3.7 IP Multicasting - - A host SHOULD support local IP multicasting on all connected - networks for which a mapping from Class D IP addresses to - link-layer addresses has been specified (see below). Support - for local IP multicasting includes sending multicast datagrams, - joining multicast groups and receiving multicast datagrams, and - leaving multicast groups. This implies support for all of - [IP:4] except the IGMP protocol itself, which is OPTIONAL. - - - - - - -Internet Engineering Task Force [Page 67] - - - - -RFC1122 INTERNET LAYER October 1989 - - - DISCUSSION: - IGMP provides gateways that are capable of multicast - routing with the information required to support IP - multicasting across multiple networks. At this time, - multicast-routing gateways are in the experimental stage - and are not widely available. For hosts that are not - connected to networks with multicast-routing gateways or - that do not need to receive multicast datagrams - originating on other networks, IGMP serves no purpose and - is therefore optional for now. However, the rest of - [IP:4] is currently recommended for the purpose of - providing IP-layer access to local network multicast - addressing, as a preferable alternative to local broadcast - addressing. It is expected that IGMP will become - recommended at some future date, when multicast-routing - gateways have become more widely available. - - If IGMP is not implemented, a host SHOULD still join the "all- - hosts" group (224.0.0.1) when the IP layer is initialized and - remain a member for as long as the IP layer is active. - - DISCUSSION: - Joining the "all-hosts" group will support strictly local - uses of multicasting, e.g., a gateway discovery protocol, - even if IGMP is not implemented. - - The mapping of IP Class D addresses to local addresses is - currently specified for the following types of networks: - - o Ethernet/IEEE 802.3, as defined in [IP:4]. - - o Any network that supports broadcast but not multicast, - addressing: all IP Class D addresses map to the local - broadcast address. - - o Any type of point-to-point link (e.g., SLIP or HDLC - links): no mapping required. All IP multicast datagrams - are sent as-is, inside the local framing. - - Mappings for other types of networks will be specified in the - future. - - A host SHOULD provide a way for higher-layer protocols or - applications to determine which of the host's connected - network(s) support IP multicast addressing. - - - - - - -Internet Engineering Task Force [Page 68] - - - - -RFC1122 INTERNET LAYER October 1989 - - - 3.3.8 Error Reporting - - Wherever practical, hosts MUST return ICMP error datagrams on - detection of an error, except in those cases where returning an - ICMP error message is specifically prohibited. - - DISCUSSION: - A common phenomenon in datagram networks is the "black - hole disease": datagrams are sent out, but nothing comes - back. Without any error datagrams, it is difficult for - the user to figure out what the problem is. - - 3.4 INTERNET/TRANSPORT LAYER INTERFACE - - The interface between the IP layer and the transport layer MUST - provide full access to all the mechanisms of the IP layer, - including options, Type-of-Service, and Time-to-Live. The - transport layer MUST either have mechanisms to set these interface - parameters, or provide a path to pass them through from an - application, or both. - - DISCUSSION: - Applications are urged to make use of these mechanisms where - applicable, even when the mechanisms are not currently - effective in the Internet (e.g., TOS). This will allow these - mechanisms to be immediately useful when they do become - effective, without a large amount of retrofitting of host - software. - - We now describe a conceptual interface between the transport layer - and the IP layer, as a set of procedure calls. This is an - extension of the information in Section 3.3 of RFC-791 [IP:1]. - - - * Send Datagram - - SEND(src, dst, prot, TOS, TTL, BufPTR, len, Id, DF, opt - => result ) - - where the parameters are defined in RFC-791. Passing an Id - parameter is optional; see Section 3.2.1.5. - - - * Receive Datagram - - RECV(BufPTR, prot - => result, src, dst, SpecDest, TOS, len, opt) - - - - -Internet Engineering Task Force [Page 69] - - - - -RFC1122 INTERNET LAYER October 1989 - - - All the parameters are defined in RFC-791, except for: - - SpecDest = specific-destination address of datagram - (defined in Section 3.2.1.3) - - The result parameter dst contains the datagram's destination - address. Since this may be a broadcast or multicast address, - the SpecDest parameter (not shown in RFC-791) MUST be passed. - The parameter opt contains all the IP options received in the - datagram; these MUST also be passed to the transport layer. - - - * Select Source Address - - GET_SRCADDR(remote, TOS) -> local - - remote = remote IP address - TOS = Type-of-Service - local = local IP address - - See Section 3.3.4.3. - - - * Find Maximum Datagram Sizes - - GET_MAXSIZES(local, remote, TOS) -> MMS_R, MMS_S - - MMS_R = maximum receive transport-message size. - MMS_S = maximum send transport-message size. - (local, remote, TOS defined above) - - See Sections 3.3.2 and 3.3.3. - - - * Advice on Delivery Success - - ADVISE_DELIVPROB(sense, local, remote, TOS) - - Here the parameter sense is a 1-bit flag indicating whether - positive or negative advice is being given; see the - discussion in Section 3.3.1.4. The other parameters were - defined earlier. - - - * Send ICMP Message - - SEND_ICMP(src, dst, TOS, TTL, BufPTR, len, Id, DF, opt) - -> result - - - -Internet Engineering Task Force [Page 70] - - - - -RFC1122 INTERNET LAYER October 1989 - - - (Parameters defined in RFC-791). - - Passing an Id parameter is optional; see Section 3.2.1.5. - The transport layer MUST be able to send certain ICMP - messages: Port Unreachable or any of the query-type - messages. This function could be considered to be a special - case of the SEND() call, of course; we describe it separately - for clarity. - - - * Receive ICMP Message - - RECV_ICMP(BufPTR ) -> result, src, dst, len, opt - - (Parameters defined in RFC-791). - - The IP layer MUST pass certain ICMP messages up to the - appropriate transport-layer routine. This function could be - considered to be a special case of the RECV() call, of - course; we describe it separately for clarity. - - For an ICMP error message, the data that is passed up MUST - include the original Internet header plus all the octets of - the original message that are included in the ICMP message. - This data will be used by the transport layer to locate the - connection state information, if any. - - In particular, the following ICMP messages are to be passed - up: - - o Destination Unreachable - - o Source Quench - - o Echo Reply (to ICMP user interface, unless the Echo - Request originated in the IP layer) - - o Timestamp Reply (to ICMP user interface) - - o Time Exceeded - - - DISCUSSION: - In the future, there may be additions to this interface to - pass path data (see Section 3.3.1.3) between the IP and - transport layers. - - - - - -Internet Engineering Task Force [Page 71] - - - - -RFC1122 INTERNET LAYER October 1989 - - - 3.5 INTERNET LAYER REQUIREMENTS SUMMARY - - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e --------------------------------------------------|--------|-|-|-|-|-|-- - | | | | | | | -Implement IP and ICMP |3.1 |x| | | | | -Handle remote multihoming in application layer |3.1 |x| | | | | -Support local multihoming |3.1 | | |x| | | -Meet gateway specs if forward datagrams |3.1 |x| | | | | -Configuration switch for embedded gateway |3.1 |x| | | | |1 - Config switch default to non-gateway |3.1 |x| | | | |1 - Auto-config based on number of interfaces |3.1 | | | | |x|1 -Able to log discarded datagrams |3.1 | |x| | | | - Record in counter |3.1 | |x| | | | - | | | | | | | -Silently discard Version != 4 |3.2.1.1 |x| | | | | -Verify IP checksum, silently discard bad dgram |3.2.1.2 |x| | | | | -Addressing: | | | | | | | - Subnet addressing (RFC-950) |3.2.1.3 |x| | | | | - Src address must be host's own IP address |3.2.1.3 |x| | | | | - Silently discard datagram with bad dest addr |3.2.1.3 |x| | | | | - Silently discard datagram with bad src addr |3.2.1.3 |x| | | | | -Support reassembly |3.2.1.4 |x| | | | | -Retain same Id field in identical datagram |3.2.1.5 | | |x| | | - | | | | | | | -TOS: | | | | | | | - Allow transport layer to set TOS |3.2.1.6 |x| | | | | - Pass received TOS up to transport layer |3.2.1.6 | |x| | | | - Use RFC-795 link-layer mappings for TOS |3.2.1.6 | | | |x| | -TTL: | | | | | | | - Send packet with TTL of 0 |3.2.1.7 | | | | |x| - Discard received packets with TTL < 2 |3.2.1.7 | | | | |x| - Allow transport layer to set TTL |3.2.1.7 |x| | | | | - Fixed TTL is configurable |3.2.1.7 |x| | | | | - | | | | | | | -IP Options: | | | | | | | - Allow transport layer to send IP options |3.2.1.8 |x| | | | | - Pass all IP options rcvd to higher layer |3.2.1.8 |x| | | | | - - - -Internet Engineering Task Force [Page 72] - - - - -RFC1122 INTERNET LAYER October 1989 - - - IP layer silently ignore unknown options |3.2.1.8 |x| | | | | - Security option |3.2.1.8a| | |x| | | - Send Stream Identifier option |3.2.1.8b| | | |x| | - Silently ignore Stream Identifer option |3.2.1.8b|x| | | | | - Record Route option |3.2.1.8d| | |x| | | - Timestamp option |3.2.1.8e| | |x| | | -Source Route Option: | | | | | | | - Originate & terminate Source Route options |3.2.1.8c|x| | | | | - Datagram with completed SR passed up to TL |3.2.1.8c|x| | | | | - Build correct (non-redundant) return route |3.2.1.8c|x| | | | | - Send multiple SR options in one header |3.2.1.8c| | | | |x| - | | | | | | | -ICMP: | | | | | | | - Silently discard ICMP msg with unknown type |3.2.2 |x| | | | | - Include more than 8 octets of orig datagram |3.2.2 | | |x| | | - Included octets same as received |3.2.2 |x| | | | | - Demux ICMP Error to transport protocol |3.2.2 |x| | | | | - Send ICMP error message with TOS=0 |3.2.2 | |x| | | | - Send ICMP error message for: | | | | | | | - - ICMP error msg |3.2.2 | | | | |x| - - IP b'cast or IP m'cast |3.2.2 | | | | |x| - - Link-layer b'cast |3.2.2 | | | | |x| - - Non-initial fragment |3.2.2 | | | | |x| - - Datagram with non-unique src address |3.2.2 | | | | |x| - Return ICMP error msgs (when not prohibited) |3.3.8 |x| | | | | - | | | | | | | - Dest Unreachable: | | | | | | | - Generate Dest Unreachable (code 2/3) |3.2.2.1 | |x| | | | - Pass ICMP Dest Unreachable to higher layer |3.2.2.1 |x| | | | | - Higher layer act on Dest Unreach |3.2.2.1 | |x| | | | - Interpret Dest Unreach as only hint |3.2.2.1 |x| | | | | - Redirect: | | | | | | | - Host send Redirect |3.2.2.2 | | | |x| | - Update route cache when recv Redirect |3.2.2.2 |x| | | | | - Handle both Host and Net Redirects |3.2.2.2 |x| | | | | - Discard illegal Redirect |3.2.2.2 | |x| | | | - Source Quench: | | | | | | | - Send Source Quench if buffering exceeded |3.2.2.3 | | |x| | | - Pass Source Quench to higher layer |3.2.2.3 |x| | | | | - Higher layer act on Source Quench |3.2.2.3 | |x| | | | - Time Exceeded: pass to higher layer |3.2.2.4 |x| | | | | - Parameter Problem: | | | | | | | - Send Parameter Problem messages |3.2.2.5 | |x| | | | - Pass Parameter Problem to higher layer |3.2.2.5 |x| | | | | - Report Parameter Problem to user |3.2.2.5 | | |x| | | - | | | | | | | - ICMP Echo Request or Reply: | | | | | | | - Echo server and Echo client |3.2.2.6 |x| | | | | - - - -Internet Engineering Task Force [Page 73] - - - - -RFC1122 INTERNET LAYER October 1989 - - - Echo client |3.2.2.6 | |x| | | | - Discard Echo Request to broadcast address |3.2.2.6 | | |x| | | - Discard Echo Request to multicast address |3.2.2.6 | | |x| | | - Use specific-dest addr as Echo Reply src |3.2.2.6 |x| | | | | - Send same data in Echo Reply |3.2.2.6 |x| | | | | - Pass Echo Reply to higher layer |3.2.2.6 |x| | | | | - Reflect Record Route, Time Stamp options |3.2.2.6 | |x| | | | - Reverse and reflect Source Route option |3.2.2.6 |x| | | | | - | | | | | | | - ICMP Information Request or Reply: |3.2.2.7 | | | |x| | - ICMP Timestamp and Timestamp Reply: |3.2.2.8 | | |x| | | - Minimize delay variability |3.2.2.8 | |x| | | |1 - Silently discard b'cast Timestamp |3.2.2.8 | | |x| | |1 - Silently discard m'cast Timestamp |3.2.2.8 | | |x| | |1 - Use specific-dest addr as TS Reply src |3.2.2.8 |x| | | | |1 - Reflect Record Route, Time Stamp options |3.2.2.6 | |x| | | |1 - Reverse and reflect Source Route option |3.2.2.8 |x| | | | |1 - Pass Timestamp Reply to higher layer |3.2.2.8 |x| | | | |1 - Obey rules for "standard value" |3.2.2.8 |x| | | | |1 - | | | | | | | - ICMP Address Mask Request and Reply: | | | | | | | - Addr Mask source configurable |3.2.2.9 |x| | | | | - Support static configuration of addr mask |3.2.2.9 |x| | | | | - Get addr mask dynamically during booting |3.2.2.9 | | |x| | | - Get addr via ICMP Addr Mask Request/Reply |3.2.2.9 | | |x| | | - Retransmit Addr Mask Req if no Reply |3.2.2.9 |x| | | | |3 - Assume default mask if no Reply |3.2.2.9 | |x| | | |3 - Update address mask from first Reply only |3.2.2.9 |x| | | | |3 - Reasonableness check on Addr Mask |3.2.2.9 | |x| | | | - Send unauthorized Addr Mask Reply msgs |3.2.2.9 | | | | |x| - Explicitly configured to be agent |3.2.2.9 |x| | | | | - Static config=> Addr-Mask-Authoritative flag |3.2.2.9 | |x| | | | - Broadcast Addr Mask Reply when init. |3.2.2.9 |x| | | | |3 - | | | | | | | -ROUTING OUTBOUND DATAGRAMS: | | | | | | | - Use address mask in local/remote decision |3.3.1.1 |x| | | | | - Operate with no gateways on conn network |3.3.1.1 |x| | | | | - Maintain "route cache" of next-hop gateways |3.3.1.2 |x| | | | | - Treat Host and Net Redirect the same |3.3.1.2 | |x| | | | - If no cache entry, use default gateway |3.3.1.2 |x| | | | | - Support multiple default gateways |3.3.1.2 |x| | | | | - Provide table of static routes |3.3.1.2 | | |x| | | - Flag: route overridable by Redirects |3.3.1.2 | | |x| | | - Key route cache on host, not net address |3.3.1.3 | | |x| | | - Include TOS in route cache |3.3.1.3 | |x| | | | - | | | | | | | - Able to detect failure of next-hop gateway |3.3.1.4 |x| | | | | - Assume route is good forever |3.3.1.4 | | | |x| | - - - -Internet Engineering Task Force [Page 74] - - - - -RFC1122 INTERNET LAYER October 1989 - - - Ping gateways continuously |3.3.1.4 | | | | |x| - Ping only when traffic being sent |3.3.1.4 |x| | | | | - Ping only when no positive indication |3.3.1.4 |x| | | | | - Higher and lower layers give advice |3.3.1.4 | |x| | | | - Switch from failed default g'way to another |3.3.1.5 |x| | | | | - Manual method of entering config info |3.3.1.6 |x| | | | | - | | | | | | | -REASSEMBLY and FRAGMENTATION: | | | | | | | - Able to reassemble incoming datagrams |3.3.2 |x| | | | | - At least 576 byte datagrams |3.3.2 |x| | | | | - EMTU_R configurable or indefinite |3.3.2 | |x| | | | - Transport layer able to learn MMS_R |3.3.2 |x| | | | | - Send ICMP Time Exceeded on reassembly timeout |3.3.2 |x| | | | | - Fixed reassembly timeout value |3.3.2 | |x| | | | - | | | | | | | - Pass MMS_S to higher layers |3.3.3 |x| | | | | - Local fragmentation of outgoing packets |3.3.3 | | |x| | | - Else don't send bigger than MMS_S |3.3.3 |x| | | | | - Send max 576 to off-net destination |3.3.3 | |x| | | | - All-Subnets-MTU configuration flag |3.3.3 | | |x| | | - | | | | | | | -MULTIHOMING: | | | | | | | - Reply with same addr as spec-dest addr |3.3.4.2 | |x| | | | - Allow application to choose local IP addr |3.3.4.2 |x| | | | | - Silently discard d'gram in "wrong" interface |3.3.4.2 | | |x| | | - Only send d'gram through "right" interface |3.3.4.2 | | |x| | |4 - | | | | | | | -SOURCE-ROUTE FORWARDING: | | | | | | | - Forward datagram with Source Route option |3.3.5 | | |x| | |1 - Obey corresponding gateway rules |3.3.5 |x| | | | |1 - Update TTL by gateway rules |3.3.5 |x| | | | |1 - Able to generate ICMP err code 4, 5 |3.3.5 |x| | | | |1 - IP src addr not local host |3.3.5 | | |x| | |1 - Update Timestamp, Record Route options |3.3.5 |x| | | | |1 - Configurable switch for non-local SRing |3.3.5 |x| | | | |1 - Defaults to OFF |3.3.5 |x| | | | |1 - Satisfy gwy access rules for non-local SRing |3.3.5 |x| | | | |1 - If not forward, send Dest Unreach (cd 5) |3.3.5 | |x| | | |2 - | | | | | | | -BROADCAST: | | | | | | | - Broadcast addr as IP source addr |3.2.1.3 | | | | |x| - Receive 0 or -1 broadcast formats OK |3.3.6 | |x| | | | - Config'ble option to send 0 or -1 b'cast |3.3.6 | | |x| | | - Default to -1 broadcast |3.3.6 | |x| | | | - Recognize all broadcast address formats |3.3.6 |x| | | | | - Use IP b'cast/m'cast addr in link-layer b'cast |3.3.6 |x| | | | | - Silently discard link-layer-only b'cast dg's |3.3.6 | |x| | | | - Use Limited Broadcast addr for connected net |3.3.6 | |x| | | | - - - -Internet Engineering Task Force [Page 75] - - - - -RFC1122 INTERNET LAYER October 1989 - - - | | | | | | | -MULTICAST: | | | | | | | - Support local IP multicasting (RFC-1112) |3.3.7 | |x| | | | - Support IGMP (RFC-1112) |3.3.7 | | |x| | | - Join all-hosts group at startup |3.3.7 | |x| | | | - Higher layers learn i'face m'cast capability |3.3.7 | |x| | | | - | | | | | | | -INTERFACE: | | | | | | | - Allow transport layer to use all IP mechanisms |3.4 |x| | | | | - Pass interface ident up to transport layer |3.4 |x| | | | | - Pass all IP options up to transport layer |3.4 |x| | | | | - Transport layer can send certain ICMP messages |3.4 |x| | | | | - Pass spec'd ICMP messages up to transp. layer |3.4 |x| | | | | - Include IP hdr+8 octets or more from orig. |3.4 |x| | | | | - Able to leap tall buildings at a single bound |3.5 | |x| | | | - -Footnotes: - -(1) Only if feature is implemented. - -(2) This requirement is overruled if datagram is an ICMP error message. - -(3) Only if feature is implemented and is configured "on". - -(4) Unless has embedded gateway functionality or is source routed. - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 76] - - - - -RFC1122 TRANSPORT LAYER -- UDP October 1989 - - -4. TRANSPORT PROTOCOLS - - 4.1 USER DATAGRAM PROTOCOL -- UDP - - 4.1.1 INTRODUCTION - - The User Datagram Protocol UDP [UDP:1] offers only a minimal - transport service -- non-guaranteed datagram delivery -- and - gives applications direct access to the datagram service of the - IP layer. UDP is used by applications that do not require the - level of service of TCP or that wish to use communications - services (e.g., multicast or broadcast delivery) not available - from TCP. - - UDP is almost a null protocol; the only services it provides - over IP are checksumming of data and multiplexing by port - number. Therefore, an application program running over UDP - must deal directly with end-to-end communication problems that - a connection-oriented protocol would have handled -- e.g., - retransmission for reliable delivery, packetization and - reassembly, flow control, congestion avoidance, etc., when - these are required. The fairly complex coupling between IP and - TCP will be mirrored in the coupling between UDP and many - applications using UDP. - - 4.1.2 PROTOCOL WALK-THROUGH - - There are no known errors in the specification of UDP. - - 4.1.3 SPECIFIC ISSUES - - 4.1.3.1 Ports - - UDP well-known ports follow the same rules as TCP well-known - ports; see Section 4.2.2.1 below. - - If a datagram arrives addressed to a UDP port for which - there is no pending LISTEN call, UDP SHOULD send an ICMP - Port Unreachable message. - - 4.1.3.2 IP Options - - UDP MUST pass any IP option that it receives from the IP - layer transparently to the application layer. - - An application MUST be able to specify IP options to be sent - in its UDP datagrams, and UDP MUST pass these options to the - IP layer. - - - -Internet Engineering Task Force [Page 77] - - - - -RFC1122 TRANSPORT LAYER -- UDP October 1989 - - - DISCUSSION: - At present, the only options that need be passed - through UDP are Source Route, Record Route, and Time - Stamp. However, new options may be defined in the - future, and UDP need not and should not make any - assumptions about the format or content of options it - passes to or from the application; an exception to this - might be an IP-layer security option. - - An application based on UDP will need to obtain a - source route from a request datagram and supply a - reversed route for sending the corresponding reply. - - 4.1.3.3 ICMP Messages - - UDP MUST pass to the application layer all ICMP error - messages that it receives from the IP layer. Conceptually - at least, this may be accomplished with an upcall to the - ERROR_REPORT routine (see Section 4.2.4.1). - - DISCUSSION: - Note that ICMP error messages resulting from sending a - UDP datagram are received asynchronously. A UDP-based - application that wants to receive ICMP error messages - is responsible for maintaining the state necessary to - demultiplex these messages when they arrive; for - example, the application may keep a pending receive - operation for this purpose. The application is also - responsible to avoid confusion from a delayed ICMP - error message resulting from an earlier use of the same - port(s). - - 4.1.3.4 UDP Checksums - - A host MUST implement the facility to generate and validate - UDP checksums. An application MAY optionally be able to - control whether a UDP checksum will be generated, but it - MUST default to checksumming on. - - If a UDP datagram is received with a checksum that is non- - zero and invalid, UDP MUST silently discard the datagram. - An application MAY optionally be able to control whether UDP - datagrams without checksums should be discarded or passed to - the application. - - DISCUSSION: - Some applications that normally run only across local - area networks have chosen to turn off UDP checksums for - - - -Internet Engineering Task Force [Page 78] - - - - -RFC1122 TRANSPORT LAYER -- UDP October 1989 - - - efficiency. As a result, numerous cases of undetected - errors have been reported. The advisability of ever - turning off UDP checksumming is very controversial. - - IMPLEMENTATION: - There is a common implementation error in UDP - checksums. Unlike the TCP checksum, the UDP checksum - is optional; the value zero is transmitted in the - checksum field of a UDP header to indicate the absence - of a checksum. If the transmitter really calculates a - UDP checksum of zero, it must transmit the checksum as - all 1's (65535). No special action is required at the - receiver, since zero and 65535 are equivalent in 1's - complement arithmetic. - - 4.1.3.5 UDP Multihoming - - When a UDP datagram is received, its specific-destination - address MUST be passed up to the application layer. - - An application program MUST be able to specify the IP source - address to be used for sending a UDP datagram or to leave it - unspecified (in which case the networking software will - choose an appropriate source address). There SHOULD be a - way to communicate the chosen source address up to the - application layer (e.g, so that the application can later - receive a reply datagram only from the corresponding - interface). - - DISCUSSION: - A request/response application that uses UDP should use - a source address for the response that is the same as - the specific destination address of the request. See - the "General Issues" section of [INTRO:1]. - - 4.1.3.6 Invalid Addresses - - A UDP datagram received with an invalid IP source address - (e.g., a broadcast or multicast address) must be discarded - by UDP or by the IP layer (see Section 3.2.1.3). - - When a host sends a UDP datagram, the source address MUST be - (one of) the IP address(es) of the host. - - 4.1.4 UDP/APPLICATION LAYER INTERFACE - - The application interface to UDP MUST provide the full services - of the IP/transport interface described in Section 3.4 of this - - - -Internet Engineering Task Force [Page 79] - - - - -RFC1122 TRANSPORT LAYER -- UDP October 1989 - - - document. Thus, an application using UDP needs the functions - of the GET_SRCADDR(), GET_MAXSIZES(), ADVISE_DELIVPROB(), and - RECV_ICMP() calls described in Section 3.4. For example, - GET_MAXSIZES() can be used to learn the effective maximum UDP - maximum datagram size for a particular {interface,remote - host,TOS} triplet. - - An application-layer program MUST be able to set the TTL and - TOS values as well as IP options for sending a UDP datagram, - and these values must be passed transparently to the IP layer. - UDP MAY pass the received TOS up to the application layer. - - 4.1.5 UDP REQUIREMENTS SUMMARY - - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e --------------------------------------------------|--------|-|-|-|-|-|-- - | | | | | | | - UDP | | | | | | | --------------------------------------------------|--------|-|-|-|-|-|-- - | | | | | | | -UDP send Port Unreachable |4.1.3.1 | |x| | | | - | | | | | | | -IP Options in UDP | | | | | | | - - Pass rcv'd IP options to applic layer |4.1.3.2 |x| | | | | - - Applic layer can specify IP options in Send |4.1.3.2 |x| | | | | - - UDP passes IP options down to IP layer |4.1.3.2 |x| | | | | - | | | | | | | -Pass ICMP msgs up to applic layer |4.1.3.3 |x| | | | | - | | | | | | | -UDP checksums: | | | | | | | - - Able to generate/check checksum |4.1.3.4 |x| | | | | - - Silently discard bad checksum |4.1.3.4 |x| | | | | - - Sender Option to not generate checksum |4.1.3.4 | | |x| | | - - Default is to checksum |4.1.3.4 |x| | | | | - - Receiver Option to require checksum |4.1.3.4 | | |x| | | - | | | | | | | -UDP Multihoming | | | | | | | - - Pass spec-dest addr to application |4.1.3.5 |x| | | | | - - - -Internet Engineering Task Force [Page 80] - - - - -RFC1122 TRANSPORT LAYER -- UDP October 1989 - - - - Applic layer can specify Local IP addr |4.1.3.5 |x| | | | | - - Applic layer specify wild Local IP addr |4.1.3.5 |x| | | | | - - Applic layer notified of Local IP addr used |4.1.3.5 | |x| | | | - | | | | | | | -Bad IP src addr silently discarded by UDP/IP |4.1.3.6 |x| | | | | -Only send valid IP source address |4.1.3.6 |x| | | | | -UDP Application Interface Services | | | | | | | -Full IP interface of 3.4 for application |4.1.4 |x| | | | | - - Able to spec TTL, TOS, IP opts when send dg |4.1.4 |x| | | | | - - Pass received TOS up to applic layer |4.1.4 | | |x| | | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 81] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - 4.2 TRANSMISSION CONTROL PROTOCOL -- TCP - - 4.2.1 INTRODUCTION - - The Transmission Control Protocol TCP [TCP:1] is the primary - virtual-circuit transport protocol for the Internet suite. TCP - provides reliable, in-sequence delivery of a full-duplex stream - of octets (8-bit bytes). TCP is used by those applications - needing reliable, connection-oriented transport service, e.g., - mail (SMTP), file transfer (FTP), and virtual terminal service - (Telnet); requirements for these application-layer protocols - are described in [INTRO:1]. - - 4.2.2 PROTOCOL WALK-THROUGH - - 4.2.2.1 Well-Known Ports: RFC-793 Section 2.7 - - DISCUSSION: - TCP reserves port numbers in the range 0-255 for - "well-known" ports, used to access services that are - standardized across the Internet. The remainder of the - port space can be freely allocated to application - processes. Current well-known port definitions are - listed in the RFC entitled "Assigned Numbers" - [INTRO:6]. A prerequisite for defining a new well- - known port is an RFC documenting the proposed service - in enough detail to allow new implementations. - - Some systems extend this notion by adding a third - subdivision of the TCP port space: reserved ports, - which are generally used for operating-system-specific - services. For example, reserved ports might fall - between 256 and some system-dependent upper limit. - Some systems further choose to protect well-known and - reserved ports by permitting only privileged users to - open TCP connections with those port values. This is - perfectly reasonable as long as the host does not - assume that all hosts protect their low-numbered ports - in this manner. - - 4.2.2.2 Use of Push: RFC-793 Section 2.8 - - When an application issues a series of SEND calls without - setting the PUSH flag, the TCP MAY aggregate the data - internally without sending it. Similarly, when a series of - segments is received without the PSH bit, a TCP MAY queue - the data internally without passing it to the receiving - application. - - - -Internet Engineering Task Force [Page 82] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - The PSH bit is not a record marker and is independent of - segment boundaries. The transmitter SHOULD collapse - successive PSH bits when it packetizes data, to send the - largest possible segment. - - A TCP MAY implement PUSH flags on SEND calls. If PUSH flags - are not implemented, then the sending TCP: (1) must not - buffer data indefinitely, and (2) MUST set the PSH bit in - the last buffered segment (i.e., when there is no more - queued data to be sent). - - The discussion in RFC-793 on pages 48, 50, and 74 - erroneously implies that a received PSH flag must be passed - to the application layer. Passing a received PSH flag to - the application layer is now OPTIONAL. - - An application program is logically required to set the PUSH - flag in a SEND call whenever it needs to force delivery of - the data to avoid a communication deadlock. However, a TCP - SHOULD send a maximum-sized segment whenever possible, to - improve performance (see Section 4.2.3.4). - - DISCUSSION: - When the PUSH flag is not implemented on SEND calls, - i.e., when the application/TCP interface uses a pure - streaming model, responsibility for aggregating any - tiny data fragments to form reasonable sized segments - is partially borne by the application layer. - - Generally, an interactive application protocol must set - the PUSH flag at least in the last SEND call in each - command or response sequence. A bulk transfer protocol - like FTP should set the PUSH flag on the last segment - of a file or when necessary to prevent buffer deadlock. - - At the receiver, the PSH bit forces buffered data to be - delivered to the application (even if less than a full - buffer has been received). Conversely, the lack of a - PSH bit can be used to avoid unnecessary wakeup calls - to the application process; this can be an important - performance optimization for large timesharing hosts. - Passing the PSH bit to the receiving application allows - an analogous optimization within the application. - - 4.2.2.3 Window Size: RFC-793 Section 3.1 - - The window size MUST be treated as an unsigned number, or - else large window sizes will appear like negative windows - - - -Internet Engineering Task Force [Page 83] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - and TCP will not work. It is RECOMMENDED that - implementations reserve 32-bit fields for the send and - receive window sizes in the connection record and do all - window computations with 32 bits. - - DISCUSSION: - It is known that the window field in the TCP header is - too small for high-speed, long-delay paths. - Experimental TCP options have been defined to extend - the window size; see for example [TCP:11]. In - anticipation of the adoption of such an extension, TCP - implementors should treat windows as 32 bits. - - 4.2.2.4 Urgent Pointer: RFC-793 Section 3.1 - - The second sentence is in error: the urgent pointer points - to the sequence number of the LAST octet (not LAST+1) in a - sequence of urgent data. The description on page 56 (last - sentence) is correct. - - A TCP MUST support a sequence of urgent data of any length. - - A TCP MUST inform the application layer asynchronously - whenever it receives an Urgent pointer and there was - previously no pending urgent data, or whenever the Urgent - pointer advances in the data stream. There MUST be a way - for the application to learn how much urgent data remains to - be read from the connection, or at least to determine - whether or not more urgent data remains to be read. - - DISCUSSION: - Although the Urgent mechanism may be used for any - application, it is normally used to send "interrupt"- - type commands to a Telnet program (see "Using Telnet - Synch Sequence" section in [INTRO:1]). - - The asynchronous or "out-of-band" notification will - allow the application to go into "urgent mode", reading - data from the TCP connection. This allows control - commands to be sent to an application whose normal - input buffers are full of unprocessed data. - - IMPLEMENTATION: - The generic ERROR-REPORT() upcall described in Section - 4.2.4.1 is a possible mechanism for informing the - application of the arrival of urgent data. - - - - - -Internet Engineering Task Force [Page 84] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - 4.2.2.5 TCP Options: RFC-793 Section 3.1 - - A TCP MUST be able to receive a TCP option in any segment. - A TCP MUST ignore without error any TCP option it does not - implement, assuming that the option has a length field (all - TCP options defined in the future will have length fields). - TCP MUST be prepared to handle an illegal option length - (e.g., zero) without crashing; a suggested procedure is to - reset the connection and log the reason. - - 4.2.2.6 Maximum Segment Size Option: RFC-793 Section 3.1 - - TCP MUST implement both sending and receiving the Maximum - Segment Size option [TCP:4]. - - TCP SHOULD send an MSS (Maximum Segment Size) option in - every SYN segment when its receive MSS differs from the - default 536, and MAY send it always. - - If an MSS option is not received at connection setup, TCP - MUST assume a default send MSS of 536 (576-40) [TCP:4]. - - The maximum size of a segment that TCP really sends, the - "effective send MSS," MUST be the smaller of the send MSS - (which reflects the available reassembly buffer size at the - remote host) and the largest size permitted by the IP layer: - - Eff.snd.MSS = - - min(SendMSS+20, MMS_S) - TCPhdrsize - IPoptionsize - - where: - - * SendMSS is the MSS value received from the remote host, - or the default 536 if no MSS option is received. - - * MMS_S is the maximum size for a transport-layer message - that TCP may send. - - * TCPhdrsize is the size of the TCP header; this is - normally 20, but may be larger if TCP options are to be - sent. - - * IPoptionsize is the size of any IP options that TCP - will pass to the IP layer with the current message. - - - The MSS value to be sent in an MSS option must be less than - - - -Internet Engineering Task Force [Page 85] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - or equal to: - - MMS_R - 20 - - where MMS_R is the maximum size for a transport-layer - message that can be received (and reassembled). TCP obtains - MMS_R and MMS_S from the IP layer; see the generic call - GET_MAXSIZES in Section 3.4. - - DISCUSSION: - The choice of TCP segment size has a strong effect on - performance. Larger segments increase throughput by - amortizing header size and per-datagram processing - overhead over more data bytes; however, if the packet - is so large that it causes IP fragmentation, efficiency - drops sharply if any fragments are lost [IP:9]. - - Some TCP implementations send an MSS option only if the - destination host is on a non-connected network. - However, in general the TCP layer may not have the - appropriate information to make this decision, so it is - preferable to leave to the IP layer the task of - determining a suitable MTU for the Internet path. We - therefore recommend that TCP always send the option (if - not 536) and that the IP layer determine MMS_R as - specified in 3.3.3 and 3.4. A proposed IP-layer - mechanism to measure the MTU would then modify the IP - layer without changing TCP. - - 4.2.2.7 TCP Checksum: RFC-793 Section 3.1 - - Unlike the UDP checksum (see Section 4.1.3.4), the TCP - checksum is never optional. The sender MUST generate it and - the receiver MUST check it. - - 4.2.2.8 TCP Connection State Diagram: RFC-793 Section 3.2, - page 23 - - There are several problems with this diagram: - - (a) The arrow from SYN-SENT to SYN-RCVD should be labeled - with "snd SYN,ACK", to agree with the text on page 68 - and with Figure 8. - - (b) There could be an arrow from SYN-RCVD state to LISTEN - state, conditioned on receiving a RST after a passive - open (see text page 70). - - - - -Internet Engineering Task Force [Page 86] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - (c) It is possible to go directly from FIN-WAIT-1 to the - TIME-WAIT state (see page 75 of the spec). - - - 4.2.2.9 Initial Sequence Number Selection: RFC-793 Section - 3.3, page 27 - - A TCP MUST use the specified clock-driven selection of - initial sequence numbers. - - 4.2.2.10 Simultaneous Open Attempts: RFC-793 Section 3.4, page - 32 - - There is an error in Figure 8: the packet on line 7 should - be identical to the packet on line 5. - - A TCP MUST support simultaneous open attempts. - - DISCUSSION: - It sometimes surprises implementors that if two - applications attempt to simultaneously connect to each - other, only one connection is generated instead of two. - This was an intentional design decision; don't try to - "fix" it. - - 4.2.2.11 Recovery from Old Duplicate SYN: RFC-793 Section 3.4, - page 33 - - Note that a TCP implementation MUST keep track of whether a - connection has reached SYN_RCVD state as the result of a - passive OPEN or an active OPEN. - - 4.2.2.12 RST Segment: RFC-793 Section 3.4 - - A TCP SHOULD allow a received RST segment to include data. - - DISCUSSION - It has been suggested that a RST segment could contain - ASCII text that encoded and explained the cause of the - RST. No standard has yet been established for such - data. - - 4.2.2.13 Closing a Connection: RFC-793 Section 3.5 - - A TCP connection may terminate in two ways: (1) the normal - TCP close sequence using a FIN handshake, and (2) an "abort" - in which one or more RST segments are sent and the - connection state is immediately discarded. If a TCP - - - -Internet Engineering Task Force [Page 87] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - connection is closed by the remote site, the local - application MUST be informed whether it closed normally or - was aborted. - - The normal TCP close sequence delivers buffered data - reliably in both directions. Since the two directions of a - TCP connection are closed independently, it is possible for - a connection to be "half closed," i.e., closed in only one - direction, and a host is permitted to continue sending data - in the open direction on a half-closed connection. - - A host MAY implement a "half-duplex" TCP close sequence, so - that an application that has called CLOSE cannot continue to - read data from the connection. If such a host issues a - CLOSE call while received data is still pending in TCP, or - if new data is received after CLOSE is called, its TCP - SHOULD send a RST to show that data was lost. - - When a connection is closed actively, it MUST linger in - TIME-WAIT state for a time 2xMSL (Maximum Segment Lifetime). - However, it MAY accept a new SYN from the remote TCP to - reopen the connection directly from TIME-WAIT state, if it: - - (1) assigns its initial sequence number for the new - connection to be larger than the largest sequence - number it used on the previous connection incarnation, - and - - (2) returns to TIME-WAIT state if the SYN turns out to be - an old duplicate. - - - DISCUSSION: - TCP's full-duplex data-preserving close is a feature - that is not included in the analogous ISO transport - protocol TP4. - - Some systems have not implemented half-closed - connections, presumably because they do not fit into - the I/O model of their particular operating system. On - these systems, once an application has called CLOSE, it - can no longer read input data from the connection; this - is referred to as a "half-duplex" TCP close sequence. - - The graceful close algorithm of TCP requires that the - connection state remain defined on (at least) one end - of the connection, for a timeout period of 2xMSL, i.e., - 4 minutes. During this period, the (remote socket, - - - -Internet Engineering Task Force [Page 88] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - local socket) pair that defines the connection is busy - and cannot be reused. To shorten the time that a given - port pair is tied up, some TCPs allow a new SYN to be - accepted in TIME-WAIT state. - - 4.2.2.14 Data Communication: RFC-793 Section 3.7, page 40 - - Since RFC-793 was written, there has been extensive work on - TCP algorithms to achieve efficient data communication. - Later sections of the present document describe required and - recommended TCP algorithms to determine when to send data - (Section 4.2.3.4), when to send an acknowledgment (Section - 4.2.3.2), and when to update the window (Section 4.2.3.3). - - DISCUSSION: - One important performance issue is "Silly Window - Syndrome" or "SWS" [TCP:5], a stable pattern of small - incremental window movements resulting in extremely - poor TCP performance. Algorithms to avoid SWS are - described below for both the sending side (Section - 4.2.3.4) and the receiving side (Section 4.2.3.3). - - In brief, SWS is caused by the receiver advancing the - right window edge whenever it has any new buffer space - available to receive data and by the sender using any - incremental window, no matter how small, to send more - data [TCP:5]. The result can be a stable pattern of - sending tiny data segments, even though both sender and - receiver have a large total buffer space for the - connection. SWS can only occur during the transmission - of a large amount of data; if the connection goes - quiescent, the problem will disappear. It is caused by - typical straightforward implementation of window - management, but the sender and receiver algorithms - given below will avoid it. - - Another important TCP performance issue is that some - applications, especially remote login to character-at- - a-time hosts, tend to send streams of one-octet data - segments. To avoid deadlocks, every TCP SEND call from - such applications must be "pushed", either explicitly - by the application or else implicitly by TCP. The - result may be a stream of TCP segments that contain one - data octet each, which makes very inefficient use of - the Internet and contributes to Internet congestion. - The Nagle Algorithm described in Section 4.2.3.4 - provides a simple and effective solution to this - problem. It does have the effect of clumping - - - -Internet Engineering Task Force [Page 89] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - characters over Telnet connections; this may initially - surprise users accustomed to single-character echo, but - user acceptance has not been a problem. - - Note that the Nagle algorithm and the send SWS - avoidance algorithm play complementary roles in - improving performance. The Nagle algorithm discourages - sending tiny segments when the data to be sent - increases in small increments, while the SWS avoidance - algorithm discourages small segments resulting from the - right window edge advancing in small increments. - - A careless implementation can send two or more - acknowledgment segments per data segment received. For - example, suppose the receiver acknowledges every data - segment immediately. When the application program - subsequently consumes the data and increases the - available receive buffer space again, the receiver may - send a second acknowledgment segment to update the - window at the sender. The extreme case occurs with - single-character segments on TCP connections using the - Telnet protocol for remote login service. Some - implementations have been observed in which each - incoming 1-character segment generates three return - segments: (1) the acknowledgment, (2) a one byte - increase in the window, and (3) the echoed character, - respectively. - - 4.2.2.15 Retransmission Timeout: RFC-793 Section 3.7, page 41 - - The algorithm suggested in RFC-793 for calculating the - retransmission timeout is now known to be inadequate; see - Section 4.2.3.1 below. - - Recent work by Jacobson [TCP:7] on Internet congestion and - TCP retransmission stability has produced a transmission - algorithm combining "slow start" with "congestion - avoidance". A TCP MUST implement this algorithm. - - If a retransmitted packet is identical to the original - packet (which implies not only that the data boundaries have - not changed, but also that the window and acknowledgment - fields of the header have not changed), then the same IP - Identification field MAY be used (see Section 3.2.1.5). - - IMPLEMENTATION: - Some TCP implementors have chosen to "packetize" the - data stream, i.e., to pick segment boundaries when - - - -Internet Engineering Task Force [Page 90] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - segments are originally sent and to queue these - segments in a "retransmission queue" until they are - acknowledged. Another design (which may be simpler) is - to defer packetizing until each time data is - transmitted or retransmitted, so there will be no - segment retransmission queue. - - In an implementation with a segment retransmission - queue, TCP performance may be enhanced by repacketizing - the segments awaiting acknowledgment when the first - retransmission timeout occurs. That is, the - outstanding segments that fitted would be combined into - one maximum-sized segment, with a new IP Identification - value. The TCP would then retain this combined segment - in the retransmit queue until it was acknowledged. - However, if the first two segments in the - retransmission queue totalled more than one maximum- - sized segment, the TCP would retransmit only the first - segment using the original IP Identification field. - - 4.2.2.16 Managing the Window: RFC-793 Section 3.7, page 41 - - A TCP receiver SHOULD NOT shrink the window, i.e., move the - right window edge to the left. However, a sending TCP MUST - be robust against window shrinking, which may cause the - "useable window" (see Section 4.2.3.4) to become negative. - - If this happens, the sender SHOULD NOT send new data, but - SHOULD retransmit normally the old unacknowledged data - between SND.UNA and SND.UNA+SND.WND. The sender MAY also - retransmit old data beyond SND.UNA+SND.WND, but SHOULD NOT - time out the connection if data beyond the right window edge - is not acknowledged. If the window shrinks to zero, the TCP - MUST probe it in the standard way (see next Section). - - DISCUSSION: - Many TCP implementations become confused if the window - shrinks from the right after data has been sent into a - larger window. Note that TCP has a heuristic to select - the latest window update despite possible datagram - reordering; as a result, it may ignore a window update - with a smaller window than previously offered if - neither the sequence number nor the acknowledgment - number is increased. - - - - - - - -Internet Engineering Task Force [Page 91] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - 4.2.2.17 Probing Zero Windows: RFC-793 Section 3.7, page 42 - - Probing of zero (offered) windows MUST be supported. - - A TCP MAY keep its offered receive window closed - indefinitely. As long as the receiving TCP continues to - send acknowledgments in response to the probe segments, the - sending TCP MUST allow the connection to stay open. - - DISCUSSION: - It is extremely important to remember that ACK - (acknowledgment) segments that contain no data are not - reliably transmitted by TCP. If zero window probing is - not supported, a connection may hang forever when an - ACK segment that re-opens the window is lost. - - The delay in opening a zero window generally occurs - when the receiving application stops taking data from - its TCP. For example, consider a printer daemon - application, stopped because the printer ran out of - paper. - - The transmitting host SHOULD send the first zero-window - probe when a zero window has existed for the retransmission - timeout period (see Section 4.2.2.15), and SHOULD increase - exponentially the interval between successive probes. - - DISCUSSION: - This procedure minimizes delay if the zero-window - condition is due to a lost ACK segment containing a - window-opening update. Exponential backoff is - recommended, possibly with some maximum interval not - specified here. This procedure is similar to that of - the retransmission algorithm, and it may be possible to - combine the two procedures in the implementation. - - 4.2.2.18 Passive OPEN Calls: RFC-793 Section 3.8 - - Every passive OPEN call either creates a new connection - record in LISTEN state, or it returns an error; it MUST NOT - affect any previously created connection record. - - A TCP that supports multiple concurrent users MUST provide - an OPEN call that will functionally allow an application to - LISTEN on a port while a connection block with the same - local port is in SYN-SENT or SYN-RECEIVED state. - - DISCUSSION: - - - -Internet Engineering Task Force [Page 92] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - Some applications (e.g., SMTP servers) may need to - handle multiple connection attempts at about the same - time. The probability of a connection attempt failing - is reduced by giving the application some means of - listening for a new connection at the same time that an - earlier connection attempt is going through the three- - way handshake. - - IMPLEMENTATION: - Acceptable implementations of concurrent opens may - permit multiple passive OPEN calls, or they may allow - "cloning" of LISTEN-state connections from a single - passive OPEN call. - - 4.2.2.19 Time to Live: RFC-793 Section 3.9, page 52 - - RFC-793 specified that TCP was to request the IP layer to - send TCP segments with TTL = 60. This is obsolete; the TTL - value used to send TCP segments MUST be configurable. See - Section 3.2.1.7 for discussion. - - 4.2.2.20 Event Processing: RFC-793 Section 3.9 - - While it is not strictly required, a TCP SHOULD be capable - of queueing out-of-order TCP segments. Change the "may" in - the last sentence of the first paragraph on page 70 to - "should". - - DISCUSSION: - Some small-host implementations have omitted segment - queueing because of limited buffer space. This - omission may be expected to adversely affect TCP - throughput, since loss of a single segment causes all - later segments to appear to be "out of sequence". - - In general, the processing of received segments MUST be - implemented to aggregate ACK segments whenever possible. - For example, if the TCP is processing a series of queued - segments, it MUST process them all before sending any ACK - segments. - - Here are some detailed error corrections and notes on the - Event Processing section of RFC-793. - - (a) CLOSE Call, CLOSE-WAIT state, p. 61: enter LAST-ACK - state, not CLOSING. - - (b) LISTEN state, check for SYN (pp. 65, 66): With a SYN - - - -Internet Engineering Task Force [Page 93] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - bit, if the security/compartment or the precedence is - wrong for the segment, a reset is sent. The wrong form - of reset is shown in the text; it should be: - - - - - (c) SYN-SENT state, Check for SYN, p. 68: When the - connection enters ESTABLISHED state, the following - variables must be set: - SND.WND <- SEG.WND - SND.WL1 <- SEG.SEQ - SND.WL2 <- SEG.ACK - - - (d) Check security and precedence, p. 71: The first heading - "ESTABLISHED STATE" should really be a list of all - states other than SYN-RECEIVED: ESTABLISHED, FIN-WAIT- - 1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, and - TIME-WAIT. - - (e) Check SYN bit, p. 71: "In SYN-RECEIVED state and if - the connection was initiated with a passive OPEN, then - return this connection to the LISTEN state and return. - Otherwise...". - - (f) Check ACK field, SYN-RECEIVED state, p. 72: When the - connection enters ESTABLISHED state, the variables - listed in (c) must be set. - - (g) Check ACK field, ESTABLISHED state, p. 72: The ACK is a - duplicate if SEG.ACK =< SND.UNA (the = was omitted). - Similarly, the window should be updated if: SND.UNA =< - SEG.ACK =< SND.NXT. - - (h) USER TIMEOUT, p. 77: - - It would be better to notify the application of the - timeout rather than letting TCP force the connection - closed. However, see also Section 4.2.3.5. - - - 4.2.2.21 Acknowledging Queued Segments: RFC-793 Section 3.9 - - A TCP MAY send an ACK segment acknowledging RCV.NXT when a - valid segment arrives that is in the window but not at the - left window edge. - - - - -Internet Engineering Task Force [Page 94] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - DISCUSSION: - RFC-793 (see page 74) was ambiguous about whether or - not an ACK segment should be sent when an out-of-order - segment was received, i.e., when SEG.SEQ was unequal to - RCV.NXT. - - One reason for ACKing out-of-order segments might be to - support an experimental algorithm known as "fast - retransmit". With this algorithm, the sender uses the - "redundant" ACK's to deduce that a segment has been - lost before the retransmission timer has expired. It - counts the number of times an ACK has been received - with the same value of SEG.ACK and with the same right - window edge. If more than a threshold number of such - ACK's is received, then the segment containing the - octets starting at SEG.ACK is assumed to have been lost - and is retransmitted, without awaiting a timeout. The - threshold is chosen to compensate for the maximum - likely segment reordering in the Internet. There is - not yet enough experience with the fast retransmit - algorithm to determine how useful it is. - - 4.2.3 SPECIFIC ISSUES - - 4.2.3.1 Retransmission Timeout Calculation - - A host TCP MUST implement Karn's algorithm and Jacobson's - algorithm for computing the retransmission timeout ("RTO"). - - o Jacobson's algorithm for computing the smoothed round- - trip ("RTT") time incorporates a simple measure of the - variance [TCP:7]. - - o Karn's algorithm for selecting RTT measurements ensures - that ambiguous round-trip times will not corrupt the - calculation of the smoothed round-trip time [TCP:6]. - - This implementation also MUST include "exponential backoff" - for successive RTO values for the same segment. - Retransmission of SYN segments SHOULD use the same algorithm - as data segments. - - DISCUSSION: - There were two known problems with the RTO calculations - specified in RFC-793. First, the accurate measurement - of RTTs is difficult when there are retransmissions. - Second, the algorithm to compute the smoothed round- - trip time is inadequate [TCP:7], because it incorrectly - - - -Internet Engineering Task Force [Page 95] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - assumed that the variance in RTT values would be small - and constant. These problems were solved by Karn's and - Jacobson's algorithm, respectively. - - The performance increase resulting from the use of - these improvements varies from noticeable to dramatic. - Jacobson's algorithm for incorporating the measured RTT - variance is especially important on a low-speed link, - where the natural variation of packet sizes causes a - large variation in RTT. One vendor found link - utilization on a 9.6kb line went from 10% to 90% as a - result of implementing Jacobson's variance algorithm in - TCP. - - The following values SHOULD be used to initialize the - estimation parameters for a new connection: - - (a) RTT = 0 seconds. - - (b) RTO = 3 seconds. (The smoothed variance is to be - initialized to the value that will result in this RTO). - - The recommended upper and lower bounds on the RTO are known - to be inadequate on large internets. The lower bound SHOULD - be measured in fractions of a second (to accommodate high - speed LANs) and the upper bound should be 2*MSL, i.e., 240 - seconds. - - DISCUSSION: - Experience has shown that these initialization values - are reasonable, and that in any case the Karn and - Jacobson algorithms make TCP behavior reasonably - insensitive to the initial parameter choices. - - 4.2.3.2 When to Send an ACK Segment - - A host that is receiving a stream of TCP data segments can - increase efficiency in both the Internet and the hosts by - sending fewer than one ACK (acknowledgment) segment per data - segment received; this is known as a "delayed ACK" [TCP:5]. - - A TCP SHOULD implement a delayed ACK, but an ACK should not - be excessively delayed; in particular, the delay MUST be - less than 0.5 seconds, and in a stream of full-sized - segments there SHOULD be an ACK for at least every second - segment. - - DISCUSSION: - - - -Internet Engineering Task Force [Page 96] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - A delayed ACK gives the application an opportunity to - update the window and perhaps to send an immediate - response. In particular, in the case of character-mode - remote login, a delayed ACK can reduce the number of - segments sent by the server by a factor of 3 (ACK, - window update, and echo character all combined in one - segment). - - In addition, on some large multi-user hosts, a delayed - ACK can substantially reduce protocol processing - overhead by reducing the total number of packets to be - processed [TCP:5]. However, excessive delays on ACK's - can disturb the round-trip timing and packet "clocking" - algorithms [TCP:7]. - - 4.2.3.3 When to Send a Window Update - - A TCP MUST include a SWS avoidance algorithm in the receiver - [TCP:5]. - - IMPLEMENTATION: - The receiver's SWS avoidance algorithm determines when - the right window edge may be advanced; this is - customarily known as "updating the window". This - algorithm combines with the delayed ACK algorithm (see - Section 4.2.3.2) to determine when an ACK segment - containing the current window will really be sent to - the receiver. We use the notation of RFC-793; see - Figures 4 and 5 in that document. - - The solution to receiver SWS is to avoid advancing the - right window edge RCV.NXT+RCV.WND in small increments, - even if data is received from the network in small - segments. - - Suppose the total receive buffer space is RCV.BUFF. At - any given moment, RCV.USER octets of this total may be - tied up with data that has been received and - acknowledged but which the user process has not yet - consumed. When the connection is quiescent, RCV.WND = - RCV.BUFF and RCV.USER = 0. - - Keeping the right window edge fixed as data arrives and - is acknowledged requires that the receiver offer less - than its full buffer space, i.e., the receiver must - specify a RCV.WND that keeps RCV.NXT+RCV.WND constant - as RCV.NXT increases. Thus, the total buffer space - RCV.BUFF is generally divided into three parts: - - - -Internet Engineering Task Force [Page 97] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - - |<------- RCV.BUFF ---------------->| - 1 2 3 - ----|---------|------------------|------|---- - RCV.NXT ^ - (Fixed) - - 1 - RCV.USER = data received but not yet consumed; - 2 - RCV.WND = space advertised to sender; - 3 - Reduction = space available but not yet - advertised. - - - The suggested SWS avoidance algorithm for the receiver - is to keep RCV.NXT+RCV.WND fixed until the reduction - satisfies: - - RCV.BUFF - RCV.USER - RCV.WND >= - - min( Fr * RCV.BUFF, Eff.snd.MSS ) - - where Fr is a fraction whose recommended value is 1/2, - and Eff.snd.MSS is the effective send MSS for the - connection (see Section 4.2.2.6). When the inequality - is satisfied, RCV.WND is set to RCV.BUFF-RCV.USER. - - Note that the general effect of this algorithm is to - advance RCV.WND in increments of Eff.snd.MSS (for - realistic receive buffers: Eff.snd.MSS < RCV.BUFF/2). - Note also that the receiver must use its own - Eff.snd.MSS, assuming it is the same as the sender's. - - 4.2.3.4 When to Send Data - - A TCP MUST include a SWS avoidance algorithm in the sender. - - A TCP SHOULD implement the Nagle Algorithm [TCP:9] to - coalesce short segments. However, there MUST be a way for - an application to disable the Nagle algorithm on an - individual connection. In all cases, sending data is also - subject to the limitation imposed by the Slow Start - algorithm (Section 4.2.2.15). - - DISCUSSION: - The Nagle algorithm is generally as follows: - - If there is unacknowledged data (i.e., SND.NXT > - SND.UNA), then the sending TCP buffers all user - - - -Internet Engineering Task Force [Page 98] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - data (regardless of the PSH bit), until the - outstanding data has been acknowledged or until - the TCP can send a full-sized segment (Eff.snd.MSS - bytes; see Section 4.2.2.6). - - Some applications (e.g., real-time display window - updates) require that the Nagle algorithm be turned - off, so small data segments can be streamed out at the - maximum rate. - - IMPLEMENTATION: - The sender's SWS avoidance algorithm is more difficult - than the receivers's, because the sender does not know - (directly) the receiver's total buffer space RCV.BUFF. - An approach which has been found to work well is for - the sender to calculate Max(SND.WND), the maximum send - window it has seen so far on the connection, and to use - this value as an estimate of RCV.BUFF. Unfortunately, - this can only be an estimate; the receiver may at any - time reduce the size of RCV.BUFF. To avoid a resulting - deadlock, it is necessary to have a timeout to force - transmission of data, overriding the SWS avoidance - algorithm. In practice, this timeout should seldom - occur. - - The "useable window" [TCP:5] is: - - U = SND.UNA + SND.WND - SND.NXT - - i.e., the offered window less the amount of data sent - but not acknowledged. If D is the amount of data - queued in the sending TCP but not yet sent, then the - following set of rules is recommended. - - Send data: - - (1) if a maximum-sized segment can be sent, i.e, if: - - min(D,U) >= Eff.snd.MSS; - - - (2) or if the data is pushed and all queued data can - be sent now, i.e., if: - - [SND.NXT = SND.UNA and] PUSHED and D <= U - - (the bracketed condition is imposed by the Nagle - algorithm); - - - -Internet Engineering Task Force [Page 99] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - (3) or if at least a fraction Fs of the maximum window - can be sent, i.e., if: - - [SND.NXT = SND.UNA and] - - min(D.U) >= Fs * Max(SND.WND); - - - (4) or if data is PUSHed and the override timeout - occurs. - - Here Fs is a fraction whose recommended value is 1/2. - The override timeout should be in the range 0.1 - 1.0 - seconds. It may be convenient to combine this timer - with the timer used to probe zero windows (Section - 4.2.2.17). - - Finally, note that the SWS avoidance algorithm just - specified is to be used instead of the sender-side - algorithm contained in [TCP:5]. - - 4.2.3.5 TCP Connection Failures - - Excessive retransmission of the same segment by TCP - indicates some failure of the remote host or the Internet - path. This failure may be of short or long duration. The - following procedure MUST be used to handle excessive - retransmissions of data segments [IP:11]: - - (a) There are two thresholds R1 and R2 measuring the amount - of retransmission that has occurred for the same - segment. R1 and R2 might be measured in time units or - as a count of retransmissions. - - (b) When the number of transmissions of the same segment - reaches or exceeds threshold R1, pass negative advice - (see Section 3.3.1.4) to the IP layer, to trigger - dead-gateway diagnosis. - - (c) When the number of transmissions of the same segment - reaches a threshold R2 greater than R1, close the - connection. - - (d) An application MUST be able to set the value for R2 for - a particular connection. For example, an interactive - application might set R2 to "infinity," giving the user - control over when to disconnect. - - - - -Internet Engineering Task Force [Page 100] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - (d) TCP SHOULD inform the application of the delivery - problem (unless such information has been disabled by - the application; see Section 4.2.4.1), when R1 is - reached and before R2. This will allow a remote login - (User Telnet) application program to inform the user, - for example. - - The value of R1 SHOULD correspond to at least 3 - retransmissions, at the current RTO. The value of R2 SHOULD - correspond to at least 100 seconds. - - An attempt to open a TCP connection could fail with - excessive retransmissions of the SYN segment or by receipt - of a RST segment or an ICMP Port Unreachable. SYN - retransmissions MUST be handled in the general way just - described for data retransmissions, including notification - of the application layer. - - However, the values of R1 and R2 may be different for SYN - and data segments. In particular, R2 for a SYN segment MUST - be set large enough to provide retransmission of the segment - for at least 3 minutes. The application can close the - connection (i.e., give up on the open attempt) sooner, of - course. - - DISCUSSION: - Some Internet paths have significant setup times, and - the number of such paths is likely to increase in the - future. - - 4.2.3.6 TCP Keep-Alives - - Implementors MAY include "keep-alives" in their TCP - implementations, although this practice is not universally - accepted. If keep-alives are included, the application MUST - be able to turn them on or off for each TCP connection, and - they MUST default to off. - - Keep-alive packets MUST only be sent when no data or - acknowledgement packets have been received for the - connection within an interval. This interval MUST be - configurable and MUST default to no less than two hours. - - It is extremely important to remember that ACK segments that - contain no data are not reliably transmitted by TCP. - Consequently, if a keep-alive mechanism is implemented it - MUST NOT interpret failure to respond to any specific probe - as a dead connection. - - - -Internet Engineering Task Force [Page 101] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - An implementation SHOULD send a keep-alive segment with no - data; however, it MAY be configurable to send a keep-alive - segment containing one garbage octet, for compatibility with - erroneous TCP implementations. - - DISCUSSION: - A "keep-alive" mechanism periodically probes the other - end of a connection when the connection is otherwise - idle, even when there is no data to be sent. The TCP - specification does not include a keep-alive mechanism - because it could: (1) cause perfectly good connections - to break during transient Internet failures; (2) - consume unnecessary bandwidth ("if no one is using the - connection, who cares if it is still good?"); and (3) - cost money for an Internet path that charges for - packets. - - Some TCP implementations, however, have included a - keep-alive mechanism. To confirm that an idle - connection is still active, these implementations send - a probe segment designed to elicit a response from the - peer TCP. Such a segment generally contains SEG.SEQ = - SND.NXT-1 and may or may not contain one garbage octet - of data. Note that on a quiet connection SND.NXT = - RCV.NXT, so that this SEG.SEQ will be outside the - window. Therefore, the probe causes the receiver to - return an acknowledgment segment, confirming that the - connection is still live. If the peer has dropped the - connection due to a network partition or a crash, it - will respond with a RST instead of an acknowledgment - segment. - - Unfortunately, some misbehaved TCP implementations fail - to respond to a segment with SEG.SEQ = SND.NXT-1 unless - the segment contains data. Alternatively, an - implementation could determine whether a peer responded - correctly to keep-alive packets with no garbage data - octet. - - A TCP keep-alive mechanism should only be invoked in - server applications that might otherwise hang - indefinitely and consume resources unnecessarily if a - client crashes or aborts a connection during a network - failure. - - - - - - - -Internet Engineering Task Force [Page 102] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - 4.2.3.7 TCP Multihoming - - If an application on a multihomed host does not specify the - local IP address when actively opening a TCP connection, - then the TCP MUST ask the IP layer to select a local IP - address before sending the (first) SYN. See the function - GET_SRCADDR() in Section 3.4. - - At all other times, a previous segment has either been sent - or received on this connection, and TCP MUST use the same - local address is used that was used in those previous - segments. - - 4.2.3.8 IP Options - - When received options are passed up to TCP from the IP - layer, TCP MUST ignore options that it does not understand. - - A TCP MAY support the Time Stamp and Record Route options. - - An application MUST be able to specify a source route when - it actively opens a TCP connection, and this MUST take - precedence over a source route received in a datagram. - - When a TCP connection is OPENed passively and a packet - arrives with a completed IP Source Route option (containing - a return route), TCP MUST save the return route and use it - for all segments sent on this connection. If a different - source route arrives in a later segment, the later - definition SHOULD override the earlier one. - - 4.2.3.9 ICMP Messages - - TCP MUST act on an ICMP error message passed up from the IP - layer, directing it to the connection that created the - error. The necessary demultiplexing information can be - found in the IP header contained within the ICMP message. - - o Source Quench - - TCP MUST react to a Source Quench by slowing - transmission on the connection. The RECOMMENDED - procedure is for a Source Quench to trigger a "slow - start," as if a retransmission timeout had occurred. - - o Destination Unreachable -- codes 0, 1, 5 - - Since these Unreachable messages indicate soft error - - - -Internet Engineering Task Force [Page 103] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - conditions, TCP MUST NOT abort the connection, and it - SHOULD make the information available to the - application. - - DISCUSSION: - TCP could report the soft error condition directly - to the application layer with an upcall to the - ERROR_REPORT routine, or it could merely note the - message and report it to the application only when - and if the TCP connection times out. - - o Destination Unreachable -- codes 2-4 - - These are hard error conditions, so TCP SHOULD abort - the connection. - - o Time Exceeded -- codes 0, 1 - - This should be handled the same way as Destination - Unreachable codes 0, 1, 5 (see above). - - o Parameter Problem - - This should be handled the same way as Destination - Unreachable codes 0, 1, 5 (see above). - - - 4.2.3.10 Remote Address Validation - - A TCP implementation MUST reject as an error a local OPEN - call for an invalid remote IP address (e.g., a broadcast or - multicast address). - - An incoming SYN with an invalid source address must be - ignored either by TCP or by the IP layer (see Section - 3.2.1.3). - - A TCP implementation MUST silently discard an incoming SYN - segment that is addressed to a broadcast or multicast - address. - - 4.2.3.11 TCP Traffic Patterns - - IMPLEMENTATION: - The TCP protocol specification [TCP:1] gives the - implementor much freedom in designing the algorithms - that control the message flow over the connection -- - packetizing, managing the window, sending - - - -Internet Engineering Task Force [Page 104] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - acknowledgments, etc. These design decisions are - difficult because a TCP must adapt to a wide range of - traffic patterns. Experience has shown that a TCP - implementor needs to verify the design on two extreme - traffic patterns: - - o Single-character Segments - - Even if the sender is using the Nagle Algorithm, - when a TCP connection carries remote login traffic - across a low-delay LAN the receiver will generally - get a stream of single-character segments. If - remote terminal echo mode is in effect, the - receiver's system will generally echo each - character as it is received. - - o Bulk Transfer - - When TCP is used for bulk transfer, the data - stream should be made up (almost) entirely of - segments of the size of the effective MSS. - Although TCP uses a sequence number space with - byte (octet) granularity, in bulk-transfer mode - its operation should be as if TCP used a sequence - space that counted only segments. - - Experience has furthermore shown that a single TCP can - effectively and efficiently handle these two extremes. - - The most important tool for verifying a new TCP - implementation is a packet trace program. There is a - large volume of experience showing the importance of - tracing a variety of traffic patterns with other TCP - implementations and studying the results carefully. - - - 4.2.3.12 Efficiency - - IMPLEMENTATION: - Extensive experience has led to the following - suggestions for efficient implementation of TCP: - - (a) Don't Copy Data - - In bulk data transfer, the primary CPU-intensive - tasks are copying data from one place to another - and checksumming the data. It is vital to - minimize the number of copies of TCP data. Since - - - -Internet Engineering Task Force [Page 105] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - the ultimate speed limitation may be fetching data - across the memory bus, it may be useful to combine - the copy with checksumming, doing both with a - single memory fetch. - - (b) Hand-Craft the Checksum Routine - - A good TCP checksumming routine is typically two - to five times faster than a simple and direct - implementation of the definition. Great care and - clever coding are often required and advisable to - make the checksumming code "blazing fast". See - [TCP:10]. - - (c) Code for the Common Case - - TCP protocol processing can be complicated, but - for most segments there are only a few simple - decisions to be made. Per-segment processing will - be greatly speeded up by coding the main line to - minimize the number of decisions in the most - common case. - - - 4.2.4 TCP/APPLICATION LAYER INTERFACE - - 4.2.4.1 Asynchronous Reports - - There MUST be a mechanism for reporting soft TCP error - conditions to the application. Generically, we assume this - takes the form of an application-supplied ERROR_REPORT - routine that may be upcalled [INTRO:7] asynchronously from - the transport layer: - - ERROR_REPORT(local connection name, reason, subreason) - - The precise encoding of the reason and subreason parameters - is not specified here. However, the conditions that are - reported asynchronously to the application MUST include: - - * ICMP error message arrived (see 4.2.3.9) - - * Excessive retransmissions (see 4.2.3.5) - - * Urgent pointer advance (see 4.2.2.4). - - However, an application program that does not want to - receive such ERROR_REPORT calls SHOULD be able to - - - -Internet Engineering Task Force [Page 106] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - effectively disable these calls. - - DISCUSSION: - These error reports generally reflect soft errors that - can be ignored without harm by many applications. It - has been suggested that these error report calls should - default to "disabled," but this is not required. - - 4.2.4.2 Type-of-Service - - The application layer MUST be able to specify the Type-of- - Service (TOS) for segments that are sent on a connection. - It not required, but the application SHOULD be able to - change the TOS during the connection lifetime. TCP SHOULD - pass the current TOS value without change to the IP layer, - when it sends segments on the connection. - - The TOS will be specified independently in each direction on - the connection, so that the receiver application will - specify the TOS used for ACK segments. - - TCP MAY pass the most recently received TOS up to the - application. - - DISCUSSION - Some applications (e.g., SMTP) change the nature of - their communication during the lifetime of a - connection, and therefore would like to change the TOS - specification. - - Note also that the OPEN call specified in RFC-793 - includes a parameter ("options") in which the caller - can specify IP options such as source route, record - route, or timestamp. - - 4.2.4.3 Flush Call - - Some TCP implementations have included a FLUSH call, which - will empty the TCP send queue of any data for which the user - has issued SEND calls but which is still to the right of the - current send window. That is, it flushes as much queued - send data as possible without losing sequence number - synchronization. This is useful for implementing the "abort - output" function of Telnet. - - - - - - - -Internet Engineering Task Force [Page 107] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - 4.2.4.4 Multihoming - - The user interface outlined in sections 2.7 and 3.8 of RFC- - 793 needs to be extended for multihoming. The OPEN call - MUST have an optional parameter: - - OPEN( ... [local IP address,] ... ) - - to allow the specification of the local IP address. - - DISCUSSION: - Some TCP-based applications need to specify the local - IP address to be used to open a particular connection; - FTP is an example. - - IMPLEMENTATION: - A passive OPEN call with a specified "local IP address" - parameter will await an incoming connection request to - that address. If the parameter is unspecified, a - passive OPEN will await an incoming connection request - to any local IP address, and then bind the local IP - address of the connection to the particular address - that is used. - - For an active OPEN call, a specified "local IP address" - parameter will be used for opening the connection. If - the parameter is unspecified, the networking software - will choose an appropriate local IP address (see - Section 3.3.4.2) for the connection - - 4.2.5 TCP REQUIREMENT SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e --------------------------------------------------|--------|-|-|-|-|-|-- - | | | | | | | -Push flag | | | | | | | - Aggregate or queue un-pushed data |4.2.2.2 | | |x| | | - Sender collapse successive PSH flags |4.2.2.2 | |x| | | | - SEND call can specify PUSH |4.2.2.2 | | |x| | | - - - -Internet Engineering Task Force [Page 108] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - If cannot: sender buffer indefinitely |4.2.2.2 | | | | |x| - If cannot: PSH last segment |4.2.2.2 |x| | | | | - Notify receiving ALP of PSH |4.2.2.2 | | |x| | |1 - Send max size segment when possible |4.2.2.2 | |x| | | | - | | | | | | | -Window | | | | | | | - Treat as unsigned number |4.2.2.3 |x| | | | | - Handle as 32-bit number |4.2.2.3 | |x| | | | - Shrink window from right |4.2.2.16| | | |x| | - Robust against shrinking window |4.2.2.16|x| | | | | - Receiver's window closed indefinitely |4.2.2.17| | |x| | | - Sender probe zero window |4.2.2.17|x| | | | | - First probe after RTO |4.2.2.17| |x| | | | - Exponential backoff |4.2.2.17| |x| | | | - Allow window stay zero indefinitely |4.2.2.17|x| | | | | - Sender timeout OK conn with zero wind |4.2.2.17| | | | |x| - | | | | | | | -Urgent Data | | | | | | | - Pointer points to last octet |4.2.2.4 |x| | | | | - Arbitrary length urgent data sequence |4.2.2.4 |x| | | | | - Inform ALP asynchronously of urgent data |4.2.2.4 |x| | | | |1 - ALP can learn if/how much urgent data Q'd |4.2.2.4 |x| | | | |1 - | | | | | | | -TCP Options | | | | | | | - Receive TCP option in any segment |4.2.2.5 |x| | | | | - Ignore unsupported options |4.2.2.5 |x| | | | | - Cope with illegal option length |4.2.2.5 |x| | | | | - Implement sending & receiving MSS option |4.2.2.6 |x| | | | | - Send MSS option unless 536 |4.2.2.6 | |x| | | | - Send MSS option always |4.2.2.6 | | |x| | | - Send-MSS default is 536 |4.2.2.6 |x| | | | | - Calculate effective send seg size |4.2.2.6 |x| | | | | - | | | | | | | -TCP Checksums | | | | | | | - Sender compute checksum |4.2.2.7 |x| | | | | - Receiver check checksum |4.2.2.7 |x| | | | | - | | | | | | | -Use clock-driven ISN selection |4.2.2.9 |x| | | | | - | | | | | | | -Opening Connections | | | | | | | - Support simultaneous open attempts |4.2.2.10|x| | | | | - SYN-RCVD remembers last state |4.2.2.11|x| | | | | - Passive Open call interfere with others |4.2.2.18| | | | |x| - Function: simultan. LISTENs for same port |4.2.2.18|x| | | | | - Ask IP for src address for SYN if necc. |4.2.3.7 |x| | | | | - Otherwise, use local addr of conn. |4.2.3.7 |x| | | | | - OPEN to broadcast/multicast IP Address |4.2.3.14| | | | |x| - Silently discard seg to bcast/mcast addr |4.2.3.14|x| | | | | - - - -Internet Engineering Task Force [Page 109] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - | | | | | | | -Closing Connections | | | | | | | - RST can contain data |4.2.2.12| |x| | | | - Inform application of aborted conn |4.2.2.13|x| | | | | - Half-duplex close connections |4.2.2.13| | |x| | | - Send RST to indicate data lost |4.2.2.13| |x| | | | - In TIME-WAIT state for 2xMSL seconds |4.2.2.13|x| | | | | - Accept SYN from TIME-WAIT state |4.2.2.13| | |x| | | - | | | | | | | -Retransmissions | | | | | | | - Jacobson Slow Start algorithm |4.2.2.15|x| | | | | - Jacobson Congestion-Avoidance algorithm |4.2.2.15|x| | | | | - Retransmit with same IP ident |4.2.2.15| | |x| | | - Karn's algorithm |4.2.3.1 |x| | | | | - Jacobson's RTO estimation alg. |4.2.3.1 |x| | | | | - Exponential backoff |4.2.3.1 |x| | | | | - SYN RTO calc same as data |4.2.3.1 | |x| | | | - Recommended initial values and bounds |4.2.3.1 | |x| | | | - | | | | | | | -Generating ACK's: | | | | | | | - Queue out-of-order segments |4.2.2.20| |x| | | | - Process all Q'd before send ACK |4.2.2.20|x| | | | | - Send ACK for out-of-order segment |4.2.2.21| | |x| | | - Delayed ACK's |4.2.3.2 | |x| | | | - Delay < 0.5 seconds |4.2.3.2 |x| | | | | - Every 2nd full-sized segment ACK'd |4.2.3.2 |x| | | | | - Receiver SWS-Avoidance Algorithm |4.2.3.3 |x| | | | | - | | | | | | | -Sending data | | | | | | | - Configurable TTL |4.2.2.19|x| | | | | - Sender SWS-Avoidance Algorithm |4.2.3.4 |x| | | | | - Nagle algorithm |4.2.3.4 | |x| | | | - Application can disable Nagle algorithm |4.2.3.4 |x| | | | | - | | | | | | | -Connection Failures: | | | | | | | - Negative advice to IP on R1 retxs |4.2.3.5 |x| | | | | - Close connection on R2 retxs |4.2.3.5 |x| | | | | - ALP can set R2 |4.2.3.5 |x| | | | |1 - Inform ALP of R1<=retxs inform ALP |4.2.3.9 | |x| | | | - Dest. Unreach (0,1,5) => abort conn |4.2.3.9 | | | | |x| - Dest. Unreach (2-4) => abort conn |4.2.3.9 | |x| | | | - Source Quench => slow start |4.2.3.9 | |x| | | | - Time Exceeded => tell ALP, don't abort |4.2.3.9 | |x| | | | - Param Problem => tell ALP, don't abort |4.2.3.9 | |x| | | | - | | | | | | | -Address Validation | | | | | | | - Reject OPEN call to invalid IP address |4.2.3.10|x| | | | | - Reject SYN from invalid IP address |4.2.3.10|x| | | | | - Silently discard SYN to bcast/mcast addr |4.2.3.10|x| | | | | - | | | | | | | -TCP/ALP Interface Services | | | | | | | - Error Report mechanism |4.2.4.1 |x| | | | | - ALP can disable Error Report Routine |4.2.4.1 | |x| | | | - ALP can specify TOS for sending |4.2.4.2 |x| | | | | - Passed unchanged to IP |4.2.4.2 | |x| | | | - ALP can change TOS during connection |4.2.4.2 | |x| | | | - Pass received TOS up to ALP |4.2.4.2 | | |x| | | - FLUSH call |4.2.4.3 | | |x| | | - Optional local IP addr parm. in OPEN |4.2.4.4 |x| | | | | --------------------------------------------------|--------|-|-|-|-|-|-- --------------------------------------------------|--------|-|-|-|-|-|-- - -FOOTNOTES: - -(1) "ALP" means Application-Layer program. - - - - - - - - - - -Internet Engineering Task Force [Page 111] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - -5. REFERENCES - -INTRODUCTORY REFERENCES - - -[INTRO:1] "Requirements for Internet Hosts -- Application and Support," - IETF Host Requirements Working Group, R. Braden, Ed., RFC-1123, - October 1989. - -[INTRO:2] "Requirements for Internet Gateways," R. Braden and J. - Postel, RFC-1009, June 1987. - -[INTRO:3] "DDN Protocol Handbook," NIC-50004, NIC-50005, NIC-50006, - (three volumes), SRI International, December 1985. - -[INTRO:4] "Official Internet Protocols," J. Reynolds and J. Postel, - RFC-1011, May 1987. - - This document is republished periodically with new RFC numbers; the - latest version must be used. - -[INTRO:5] "Protocol Document Order Information," O. Jacobsen and J. - Postel, RFC-980, March 1986. - -[INTRO:6] "Assigned Numbers," J. Reynolds and J. Postel, RFC-1010, May - 1987. - - This document is republished periodically with new RFC numbers; the - latest version must be used. - -[INTRO:7] "Modularity and Efficiency in Protocol Implementations," D. - Clark, RFC-817, July 1982. - -[INTRO:8] "The Structuring of Systems Using Upcalls," D. Clark, 10th ACM - SOSP, Orcas Island, Washington, December 1985. - - -Secondary References: - - -[INTRO:9] "A Protocol for Packet Network Intercommunication," V. Cerf - and R. Kahn, IEEE Transactions on Communication, May 1974. - -[INTRO:10] "The ARPA Internet Protocol," J. Postel, C. Sunshine, and D. - Cohen, Computer Networks, Vol. 5, No. 4, July 1981. - -[INTRO:11] "The DARPA Internet Protocol Suite," B. Leiner, J. Postel, - R. Cole and D. Mills, Proceedings INFOCOM 85, IEEE, Washington DC, - - - -Internet Engineering Task Force [Page 112] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - March 1985. Also in: IEEE Communications Magazine, March 1985. - Also available as ISI-RS-85-153. - -[INTRO:12] "Final Text of DIS8473, Protocol for Providing the - Connectionless Mode Network Service," ANSI, published as RFC-994, - March 1986. - -[INTRO:13] "End System to Intermediate System Routing Exchange - Protocol," ANSI X3S3.3, published as RFC-995, April 1986. - - -LINK LAYER REFERENCES - - -[LINK:1] "Trailer Encapsulations," S. Leffler and M. Karels, RFC-893, - April 1984. - -[LINK:2] "An Ethernet Address Resolution Protocol," D. Plummer, RFC-826, - November 1982. - -[LINK:3] "A Standard for the Transmission of IP Datagrams over Ethernet - Networks," C. Hornig, RFC-894, April 1984. - -[LINK:4] "A Standard for the Transmission of IP Datagrams over IEEE 802 - "Networks," J. Postel and J. Reynolds, RFC-1042, February 1988. - - This RFC contains a great deal of information of importance to - Internet implementers planning to use IEEE 802 networks. - - -IP LAYER REFERENCES - - -[IP:1] "Internet Protocol (IP)," J. Postel, RFC-791, September 1981. - -[IP:2] "Internet Control Message Protocol (ICMP)," J. Postel, RFC-792, - September 1981. - -[IP:3] "Internet Standard Subnetting Procedure," J. Mogul and J. Postel, - RFC-950, August 1985. - -[IP:4] "Host Extensions for IP Multicasting," S. Deering, RFC-1112, - August 1989. - -[IP:5] "Military Standard Internet Protocol," MIL-STD-1777, Department - of Defense, August 1983. - - This specification, as amended by RFC-963, is intended to describe - - - -Internet Engineering Task Force [Page 113] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - - the Internet Protocol but has some serious omissions (e.g., the - mandatory subnet extension [IP:3] and the optional multicasting - extension [IP:4]). It is also out of date. If there is a - conflict, RFC-791, RFC-792, and RFC-950 must be taken as - authoritative, while the present document is authoritative over - all. - -[IP:6] "Some Problems with the Specification of the Military Standard - Internet Protocol," D. Sidhu, RFC-963, November 1985. - -[IP:7] "The TCP Maximum Segment Size and Related Topics," J. Postel, - RFC-879, November 1983. - - Discusses and clarifies the relationship between the TCP Maximum - Segment Size option and the IP datagram size. - -[IP:8] "Internet Protocol Security Options," B. Schofield, RFC-1108, - October 1989. - -[IP:9] "Fragmentation Considered Harmful," C. Kent and J. Mogul, ACM - SIGCOMM-87, August 1987. Published as ACM Comp Comm Review, Vol. - 17, no. 5. - - This useful paper discusses the problems created by Internet - fragmentation and presents alternative solutions. - -[IP:10] "IP Datagram Reassembly Algorithms," D. Clark, RFC-815, July - 1982. - - This and the following paper should be read by every implementor. - -[IP:11] "Fault Isolation and Recovery," D. Clark, RFC-816, July 1982. - -SECONDARY IP REFERENCES: - - -[IP:12] "Broadcasting Internet Datagrams in the Presence of Subnets," J. - Mogul, RFC-922, October 1984. - -[IP:13] "Name, Addresses, Ports, and Routes," D. Clark, RFC-814, July - 1982. - -[IP:14] "Something a Host Could Do with Source Quench: The Source Quench - Introduced Delay (SQUID)," W. Prue and J. Postel, RFC-1016, July - 1987. - - This RFC first described directed broadcast addresses. However, - the bulk of the RFC is concerned with gateways, not hosts. - - - -Internet Engineering Task Force [Page 114] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - -UDP REFERENCES: - - -[UDP:1] "User Datagram Protocol," J. Postel, RFC-768, August 1980. - - -TCP REFERENCES: - - -[TCP:1] "Transmission Control Protocol," J. Postel, RFC-793, September - 1981. - - -[TCP:2] "Transmission Control Protocol," MIL-STD-1778, US Department of - Defense, August 1984. - - This specification as amended by RFC-964 is intended to describe - the same protocol as RFC-793 [TCP:1]. If there is a conflict, - RFC-793 takes precedence, and the present document is authoritative - over both. - - -[TCP:3] "Some Problems with the Specification of the Military Standard - Transmission Control Protocol," D. Sidhu and T. Blumer, RFC-964, - November 1985. - - -[TCP:4] "The TCP Maximum Segment Size and Related Topics," J. Postel, - RFC-879, November 1983. - - -[TCP:5] "Window and Acknowledgment Strategy in TCP," D. Clark, RFC-813, - July 1982. - - -[TCP:6] "Round Trip Time Estimation," P. Karn & C. Partridge, ACM - SIGCOMM-87, August 1987. - - -[TCP:7] "Congestion Avoidance and Control," V. Jacobson, ACM SIGCOMM-88, - August 1988. - - -SECONDARY TCP REFERENCES: - - -[TCP:8] "Modularity and Efficiency in Protocol Implementation," D. - Clark, RFC-817, July 1982. - - - -Internet Engineering Task Force [Page 115] - - - - -RFC1122 TRANSPORT LAYER -- TCP October 1989 - - -[TCP:9] "Congestion Control in IP/TCP," J. Nagle, RFC-896, January 1984. - - -[TCP:10] "Computing the Internet Checksum," R. Braden, D. Borman, and C. - Partridge, RFC-1071, September 1988. - - -[TCP:11] "TCP Extensions for Long-Delay Paths," V. Jacobson & R. Braden, - RFC-1072, October 1988. - - -Security Considerations - - There are many security issues in the communication layers of host - software, but a full discussion is beyond the scope of this RFC. - - The Internet architecture generally provides little protection - against spoofing of IP source addresses, so any security mechanism - that is based upon verifying the IP source address of a datagram - should be treated with suspicion. However, in restricted - environments some source-address checking may be possible. For - example, there might be a secure LAN whose gateway to the rest of the - Internet discarded any incoming datagram with a source address that - spoofed the LAN address. In this case, a host on the LAN could use - the source address to test for local vs. remote source. This problem - is complicated by source routing, and some have suggested that - source-routed datagram forwarding by hosts (see Section 3.3.5) should - be outlawed for security reasons. - - Security-related issues are mentioned in sections concerning the IP - Security option (Section 3.2.1.8), the ICMP Parameter Problem message - (Section 3.2.2.5), IP options in UDP datagrams (Section 4.1.3.2), and - reserved TCP ports (Section 4.2.2.1). - -Author's Address - - Robert Braden - USC/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292-6695 - - Phone: (213) 822 1511 - - EMail: Braden@ISI.EDU - - - - - - - -Internet Engineering Task Force [Page 116] - diff --git a/kernel/picotcp/RFC/rfc1123.txt b/kernel/picotcp/RFC/rfc1123.txt deleted file mode 100644 index 51cdf83..0000000 --- a/kernel/picotcp/RFC/rfc1123.txt +++ /dev/null @@ -1,5782 +0,0 @@ - - - - - - -Network Working Group Internet Engineering Task Force -Request for Comments: 1123 R. Braden, Editor - October 1989 - - - Requirements for Internet Hosts -- Application and Support - -Status of This Memo - - This RFC is an official specification for the Internet community. It - incorporates by reference, amends, corrects, and supplements the - primary protocol standards documents relating to hosts. Distribution - of this document is unlimited. - -Summary - - This RFC is one of a pair that defines and discusses the requirements - for Internet host software. This RFC covers the application and - support protocols; its companion RFC-1122 covers the communication - protocol layers: link layer, IP layer, and transport layer. - - - - Table of Contents - - - - - 1. INTRODUCTION ............................................... 5 - 1.1 The Internet Architecture .............................. 6 - 1.2 General Considerations ................................. 6 - 1.2.1 Continuing Internet Evolution ..................... 6 - 1.2.2 Robustness Principle .............................. 7 - 1.2.3 Error Logging ..................................... 8 - 1.2.4 Configuration ..................................... 8 - 1.3 Reading this Document .................................. 10 - 1.3.1 Organization ...................................... 10 - 1.3.2 Requirements ...................................... 10 - 1.3.3 Terminology ....................................... 11 - 1.4 Acknowledgments ........................................ 12 - - 2. GENERAL ISSUES ............................................. 13 - 2.1 Host Names and Numbers ................................. 13 - 2.2 Using Domain Name Service .............................. 13 - 2.3 Applications on Multihomed hosts ....................... 14 - 2.4 Type-of-Service ........................................ 14 - 2.5 GENERAL APPLICATION REQUIREMENTS SUMMARY ............... 15 - - - - -Internet Engineering Task Force [Page 1] - - - - -RFC1123 INTRODUCTION October 1989 - - - 3. REMOTE LOGIN -- TELNET PROTOCOL ............................ 16 - 3.1 INTRODUCTION ........................................... 16 - 3.2 PROTOCOL WALK-THROUGH .................................. 16 - 3.2.1 Option Negotiation ................................ 16 - 3.2.2 Telnet Go-Ahead Function .......................... 16 - 3.2.3 Control Functions ................................. 17 - 3.2.4 Telnet "Synch" Signal ............................. 18 - 3.2.5 NVT Printer and Keyboard .......................... 19 - 3.2.6 Telnet Command Structure .......................... 20 - 3.2.7 Telnet Binary Option .............................. 20 - 3.2.8 Telnet Terminal-Type Option ....................... 20 - 3.3 SPECIFIC ISSUES ........................................ 21 - 3.3.1 Telnet End-of-Line Convention ..................... 21 - 3.3.2 Data Entry Terminals .............................. 23 - 3.3.3 Option Requirements ............................... 24 - 3.3.4 Option Initiation ................................. 24 - 3.3.5 Telnet Linemode Option ............................ 25 - 3.4 TELNET/USER INTERFACE .................................. 25 - 3.4.1 Character Set Transparency ........................ 25 - 3.4.2 Telnet Commands ................................... 26 - 3.4.3 TCP Connection Errors ............................. 26 - 3.4.4 Non-Default Telnet Contact Port ................... 26 - 3.4.5 Flushing Output ................................... 26 - 3.5. TELNET REQUIREMENTS SUMMARY ........................... 27 - - 4. FILE TRANSFER .............................................. 29 - 4.1 FILE TRANSFER PROTOCOL -- FTP .......................... 29 - 4.1.1 INTRODUCTION ...................................... 29 - 4.1.2. PROTOCOL WALK-THROUGH ............................ 29 - 4.1.2.1 LOCAL Type ................................... 29 - 4.1.2.2 Telnet Format Control ........................ 30 - 4.1.2.3 Page Structure ............................... 30 - 4.1.2.4 Data Structure Transformations ............... 30 - 4.1.2.5 Data Connection Management ................... 31 - 4.1.2.6 PASV Command ................................. 31 - 4.1.2.7 LIST and NLST Commands ....................... 31 - 4.1.2.8 SITE Command ................................. 32 - 4.1.2.9 STOU Command ................................. 32 - 4.1.2.10 Telnet End-of-line Code ..................... 32 - 4.1.2.11 FTP Replies ................................. 33 - 4.1.2.12 Connections ................................. 34 - 4.1.2.13 Minimum Implementation; RFC-959 Section ..... 34 - 4.1.3 SPECIFIC ISSUES ................................... 35 - 4.1.3.1 Non-standard Command Verbs ................... 35 - 4.1.3.2 Idle Timeout ................................. 36 - 4.1.3.3 Concurrency of Data and Control .............. 36 - 4.1.3.4 FTP Restart Mechanism ........................ 36 - 4.1.4 FTP/USER INTERFACE ................................ 39 - - - -Internet Engineering Task Force [Page 2] - - - - -RFC1123 INTRODUCTION October 1989 - - - 4.1.4.1 Pathname Specification ....................... 39 - 4.1.4.2 "QUOTE" Command .............................. 40 - 4.1.4.3 Displaying Replies to User ................... 40 - 4.1.4.4 Maintaining Synchronization .................. 40 - 4.1.5 FTP REQUIREMENTS SUMMARY ......................... 41 - 4.2 TRIVIAL FILE TRANSFER PROTOCOL -- TFTP ................. 44 - 4.2.1 INTRODUCTION ...................................... 44 - 4.2.2 PROTOCOL WALK-THROUGH ............................. 44 - 4.2.2.1 Transfer Modes ............................... 44 - 4.2.2.2 UDP Header ................................... 44 - 4.2.3 SPECIFIC ISSUES ................................... 44 - 4.2.3.1 Sorcerer's Apprentice Syndrome ............... 44 - 4.2.3.2 Timeout Algorithms ........................... 46 - 4.2.3.3 Extensions ................................... 46 - 4.2.3.4 Access Control ............................... 46 - 4.2.3.5 Broadcast Request ............................ 46 - 4.2.4 TFTP REQUIREMENTS SUMMARY ......................... 47 - - 5. ELECTRONIC MAIL -- SMTP and RFC-822 ........................ 48 - 5.1 INTRODUCTION ........................................... 48 - 5.2 PROTOCOL WALK-THROUGH .................................. 48 - 5.2.1 The SMTP Model .................................... 48 - 5.2.2 Canonicalization .................................. 49 - 5.2.3 VRFY and EXPN Commands ............................ 50 - 5.2.4 SEND, SOML, and SAML Commands ..................... 50 - 5.2.5 HELO Command ...................................... 50 - 5.2.6 Mail Relay ........................................ 51 - 5.2.7 RCPT Command ...................................... 52 - 5.2.8 DATA Command ...................................... 53 - 5.2.9 Command Syntax .................................... 54 - 5.2.10 SMTP Replies ..................................... 54 - 5.2.11 Transparency ..................................... 55 - 5.2.12 WKS Use in MX Processing ......................... 55 - 5.2.13 RFC-822 Message Specification .................... 55 - 5.2.14 RFC-822 Date and Time Specification .............. 55 - 5.2.15 RFC-822 Syntax Change ............................ 56 - 5.2.16 RFC-822 Local-part .............................. 56 - 5.2.17 Domain Literals .................................. 57 - 5.2.18 Common Address Formatting Errors ................. 58 - 5.2.19 Explicit Source Routes ........................... 58 - 5.3 SPECIFIC ISSUES ........................................ 59 - 5.3.1 SMTP Queueing Strategies .......................... 59 - 5.3.1.1 Sending Strategy .............................. 59 - 5.3.1.2 Receiving strategy ........................... 61 - 5.3.2 Timeouts in SMTP .................................. 61 - 5.3.3 Reliable Mail Receipt ............................. 63 - 5.3.4 Reliable Mail Transmission ........................ 63 - 5.3.5 Domain Name Support ............................... 65 - - - -Internet Engineering Task Force [Page 3] - - - - -RFC1123 INTRODUCTION October 1989 - - - 5.3.6 Mailing Lists and Aliases ......................... 65 - 5.3.7 Mail Gatewaying ................................... 66 - 5.3.8 Maximum Message Size .............................. 68 - 5.4 SMTP REQUIREMENTS SUMMARY .............................. 69 - - 6. SUPPORT SERVICES ............................................ 72 - 6.1 DOMAIN NAME TRANSLATION ................................. 72 - 6.1.1 INTRODUCTION ....................................... 72 - 6.1.2 PROTOCOL WALK-THROUGH ............................. 72 - 6.1.2.1 Resource Records with Zero TTL ............... 73 - 6.1.2.2 QCLASS Values ................................ 73 - 6.1.2.3 Unused Fields ................................ 73 - 6.1.2.4 Compression .................................. 73 - 6.1.2.5 Misusing Configuration Info .................. 73 - 6.1.3 SPECIFIC ISSUES ................................... 74 - 6.1.3.1 Resolver Implementation ...................... 74 - 6.1.3.2 Transport Protocols .......................... 75 - 6.1.3.3 Efficient Resource Usage ..................... 77 - 6.1.3.4 Multihomed Hosts ............................. 78 - 6.1.3.5 Extensibility ................................ 79 - 6.1.3.6 Status of RR Types ........................... 79 - 6.1.3.7 Robustness ................................... 80 - 6.1.3.8 Local Host Table ............................. 80 - 6.1.4 DNS USER INTERFACE ................................ 81 - 6.1.4.1 DNS Administration ........................... 81 - 6.1.4.2 DNS User Interface ........................... 81 - 6.1.4.3 Interface Abbreviation Facilities ............. 82 - 6.1.5 DOMAIN NAME SYSTEM REQUIREMENTS SUMMARY ........... 84 - 6.2 HOST INITIALIZATION .................................... 87 - 6.2.1 INTRODUCTION ...................................... 87 - 6.2.2 REQUIREMENTS ...................................... 87 - 6.2.2.1 Dynamic Configuration ........................ 87 - 6.2.2.2 Loading Phase ................................ 89 - 6.3 REMOTE MANAGEMENT ...................................... 90 - 6.3.1 INTRODUCTION ...................................... 90 - 6.3.2 PROTOCOL WALK-THROUGH ............................. 90 - 6.3.3 MANAGEMENT REQUIREMENTS SUMMARY ................... 92 - - 7. REFERENCES ................................................. 93 - - - - - - - - - - - - -Internet Engineering Task Force [Page 4] - - - - -RFC1123 INTRODUCTION October 1989 - - -1. INTRODUCTION - - This document is one of a pair that defines and discusses the - requirements for host system implementations of the Internet protocol - suite. This RFC covers the applications layer and support protocols. - Its companion RFC, "Requirements for Internet Hosts -- Communications - Layers" [INTRO:1] covers the lower layer protocols: transport layer, - IP layer, and link layer. - - These documents are intended to provide guidance for vendors, - implementors, and users of Internet communication software. They - represent the consensus of a large body of technical experience and - wisdom, contributed by members of the Internet research and vendor - communities. - - This RFC enumerates standard protocols that a host connected to the - Internet must use, and it incorporates by reference the RFCs and - other documents describing the current specifications for these - protocols. It corrects errors in the referenced documents and adds - additional discussion and guidance for an implementor. - - For each protocol, this document also contains an explicit set of - requirements, recommendations, and options. The reader must - understand that the list of requirements in this document is - incomplete by itself; the complete set of requirements for an - Internet host is primarily defined in the standard protocol - specification documents, with the corrections, amendments, and - supplements contained in this RFC. - - A good-faith implementation of the protocols that was produced after - careful reading of the RFC's and with some interaction with the - Internet technical community, and that followed good communications - software engineering practices, should differ from the requirements - of this document in only minor ways. Thus, in many cases, the - "requirements" in this RFC are already stated or implied in the - standard protocol documents, so that their inclusion here is, in a - sense, redundant. However, they were included because some past - implementation has made the wrong choice, causing problems of - interoperability, performance, and/or robustness. - - This document includes discussion and explanation of many of the - requirements and recommendations. A simple list of requirements - would be dangerous, because: - - o Some required features are more important than others, and some - features are optional. - - o There may be valid reasons why particular vendor products that - - - -Internet Engineering Task Force [Page 5] - - - - -RFC1123 INTRODUCTION October 1989 - - - are designed for restricted contexts might choose to use - different specifications. - - However, the specifications of this document must be followed to meet - the general goal of arbitrary host interoperation across the - diversity and complexity of the Internet system. Although most - current implementations fail to meet these requirements in various - ways, some minor and some major, this specification is the ideal - towards which we need to move. - - These requirements are based on the current level of Internet - architecture. This document will be updated as required to provide - additional clarifications or to include additional information in - those areas in which specifications are still evolving. - - This introductory section begins with general advice to host software - vendors, and then gives some guidance on reading the rest of the - document. Section 2 contains general requirements that may be - applicable to all application and support protocols. Sections 3, 4, - and 5 contain the requirements on protocols for the three major - applications: Telnet, file transfer, and electronic mail, - respectively. Section 6 covers the support applications: the domain - name system, system initialization, and management. Finally, all - references will be found in Section 7. - - 1.1 The Internet Architecture - - For a brief introduction to the Internet architecture from a host - viewpoint, see Section 1.1 of [INTRO:1]. That section also - contains recommended references for general background on the - Internet architecture. - - 1.2 General Considerations - - There are two important lessons that vendors of Internet host - software have learned and which a new vendor should consider - seriously. - - 1.2.1 Continuing Internet Evolution - - The enormous growth of the Internet has revealed problems of - management and scaling in a large datagram-based packet - communication system. These problems are being addressed, and - as a result there will be continuing evolution of the - specifications described in this document. These changes will - be carefully planned and controlled, since there is extensive - participation in this planning by the vendors and by the - organizations responsible for operations of the networks. - - - -Internet Engineering Task Force [Page 6] - - - - -RFC1123 INTRODUCTION October 1989 - - - Development, evolution, and revision are characteristic of - computer network protocols today, and this situation will - persist for some years. A vendor who develops computer - communication software for the Internet protocol suite (or any - other protocol suite!) and then fails to maintain and update - that software for changing specifications is going to leave a - trail of unhappy customers. The Internet is a large - communication network, and the users are in constant contact - through it. Experience has shown that knowledge of - deficiencies in vendor software propagates quickly through the - Internet technical community. - - 1.2.2 Robustness Principle - - At every layer of the protocols, there is a general rule whose - application can lead to enormous benefits in robustness and - interoperability: - - "Be liberal in what you accept, and - conservative in what you send" - - Software should be written to deal with every conceivable - error, no matter how unlikely; sooner or later a packet will - come in with that particular combination of errors and - attributes, and unless the software is prepared, chaos can - ensue. In general, it is best to assume that the network is - filled with malevolent entities that will send in packets - designed to have the worst possible effect. This assumption - will lead to suitable protective design, although the most - serious problems in the Internet have been caused by - unenvisaged mechanisms triggered by low-probability events; - mere human malice would never have taken so devious a course! - - Adaptability to change must be designed into all levels of - Internet host software. As a simple example, consider a - protocol specification that contains an enumeration of values - for a particular header field -- e.g., a type field, a port - number, or an error code; this enumeration must be assumed to - be incomplete. Thus, if a protocol specification defines four - possible error codes, the software must not break when a fifth - code shows up. An undefined code might be logged (see below), - but it must not cause a failure. - - The second part of the principle is almost as important: - software on other hosts may contain deficiencies that make it - unwise to exploit legal but obscure protocol features. It is - unwise to stray far from the obvious and simple, lest untoward - effects result elsewhere. A corollary of this is "watch out - - - -Internet Engineering Task Force [Page 7] - - - - -RFC1123 INTRODUCTION October 1989 - - - for misbehaving hosts"; host software should be prepared, not - just to survive other misbehaving hosts, but also to cooperate - to limit the amount of disruption such hosts can cause to the - shared communication facility. - - 1.2.3 Error Logging - - The Internet includes a great variety of host and gateway - systems, each implementing many protocols and protocol layers, - and some of these contain bugs and mis-features in their - Internet protocol software. As a result of complexity, - diversity, and distribution of function, the diagnosis of user - problems is often very difficult. - - Problem diagnosis will be aided if host implementations include - a carefully designed facility for logging erroneous or - "strange" protocol events. It is important to include as much - diagnostic information as possible when an error is logged. In - particular, it is often useful to record the header(s) of a - packet that caused an error. However, care must be taken to - ensure that error logging does not consume prohibitive amounts - of resources or otherwise interfere with the operation of the - host. - - There is a tendency for abnormal but harmless protocol events - to overflow error logging files; this can be avoided by using a - "circular" log, or by enabling logging only while diagnosing a - known failure. It may be useful to filter and count duplicate - successive messages. One strategy that seems to work well is: - (1) always count abnormalities and make such counts accessible - through the management protocol (see Section 6.3); and (2) - allow the logging of a great variety of events to be - selectively enabled. For example, it might useful to be able - to "log everything" or to "log everything for host X". - - Note that different managements may have differing policies - about the amount of error logging that they want normally - enabled in a host. Some will say, "if it doesn't hurt me, I - don't want to know about it", while others will want to take a - more watchful and aggressive attitude about detecting and - removing protocol abnormalities. - - 1.2.4 Configuration - - It would be ideal if a host implementation of the Internet - protocol suite could be entirely self-configuring. This would - allow the whole suite to be implemented in ROM or cast into - silicon, it would simplify diskless workstations, and it would - - - -Internet Engineering Task Force [Page 8] - - - - -RFC1123 INTRODUCTION October 1989 - - - be an immense boon to harried LAN administrators as well as - system vendors. We have not reached this ideal; in fact, we - are not even close. - - At many points in this document, you will find a requirement - that a parameter be a configurable option. There are several - different reasons behind such requirements. In a few cases, - there is current uncertainty or disagreement about the best - value, and it may be necessary to update the recommended value - in the future. In other cases, the value really depends on - external factors -- e.g., the size of the host and the - distribution of its communication load, or the speeds and - topology of nearby networks -- and self-tuning algorithms are - unavailable and may be insufficient. In some cases, - configurability is needed because of administrative - requirements. - - Finally, some configuration options are required to communicate - with obsolete or incorrect implementations of the protocols, - distributed without sources, that unfortunately persist in many - parts of the Internet. To make correct systems coexist with - these faulty systems, administrators often have to "mis- - configure" the correct systems. This problem will correct - itself gradually as the faulty systems are retired, but it - cannot be ignored by vendors. - - When we say that a parameter must be configurable, we do not - intend to require that its value be explicitly read from a - configuration file at every boot time. We recommend that - implementors set up a default for each parameter, so a - configuration file is only necessary to override those defaults - that are inappropriate in a particular installation. Thus, the - configurability requirement is an assurance that it will be - POSSIBLE to override the default when necessary, even in a - binary-only or ROM-based product. - - This document requires a particular value for such defaults in - some cases. The choice of default is a sensitive issue when - the configuration item controls the accommodation to existing - faulty systems. If the Internet is to converge successfully to - complete interoperability, the default values built into - implementations must implement the official protocol, not - "mis-configurations" to accommodate faulty implementations. - Although marketing considerations have led some vendors to - choose mis-configuration defaults, we urge vendors to choose - defaults that will conform to the standard. - - Finally, we note that a vendor needs to provide adequate - - - -Internet Engineering Task Force [Page 9] - - - - -RFC1123 INTRODUCTION October 1989 - - - documentation on all configuration parameters, their limits and - effects. - - - 1.3 Reading this Document - - 1.3.1 Organization - - In general, each major section is organized into the following - subsections: - - (1) Introduction - - (2) Protocol Walk-Through -- considers the protocol - specification documents section-by-section, correcting - errors, stating requirements that may be ambiguous or - ill-defined, and providing further clarification or - explanation. - - (3) Specific Issues -- discusses protocol design and - implementation issues that were not included in the walk- - through. - - (4) Interfaces -- discusses the service interface to the next - higher layer. - - (5) Summary -- contains a summary of the requirements of the - section. - - Under many of the individual topics in this document, there is - parenthetical material labeled "DISCUSSION" or - "IMPLEMENTATION". This material is intended to give - clarification and explanation of the preceding requirements - text. It also includes some suggestions on possible future - directions or developments. The implementation material - contains suggested approaches that an implementor may want to - consider. - - The summary sections are intended to be guides and indexes to - the text, but are necessarily cryptic and incomplete. The - summaries should never be used or referenced separately from - the complete RFC. - - 1.3.2 Requirements - - In this document, the words that are used to define the - significance of each particular requirement are capitalized. - These words are: - - - -Internet Engineering Task Force [Page 10] - - - - -RFC1123 INTRODUCTION October 1989 - - - * "MUST" - - This word or the adjective "REQUIRED" means that the item - is an absolute requirement of the specification. - - * "SHOULD" - - This word or the adjective "RECOMMENDED" means that there - may exist valid reasons in particular circumstances to - ignore this item, but the full implications should be - understood and the case carefully weighed before choosing - a different course. - - * "MAY" - - This word or the adjective "OPTIONAL" means that this item - is truly optional. One vendor may choose to include the - item because a particular marketplace requires it or - because it enhances the product, for example; another - vendor may omit the same item. - - - An implementation is not compliant if it fails to satisfy one - or more of the MUST requirements for the protocols it - implements. An implementation that satisfies all the MUST and - all the SHOULD requirements for its protocols is said to be - "unconditionally compliant"; one that satisfies all the MUST - requirements but not all the SHOULD requirements for its - protocols is said to be "conditionally compliant". - - 1.3.3 Terminology - - This document uses the following technical terms: - - Segment - A segment is the unit of end-to-end transmission in the - TCP protocol. A segment consists of a TCP header followed - by application data. A segment is transmitted by - encapsulation in an IP datagram. - - Message - This term is used by some application layer protocols - (particularly SMTP) for an application data unit. - - Datagram - A [UDP] datagram is the unit of end-to-end transmission in - the UDP protocol. - - - - -Internet Engineering Task Force [Page 11] - - - - -RFC1123 INTRODUCTION October 1989 - - - Multihomed - A host is said to be multihomed if it has multiple IP - addresses to connected networks. - - - - 1.4 Acknowledgments - - This document incorporates contributions and comments from a large - group of Internet protocol experts, including representatives of - university and research labs, vendors, and government agencies. - It was assembled primarily by the Host Requirements Working Group - of the Internet Engineering Task Force (IETF). - - The Editor would especially like to acknowledge the tireless - dedication of the following people, who attended many long - meetings and generated 3 million bytes of electronic mail over the - past 18 months in pursuit of this document: Philip Almquist, Dave - Borman (Cray Research), Noel Chiappa, Dave Crocker (DEC), Steve - Deering (Stanford), Mike Karels (Berkeley), Phil Karn (Bellcore), - John Lekashman (NASA), Charles Lynn (BBN), Keith McCloghrie (TWG), - Paul Mockapetris (ISI), Thomas Narten (Purdue), Craig Partridge - (BBN), Drew Perkins (CMU), and James Van Bokkelen (FTP Software). - - In addition, the following people made major contributions to the - effort: Bill Barns (Mitre), Steve Bellovin (AT&T), Mike Brescia - (BBN), Ed Cain (DCA), Annette DeSchon (ISI), Martin Gross (DCA), - Phill Gross (NRI), Charles Hedrick (Rutgers), Van Jacobson (LBL), - John Klensin (MIT), Mark Lottor (SRI), Milo Medin (NASA), Bill - Melohn (Sun Microsystems), Greg Minshall (Kinetics), Jeff Mogul - (DEC), John Mullen (CMC), Jon Postel (ISI), John Romkey (Epilogue - Technology), and Mike StJohns (DCA). The following also made - significant contributions to particular areas: Eric Allman - (Berkeley), Rob Austein (MIT), Art Berggreen (ACC), Keith Bostic - (Berkeley), Vint Cerf (NRI), Wayne Hathaway (NASA), Matt Korn - (IBM), Erik Naggum (Naggum Software, Norway), Robert Ullmann - (Prime Computer), David Waitzman (BBN), Frank Wancho (USA), Arun - Welch (Ohio State), Bill Westfield (Cisco), and Rayan Zachariassen - (Toronto). - - We are grateful to all, including any contributors who may have - been inadvertently omitted from this list. - - - - - - - - - -Internet Engineering Task Force [Page 12] - - - - -RFC1123 APPLICATIONS LAYER -- GENERAL October 1989 - - -2. GENERAL ISSUES - - This section contains general requirements that may be applicable to - all application-layer protocols. - - 2.1 Host Names and Numbers - - The syntax of a legal Internet host name was specified in RFC-952 - [DNS:4]. One aspect of host name syntax is hereby changed: the - restriction on the first character is relaxed to allow either a - letter or a digit. Host software MUST support this more liberal - syntax. - - Host software MUST handle host names of up to 63 characters and - SHOULD handle host names of up to 255 characters. - - Whenever a user inputs the identity of an Internet host, it SHOULD - be possible to enter either (1) a host domain name or (2) an IP - address in dotted-decimal ("#.#.#.#") form. The host SHOULD check - the string syntactically for a dotted-decimal number before - looking it up in the Domain Name System. - - DISCUSSION: - This last requirement is not intended to specify the complete - syntactic form for entering a dotted-decimal host number; - that is considered to be a user-interface issue. For - example, a dotted-decimal number must be enclosed within - "[ ]" brackets for SMTP mail (see Section 5.2.17). This - notation could be made universal within a host system, - simplifying the syntactic checking for a dotted-decimal - number. - - If a dotted-decimal number can be entered without such - identifying delimiters, then a full syntactic check must be - made, because a segment of a host domain name is now allowed - to begin with a digit and could legally be entirely numeric - (see Section 6.1.2.4). However, a valid host name can never - have the dotted-decimal form #.#.#.#, since at least the - highest-level component label will be alphabetic. - - 2.2 Using Domain Name Service - - Host domain names MUST be translated to IP addresses as described - in Section 6.1. - - Applications using domain name services MUST be able to cope with - soft error conditions. Applications MUST wait a reasonable - interval between successive retries due to a soft error, and MUST - - - -Internet Engineering Task Force [Page 13] - - - - -RFC1123 APPLICATIONS LAYER -- GENERAL October 1989 - - - allow for the possibility that network problems may deny service - for hours or even days. - - An application SHOULD NOT rely on the ability to locate a WKS - record containing an accurate listing of all services at a - particular host address, since the WKS RR type is not often used - by Internet sites. To confirm that a service is present, simply - attempt to use it. - - 2.3 Applications on Multihomed hosts - - When the remote host is multihomed, the name-to-address - translation will return a list of alternative IP addresses. As - specified in Section 6.1.3.4, this list should be in order of - decreasing preference. Application protocol implementations - SHOULD be prepared to try multiple addresses from the list until - success is obtained. More specific requirements for SMTP are - given in Section 5.3.4. - - When the local host is multihomed, a UDP-based request/response - application SHOULD send the response with an IP source address - that is the same as the specific destination address of the UDP - request datagram. The "specific destination address" is defined - in the "IP Addressing" section of the companion RFC [INTRO:1]. - - Similarly, a server application that opens multiple TCP - connections to the same client SHOULD use the same local IP - address for all. - - 2.4 Type-of-Service - - Applications MUST select appropriate TOS values when they invoke - transport layer services, and these values MUST be configurable. - Note that a TOS value contains 5 bits, of which only the most- - significant 3 bits are currently defined; the other two bits MUST - be zero. - - DISCUSSION: - As gateway algorithms are developed to implement Type-of- - Service, the recommended values for various application - protocols may change. In addition, it is likely that - particular combinations of users and Internet paths will want - non-standard TOS values. For these reasons, the TOS values - must be configurable. - - See the latest version of the "Assigned Numbers" RFC - [INTRO:5] for the recommended TOS values for the major - application protocols. - - - -Internet Engineering Task Force [Page 14] - - - - -RFC1123 APPLICATIONS LAYER -- GENERAL October 1989 - - - 2.5 GENERAL APPLICATION REQUIREMENTS SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e ------------------------------------------------|----------|-|-|-|-|-|-- - | | | | | | | -User interfaces: | | | | | | | - Allow host name to begin with digit |2.1 |x| | | | | - Host names of up to 635 characters |2.1 |x| | | | | - Host names of up to 255 characters |2.1 | |x| | | | - Support dotted-decimal host numbers |2.1 | |x| | | | - Check syntactically for dotted-dec first |2.1 | |x| | | | - | | | | | | | -Map domain names per Section 6.1 |2.2 |x| | | | | -Cope with soft DNS errors |2.2 |x| | | | | - Reasonable interval between retries |2.2 |x| | | | | - Allow for long outages |2.2 |x| | | | | -Expect WKS records to be available |2.2 | | | |x| | - | | | | | | | -Try multiple addr's for remote multihomed host |2.3 | |x| | | | -UDP reply src addr is specific dest of request |2.3 | |x| | | | -Use same IP addr for related TCP connections |2.3 | |x| | | | -Specify appropriate TOS values |2.4 |x| | | | | - TOS values configurable |2.4 |x| | | | | - Unused TOS bits zero |2.4 |x| | | | | - | | | | | | | - | | | | | | | - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 15] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - -3. REMOTE LOGIN -- TELNET PROTOCOL - - 3.1 INTRODUCTION - - Telnet is the standard Internet application protocol for remote - login. It provides the encoding rules to link a user's - keyboard/display on a client ("user") system with a command - interpreter on a remote server system. A subset of the Telnet - protocol is also incorporated within other application protocols, - e.g., FTP and SMTP. - - Telnet uses a single TCP connection, and its normal data stream - ("Network Virtual Terminal" or "NVT" mode) is 7-bit ASCII with - escape sequences to embed control functions. Telnet also allows - the negotiation of many optional modes and functions. - - The primary Telnet specification is to be found in RFC-854 - [TELNET:1], while the options are defined in many other RFCs; see - Section 7 for references. - - 3.2 PROTOCOL WALK-THROUGH - - 3.2.1 Option Negotiation: RFC-854, pp. 2-3 - - Every Telnet implementation MUST include option negotiation and - subnegotiation machinery [TELNET:2]. - - A host MUST carefully follow the rules of RFC-854 to avoid - option-negotiation loops. A host MUST refuse (i.e, reply - WONT/DONT to a DO/WILL) an unsupported option. Option - negotiation SHOULD continue to function (even if all requests - are refused) throughout the lifetime of a Telnet connection. - - If all option negotiations fail, a Telnet implementation MUST - default to, and support, an NVT. - - DISCUSSION: - Even though more sophisticated "terminals" and supporting - option negotiations are becoming the norm, all - implementations must be prepared to support an NVT for any - user-server communication. - - 3.2.2 Telnet Go-Ahead Function: RFC-854, p. 5, and RFC-858 - - On a host that never sends the Telnet command Go Ahead (GA), - the Telnet Server MUST attempt to negotiate the Suppress Go - Ahead option (i.e., send "WILL Suppress Go Ahead"). A User or - Server Telnet MUST always accept negotiation of the Suppress Go - - - -Internet Engineering Task Force [Page 16] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - Ahead option. - - When it is driving a full-duplex terminal for which GA has no - meaning, a User Telnet implementation MAY ignore GA commands. - - DISCUSSION: - Half-duplex ("locked-keyboard") line-at-a-time terminals - for which the Go-Ahead mechanism was designed have largely - disappeared from the scene. It turned out to be difficult - to implement sending the Go-Ahead signal in many operating - systems, even some systems that support native half-duplex - terminals. The difficulty is typically that the Telnet - server code does not have access to information about - whether the user process is blocked awaiting input from - the Telnet connection, i.e., it cannot reliably determine - when to send a GA command. Therefore, most Telnet Server - hosts do not send GA commands. - - The effect of the rules in this section is to allow either - end of a Telnet connection to veto the use of GA commands. - - There is a class of half-duplex terminals that is still - commercially important: "data entry terminals," which - interact in a full-screen manner. However, supporting - data entry terminals using the Telnet protocol does not - require the Go Ahead signal; see Section 3.3.2. - - 3.2.3 Control Functions: RFC-854, pp. 7-8 - - The list of Telnet commands has been extended to include EOR - (End-of-Record), with code 239 [TELNET:9]. - - Both User and Server Telnets MAY support the control functions - EOR, EC, EL, and Break, and MUST support AO, AYT, DM, IP, NOP, - SB, and SE. - - A host MUST be able to receive and ignore any Telnet control - functions that it does not support. - - DISCUSSION: - Note that a Server Telnet is required to support the - Telnet IP (Interrupt Process) function, even if the server - host has an equivalent in-stream function (e.g., Control-C - in many systems). The Telnet IP function may be stronger - than an in-stream interrupt command, because of the out- - of-band effect of TCP urgent data. - - The EOR control function may be used to delimit the - - - -Internet Engineering Task Force [Page 17] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - stream. An important application is data entry terminal - support (see Section 3.3.2). There was concern that since - EOR had not been defined in RFC-854, a host that was not - prepared to correctly ignore unknown Telnet commands might - crash if it received an EOR. To protect such hosts, the - End-of-Record option [TELNET:9] was introduced; however, a - properly implemented Telnet program will not require this - protection. - - 3.2.4 Telnet "Synch" Signal: RFC-854, pp. 8-10 - - When it receives "urgent" TCP data, a User or Server Telnet - MUST discard all data except Telnet commands until the DM (and - end of urgent) is reached. - - When it sends Telnet IP (Interrupt Process), a User Telnet - SHOULD follow it by the Telnet "Synch" sequence, i.e., send as - TCP urgent data the sequence "IAC IP IAC DM". The TCP urgent - pointer points to the DM octet. - - When it receives a Telnet IP command, a Server Telnet MAY send - a Telnet "Synch" sequence back to the user, to flush the output - stream. The choice ought to be consistent with the way the - server operating system behaves when a local user interrupts a - process. - - When it receives a Telnet AO command, a Server Telnet MUST send - a Telnet "Synch" sequence back to the user, to flush the output - stream. - - A User Telnet SHOULD have the capability of flushing output - when it sends a Telnet IP; see also Section 3.4.5. - - DISCUSSION: - There are three possible ways for a User Telnet to flush - the stream of server output data: - - (1) Send AO after IP. - - This will cause the server host to send a "flush- - buffered-output" signal to its operating system. - However, the AO may not take effect locally, i.e., - stop terminal output at the User Telnet end, until - the Server Telnet has received and processed the AO - and has sent back a "Synch". - - (2) Send DO TIMING-MARK [TELNET:7] after IP, and discard - all output locally until a WILL/WONT TIMING-MARK is - - - -Internet Engineering Task Force [Page 18] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - received from the Server Telnet. - - Since the DO TIMING-MARK will be processed after the - IP at the server, the reply to it should be in the - right place in the output data stream. However, the - TIMING-MARK will not send a "flush buffered output" - signal to the server operating system. Whether or - not this is needed is dependent upon the server - system. - - (3) Do both. - - The best method is not entirely clear, since it must - accommodate a number of existing server hosts that do not - follow the Telnet standards in various ways. The safest - approach is probably to provide a user-controllable option - to select (1), (2), or (3). - - 3.2.5 NVT Printer and Keyboard: RFC-854, p. 11 - - In NVT mode, a Telnet SHOULD NOT send characters with the - high-order bit 1, and MUST NOT send it as a parity bit. - Implementations that pass the high-order bit to applications - SHOULD negotiate binary mode (see Section 3.2.6). - - - DISCUSSION: - Implementors should be aware that a strict reading of - RFC-854 allows a client or server expecting NVT ASCII to - ignore characters with the high-order bit set. In - general, binary mode is expected to be used for - transmission of an extended (beyond 7-bit) character set - with Telnet. - - However, there exist applications that really need an 8- - bit NVT mode, which is currently not defined, and these - existing applications do set the high-order bit during - part or all of the life of a Telnet connection. Note that - binary mode is not the same as 8-bit NVT mode, since - binary mode turns off end-of-line processing. For this - reason, the requirements on the high-order bit are stated - as SHOULD, not MUST. - - RFC-854 defines a minimal set of properties of a "network - virtual terminal" or NVT; this is not meant to preclude - additional features in a real terminal. A Telnet - connection is fully transparent to all 7-bit ASCII - characters, including arbitrary ASCII control characters. - - - -Internet Engineering Task Force [Page 19] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - For example, a terminal might support full-screen commands - coded as ASCII escape sequences; a Telnet implementation - would pass these sequences as uninterpreted data. Thus, - an NVT should not be conceived as a terminal type of a - highly-restricted device. - - 3.2.6 Telnet Command Structure: RFC-854, p. 13 - - Since options may appear at any point in the data stream, a - Telnet escape character (known as IAC, with the value 255) to - be sent as data MUST be doubled. - - 3.2.7 Telnet Binary Option: RFC-856 - - When the Binary option has been successfully negotiated, - arbitrary 8-bit characters are allowed. However, the data - stream MUST still be scanned for IAC characters, any embedded - Telnet commands MUST be obeyed, and data bytes equal to IAC - MUST be doubled. Other character processing (e.g., replacing - CR by CR NUL or by CR LF) MUST NOT be done. In particular, - there is no end-of-line convention (see Section 3.3.1) in - binary mode. - - DISCUSSION: - The Binary option is normally negotiated in both - directions, to change the Telnet connection from NVT mode - to "binary mode". - - The sequence IAC EOR can be used to delimit blocks of data - within a binary-mode Telnet stream. - - 3.2.8 Telnet Terminal-Type Option: RFC-1091 - - The Terminal-Type option MUST use the terminal type names - officially defined in the Assigned Numbers RFC [INTRO:5], when - they are available for the particular terminal. However, the - receiver of a Terminal-Type option MUST accept any name. - - DISCUSSION: - RFC-1091 [TELNET:10] updates an earlier version of the - Terminal-Type option defined in RFC-930. The earlier - version allowed a server host capable of supporting - multiple terminal types to learn the type of a particular - client's terminal, assuming that each physical terminal - had an intrinsic type. However, today a "terminal" is - often really a terminal emulator program running in a PC, - perhaps capable of emulating a range of terminal types. - Therefore, RFC-1091 extends the specification to allow a - - - -Internet Engineering Task Force [Page 20] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - more general terminal-type negotiation between User and - Server Telnets. - - 3.3 SPECIFIC ISSUES - - 3.3.1 Telnet End-of-Line Convention - - The Telnet protocol defines the sequence CR LF to mean "end- - of-line". For terminal input, this corresponds to a command- - completion or "end-of-line" key being pressed on a user - terminal; on an ASCII terminal, this is the CR key, but it may - also be labelled "Return" or "Enter". - - When a Server Telnet receives the Telnet end-of-line sequence - CR LF as input from a remote terminal, the effect MUST be the - same as if the user had pressed the "end-of-line" key on a - local terminal. On server hosts that use ASCII, in particular, - receipt of the Telnet sequence CR LF must cause the same effect - as a local user pressing the CR key on a local terminal. Thus, - CR LF and CR NUL MUST have the same effect on an ASCII server - host when received as input over a Telnet connection. - - A User Telnet MUST be able to send any of the forms: CR LF, CR - NUL, and LF. A User Telnet on an ASCII host SHOULD have a - user-controllable mode to send either CR LF or CR NUL when the - user presses the "end-of-line" key, and CR LF SHOULD be the - default. - - The Telnet end-of-line sequence CR LF MUST be used to send - Telnet data that is not terminal-to-computer (e.g., for Server - Telnet sending output, or the Telnet protocol incorporated - another application protocol). - - DISCUSSION: - To allow interoperability between arbitrary Telnet clients - and servers, the Telnet protocol defined a standard - representation for a line terminator. Since the ASCII - character set includes no explicit end-of-line character, - systems have chosen various representations, e.g., CR, LF, - and the sequence CR LF. The Telnet protocol chose the CR - LF sequence as the standard for network transmission. - - Unfortunately, the Telnet protocol specification in RFC- - 854 [TELNET:1] has turned out to be somewhat ambiguous on - what character(s) should be sent from client to server for - the "end-of-line" key. The result has been a massive and - continuing interoperability headache, made worse by - various faulty implementations of both User and Server - - - -Internet Engineering Task Force [Page 21] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - Telnets. - - Although the Telnet protocol is based on a perfectly - symmetric model, in a remote login session the role of the - user at a terminal differs from the role of the server - host. For example, RFC-854 defines the meaning of CR, LF, - and CR LF as output from the server, but does not specify - what the User Telnet should send when the user presses the - "end-of-line" key on the terminal; this turns out to be - the point at issue. - - When a user presses the "end-of-line" key, some User - Telnet implementations send CR LF, while others send CR - NUL (based on a different interpretation of the same - sentence in RFC-854). These will be equivalent for a - correctly-implemented ASCII server host, as discussed - above. For other servers, a mode in the User Telnet is - needed. - - The existence of User Telnets that send only CR NUL when - CR is pressed creates a dilemma for non-ASCII hosts: they - can either treat CR NUL as equivalent to CR LF in input, - thus precluding the possibility of entering a "bare" CR, - or else lose complete interworking. - - Suppose a user on host A uses Telnet to log into a server - host B, and then execute B's User Telnet program to log - into server host C. It is desirable for the Server/User - Telnet combination on B to be as transparent as possible, - i.e., to appear as if A were connected directly to C. In - particular, correct implementation will make B transparent - to Telnet end-of-line sequences, except that CR LF may be - translated to CR NUL or vice versa. - - IMPLEMENTATION: - To understand Telnet end-of-line issues, one must have at - least a general model of the relationship of Telnet to the - local operating system. The Server Telnet process is - typically coupled into the terminal driver software of the - operating system as a pseudo-terminal. A Telnet end-of- - line sequence received by the Server Telnet must have the - same effect as pressing the end-of-line key on a real - locally-connected terminal. - - Operating systems that support interactive character-at- - a-time applications (e.g., editors) typically have two - internal modes for their terminal I/O: a formatted mode, - in which local conventions for end-of-line and other - - - -Internet Engineering Task Force [Page 22] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - formatting rules have been applied to the data stream, and - a "raw" mode, in which the application has direct access - to every character as it was entered. A Server Telnet - must be implemented in such a way that these modes have - the same effect for remote as for local terminals. For - example, suppose a CR LF or CR NUL is received by the - Server Telnet on an ASCII host. In raw mode, a CR - character is passed to the application; in formatted mode, - the local system's end-of-line convention is used. - - 3.3.2 Data Entry Terminals - - DISCUSSION: - In addition to the line-oriented and character-oriented - ASCII terminals for which Telnet was designed, there are - several families of video display terminals that are - sometimes known as "data entry terminals" or DETs. The - IBM 3270 family is a well-known example. - - Two Internet protocols have been designed to support - generic DETs: SUPDUP [TELNET:16, TELNET:17], and the DET - option [TELNET:18, TELNET:19]. The DET option drives a - data entry terminal over a Telnet connection using (sub-) - negotiation. SUPDUP is a completely separate terminal - protocol, which can be entered from Telnet by negotiation. - Although both SUPDUP and the DET option have been used - successfully in particular environments, neither has - gained general acceptance or wide implementation. - - A different approach to DET interaction has been developed - for supporting the IBM 3270 family through Telnet, - although the same approach would be applicable to any DET. - The idea is to enter a "native DET" mode, in which the - native DET input/output stream is sent as binary data. - The Telnet EOR command is used to delimit logical records - (e.g., "screens") within this binary stream. - - IMPLEMENTATION: - The rules for entering and leaving native DET mode are as - follows: - - o The Server uses the Terminal-Type option [TELNET:10] - to learn that the client is a DET. - - o It is conventional, but not required, that both ends - negotiate the EOR option [TELNET:9]. - - o Both ends negotiate the Binary option [TELNET:3] to - - - -Internet Engineering Task Force [Page 23] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - enter native DET mode. - - o When either end negotiates out of binary mode, the - other end does too, and the mode then reverts to - normal NVT. - - - 3.3.3 Option Requirements - - Every Telnet implementation MUST support the Binary option - [TELNET:3] and the Suppress Go Ahead option [TELNET:5], and - SHOULD support the Echo [TELNET:4], Status [TELNET:6], End-of- - Record [TELNET:9], and Extended Options List [TELNET:8] - options. - - A User or Server Telnet SHOULD support the Window Size Option - [TELNET:12] if the local operating system provides the - corresponding capability. - - DISCUSSION: - Note that the End-of-Record option only signifies that a - Telnet can receive a Telnet EOR without crashing; - therefore, every Telnet ought to be willing to accept - negotiation of the End-of-Record option. See also the - discussion in Section 3.2.3. - - 3.3.4 Option Initiation - - When the Telnet protocol is used in a client/server situation, - the server SHOULD initiate negotiation of the terminal - interaction mode it expects. - - DISCUSSION: - The Telnet protocol was defined to be perfectly - symmetrical, but its application is generally asymmetric. - Remote login has been known to fail because NEITHER side - initiated negotiation of the required non-default terminal - modes. It is generally the server that determines the - preferred mode, so the server needs to initiate the - negotiation; since the negotiation is symmetric, the user - can also initiate it. - - A client (User Telnet) SHOULD provide a means for users to - enable and disable the initiation of option negotiation. - - DISCUSSION: - A user sometimes needs to connect to an application - service (e.g., FTP or SMTP) that uses Telnet for its - - - -Internet Engineering Task Force [Page 24] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - control stream but does not support Telnet options. User - Telnet may be used for this purpose if initiation of - option negotiation is disabled. - - 3.3.5 Telnet Linemode Option - - DISCUSSION: - An important new Telnet option, LINEMODE [TELNET:12], has - been proposed. The LINEMODE option provides a standard - way for a User Telnet and a Server Telnet to agree that - the client rather than the server will perform terminal - character processing. When the client has prepared a - complete line of text, it will send it to the server in - (usually) one TCP packet. This option will greatly - decrease the packet cost of Telnet sessions and will also - give much better user response over congested or long- - delay networks. - - The LINEMODE option allows dynamic switching between local - and remote character processing. For example, the Telnet - connection will automatically negotiate into single- - character mode while a full screen editor is running, and - then return to linemode when the editor is finished. - - We expect that when this RFC is released, hosts should - implement the client side of this option, and may - implement the server side of this option. To properly - implement the server side, the server needs to be able to - tell the local system not to do any input character - processing, but to remember its current terminal state and - notify the Server Telnet process whenever the state - changes. This will allow password echoing and full screen - editors to be handled properly, for example. - - 3.4 TELNET/USER INTERFACE - - 3.4.1 Character Set Transparency - - User Telnet implementations SHOULD be able to send or receive - any 7-bit ASCII character. Where possible, any special - character interpretations by the user host's operating system - SHOULD be bypassed so that these characters can conveniently be - sent and received on the connection. - - Some character value MUST be reserved as "escape to command - mode"; conventionally, doubling this character allows it to be - entered as data. The specific character used SHOULD be user - selectable. - - - -Internet Engineering Task Force [Page 25] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - On binary-mode connections, a User Telnet program MAY provide - an escape mechanism for entering arbitrary 8-bit values, if the - host operating system doesn't allow them to be entered directly - from the keyboard. - - IMPLEMENTATION: - The transparency issues are less pressing on servers, but - implementors should take care in dealing with issues like: - masking off parity bits (sent by an older, non-conforming - client) before they reach programs that expect only NVT - ASCII, and properly handling programs that request 8-bit - data streams. - - 3.4.2 Telnet Commands - - A User Telnet program MUST provide a user the capability of - entering any of the Telnet control functions IP, AO, or AYT, - and SHOULD provide the capability of entering EC, EL, and - Break. - - 3.4.3 TCP Connection Errors - - A User Telnet program SHOULD report to the user any TCP errors - that are reported by the transport layer (see "TCP/Application - Layer Interface" section in [INTRO:1]). - - 3.4.4 Non-Default Telnet Contact Port - - A User Telnet program SHOULD allow the user to optionally - specify a non-standard contact port number at the Server Telnet - host. - - 3.4.5 Flushing Output - - A User Telnet program SHOULD provide the user the ability to - specify whether or not output should be flushed when an IP is - sent; see Section 3.2.4. - - For any output flushing scheme that causes the User Telnet to - flush output locally until a Telnet signal is received from the - Server, there SHOULD be a way for the user to manually restore - normal output, in case the Server fails to send the expected - signal. - - - - - - - - -Internet Engineering Task Force [Page 26] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - 3.5. TELNET REQUIREMENTS SUMMARY - - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e --------------------------------------------------|--------|-|-|-|-|-|-- - | | | | | | | -Option Negotiation |3.2.1 |x| | | | | - Avoid negotiation loops |3.2.1 |x| | | | | - Refuse unsupported options |3.2.1 |x| | | | | - Negotiation OK anytime on connection |3.2.1 | |x| | | | - Default to NVT |3.2.1 |x| | | | | - Send official name in Term-Type option |3.2.8 |x| | | | | - Accept any name in Term-Type option |3.2.8 |x| | | | | - Implement Binary, Suppress-GA options |3.3.3 |x| | | | | - Echo, Status, EOL, Ext-Opt-List options |3.3.3 | |x| | | | - Implement Window-Size option if appropriate |3.3.3 | |x| | | | - Server initiate mode negotiations |3.3.4 | |x| | | | - User can enable/disable init negotiations |3.3.4 | |x| | | | - | | | | | | | -Go-Aheads | | | | | | | - Non-GA server negotiate SUPPRESS-GA option |3.2.2 |x| | | | | - User or Server accept SUPPRESS-GA option |3.2.2 |x| | | | | - User Telnet ignore GA's |3.2.2 | | |x| | | - | | | | | | | -Control Functions | | | | | | | - Support SE NOP DM IP AO AYT SB |3.2.3 |x| | | | | - Support EOR EC EL Break |3.2.3 | | |x| | | - Ignore unsupported control functions |3.2.3 |x| | | | | - User, Server discard urgent data up to DM |3.2.4 |x| | | | | - User Telnet send "Synch" after IP, AO, AYT |3.2.4 | |x| | | | - Server Telnet reply Synch to IP |3.2.4 | | |x| | | - Server Telnet reply Synch to AO |3.2.4 |x| | | | | - User Telnet can flush output when send IP |3.2.4 | |x| | | | - | | | | | | | -Encoding | | | | | | | - Send high-order bit in NVT mode |3.2.5 | | | |x| | - Send high-order bit as parity bit |3.2.5 | | | | |x| - Negot. BINARY if pass high-ord. bit to applic |3.2.5 | |x| | | | - Always double IAC data byte |3.2.6 |x| | | | | - - - -Internet Engineering Task Force [Page 27] - - - - -RFC1123 REMOTE LOGIN -- TELNET October 1989 - - - Double IAC data byte in binary mode |3.2.7 |x| | | | | - Obey Telnet cmds in binary mode |3.2.7 |x| | | | | - End-of-line, CR NUL in binary mode |3.2.7 | | | | |x| - | | | | | | | -End-of-Line | | | | | | | - EOL at Server same as local end-of-line |3.3.1 |x| | | | | - ASCII Server accept CR LF or CR NUL for EOL |3.3.1 |x| | | | | - User Telnet able to send CR LF, CR NUL, or LF |3.3.1 |x| | | | | - ASCII user able to select CR LF/CR NUL |3.3.1 | |x| | | | - User Telnet default mode is CR LF |3.3.1 | |x| | | | - Non-interactive uses CR LF for EOL |3.3.1 |x| | | | | - | | | | | | | -User Telnet interface | | | | | | | - Input & output all 7-bit characters |3.4.1 | |x| | | | - Bypass local op sys interpretation |3.4.1 | |x| | | | - Escape character |3.4.1 |x| | | | | - User-settable escape character |3.4.1 | |x| | | | - Escape to enter 8-bit values |3.4.1 | | |x| | | - Can input IP, AO, AYT |3.4.2 |x| | | | | - Can input EC, EL, Break |3.4.2 | |x| | | | - Report TCP connection errors to user |3.4.3 | |x| | | | - Optional non-default contact port |3.4.4 | |x| | | | - Can spec: output flushed when IP sent |3.4.5 | |x| | | | - Can manually restore output mode |3.4.5 | |x| | | | - | | | | | | | - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 28] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - -4. FILE TRANSFER - - 4.1 FILE TRANSFER PROTOCOL -- FTP - - 4.1.1 INTRODUCTION - - The File Transfer Protocol FTP is the primary Internet standard - for file transfer. The current specification is contained in - RFC-959 [FTP:1]. - - FTP uses separate simultaneous TCP connections for control and - for data transfer. The FTP protocol includes many features, - some of which are not commonly implemented. However, for every - feature in FTP, there exists at least one implementation. The - minimum implementation defined in RFC-959 was too small, so a - somewhat larger minimum implementation is defined here. - - Internet users have been unnecessarily burdened for years by - deficient FTP implementations. Protocol implementors have - suffered from the erroneous opinion that implementing FTP ought - to be a small and trivial task. This is wrong, because FTP has - a user interface, because it has to deal (correctly) with the - whole variety of communication and operating system errors that - may occur, and because it has to handle the great diversity of - real file systems in the world. - - 4.1.2. PROTOCOL WALK-THROUGH - - 4.1.2.1 LOCAL Type: RFC-959 Section 3.1.1.4 - - An FTP program MUST support TYPE I ("IMAGE" or binary type) - as well as TYPE L 8 ("LOCAL" type with logical byte size 8). - A machine whose memory is organized into m-bit words, where - m is not a multiple of 8, MAY also support TYPE L m. - - DISCUSSION: - The command "TYPE L 8" is often required to transfer - binary data between a machine whose memory is organized - into (e.g.) 36-bit words and a machine with an 8-bit - byte organization. For an 8-bit byte machine, TYPE L 8 - is equivalent to IMAGE. - - "TYPE L m" is sometimes specified to the FTP programs - on two m-bit word machines to ensure the correct - transfer of a native-mode binary file from one machine - to the other. However, this command should have the - same effect on these machines as "TYPE I". - - - - -Internet Engineering Task Force [Page 29] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - 4.1.2.2 Telnet Format Control: RFC-959 Section 3.1.1.5.2 - - A host that makes no distinction between TYPE N and TYPE T - SHOULD implement TYPE T to be identical to TYPE N. - - DISCUSSION: - This provision should ease interoperation with hosts - that do make this distinction. - - Many hosts represent text files internally as strings - of ASCII characters, using the embedded ASCII format - effector characters (LF, BS, FF, ...) to control the - format when a file is printed. For such hosts, there - is no distinction between "print" files and other - files. However, systems that use record structured - files typically need a special format for printable - files (e.g., ASA carriage control). For the latter - hosts, FTP allows a choice of TYPE N or TYPE T. - - 4.1.2.3 Page Structure: RFC-959 Section 3.1.2.3 and Appendix I - - Implementation of page structure is NOT RECOMMENDED in - general. However, if a host system does need to implement - FTP for "random access" or "holey" files, it MUST use the - defined page structure format rather than define a new - private FTP format. - - 4.1.2.4 Data Structure Transformations: RFC-959 Section 3.1.2 - - An FTP transformation between record-structure and file- - structure SHOULD be invertible, to the extent possible while - making the result useful on the target host. - - DISCUSSION: - RFC-959 required strict invertibility between record- - structure and file-structure, but in practice, - efficiency and convenience often preclude it. - Therefore, the requirement is being relaxed. There are - two different objectives for transferring a file: - processing it on the target host, or just storage. For - storage, strict invertibility is important. For - processing, the file created on the target host needs - to be in the format expected by application programs on - that host. - - As an example of the conflict, imagine a record- - oriented operating system that requires some data files - to have exactly 80 bytes in each record. While STORing - - - -Internet Engineering Task Force [Page 30] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - a file on such a host, an FTP Server must be able to - pad each line or record to 80 bytes; a later retrieval - of such a file cannot be strictly invertible. - - 4.1.2.5 Data Connection Management: RFC-959 Section 3.3 - - A User-FTP that uses STREAM mode SHOULD send a PORT command - to assign a non-default data port before each transfer - command is issued. - - DISCUSSION: - This is required because of the long delay after a TCP - connection is closed until its socket pair can be - reused, to allow multiple transfers during a single FTP - session. Sending a port command can avoided if a - transfer mode other than stream is used, by leaving the - data transfer connection open between transfers. - - 4.1.2.6 PASV Command: RFC-959 Section 4.1.2 - - A server-FTP MUST implement the PASV command. - - If multiple third-party transfers are to be executed during - the same session, a new PASV command MUST be issued before - each transfer command, to obtain a unique port pair. - - IMPLEMENTATION: - The format of the 227 reply to a PASV command is not - well standardized. In particular, an FTP client cannot - assume that the parentheses shown on page 40 of RFC-959 - will be present (and in fact, Figure 3 on page 43 omits - them). Therefore, a User-FTP program that interprets - the PASV reply must scan the reply for the first digit - of the host and port numbers. - - Note that the host number h1,h2,h3,h4 is the IP address - of the server host that is sending the reply, and that - p1,p2 is a non-default data transfer port that PASV has - assigned. - - 4.1.2.7 LIST and NLST Commands: RFC-959 Section 4.1.3 - - The data returned by an NLST command MUST contain only a - simple list of legal pathnames, such that the server can use - them directly as the arguments of subsequent data transfer - commands for the individual files. - - The data returned by a LIST or NLST command SHOULD use an - - - -Internet Engineering Task Force [Page 31] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - implied TYPE AN, unless the current type is EBCDIC, in which - case an implied TYPE EN SHOULD be used. - - DISCUSSION: - Many FTP clients support macro-commands that will get - or put files matching a wildcard specification, using - NLST to obtain a list of pathnames. The expansion of - "multiple-put" is local to the client, but "multiple- - get" requires cooperation by the server. - - The implied type for LIST and NLST is designed to - provide compatibility with existing User-FTPs, and in - particular with multiple-get commands. - - 4.1.2.8 SITE Command: RFC-959 Section 4.1.3 - - A Server-FTP SHOULD use the SITE command for non-standard - features, rather than invent new private commands or - unstandardized extensions to existing commands. - - 4.1.2.9 STOU Command: RFC-959 Section 4.1.3 - - The STOU command stores into a uniquely named file. When it - receives an STOU command, a Server-FTP MUST return the - actual file name in the "125 Transfer Starting" or the "150 - Opening Data Connection" message that precedes the transfer - (the 250 reply code mentioned in RFC-959 is incorrect). The - exact format of these messages is hereby defined to be as - follows: - - 125 FILE: pppp - 150 FILE: pppp - - where pppp represents the unique pathname of the file that - will be written. - - 4.1.2.10 Telnet End-of-line Code: RFC-959, Page 34 - - Implementors MUST NOT assume any correspondence between READ - boundaries on the control connection and the Telnet EOL - sequences (CR LF). - - DISCUSSION: - Thus, a server-FTP (or User-FTP) must continue reading - characters from the control connection until a complete - Telnet EOL sequence is encountered, before processing - the command (or response, respectively). Conversely, a - single READ from the control connection may include - - - -Internet Engineering Task Force [Page 32] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - more than one FTP command. - - 4.1.2.11 FTP Replies: RFC-959 Section 4.2, Page 35 - - A Server-FTP MUST send only correctly formatted replies on - the control connection. Note that RFC-959 (unlike earlier - versions of the FTP spec) contains no provision for a - "spontaneous" reply message. - - A Server-FTP SHOULD use the reply codes defined in RFC-959 - whenever they apply. However, a server-FTP MAY use a - different reply code when needed, as long as the general - rules of Section 4.2 are followed. When the implementor has - a choice between a 4xx and 5xx reply code, a Server-FTP - SHOULD send a 4xx (temporary failure) code when there is any - reasonable possibility that a failed FTP will succeed a few - hours later. - - A User-FTP SHOULD generally use only the highest-order digit - of a 3-digit reply code for making a procedural decision, to - prevent difficulties when a Server-FTP uses non-standard - reply codes. - - A User-FTP MUST be able to handle multi-line replies. If - the implementation imposes a limit on the number of lines - and if this limit is exceeded, the User-FTP MUST recover, - e.g., by ignoring the excess lines until the end of the - multi-line reply is reached. - - A User-FTP SHOULD NOT interpret a 421 reply code ("Service - not available, closing control connection") specially, but - SHOULD detect closing of the control connection by the - server. - - DISCUSSION: - Server implementations that fail to strictly follow the - reply rules often cause FTP user programs to hang. - Note that RFC-959 resolved ambiguities in the reply - rules found in earlier FTP specifications and must be - followed. - - It is important to choose FTP reply codes that properly - distinguish between temporary and permanent failures, - to allow the successful use of file transfer client - daemons. These programs depend on the reply codes to - decide whether or not to retry a failed transfer; using - a permanent failure code (5xx) for a temporary error - will cause these programs to give up unnecessarily. - - - -Internet Engineering Task Force [Page 33] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - When the meaning of a reply matches exactly the text - shown in RFC-959, uniformity will be enhanced by using - the RFC-959 text verbatim. However, a Server-FTP - implementor is encouraged to choose reply text that - conveys specific system-dependent information, when - appropriate. - - 4.1.2.12 Connections: RFC-959 Section 5.2 - - The words "and the port used" in the second paragraph of - this section of RFC-959 are erroneous (historical), and they - should be ignored. - - On a multihomed server host, the default data transfer port - (L-1) MUST be associated with the same local IP address as - the corresponding control connection to port L. - - A user-FTP MUST NOT send any Telnet controls other than - SYNCH and IP on an FTP control connection. In particular, it - MUST NOT attempt to negotiate Telnet options on the control - connection. However, a server-FTP MUST be capable of - accepting and refusing Telnet negotiations (i.e., sending - DONT/WONT). - - DISCUSSION: - Although the RFC says: "Server- and User- processes - should follow the conventions for the Telnet - protocol...[on the control connection]", it is not the - intent that Telnet option negotiation is to be - employed. - - 4.1.2.13 Minimum Implementation; RFC-959 Section 5.1 - - The following commands and options MUST be supported by - every server-FTP and user-FTP, except in cases where the - underlying file system or operating system does not allow or - support a particular command. - - Type: ASCII Non-print, IMAGE, LOCAL 8 - Mode: Stream - Structure: File, Record* - Commands: - USER, PASS, ACCT, - PORT, PASV, - TYPE, MODE, STRU, - RETR, STOR, APPE, - RNFR, RNTO, DELE, - CWD, CDUP, RMD, MKD, PWD, - - - -Internet Engineering Task Force [Page 34] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - LIST, NLST, - SYST, STAT, - HELP, NOOP, QUIT. - - *Record structure is REQUIRED only for hosts whose file - systems support record structure. - - DISCUSSION: - Vendors are encouraged to implement a larger subset of - the protocol. For example, there are important - robustness features in the protocol (e.g., Restart, - ABOR, block mode) that would be an aid to some Internet - users but are not widely implemented. - - A host that does not have record structures in its file - system may still accept files with STRU R, recording - the byte stream literally. - - 4.1.3 SPECIFIC ISSUES - - 4.1.3.1 Non-standard Command Verbs - - FTP allows "experimental" commands, whose names begin with - "X". If these commands are subsequently adopted as - standards, there may still be existing implementations using - the "X" form. At present, this is true for the directory - commands: - - RFC-959 "Experimental" - - MKD XMKD - RMD XRMD - PWD XPWD - CDUP XCUP - CWD XCWD - - All FTP implementations SHOULD recognize both forms of these - commands, by simply equating them with extra entries in the - command lookup table. - - IMPLEMENTATION: - A User-FTP can access a server that supports only the - "X" forms by implementing a mode switch, or - automatically using the following procedure: if the - RFC-959 form of one of the above commands is rejected - with a 500 or 502 response code, then try the - experimental form; any other response would be passed - to the user. - - - -Internet Engineering Task Force [Page 35] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - 4.1.3.2 Idle Timeout - - A Server-FTP process SHOULD have an idle timeout, which will - terminate the process and close the control connection if - the server is inactive (i.e., no command or data transfer in - progress) for a long period of time. The idle timeout time - SHOULD be configurable, and the default should be at least 5 - minutes. - - A client FTP process ("User-PI" in RFC-959) will need - timeouts on responses only if it is invoked from a program. - - DISCUSSION: - Without a timeout, a Server-FTP process may be left - pending indefinitely if the corresponding client - crashes without closing the control connection. - - 4.1.3.3 Concurrency of Data and Control - - DISCUSSION: - The intent of the designers of FTP was that a user - should be able to send a STAT command at any time while - data transfer was in progress and that the server-FTP - would reply immediately with status -- e.g., the number - of bytes transferred so far. Similarly, an ABOR - command should be possible at any time during a data - transfer. - - Unfortunately, some small-machine operating systems - make such concurrent programming difficult, and some - other implementers seek minimal solutions, so some FTP - implementations do not allow concurrent use of the data - and control connections. Even such a minimal server - must be prepared to accept and defer a STAT or ABOR - command that arrives during data transfer. - - 4.1.3.4 FTP Restart Mechanism - - The description of the 110 reply on pp. 40-41 of RFC-959 is - incorrect; the correct description is as follows. A restart - reply message, sent over the control connection from the - receiving FTP to the User-FTP, has the format: - - 110 MARK ssss = rrrr - - Here: - - * ssss is a text string that appeared in a Restart Marker - - - -Internet Engineering Task Force [Page 36] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - in the data stream and encodes a position in the - sender's file system; - - * rrrr encodes the corresponding position in the - receiver's file system. - - The encoding, which is specific to a particular file system - and network implementation, is always generated and - interpreted by the same system, either sender or receiver. - - When an FTP that implements restart receives a Restart - Marker in the data stream, it SHOULD force the data to that - point to be written to stable storage before encoding the - corresponding position rrrr. An FTP sending Restart Markers - MUST NOT assume that 110 replies will be returned - synchronously with the data, i.e., it must not await a 110 - reply before sending more data. - - Two new reply codes are hereby defined for errors - encountered in restarting a transfer: - - 554 Requested action not taken: invalid REST parameter. - - A 554 reply may result from a FTP service command that - follows a REST command. The reply indicates that the - existing file at the Server-FTP cannot be repositioned - as specified in the REST. - - 555 Requested action not taken: type or stru mismatch. - - A 555 reply may result from an APPE command or from any - FTP service command following a REST command. The - reply indicates that there is some mismatch between the - current transfer parameters (type and stru) and the - attributes of the existing file. - - DISCUSSION: - Note that the FTP Restart mechanism requires that Block - or Compressed mode be used for data transfer, to allow - the Restart Markers to be included within the data - stream. The frequency of Restart Markers can be low. - - Restart Markers mark a place in the data stream, but - the receiver may be performing some transformation on - the data as it is stored into stable storage. In - general, the receiver's encoding must include any state - information necessary to restart this transformation at - any point of the FTP data stream. For example, in TYPE - - - -Internet Engineering Task Force [Page 37] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - A transfers, some receiver hosts transform CR LF - sequences into a single LF character on disk. If a - Restart Marker happens to fall between CR and LF, the - receiver must encode in rrrr that the transfer must be - restarted in a "CR has been seen and discarded" state. - - Note that the Restart Marker is required to be encoded - as a string of printable ASCII characters, regardless - of the type of the data. - - RFC-959 says that restart information is to be returned - "to the user". This should not be taken literally. In - general, the User-FTP should save the restart - information (ssss,rrrr) in stable storage, e.g., append - it to a restart control file. An empty restart control - file should be created when the transfer first starts - and deleted automatically when the transfer completes - successfully. It is suggested that this file have a - name derived in an easily-identifiable manner from the - name of the file being transferred and the remote host - name; this is analogous to the means used by many text - editors for naming "backup" files. - - There are three cases for FTP restart. - - (1) User-to-Server Transfer - - The User-FTP puts Restart Markers at - convenient places in the data stream. When the - Server-FTP receives a Marker, it writes all prior - data to disk, encodes its file system position and - transformation state as rrrr, and returns a "110 - MARK ssss = rrrr" reply over the control - connection. The User-FTP appends the pair - (ssss,rrrr) to its restart control file. - - To restart the transfer, the User-FTP fetches the - last (ssss,rrrr) pair from the restart control - file, repositions its local file system and - transformation state using ssss, and sends the - command "REST rrrr" to the Server-FTP. - - (2) Server-to-User Transfer - - The Server-FTP puts Restart Markers at - convenient places in the data stream. When the - User-FTP receives a Marker, it writes all prior - data to disk, encodes its file system position and - - - -Internet Engineering Task Force [Page 38] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - transformation state as rrrr, and appends the pair - (rrrr,ssss) to its restart control file. - - To restart the transfer, the User-FTP fetches the - last (rrrr,ssss) pair from the restart control - file, repositions its local file system and - transformation state using rrrr, and sends the - command "REST ssss" to the Server-FTP. - - (3) Server-to-Server ("Third-Party") Transfer - - The sending Server-FTP puts Restart Markers - at convenient places in the data stream. When it - receives a Marker, the receiving Server-FTP writes - all prior data to disk, encodes its file system - position and transformation state as rrrr, and - sends a "110 MARK ssss = rrrr" reply over the - control connection to the User. The User-FTP - appends the pair (ssss,rrrr) to its restart - control file. - - To restart the transfer, the User-FTP fetches the - last (ssss,rrrr) pair from the restart control - file, sends "REST ssss" to the sending Server-FTP, - and sends "REST rrrr" to the receiving Server-FTP. - - - 4.1.4 FTP/USER INTERFACE - - This section discusses the user interface for a User-FTP - program. - - 4.1.4.1 Pathname Specification - - Since FTP is intended for use in a heterogeneous - environment, User-FTP implementations MUST support remote - pathnames as arbitrary character strings, so that their form - and content are not limited by the conventions of the local - operating system. - - DISCUSSION: - In particular, remote pathnames can be of arbitrary - length, and all the printing ASCII characters as well - as space (0x20) must be allowed. RFC-959 allows a - pathname to contain any 7-bit ASCII character except CR - or LF. - - - - - -Internet Engineering Task Force [Page 39] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - 4.1.4.2 "QUOTE" Command - - A User-FTP program MUST implement a "QUOTE" command that - will pass an arbitrary character string to the server and - display all resulting response messages to the user. - - To make the "QUOTE" command useful, a User-FTP SHOULD send - transfer control commands to the server as the user enters - them, rather than saving all the commands and sending them - to the server only when a data transfer is started. - - DISCUSSION: - The "QUOTE" command is essential to allow the user to - access servers that require system-specific commands - (e.g., SITE or ALLO), or to invoke new or optional - features that are not implemented by the User-FTP. For - example, "QUOTE" may be used to specify "TYPE A T" to - send a print file to hosts that require the - distinction, even if the User-FTP does not recognize - that TYPE. - - 4.1.4.3 Displaying Replies to User - - A User-FTP SHOULD display to the user the full text of all - error reply messages it receives. It SHOULD have a - "verbose" mode in which all commands it sends and the full - text and reply codes it receives are displayed, for - diagnosis of problems. - - 4.1.4.4 Maintaining Synchronization - - The state machine in a User-FTP SHOULD be forgiving of - missing and unexpected reply messages, in order to maintain - command synchronization with the server. - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 40] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - 4.1.5 FTP REQUIREMENTS SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e --------------------------------------------|---------------|-|-|-|-|-|-- -Implement TYPE T if same as TYPE N |4.1.2.2 | |x| | | | -File/Record transform invertible if poss. |4.1.2.4 | |x| | | | -User-FTP send PORT cmd for stream mode |4.1.2.5 | |x| | | | -Server-FTP implement PASV |4.1.2.6 |x| | | | | - PASV is per-transfer |4.1.2.6 |x| | | | | -NLST reply usable in RETR cmds |4.1.2.7 |x| | | | | -Implied type for LIST and NLST |4.1.2.7 | |x| | | | -SITE cmd for non-standard features |4.1.2.8 | |x| | | | -STOU cmd return pathname as specified |4.1.2.9 |x| | | | | -Use TCP READ boundaries on control conn. |4.1.2.10 | | | | |x| - | | | | | | | -Server-FTP send only correct reply format |4.1.2.11 |x| | | | | -Server-FTP use defined reply code if poss. |4.1.2.11 | |x| | | | - New reply code following Section 4.2 |4.1.2.11 | | |x| | | -User-FTP use only high digit of reply |4.1.2.11 | |x| | | | -User-FTP handle multi-line reply lines |4.1.2.11 |x| | | | | -User-FTP handle 421 reply specially |4.1.2.11 | | | |x| | - | | | | | | | -Default data port same IP addr as ctl conn |4.1.2.12 |x| | | | | -User-FTP send Telnet cmds exc. SYNCH, IP |4.1.2.12 | | | | |x| -User-FTP negotiate Telnet options |4.1.2.12 | | | | |x| -Server-FTP handle Telnet options |4.1.2.12 |x| | | | | -Handle "Experimental" directory cmds |4.1.3.1 | |x| | | | -Idle timeout in server-FTP |4.1.3.2 | |x| | | | - Configurable idle timeout |4.1.3.2 | |x| | | | -Receiver checkpoint data at Restart Marker |4.1.3.4 | |x| | | | -Sender assume 110 replies are synchronous |4.1.3.4 | | | | |x| - | | | | | | | -Support TYPE: | | | | | | | - ASCII - Non-Print (AN) |4.1.2.13 |x| | | | | - ASCII - Telnet (AT) -- if same as AN |4.1.2.2 | |x| | | | - ASCII - Carriage Control (AC) |959 3.1.1.5.2 | | |x| | | - EBCDIC - (any form) |959 3.1.1.2 | | |x| | | - IMAGE |4.1.2.1 |x| | | | | - LOCAL 8 |4.1.2.1 |x| | | | | - - - -Internet Engineering Task Force [Page 41] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - - LOCAL m |4.1.2.1 | | |x| | |2 - | | | | | | | -Support MODE: | | | | | | | - Stream |4.1.2.13 |x| | | | | - Block |959 3.4.2 | | |x| | | - | | | | | | | -Support STRUCTURE: | | | | | | | - File |4.1.2.13 |x| | | | | - Record |4.1.2.13 |x| | | | |3 - Page |4.1.2.3 | | | |x| | - | | | | | | | -Support commands: | | | | | | | - USER |4.1.2.13 |x| | | | | - PASS |4.1.2.13 |x| | | | | - ACCT |4.1.2.13 |x| | | | | - CWD |4.1.2.13 |x| | | | | - CDUP |4.1.2.13 |x| | | | | - SMNT |959 5.3.1 | | |x| | | - REIN |959 5.3.1 | | |x| | | - QUIT |4.1.2.13 |x| | | | | - | | | | | | | - PORT |4.1.2.13 |x| | | | | - PASV |4.1.2.6 |x| | | | | - TYPE |4.1.2.13 |x| | | | |1 - STRU |4.1.2.13 |x| | | | |1 - MODE |4.1.2.13 |x| | | | |1 - | | | | | | | - RETR |4.1.2.13 |x| | | | | - STOR |4.1.2.13 |x| | | | | - STOU |959 5.3.1 | | |x| | | - APPE |4.1.2.13 |x| | | | | - ALLO |959 5.3.1 | | |x| | | - REST |959 5.3.1 | | |x| | | - RNFR |4.1.2.13 |x| | | | | - RNTO |4.1.2.13 |x| | | | | - ABOR |959 5.3.1 | | |x| | | - DELE |4.1.2.13 |x| | | | | - RMD |4.1.2.13 |x| | | | | - MKD |4.1.2.13 |x| | | | | - PWD |4.1.2.13 |x| | | | | - LIST |4.1.2.13 |x| | | | | - NLST |4.1.2.13 |x| | | | | - SITE |4.1.2.8 | | |x| | | - STAT |4.1.2.13 |x| | | | | - SYST |4.1.2.13 |x| | | | | - HELP |4.1.2.13 |x| | | | | - NOOP |4.1.2.13 |x| | | | | - | | | | | | | - - - -Internet Engineering Task Force [Page 42] - - - - -RFC1123 FILE TRANSFER -- FTP October 1989 - - -User Interface: | | | | | | | - Arbitrary pathnames |4.1.4.1 |x| | | | | - Implement "QUOTE" command |4.1.4.2 |x| | | | | - Transfer control commands immediately |4.1.4.2 | |x| | | | - Display error messages to user |4.1.4.3 | |x| | | | - Verbose mode |4.1.4.3 | |x| | | | - Maintain synchronization with server |4.1.4.4 | |x| | | | - -Footnotes: - -(1) For the values shown earlier. - -(2) Here m is number of bits in a memory word. - -(3) Required for host with record-structured file system, optional - otherwise. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 43] - - - - -RFC1123 FILE TRANSFER -- TFTP October 1989 - - - 4.2 TRIVIAL FILE TRANSFER PROTOCOL -- TFTP - - 4.2.1 INTRODUCTION - - The Trivial File Transfer Protocol TFTP is defined in RFC-783 - [TFTP:1]. - - TFTP provides its own reliable delivery with UDP as its - transport protocol, using a simple stop-and-wait acknowledgment - system. Since TFTP has an effective window of only one 512 - octet segment, it can provide good performance only over paths - that have a small delay*bandwidth product. The TFTP file - interface is very simple, providing no access control or - security. - - TFTP's most important application is bootstrapping a host over - a local network, since it is simple and small enough to be - easily implemented in EPROM [BOOT:1, BOOT:2]. Vendors are - urged to support TFTP for booting. - - 4.2.2 PROTOCOL WALK-THROUGH - - The TFTP specification [TFTP:1] is written in an open style, - and does not fully specify many parts of the protocol. - - 4.2.2.1 Transfer Modes: RFC-783, Page 3 - - The transfer mode "mail" SHOULD NOT be supported. - - 4.2.2.2 UDP Header: RFC-783, Page 17 - - The Length field of a UDP header is incorrectly defined; it - includes the UDP header length (8). - - 4.2.3 SPECIFIC ISSUES - - 4.2.3.1 Sorcerer's Apprentice Syndrome - - There is a serious bug, known as the "Sorcerer's Apprentice - Syndrome," in the protocol specification. While it does not - cause incorrect operation of the transfer (the file will - always be transferred correctly if the transfer completes), - this bug may cause excessive retransmission, which may cause - the transfer to time out. - - Implementations MUST contain the fix for this problem: the - sender (i.e., the side originating the DATA packets) must - never resend the current DATA packet on receipt of a - - - -Internet Engineering Task Force [Page 44] - - - - -RFC1123 FILE TRANSFER -- TFTP October 1989 - - - duplicate ACK. - - DISCUSSION: - The bug is caused by the protocol rule that either - side, on receiving an old duplicate datagram, may - resend the current datagram. If a packet is delayed in - the network but later successfully delivered after - either side has timed out and retransmitted a packet, a - duplicate copy of the response may be generated. If - the other side responds to this duplicate with a - duplicate of its own, then every datagram will be sent - in duplicate for the remainder of the transfer (unless - a datagram is lost, breaking the repetition). Worse - yet, since the delay is often caused by congestion, - this duplicate transmission will usually causes more - congestion, leading to more delayed packets, etc. - - The following example may help to clarify this problem. - - TFTP A TFTP B - - (1) Receive ACK X-1 - Send DATA X - (2) Receive DATA X - Send ACK X - (ACK X is delayed in network, - and A times out): - (3) Retransmit DATA X - - (4) Receive DATA X again - Send ACK X again - (5) Receive (delayed) ACK X - Send DATA X+1 - (6) Receive DATA X+1 - Send ACK X+1 - (7) Receive ACK X again - Send DATA X+1 again - (8) Receive DATA X+1 again - Send ACK X+1 again - (9) Receive ACK X+1 - Send DATA X+2 - (10) Receive DATA X+2 - Send ACK X+3 - (11) Receive ACK X+1 again - Send DATA X+2 again - (12) Receive DATA X+2 again - Send ACK X+3 again - - - - -Internet Engineering Task Force [Page 45] - - - - -RFC1123 FILE TRANSFER -- TFTP October 1989 - - - Notice that once the delayed ACK arrives, the protocol - settles down to duplicate all further packets - (sequences 5-8 and 9-12). The problem is caused not by - either side timing out, but by both sides - retransmitting the current packet when they receive a - duplicate. - - The fix is to break the retransmission loop, as - indicated above. This is analogous to the behavior of - TCP. It is then possible to remove the retransmission - timer on the receiver, since the resent ACK will never - cause any action; this is a useful simplification where - TFTP is used in a bootstrap program. It is OK to allow - the timer to remain, and it may be helpful if the - retransmitted ACK replaces one that was genuinely lost - in the network. The sender still requires a retransmit - timer, of course. - - 4.2.3.2 Timeout Algorithms - - A TFTP implementation MUST use an adaptive timeout. - - IMPLEMENTATION: - TCP retransmission algorithms provide a useful base to - work from. At least an exponential backoff of - retransmission timeout is necessary. - - 4.2.3.3 Extensions - - A variety of non-standard extensions have been made to TFTP, - including additional transfer modes and a secure operation - mode (with passwords). None of these have been - standardized. - - 4.2.3.4 Access Control - - A server TFTP implementation SHOULD include some - configurable access control over what pathnames are allowed - in TFTP operations. - - 4.2.3.5 Broadcast Request - - A TFTP request directed to a broadcast address SHOULD be - silently ignored. - - DISCUSSION: - Due to the weak access control capability of TFTP, - directed broadcasts of TFTP requests to random networks - - - -Internet Engineering Task Force [Page 46] - - - - -RFC1123 FILE TRANSFER -- TFTP October 1989 - - - could create a significant security hole. - - 4.2.4 TFTP REQUIREMENTS SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e --------------------------------------------------|--------|-|-|-|-|-|-- -Fix Sorcerer's Apprentice Syndrome |4.2.3.1 |x| | | | | -Transfer modes: | | | | | | | - netascii |RFC-783 |x| | | | | - octet |RFC-783 |x| | | | | - mail |4.2.2.1 | | | |x| | - extensions |4.2.3.3 | | |x| | | -Use adaptive timeout |4.2.3.2 |x| | | | | -Configurable access control |4.2.3.4 | |x| | | | -Silently ignore broadcast request |4.2.3.5 | |x| | | | --------------------------------------------------|--------|-|-|-|-|-|-- --------------------------------------------------|--------|-|-|-|-|-|-- - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 47] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - -5. ELECTRONIC MAIL -- SMTP and RFC-822 - - 5.1 INTRODUCTION - - In the TCP/IP protocol suite, electronic mail in a format - specified in RFC-822 [SMTP:2] is transmitted using the Simple Mail - Transfer Protocol (SMTP) defined in RFC-821 [SMTP:1]. - - While SMTP has remained unchanged over the years, the Internet - community has made several changes in the way SMTP is used. In - particular, the conversion to the Domain Name System (DNS) has - caused changes in address formats and in mail routing. In this - section, we assume familiarity with the concepts and terminology - of the DNS, whose requirements are given in Section 6.1. - - RFC-822 specifies the Internet standard format for electronic mail - messages. RFC-822 supercedes an older standard, RFC-733, that may - still be in use in a few places, although it is obsolete. The two - formats are sometimes referred to simply by number ("822" and - "733"). - - RFC-822 is used in some non-Internet mail environments with - different mail transfer protocols than SMTP, and SMTP has also - been adapted for use in some non-Internet environments. Note that - this document presents the rules for the use of SMTP and RFC-822 - for the Internet environment only; other mail environments that - use these protocols may be expected to have their own rules. - - 5.2 PROTOCOL WALK-THROUGH - - This section covers both RFC-821 and RFC-822. - - The SMTP specification in RFC-821 is clear and contains numerous - examples, so implementors should not find it difficult to - understand. This section simply updates or annotates portions of - RFC-821 to conform with current usage. - - RFC-822 is a long and dense document, defining a rich syntax. - Unfortunately, incomplete or defective implementations of RFC-822 - are common. In fact, nearly all of the many formats of RFC-822 - are actually used, so an implementation generally needs to - recognize and correctly interpret all of the RFC-822 syntax. - - 5.2.1 The SMTP Model: RFC-821 Section 2 - - DISCUSSION: - Mail is sent by a series of request/response transactions - between a client, the "sender-SMTP," and a server, the - - - -Internet Engineering Task Force [Page 48] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - "receiver-SMTP". These transactions pass (1) the message - proper, which is composed of header and body, and (2) SMTP - source and destination addresses, referred to as the - "envelope". - - The SMTP programs are analogous to Message Transfer Agents - (MTAs) of X.400. There will be another level of protocol - software, closer to the end user, that is responsible for - composing and analyzing RFC-822 message headers; this - component is known as the "User Agent" in X.400, and we - use that term in this document. There is a clear logical - distinction between the User Agent and the SMTP - implementation, since they operate on different levels of - protocol. Note, however, that this distinction is may not - be exactly reflected the structure of typical - implementations of Internet mail. Often there is a - program known as the "mailer" that implements SMTP and - also some of the User Agent functions; the rest of the - User Agent functions are included in a user interface used - for entering and reading mail. - - The SMTP envelope is constructed at the originating site, - typically by the User Agent when the message is first - queued for the Sender-SMTP program. The envelope - addresses may be derived from information in the message - header, supplied by the user interface (e.g., to implement - a bcc: request), or derived from local configuration - information (e.g., expansion of a mailing list). The SMTP - envelope cannot in general be re-derived from the header - at a later stage in message delivery, so the envelope is - transmitted separately from the message itself using the - MAIL and RCPT commands of SMTP. - - The text of RFC-821 suggests that mail is to be delivered - to an individual user at a host. With the advent of the - domain system and of mail routing using mail-exchange (MX) - resource records, implementors should now think of - delivering mail to a user at a domain, which may or may - not be a particular host. This DOES NOT change the fact - that SMTP is a host-to-host mail exchange protocol. - - 5.2.2 Canonicalization: RFC-821 Section 3.1 - - The domain names that a Sender-SMTP sends in MAIL and RCPT - commands MUST have been "canonicalized," i.e., they must be - fully-qualified principal names or domain literals, not - nicknames or domain abbreviations. A canonicalized name either - identifies a host directly or is an MX name; it cannot be a - - - -Internet Engineering Task Force [Page 49] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - CNAME. - - 5.2.3 VRFY and EXPN Commands: RFC-821 Section 3.3 - - A receiver-SMTP MUST implement VRFY and SHOULD implement EXPN - (this requirement overrides RFC-821). However, there MAY be - configuration information to disable VRFY and EXPN in a - particular installation; this might even allow EXPN to be - disabled for selected lists. - - A new reply code is defined for the VRFY command: - - 252 Cannot VRFY user (e.g., info is not local), but will - take message for this user and attempt delivery. - - DISCUSSION: - SMTP users and administrators make regular use of these - commands for diagnosing mail delivery problems. With the - increasing use of multi-level mailing list expansion - (sometimes more than two levels), EXPN has been - increasingly important for diagnosing inadvertent mail - loops. On the other hand, some feel that EXPN represents - a significant privacy, and perhaps even a security, - exposure. - - 5.2.4 SEND, SOML, and SAML Commands: RFC-821 Section 3.4 - - An SMTP MAY implement the commands to send a message to a - user's terminal: SEND, SOML, and SAML. - - DISCUSSION: - It has been suggested that the use of mail relaying - through an MX record is inconsistent with the intent of - SEND to deliver a message immediately and directly to a - user's terminal. However, an SMTP receiver that is unable - to write directly to the user terminal can return a "251 - User Not Local" reply to the RCPT following a SEND, to - inform the originator of possibly deferred delivery. - - 5.2.5 HELO Command: RFC-821 Section 3.5 - - The sender-SMTP MUST ensure that the parameter in a - HELO command is a valid principal host domain name for the - client host. As a result, the receiver-SMTP will not have to - perform MX resolution on this name in order to validate the - HELO parameter. - - The HELO receiver MAY verify that the HELO parameter really - - - -Internet Engineering Task Force [Page 50] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - corresponds to the IP address of the sender. However, the - receiver MUST NOT refuse to accept a message, even if the - sender's HELO command fails verification. - - DISCUSSION: - Verifying the HELO parameter requires a domain name lookup - and may therefore take considerable time. An alternative - tool for tracking bogus mail sources is suggested below - (see "DATA Command"). - - Note also that the HELO argument is still required to have - valid syntax, since it will appear in a Received: - line; otherwise, a 501 error is to be sent. - - IMPLEMENTATION: - When HELO parameter validation fails, a suggested - procedure is to insert a note about the unknown - authenticity of the sender into the message header (e.g., - in the "Received:" line). - - 5.2.6 Mail Relay: RFC-821 Section 3.6 - - We distinguish three types of mail (store-and-) forwarding: - - (1) A simple forwarder or "mail exchanger" forwards a message - using private knowledge about the recipient; see section - 3.2 of RFC-821. - - (2) An SMTP mail "relay" forwards a message within an SMTP - mail environment as the result of an explicit source route - (as defined in section 3.6 of RFC-821). The SMTP relay - function uses the "@...:" form of source route from RFC- - 822 (see Section 5.2.19 below). - - (3) A mail "gateway" passes a message between different - environments. The rules for mail gateways are discussed - below in Section 5.3.7. - - An Internet host that is forwarding a message but is not a - gateway to a different mail environment (i.e., it falls under - (1) or (2)) SHOULD NOT alter any existing header fields, - although the host will add an appropriate Received: line as - required in Section 5.2.8. - - A Sender-SMTP SHOULD NOT send a RCPT TO: command containing an - explicit source route using the "@...:" address form. Thus, - the relay function defined in section 3.6 of RFC-821 should - not be used. - - - -Internet Engineering Task Force [Page 51] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - DISCUSSION: - The intent is to discourage all source routing and to - abolish explicit source routing for mail delivery within - the Internet environment. Source-routing is unnecessary; - the simple target address "user@domain" should always - suffice. This is the result of an explicit architectural - decision to use universal naming rather than source - routing for mail. Thus, SMTP provides end-to-end - connectivity, and the DNS provides globally-unique, - location-independent names. MX records handle the major - case where source routing might otherwise be needed. - - A receiver-SMTP MUST accept the explicit source route syntax in - the envelope, but it MAY implement the relay function as - defined in section 3.6 of RFC-821. If it does not implement - the relay function, it SHOULD attempt to deliver the message - directly to the host to the right of the right-most "@" sign. - - DISCUSSION: - For example, suppose a host that does not implement the - relay function receives a message with the SMTP command: - "RCPT TO:<@ALPHA,@BETA:joe@GAMMA>", where ALPHA, BETA, and - GAMMA represent domain names. Rather than immediately - refusing the message with a 550 error reply as suggested - on page 20 of RFC-821, the host should try to forward the - message to GAMMA directly, using: "RCPT TO:". - Since this host does not support relaying, it is not - required to update the reverse path. - - Some have suggested that source routing may be needed - occasionally for manually routing mail around failures; - however, the reality and importance of this need is - controversial. The use of explicit SMTP mail relaying for - this purpose is discouraged, and in fact it may not be - successful, as many host systems do not support it. Some - have used the "%-hack" (see Section 5.2.16) for this - purpose. - - 5.2.7 RCPT Command: RFC-821 Section 4.1.1 - - A host that supports a receiver-SMTP MUST support the reserved - mailbox "Postmaster". - - The receiver-SMTP MAY verify RCPT parameters as they arrive; - however, RCPT responses MUST NOT be delayed beyond a reasonable - time (see Section 5.3.2). - - Therefore, a "250 OK" response to a RCPT does not necessarily - - - -Internet Engineering Task Force [Page 52] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - imply that the delivery address(es) are valid. Errors found - after message acceptance will be reported by mailing a - notification message to an appropriate address (see Section - 5.3.3). - - DISCUSSION: - The set of conditions under which a RCPT parameter can be - validated immediately is an engineering design choice. - Reporting destination mailbox errors to the Sender-SMTP - before mail is transferred is generally desirable to save - time and network bandwidth, but this advantage is lost if - RCPT verification is lengthy. - - For example, the receiver can verify immediately any - simple local reference, such as a single locally- - registered mailbox. On the other hand, the "reasonable - time" limitation generally implies deferring verification - of a mailing list until after the message has been - transferred and accepted, since verifying a large mailing - list can take a very long time. An implementation might - or might not choose to defer validation of addresses that - are non-local and therefore require a DNS lookup. If a - DNS lookup is performed but a soft domain system error - (e.g., timeout) occurs, validity must be assumed. - - 5.2.8 DATA Command: RFC-821 Section 4.1.1 - - Every receiver-SMTP (not just one that "accepts a message for - relaying or for final delivery" [SMTP:1]) MUST insert a - "Received:" line at the beginning of a message. In this line, - called a "time stamp line" in RFC-821: - - * The FROM field SHOULD contain both (1) the name of the - source host as presented in the HELO command and (2) a - domain literal containing the IP address of the source, - determined from the TCP connection. - - * The ID field MAY contain an "@" as suggested in RFC-822, - but this is not required. - - * The FOR field MAY contain a list of entries when - multiple RCPT commands have been given. - - - An Internet mail program MUST NOT change a Received: line that - was previously added to the message header. - - - - - -Internet Engineering Task Force [Page 53] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - DISCUSSION: - Including both the source host and the IP source address - in the Received: line may provide enough information for - tracking illicit mail sources and eliminate a need to - explicitly verify the HELO parameter. - - Received: lines are primarily intended for humans tracing - mail routes, primarily of diagnosis of faults. See also - the discussion under 5.3.7. - - When the receiver-SMTP makes "final delivery" of a message, - then it MUST pass the MAIL FROM: address from the SMTP envelope - with the message, for use if an error notification message must - be sent later (see Section 5.3.3). There is an analogous - requirement when gatewaying from the Internet into a different - mail environment; see Section 5.3.7. - - DISCUSSION: - Note that the final reply to the DATA command depends only - upon the successful transfer and storage of the message. - Any problem with the destination address(es) must either - (1) have been reported in an SMTP error reply to the RCPT - command(s), or (2) be reported in a later error message - mailed to the originator. - - IMPLEMENTATION: - The MAIL FROM: information may be passed as a parameter or - in a Return-Path: line inserted at the beginning of the - message. - - 5.2.9 Command Syntax: RFC-821 Section 4.1.2 - - The syntax shown in RFC-821 for the MAIL FROM: command omits - the case of an empty path: "MAIL FROM: <>" (see RFC-821 Page - 15). An empty reverse path MUST be supported. - - 5.2.10 SMTP Replies: RFC-821 Section 4.2 - - A receiver-SMTP SHOULD send only the reply codes listed in - section 4.2.2 of RFC-821 or in this document. A receiver-SMTP - SHOULD use the text shown in examples in RFC-821 whenever - appropriate. - - A sender-SMTP MUST determine its actions only by the reply - code, not by the text (except for 251 and 551 replies); any - text, including no text at all, must be acceptable. The space - (blank) following the reply code is considered part of the - text. Whenever possible, a sender-SMTP SHOULD test only the - - - -Internet Engineering Task Force [Page 54] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - first digit of the reply code, as specified in Appendix E of - RFC-821. - - DISCUSSION: - Interoperability problems have arisen with SMTP systems - using reply codes that are not listed explicitly in RFC- - 821 Section 4.3 but are legal according to the theory of - reply codes explained in Appendix E. - - 5.2.11 Transparency: RFC-821 Section 4.5.2 - - Implementors MUST be sure that their mail systems always add - and delete periods to ensure message transparency. - - 5.2.12 WKS Use in MX Processing: RFC-974, p. 5 - - RFC-974 [SMTP:3] recommended that the domain system be queried - for WKS ("Well-Known Service") records, to verify that each - proposed mail target does support SMTP. Later experience has - shown that WKS is not widely supported, so the WKS step in MX - processing SHOULD NOT be used. - - The following are notes on RFC-822, organized by section of that - document. - - 5.2.13 RFC-822 Message Specification: RFC-822 Section 4 - - The syntax shown for the Return-path line omits the possibility - of a null return path, which is used to prevent looping of - error notifications (see Section 5.3.3). The complete syntax - is: - - return = "Return-path" ":" route-addr - / "Return-path" ":" "<" ">" - - The set of optional header fields is hereby expanded to include - the Content-Type field defined in RFC-1049 [SMTP:7]. This - field "allows mail reading systems to automatically identify - the type of a structured message body and to process it for - display accordingly". [SMTP:7] A User Agent MAY support this - field. - - 5.2.14 RFC-822 Date and Time Specification: RFC-822 Section 5 - - The syntax for the date is hereby changed to: - - date = 1*2DIGIT month 2*4DIGIT - - - - -Internet Engineering Task Force [Page 55] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - All mail software SHOULD use 4-digit years in dates, to ease - the transition to the next century. - - There is a strong trend towards the use of numeric timezone - indicators, and implementations SHOULD use numeric timezones - instead of timezone names. However, all implementations MUST - accept either notation. If timezone names are used, they MUST - be exactly as defined in RFC-822. - - The military time zones are specified incorrectly in RFC-822: - they count the wrong way from UT (the signs are reversed). As - a result, military time zones in RFC-822 headers carry no - information. - - Finally, note that there is a typo in the definition of "zone" - in the syntax summary of appendix D; the correct definition - occurs in Section 3 of RFC-822. - - 5.2.15 RFC-822 Syntax Change: RFC-822 Section 6.1 - - The syntactic definition of "mailbox" in RFC-822 is hereby - changed to: - - mailbox = addr-spec ; simple address - / [phrase] route-addr ; name & addr-spec - - That is, the phrase preceding a route address is now OPTIONAL. - This change makes the following header field legal, for - example: - - From: - - 5.2.16 RFC-822 Local-part: RFC-822 Section 6.2 - - The basic mailbox address specification has the form: "local- - part@domain". Here "local-part", sometimes called the "left- - hand side" of the address, is domain-dependent. - - A host that is forwarding the message but is not the - destination host implied by the right-hand side "domain" MUST - NOT interpret or modify the "local-part" of the address. - - When mail is to be gatewayed from the Internet mail environment - into a foreign mail environment (see Section 5.3.7), routing - information for that foreign environment MAY be embedded within - the "local-part" of the address. The gateway will then - interpret this local part appropriately for the foreign mail - environment. - - - -Internet Engineering Task Force [Page 56] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - DISCUSSION: - Although source routes are discouraged within the Internet - (see Section 5.2.6), there are non-Internet mail - environments whose delivery mechanisms do depend upon - source routes. Source routes for extra-Internet - environments can generally be buried in the "local-part" - of the address (see Section 5.2.16) while mail traverses - the Internet. When the mail reaches the appropriate - Internet mail gateway, the gateway will interpret the - local-part and build the necessary address or route for - the target mail environment. - - For example, an Internet host might send mail to: - "a!b!c!user@gateway-domain". The complex local part - "a!b!c!user" would be uninterpreted within the Internet - domain, but could be parsed and understood by the - specified mail gateway. - - An embedded source route is sometimes encoded in the - "local-part" using "%" as a right-binding routing - operator. For example, in: - - user%domain%relay3%relay2@relay1 - - the "%" convention implies that the mail is to be routed - from "relay1" through "relay2", "relay3", and finally to - "user" at "domain". This is commonly known as the "%- - hack". It is suggested that "%" have lower precedence - than any other routing operator (e.g., "!") hidden in the - local-part; for example, "a!b%c" would be interpreted as - "(a!b)%c". - - Only the target host (in this case, "relay1") is permitted - to analyze the local-part "user%domain%relay3%relay2". - - 5.2.17 Domain Literals: RFC-822 Section 6.2.3 - - A mailer MUST be able to accept and parse an Internet domain - literal whose content ("dtext"; see RFC-822) is a dotted- - decimal host address. This satisfies the requirement of - Section 2.1 for the case of mail. - - An SMTP MUST accept and recognize a domain literal for any of - its own IP addresses. - - - - - - - -Internet Engineering Task Force [Page 57] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - 5.2.18 Common Address Formatting Errors: RFC-822 Section 6.1 - - Errors in formatting or parsing 822 addresses are unfortunately - common. This section mentions only the most common errors. A - User Agent MUST accept all valid RFC-822 address formats, and - MUST NOT generate illegal address syntax. - - o A common error is to leave out the semicolon after a group - identifier. - - o Some systems fail to fully-qualify domain names in - messages they generate. The right-hand side of an "@" - sign in a header address field MUST be a fully-qualified - domain name. - - For example, some systems fail to fully-qualify the From: - address; this prevents a "reply" command in the user - interface from automatically constructing a return - address. - - DISCUSSION: - Although RFC-822 allows the local use of abbreviated - domain names within a domain, the application of - RFC-822 in Internet mail does not allow this. The - intent is that an Internet host must not send an SMTP - message header containing an abbreviated domain name - in an address field. This allows the address fields - of the header to be passed without alteration across - the Internet, as required in Section 5.2.6. - - o Some systems mis-parse multiple-hop explicit source routes - such as: - - @relay1,@relay2,@relay3:user@domain. - - - o Some systems over-qualify domain names by adding a - trailing dot to some or all domain names in addresses or - message-ids. This violates RFC-822 syntax. - - - 5.2.19 Explicit Source Routes: RFC-822 Section 6.2.7 - - Internet host software SHOULD NOT create an RFC-822 header - containing an address with an explicit source route, but MUST - accept such headers for compatibility with earlier systems. - - DISCUSSION: - - - -Internet Engineering Task Force [Page 58] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - In an understatement, RFC-822 says "The use of explicit - source routing is discouraged". Many hosts implemented - RFC-822 source routes incorrectly, so the syntax cannot be - used unambiguously in practice. Many users feel the - syntax is ugly. Explicit source routes are not needed in - the mail envelope for delivery; see Section 5.2.6. For - all these reasons, explicit source routes using the RFC- - 822 notations are not to be used in Internet mail headers. - - As stated in Section 5.2.16, it is necessary to allow an - explicit source route to be buried in the local-part of an - address, e.g., using the "%-hack", in order to allow mail - to be gatewayed into another environment in which explicit - source routing is necessary. The vigilant will observe - that there is no way for a User Agent to detect and - prevent the use of such implicit source routing when the - destination is within the Internet. We can only - discourage source routing of any kind within the Internet, - as unnecessary and undesirable. - - 5.3 SPECIFIC ISSUES - - 5.3.1 SMTP Queueing Strategies - - The common structure of a host SMTP implementation includes - user mailboxes, one or more areas for queueing messages in - transit, and one or more daemon processes for sending and - receiving mail. The exact structure will vary depending on the - needs of the users on the host and the number and size of - mailing lists supported by the host. We describe several - optimizations that have proved helpful, particularly for - mailers supporting high traffic levels. - - Any queueing strategy MUST include: - - o Timeouts on all activities. See Section 5.3.2. - - o Never sending error messages in response to error - messages. - - - 5.3.1.1 Sending Strategy - - The general model of a sender-SMTP is one or more processes - that periodically attempt to transmit outgoing mail. In a - typical system, the program that composes a message has some - method for requesting immediate attention for a new piece of - outgoing mail, while mail that cannot be transmitted - - - -Internet Engineering Task Force [Page 59] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - immediately MUST be queued and periodically retried by the - sender. A mail queue entry will include not only the - message itself but also the envelope information. - - The sender MUST delay retrying a particular destination - after one attempt has failed. In general, the retry - interval SHOULD be at least 30 minutes; however, more - sophisticated and variable strategies will be beneficial - when the sender-SMTP can determine the reason for non- - delivery. - - Retries continue until the message is transmitted or the - sender gives up; the give-up time generally needs to be at - least 4-5 days. The parameters to the retry algorithm MUST - be configurable. - - A sender SHOULD keep a list of hosts it cannot reach and - corresponding timeouts, rather than just retrying queued - mail items. - - DISCUSSION: - Experience suggests that failures are typically - transient (the target system has crashed), favoring a - policy of two connection attempts in the first hour the - message is in the queue, and then backing off to once - every two or three hours. - - The sender-SMTP can shorten the queueing delay by - cooperation with the receiver-SMTP. In particular, if - mail is received from a particular address, it is good - evidence that any mail queued for that host can now be - sent. - - The strategy may be further modified as a result of - multiple addresses per host (see Section 5.3.4), to - optimize delivery time vs. resource usage. - - A sender-SMTP may have a large queue of messages for - each unavailable destination host, and if it retried - all these messages in every retry cycle, there would be - excessive Internet overhead and the daemon would be - blocked for a long period. Note that an SMTP can - generally determine that a delivery attempt has failed - only after a timeout of a minute or more; a one minute - timeout per connection will result in a very large - delay if it is repeated for dozens or even hundreds of - queued messages. - - - - -Internet Engineering Task Force [Page 60] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - When the same message is to be delivered to several users on - the same host, only one copy of the message SHOULD be - transmitted. That is, the sender-SMTP should use the - command sequence: RCPT, RCPT,... RCPT, DATA instead of the - sequence: RCPT, DATA, RCPT, DATA,... RCPT, DATA. - Implementation of this efficiency feature is strongly urged. - - Similarly, the sender-SMTP MAY support multiple concurrent - outgoing mail transactions to achieve timely delivery. - However, some limit SHOULD be imposed to protect the host - from devoting all its resources to mail. - - The use of the different addresses of a multihomed host is - discussed below. - - 5.3.1.2 Receiving strategy - - The receiver-SMTP SHOULD attempt to keep a pending listen on - the SMTP port at all times. This will require the support - of multiple incoming TCP connections for SMTP. Some limit - MAY be imposed. - - IMPLEMENTATION: - When the receiver-SMTP receives mail from a particular - host address, it could notify the sender-SMTP to retry - any mail pending for that host address. - - 5.3.2 Timeouts in SMTP - - There are two approaches to timeouts in the sender-SMTP: (a) - limit the time for each SMTP command separately, or (b) limit - the time for the entire SMTP dialogue for a single mail - message. A sender-SMTP SHOULD use option (a), per-command - timeouts. Timeouts SHOULD be easily reconfigurable, preferably - without recompiling the SMTP code. - - DISCUSSION: - Timeouts are an essential feature of an SMTP - implementation. If the timeouts are too long (or worse, - there are no timeouts), Internet communication failures or - software bugs in receiver-SMTP programs can tie up SMTP - processes indefinitely. If the timeouts are too short, - resources will be wasted with attempts that time out part - way through message delivery. - - If option (b) is used, the timeout has to be very large, - e.g., an hour, to allow time to expand very large mailing - lists. The timeout may also need to increase linearly - - - -Internet Engineering Task Force [Page 61] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - with the size of the message, to account for the time to - transmit a very large message. A large fixed timeout - leads to two problems: a failure can still tie up the - sender for a very long time, and very large messages may - still spuriously time out (which is a wasteful failure!). - - Using the recommended option (a), a timer is set for each - SMTP command and for each buffer of the data transfer. - The latter means that the overall timeout is inherently - proportional to the size of the message. - - Based on extensive experience with busy mail-relay hosts, the - minimum per-command timeout values SHOULD be as follows: - - o Initial 220 Message: 5 minutes - - A Sender-SMTP process needs to distinguish between a - failed TCP connection and a delay in receiving the initial - 220 greeting message. Many receiver-SMTPs will accept a - TCP connection but delay delivery of the 220 message until - their system load will permit more mail to be processed. - - o MAIL Command: 5 minutes - - - o RCPT Command: 5 minutes - - A longer timeout would be required if processing of - mailing lists and aliases were not deferred until after - the message was accepted. - - o DATA Initiation: 2 minutes - - This is while awaiting the "354 Start Input" reply to a - DATA command. - - o Data Block: 3 minutes - - This is while awaiting the completion of each TCP SEND - call transmitting a chunk of data. - - o DATA Termination: 10 minutes. - - This is while awaiting the "250 OK" reply. When the - receiver gets the final period terminating the message - data, it typically performs processing to deliver the - message to a user mailbox. A spurious timeout at this - point would be very wasteful, since the message has been - - - -Internet Engineering Task Force [Page 62] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - successfully sent. - - A receiver-SMTP SHOULD have a timeout of at least 5 minutes - while it is awaiting the next command from the sender. - - 5.3.3 Reliable Mail Receipt - - When the receiver-SMTP accepts a piece of mail (by sending a - "250 OK" message in response to DATA), it is accepting - responsibility for delivering or relaying the message. It must - take this responsibility seriously, i.e., it MUST NOT lose the - message for frivolous reasons, e.g., because the host later - crashes or because of a predictable resource shortage. - - If there is a delivery failure after acceptance of a message, - the receiver-SMTP MUST formulate and mail a notification - message. This notification MUST be sent using a null ("<>") - reverse path in the envelope; see Section 3.6 of RFC-821. The - recipient of this notification SHOULD be the address from the - envelope return path (or the Return-Path: line). However, if - this address is null ("<>"), the receiver-SMTP MUST NOT send a - notification. If the address is an explicit source route, it - SHOULD be stripped down to its final hop. - - DISCUSSION: - For example, suppose that an error notification must be - sent for a message that arrived with: - "MAIL FROM:<@a,@b:user@d>". The notification message - should be sent to: "RCPT TO:". - - Some delivery failures after the message is accepted by - SMTP will be unavoidable. For example, it may be - impossible for the receiver-SMTP to validate all the - delivery addresses in RCPT command(s) due to a "soft" - domain system error or because the target is a mailing - list (see earlier discussion of RCPT). - - To avoid receiving duplicate messages as the result of - timeouts, a receiver-SMTP MUST seek to minimize the time - required to respond to the final "." that ends a message - transfer. See RFC-1047 [SMTP:4] for a discussion of this - problem. - - 5.3.4 Reliable Mail Transmission - - To transmit a message, a sender-SMTP determines the IP address - of the target host from the destination address in the - envelope. Specifically, it maps the string to the right of the - - - -Internet Engineering Task Force [Page 63] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - "@" sign into an IP address. This mapping or the transfer - itself may fail with a soft error, in which case the sender- - SMTP will requeue the outgoing mail for a later retry, as - required in Section 5.3.1.1. - - When it succeeds, the mapping can result in a list of - alternative delivery addresses rather than a single address, - because of (a) multiple MX records, (b) multihoming, or both. - To provide reliable mail transmission, the sender-SMTP MUST be - able to try (and retry) each of the addresses in this list in - order, until a delivery attempt succeeds. However, there MAY - also be a configurable limit on the number of alternate - addresses that can be tried. In any case, a host SHOULD try at - least two addresses. - - The following information is to be used to rank the host - addresses: - - (1) Multiple MX Records -- these contain a preference - indication that should be used in sorting. If there are - multiple destinations with the same preference and there - is no clear reason to favor one (e.g., by address - preference), then the sender-SMTP SHOULD pick one at - random to spread the load across multiple mail exchanges - for a specific organization; note that this is a - refinement of the procedure in [DNS:3]. - - (2) Multihomed host -- The destination host (perhaps taken - from the preferred MX record) may be multihomed, in which - case the domain name resolver will return a list of - alternative IP addresses. It is the responsibility of the - domain name resolver interface (see Section 6.1.3.4 below) - to have ordered this list by decreasing preference, and - SMTP MUST try them in the order presented. - - DISCUSSION: - Although the capability to try multiple alternative - addresses is required, there may be circumstances where - specific installations want to limit or disable the use of - alternative addresses. The question of whether a sender - should attempt retries using the different addresses of a - multihomed host has been controversial. The main argument - for using the multiple addresses is that it maximizes the - probability of timely delivery, and indeed sometimes the - probability of any delivery; the counter argument is that - it may result in unnecessary resource use. - - Note that resource use is also strongly determined by the - - - -Internet Engineering Task Force [Page 64] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - sending strategy discussed in Section 5.3.1. - - 5.3.5 Domain Name Support - - SMTP implementations MUST use the mechanism defined in Section - 6.1 for mapping between domain names and IP addresses. This - means that every Internet SMTP MUST include support for the - Internet DNS. - - In particular, a sender-SMTP MUST support the MX record scheme - [SMTP:3]. See also Section 7.4 of [DNS:2] for information on - domain name support for SMTP. - - 5.3.6 Mailing Lists and Aliases - - An SMTP-capable host SHOULD support both the alias and the list - form of address expansion for multiple delivery. When a - message is delivered or forwarded to each address of an - expanded list form, the return address in the envelope - ("MAIL FROM:") MUST be changed to be the address of a person - who administers the list, but the message header MUST be left - unchanged; in particular, the "From" field of the message is - unaffected. - - DISCUSSION: - An important mail facility is a mechanism for multi- - destination delivery of a single message, by transforming - or "expanding" a pseudo-mailbox address into a list of - destination mailbox addresses. When a message is sent to - such a pseudo-mailbox (sometimes called an "exploder"), - copies are forwarded or redistributed to each mailbox in - the expanded list. We classify such a pseudo-mailbox as - an "alias" or a "list", depending upon the expansion - rules: - - (a) Alias - - To expand an alias, the recipient mailer simply - replaces the pseudo-mailbox address in the envelope - with each of the expanded addresses in turn; the rest - of the envelope and the message body are left - unchanged. The message is then delivered or - forwarded to each expanded address. - - (b) List - - A mailing list may be said to operate by - "redistribution" rather than by "forwarding". To - - - -Internet Engineering Task Force [Page 65] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - expand a list, the recipient mailer replaces the - pseudo-mailbox address in the envelope with each of - the expanded addresses in turn. The return address in - the envelope is changed so that all error messages - generated by the final deliveries will be returned to - a list administrator, not to the message originator, - who generally has no control over the contents of the - list and will typically find error messages annoying. - - - 5.3.7 Mail Gatewaying - - Gatewaying mail between different mail environments, i.e., - different mail formats and protocols, is complex and does not - easily yield to standardization. See for example [SMTP:5a], - [SMTP:5b]. However, some general requirements may be given for - a gateway between the Internet and another mail environment. - - (A) Header fields MAY be rewritten when necessary as messages - are gatewayed across mail environment boundaries. - - DISCUSSION: - This may involve interpreting the local-part of the - destination address, as suggested in Section 5.2.16. - - The other mail systems gatewayed to the Internet - generally use a subset of RFC-822 headers, but some - of them do not have an equivalent to the SMTP - envelope. Therefore, when a message leaves the - Internet environment, it may be necessary to fold the - SMTP envelope information into the message header. A - possible solution would be to create new header - fields to carry the envelope information (e.g., "X- - SMTP-MAIL:" and "X-SMTP-RCPT:"); however, this would - require changes in mail programs in the foreign - environment. - - (B) When forwarding a message into or out of the Internet - environment, a gateway MUST prepend a Received: line, but - it MUST NOT alter in any way a Received: line that is - already in the header. - - DISCUSSION: - This requirement is a subset of the general - "Received:" line requirement of Section 5.2.8; it is - restated here for emphasis. - - Received: fields of messages originating from other - - - -Internet Engineering Task Force [Page 66] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - environments may not conform exactly to RFC822. - However, the most important use of Received: lines is - for debugging mail faults, and this debugging can be - severely hampered by well-meaning gateways that try - to "fix" a Received: line. - - The gateway is strongly encouraged to indicate the - environment and protocol in the "via" clauses of - Received field(s) that it supplies. - - (C) From the Internet side, the gateway SHOULD accept all - valid address formats in SMTP commands and in RFC-822 - headers, and all valid RFC-822 messages. Although a - gateway must accept an RFC-822 explicit source route - ("@...:" format) in either the RFC-822 header or in the - envelope, it MAY or may not act on the source route; see - Sections 5.2.6 and 5.2.19. - - DISCUSSION: - It is often tempting to restrict the range of - addresses accepted at the mail gateway to simplify - the translation into addresses for the remote - environment. This practice is based on the - assumption that mail users have control over the - addresses their mailers send to the mail gateway. In - practice, however, users have little control over the - addresses that are finally sent; their mailers are - free to change addresses into any legal RFC-822 - format. - - (D) The gateway MUST ensure that all header fields of a - message that it forwards into the Internet meet the - requirements for Internet mail. In particular, all - addresses in "From:", "To:", "Cc:", etc., fields must be - transformed (if necessary) to satisfy RFC-822 syntax, and - they must be effective and useful for sending replies. - - - (E) The translation algorithm used to convert mail from the - Internet protocols to another environment's protocol - SHOULD try to ensure that error messages from the foreign - mail environment are delivered to the return path from the - SMTP envelope, not to the sender listed in the "From:" - field of the RFC-822 message. - - DISCUSSION: - Internet mail lists usually place the address of the - mail list maintainer in the envelope but leave the - - - -Internet Engineering Task Force [Page 67] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - original message header intact (with the "From:" - field containing the original sender). This yields - the behavior the average recipient expects: a reply - to the header gets sent to the original sender, not - to a mail list maintainer; however, errors get sent - to the maintainer (who can fix the problem) and not - the sender (who probably cannot). - - (F) Similarly, when forwarding a message from another - environment into the Internet, the gateway SHOULD set the - envelope return path in accordance with an error message - return address, if any, supplied by the foreign - environment. - - - 5.3.8 Maximum Message Size - - Mailer software MUST be able to send and receive messages of at - least 64K bytes in length (including header), and a much larger - maximum size is highly desirable. - - DISCUSSION: - Although SMTP does not define the maximum size of a - message, many systems impose implementation limits. - - The current de facto minimum limit in the Internet is 64K - bytes. However, electronic mail is used for a variety of - purposes that create much larger messages. For example, - mail is often used instead of FTP for transmitting ASCII - files, and in particular to transmit entire documents. As - a result, messages can be 1 megabyte or even larger. We - note that the present document together with its lower- - layer companion contains 0.5 megabytes. - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 68] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - 5.4 SMTP REQUIREMENTS SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e ------------------------------------------------|----------|-|-|-|-|-|-- - | | | | | | | -RECEIVER-SMTP: | | | | | | | - Implement VRFY |5.2.3 |x| | | | | - Implement EXPN |5.2.3 | |x| | | | - EXPN, VRFY configurable |5.2.3 | | |x| | | - Implement SEND, SOML, SAML |5.2.4 | | |x| | | - Verify HELO parameter |5.2.5 | | |x| | | - Refuse message with bad HELO |5.2.5 | | | | |x| - Accept explicit src-route syntax in env. |5.2.6 |x| | | | | - Support "postmaster" |5.2.7 |x| | | | | - Process RCPT when received (except lists) |5.2.7 | | |x| | | - Long delay of RCPT responses |5.2.7 | | | | |x| - | | | | | | | - Add Received: line |5.2.8 |x| | | | | - Received: line include domain literal |5.2.8 | |x| | | | - Change previous Received: line |5.2.8 | | | | |x| - Pass Return-Path info (final deliv/gwy) |5.2.8 |x| | | | | - Support empty reverse path |5.2.9 |x| | | | | - Send only official reply codes |5.2.10 | |x| | | | - Send text from RFC-821 when appropriate |5.2.10 | |x| | | | - Delete "." for transparency |5.2.11 |x| | | | | - Accept and recognize self domain literal(s) |5.2.17 |x| | | | | - | | | | | | | - Error message about error message |5.3.1 | | | | |x| - Keep pending listen on SMTP port |5.3.1.2 | |x| | | | - Provide limit on recv concurrency |5.3.1.2 | | |x| | | - Wait at least 5 mins for next sender cmd |5.3.2 | |x| | | | - Avoidable delivery failure after "250 OK" |5.3.3 | | | | |x| - Send error notification msg after accept |5.3.3 |x| | | | | - Send using null return path |5.3.3 |x| | | | | - Send to envelope return path |5.3.3 | |x| | | | - Send to null address |5.3.3 | | | | |x| - Strip off explicit src route |5.3.3 | |x| | | | - Minimize acceptance delay (RFC-1047) |5.3.3 |x| | | | | ------------------------------------------------|----------|-|-|-|-|-|-- - - - -Internet Engineering Task Force [Page 69] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - | | | | | | | -SENDER-SMTP: | | | | | | | - Canonicalized domain names in MAIL, RCPT |5.2.2 |x| | | | | - Implement SEND, SOML, SAML |5.2.4 | | |x| | | - Send valid principal host name in HELO |5.2.5 |x| | | | | - Send explicit source route in RCPT TO: |5.2.6 | | | |x| | - Use only reply code to determine action |5.2.10 |x| | | | | - Use only high digit of reply code when poss. |5.2.10 | |x| | | | - Add "." for transparency |5.2.11 |x| | | | | - | | | | | | | - Retry messages after soft failure |5.3.1.1 |x| | | | | - Delay before retry |5.3.1.1 |x| | | | | - Configurable retry parameters |5.3.1.1 |x| | | | | - Retry once per each queued dest host |5.3.1.1 | |x| | | | - Multiple RCPT's for same DATA |5.3.1.1 | |x| | | | - Support multiple concurrent transactions |5.3.1.1 | | |x| | | - Provide limit on concurrency |5.3.1.1 | |x| | | | - | | | | | | | - Timeouts on all activities |5.3.1 |x| | | | | - Per-command timeouts |5.3.2 | |x| | | | - Timeouts easily reconfigurable |5.3.2 | |x| | | | - Recommended times |5.3.2 | |x| | | | - Try alternate addr's in order |5.3.4 |x| | | | | - Configurable limit on alternate tries |5.3.4 | | |x| | | - Try at least two alternates |5.3.4 | |x| | | | - Load-split across equal MX alternates |5.3.4 | |x| | | | - Use the Domain Name System |5.3.5 |x| | | | | - Support MX records |5.3.5 |x| | | | | - Use WKS records in MX processing |5.2.12 | | | |x| | ------------------------------------------------|----------|-|-|-|-|-|-- - | | | | | | | -MAIL FORWARDING: | | | | | | | - Alter existing header field(s) |5.2.6 | | | |x| | - Implement relay function: 821/section 3.6 |5.2.6 | | |x| | | - If not, deliver to RHS domain |5.2.6 | |x| | | | - Interpret 'local-part' of addr |5.2.16 | | | | |x| - | | | | | | | -MAILING LISTS AND ALIASES | | | | | | | - Support both |5.3.6 | |x| | | | - Report mail list error to local admin. |5.3.6 |x| | | | | - | | | | | | | -MAIL GATEWAYS: | | | | | | | - Embed foreign mail route in local-part |5.2.16 | | |x| | | - Rewrite header fields when necessary |5.3.7 | | |x| | | - Prepend Received: line |5.3.7 |x| | | | | - Change existing Received: line |5.3.7 | | | | |x| - Accept full RFC-822 on Internet side |5.3.7 | |x| | | | - Act on RFC-822 explicit source route |5.3.7 | | |x| | | - - - -Internet Engineering Task Force [Page 70] - - - - -RFC1123 MAIL -- SMTP & RFC-822 October 1989 - - - Send only valid RFC-822 on Internet side |5.3.7 |x| | | | | - Deliver error msgs to envelope addr |5.3.7 | |x| | | | - Set env return path from err return addr |5.3.7 | |x| | | | - | | | | | | | -USER AGENT -- RFC-822 | | | | | | | - Allow user to enter address |5.2.6 | | | |x| | - Support RFC-1049 Content Type field |5.2.13 | | |x| | | - Use 4-digit years |5.2.14 | |x| | | | - Generate numeric timezones |5.2.14 | |x| | | | - Accept all timezones |5.2.14 |x| | | | | - Use non-num timezones from RFC-822 |5.2.14 |x| | | | | - Omit phrase before route-addr |5.2.15 | | |x| | | - Accept and parse dot.dec. domain literals |5.2.17 |x| | | | | - Accept all RFC-822 address formats |5.2.18 |x| | | | | - Generate invalid RFC-822 address format |5.2.18 | | | | |x| - Fully-qualified domain names in header |5.2.18 |x| | | | | - Create explicit src route in header |5.2.19 | | | |x| | - Accept explicit src route in header |5.2.19 |x| | | | | - | | | | | | | -Send/recv at least 64KB messages |5.3.8 |x| | | | | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 71] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - -6. SUPPORT SERVICES - - 6.1 DOMAIN NAME TRANSLATION - - 6.1.1 INTRODUCTION - - Every host MUST implement a resolver for the Domain Name System - (DNS), and it MUST implement a mechanism using this DNS - resolver to convert host names to IP addresses and vice-versa - [DNS:1, DNS:2]. - - In addition to the DNS, a host MAY also implement a host name - translation mechanism that searches a local Internet host - table. See Section 6.1.3.8 for more information on this - option. - - DISCUSSION: - Internet host name translation was originally performed by - searching local copies of a table of all hosts. This - table became too large to update and distribute in a - timely manner and too large to fit into many hosts, so the - DNS was invented. - - The DNS creates a distributed database used primarily for - the translation between host names and host addresses. - Implementation of DNS software is required. The DNS - consists of two logically distinct parts: name servers and - resolvers (although implementations often combine these - two logical parts in the interest of efficiency) [DNS:2]. - - Domain name servers store authoritative data about certain - sections of the database and answer queries about the - data. Domain resolvers query domain name servers for data - on behalf of user processes. Every host therefore needs a - DNS resolver; some host machines will also need to run - domain name servers. Since no name server has complete - information, in general it is necessary to obtain - information from more than one name server to resolve a - query. - - 6.1.2 PROTOCOL WALK-THROUGH - - An implementor must study references [DNS:1] and [DNS:2] - carefully. They provide a thorough description of the theory, - protocol, and implementation of the domain name system, and - reflect several years of experience. - - - - - -Internet Engineering Task Force [Page 72] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - 6.1.2.1 Resource Records with Zero TTL: RFC-1035 Section 3.2.1 - - All DNS name servers and resolvers MUST properly handle RRs - with a zero TTL: return the RR to the client but do not - cache it. - - DISCUSSION: - Zero TTL values are interpreted to mean that the RR can - only be used for the transaction in progress, and - should not be cached; they are useful for extremely - volatile data. - - 6.1.2.2 QCLASS Values: RFC-1035 Section 3.2.5 - - A query with "QCLASS=*" SHOULD NOT be used unless the - requestor is seeking data from more than one class. In - particular, if the requestor is only interested in Internet - data types, QCLASS=IN MUST be used. - - 6.1.2.3 Unused Fields: RFC-1035 Section 4.1.1 - - Unused fields in a query or response message MUST be zero. - - 6.1.2.4 Compression: RFC-1035 Section 4.1.4 - - Name servers MUST use compression in responses. - - DISCUSSION: - Compression is essential to avoid overflowing UDP - datagrams; see Section 6.1.3.2. - - 6.1.2.5 Misusing Configuration Info: RFC-1035 Section 6.1.2 - - Recursive name servers and full-service resolvers generally - have some configuration information containing hints about - the location of root or local name servers. An - implementation MUST NOT include any of these hints in a - response. - - DISCUSSION: - Many implementors have found it convenient to store - these hints as if they were cached data, but some - neglected to ensure that this "cached data" was not - included in responses. This has caused serious - problems in the Internet when the hints were obsolete - or incorrect. - - - - - -Internet Engineering Task Force [Page 73] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - 6.1.3 SPECIFIC ISSUES - - 6.1.3.1 Resolver Implementation - - A name resolver SHOULD be able to multiplex concurrent - requests if the host supports concurrent processes. - - In implementing a DNS resolver, one of two different models - MAY optionally be chosen: a full-service resolver, or a stub - resolver. - - - (A) Full-Service Resolver - - A full-service resolver is a complete implementation of - the resolver service, and is capable of dealing with - communication failures, failure of individual name - servers, location of the proper name server for a given - name, etc. It must satisfy the following requirements: - - o The resolver MUST implement a local caching - function to avoid repeated remote access for - identical requests, and MUST time out information - in the cache. - - o The resolver SHOULD be configurable with start-up - information pointing to multiple root name servers - and multiple name servers for the local domain. - This insures that the resolver will be able to - access the whole name space in normal cases, and - will be able to access local domain information - should the local network become disconnected from - the rest of the Internet. - - - (B) Stub Resolver - - A "stub resolver" relies on the services of a recursive - name server on the connected network or a "nearby" - network. This scheme allows the host to pass on the - burden of the resolver function to a name server on - another host. This model is often essential for less - capable hosts, such as PCs, and is also recommended - when the host is one of several workstations on a local - network, because it allows all of the workstations to - share the cache of the recursive name server and hence - reduce the number of domain requests exported by the - local network. - - - -Internet Engineering Task Force [Page 74] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - At a minimum, the stub resolver MUST be capable of - directing its requests to redundant recursive name - servers. Note that recursive name servers are allowed - to restrict the sources of requests that they will - honor, so the host administrator must verify that the - service will be provided. Stub resolvers MAY implement - caching if they choose, but if so, MUST timeout cached - information. - - - 6.1.3.2 Transport Protocols - - DNS resolvers and recursive servers MUST support UDP, and - SHOULD support TCP, for sending (non-zone-transfer) queries. - Specifically, a DNS resolver or server that is sending a - non-zone-transfer query MUST send a UDP query first. If the - Answer section of the response is truncated and if the - requester supports TCP, it SHOULD try the query again using - TCP. - - DNS servers MUST be able to service UDP queries and SHOULD - be able to service TCP queries. A name server MAY limit the - resources it devotes to TCP queries, but it SHOULD NOT - refuse to service a TCP query just because it would have - succeeded with UDP. - - Truncated responses MUST NOT be saved (cached) and later - used in such a way that the fact that they are truncated is - lost. - - DISCUSSION: - UDP is preferred over TCP for queries because UDP - queries have much lower overhead, both in packet count - and in connection state. The use of UDP is essential - for heavily-loaded servers, especially the root - servers. UDP also offers additional robustness, since - a resolver can attempt several UDP queries to different - servers for the cost of a single TCP query. - - It is possible for a DNS response to be truncated, - although this is a very rare occurrence in the present - Internet DNS. Practically speaking, truncation cannot - be predicted, since it is data-dependent. The - dependencies include the number of RRs in the answer, - the size of each RR, and the savings in space realized - by the name compression algorithm. As a rule of thumb, - truncation in NS and MX lists should not occur for - answers containing 15 or fewer RRs. - - - -Internet Engineering Task Force [Page 75] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - Whether it is possible to use a truncated answer - depends on the application. A mailer must not use a - truncated MX response, since this could lead to mail - loops. - - Responsible practices can make UDP suffice in the vast - majority of cases. Name servers must use compression - in responses. Resolvers must differentiate truncation - of the Additional section of a response (which only - loses extra information) from truncation of the Answer - section (which for MX records renders the response - unusable by mailers). Database administrators should - list only a reasonable number of primary names in lists - of name servers, MX alternatives, etc. - - However, it is also clear that some new DNS record - types defined in the future will contain information - exceeding the 512 byte limit that applies to UDP, and - hence will require TCP. Thus, resolvers and name - servers should implement TCP services as a backup to - UDP today, with the knowledge that they will require - the TCP service in the future. - - By private agreement, name servers and resolvers MAY arrange - to use TCP for all traffic between themselves. TCP MUST be - used for zone transfers. - - A DNS server MUST have sufficient internal concurrency that - it can continue to process UDP queries while awaiting a - response or performing a zone transfer on an open TCP - connection [DNS:2]. - - A server MAY support a UDP query that is delivered using an - IP broadcast or multicast address. However, the Recursion - Desired bit MUST NOT be set in a query that is multicast, - and MUST be ignored by name servers receiving queries via a - broadcast or multicast address. A host that sends broadcast - or multicast DNS queries SHOULD send them only as occasional - probes, caching the IP address(es) it obtains from the - response(s) so it can normally send unicast queries. - - DISCUSSION: - Broadcast or (especially) IP multicast can provide a - way to locate nearby name servers without knowing their - IP addresses in advance. However, general broadcasting - of recursive queries can result in excessive and - unnecessary load on both network and servers. - - - - -Internet Engineering Task Force [Page 76] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - 6.1.3.3 Efficient Resource Usage - - The following requirements on servers and resolvers are very - important to the health of the Internet as a whole, - particularly when DNS services are invoked repeatedly by - higher level automatic servers, such as mailers. - - (1) The resolver MUST implement retransmission controls to - insure that it does not waste communication bandwidth, - and MUST impose finite bounds on the resources consumed - to respond to a single request. See [DNS:2] pages 43- - 44 for specific recommendations. - - (2) After a query has been retransmitted several times - without a response, an implementation MUST give up and - return a soft error to the application. - - (3) All DNS name servers and resolvers SHOULD cache - temporary failures, with a timeout period of the order - of minutes. - - DISCUSSION: - This will prevent applications that immediately - retry soft failures (in violation of Section 2.2 - of this document) from generating excessive DNS - traffic. - - (4) All DNS name servers and resolvers SHOULD cache - negative responses that indicate the specified name, or - data of the specified type, does not exist, as - described in [DNS:2]. - - (5) When a DNS server or resolver retries a UDP query, the - retry interval SHOULD be constrained by an exponential - backoff algorithm, and SHOULD also have upper and lower - bounds. - - IMPLEMENTATION: - A measured RTT and variance (if available) should - be used to calculate an initial retransmission - interval. If this information is not available, a - default of no less than 5 seconds should be used. - Implementations may limit the retransmission - interval, but this limit must exceed twice the - Internet maximum segment lifetime plus service - delay at the name server. - - (6) When a resolver or server receives a Source Quench for - - - -Internet Engineering Task Force [Page 77] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - a query it has issued, it SHOULD take steps to reduce - the rate of querying that server in the near future. A - server MAY ignore a Source Quench that it receives as - the result of sending a response datagram. - - IMPLEMENTATION: - One recommended action to reduce the rate is to - send the next query attempt to an alternate - server, if there is one available. Another is to - backoff the retry interval for the same server. - - - 6.1.3.4 Multihomed Hosts - - When the host name-to-address function encounters a host - with multiple addresses, it SHOULD rank or sort the - addresses using knowledge of the immediately connected - network number(s) and any other applicable performance or - history information. - - DISCUSSION: - The different addresses of a multihomed host generally - imply different Internet paths, and some paths may be - preferable to others in performance, reliability, or - administrative restrictions. There is no general way - for the domain system to determine the best path. A - recommended approach is to base this decision on local - configuration information set by the system - administrator. - - IMPLEMENTATION: - The following scheme has been used successfully: - - (a) Incorporate into the host configuration data a - Network-Preference List, that is simply a list of - networks in preferred order. This list may be - empty if there is no preference. - - (b) When a host name is mapped into a list of IP - addresses, these addresses should be sorted by - network number, into the same order as the - corresponding networks in the Network-Preference - List. IP addresses whose networks do not appear - in the Network-Preference List should be placed at - the end of the list. - - - - - - -Internet Engineering Task Force [Page 78] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - 6.1.3.5 Extensibility - - DNS software MUST support all well-known, class-independent - formats [DNS:2], and SHOULD be written to minimize the - trauma associated with the introduction of new well-known - types and local experimentation with non-standard types. - - DISCUSSION: - The data types and classes used by the DNS are - extensible, and thus new types will be added and old - types deleted or redefined. Introduction of new data - types ought to be dependent only upon the rules for - compression of domain names inside DNS messages, and - the translation between printable (i.e., master file) - and internal formats for Resource Records (RRs). - - Compression relies on knowledge of the format of data - inside a particular RR. Hence compression must only be - used for the contents of well-known, class-independent - RRs, and must never be used for class-specific RRs or - RR types that are not well-known. The owner name of an - RR is always eligible for compression. - - A name server may acquire, via zone transfer, RRs that - the server doesn't know how to convert to printable - format. A resolver can receive similar information as - the result of queries. For proper operation, this data - must be preserved, and hence the implication is that - DNS software cannot use textual formats for internal - storage. - - The DNS defines domain name syntax very generally -- a - string of labels each containing up to 63 8-bit octets, - separated by dots, and with a maximum total of 255 - octets. Particular applications of the DNS are - permitted to further constrain the syntax of the domain - names they use, although the DNS deployment has led to - some applications allowing more general names. In - particular, Section 2.1 of this document liberalizes - slightly the syntax of a legal Internet host name that - was defined in RFC-952 [DNS:4]. - - 6.1.3.6 Status of RR Types - - Name servers MUST be able to load all RR types except MD and - MF from configuration files. The MD and MF types are - obsolete and MUST NOT be implemented; in particular, name - servers MUST NOT load these types from configuration files. - - - -Internet Engineering Task Force [Page 79] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - DISCUSSION: - The RR types MB, MG, MR, NULL, MINFO and RP are - considered experimental, and applications that use the - DNS cannot expect these RR types to be supported by - most domains. Furthermore these types are subject to - redefinition. - - The TXT and WKS RR types have not been widely used by - Internet sites; as a result, an application cannot rely - on the the existence of a TXT or WKS RR in most - domains. - - 6.1.3.7 Robustness - - DNS software may need to operate in environments where the - root servers or other servers are unavailable due to network - connectivity or other problems. In this situation, DNS name - servers and resolvers MUST continue to provide service for - the reachable part of the name space, while giving temporary - failures for the rest. - - DISCUSSION: - Although the DNS is meant to be used primarily in the - connected Internet, it should be possible to use the - system in networks which are unconnected to the - Internet. Hence implementations must not depend on - access to root servers before providing service for - local names. - - 6.1.3.8 Local Host Table - - DISCUSSION: - A host may use a local host table as a backup or - supplement to the DNS. This raises the question of - which takes precedence, the DNS or the host table; the - most flexible approach would make this a configuration - option. - - Typically, the contents of such a supplementary host - table will be determined locally by the site. However, - a publically-available table of Internet hosts is - maintained by the DDN Network Information Center (DDN - NIC), with a format documented in [DNS:4]. This table - can be retrieved from the DDN NIC using a protocol - described in [DNS:5]. It must be noted that this table - contains only a small fraction of all Internet hosts. - Hosts using this protocol to retrieve the DDN NIC host - table should use the VERSION command to check if the - - - -Internet Engineering Task Force [Page 80] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - table has changed before requesting the entire table - with the ALL command. The VERSION identifier should be - treated as an arbitrary string and tested only for - equality; no numerical sequence may be assumed. - - The DDN NIC host table includes administrative - information that is not needed for host operation and - is therefore not currently included in the DNS - database; examples include network and gateway entries. - However, much of this additional information will be - added to the DNS in the future. Conversely, the DNS - provides essential services (in particular, MX records) - that are not available from the DDN NIC host table. - - 6.1.4 DNS USER INTERFACE - - 6.1.4.1 DNS Administration - - This document is concerned with design and implementation - issues in host software, not with administrative or - operational issues. However, administrative issues are of - particular importance in the DNS, since errors in particular - segments of this large distributed database can cause poor - or erroneous performance for many sites. These issues are - discussed in [DNS:6] and [DNS:7]. - - 6.1.4.2 DNS User Interface - - Hosts MUST provide an interface to the DNS for all - application programs running on the host. This interface - will typically direct requests to a system process to - perform the resolver function [DNS:1, 6.1:2]. - - At a minimum, the basic interface MUST support a request for - all information of a specific type and class associated with - a specific name, and it MUST return either all of the - requested information, a hard error code, or a soft error - indication. When there is no error, the basic interface - returns the complete response information without - modification, deletion, or ordering, so that the basic - interface will not need to be changed to accommodate new - data types. - - DISCUSSION: - The soft error indication is an essential part of the - interface, since it may not always be possible to - access particular information from the DNS; see Section - 6.1.3.3. - - - -Internet Engineering Task Force [Page 81] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - A host MAY provide other DNS interfaces tailored to - particular functions, transforming the raw domain data into - formats more suited to these functions. In particular, a - host MUST provide a DNS interface to facilitate translation - between host addresses and host names. - - 6.1.4.3 Interface Abbreviation Facilities - - User interfaces MAY provide a method for users to enter - abbreviations for commonly-used names. Although the - definition of such methods is outside of the scope of the - DNS specification, certain rules are necessary to insure - that these methods allow access to the entire DNS name space - and to prevent excessive use of Internet resources. - - If an abbreviation method is provided, then: - - (a) There MUST be some convention for denoting that a name - is already complete, so that the abbreviation method(s) - are suppressed. A trailing dot is the usual method. - - (b) Abbreviation expansion MUST be done exactly once, and - MUST be done in the context in which the name was - entered. - - - DISCUSSION: - For example, if an abbreviation is used in a mail - program for a destination, the abbreviation should be - expanded into a full domain name and stored in the - queued message with an indication that it is already - complete. Otherwise, the abbreviation might be - expanded with a mail system search list, not the - user's, or a name could grow due to repeated - canonicalizations attempts interacting with wildcards. - - The two most common abbreviation methods are: - - (1) Interface-level aliases - - Interface-level aliases are conceptually implemented as - a list of alias/domain name pairs. The list can be - per-user or per-host, and separate lists can be - associated with different functions, e.g. one list for - host name-to-address translation, and a different list - for mail domains. When the user enters a name, the - interface attempts to match the name to the alias - component of a list entry, and if a matching entry can - - - -Internet Engineering Task Force [Page 82] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - be found, the name is replaced by the domain name found - in the pair. - - Note that interface-level aliases and CNAMEs are - completely separate mechanisms; interface-level aliases - are a local matter while CNAMEs are an Internet-wide - aliasing mechanism which is a required part of any DNS - implementation. - - (2) Search Lists - - A search list is conceptually implemented as an ordered - list of domain names. When the user enters a name, the - domain names in the search list are used as suffixes to - the user-supplied name, one by one, until a domain name - with the desired associated data is found, or the - search list is exhausted. Search lists often contain - the name of the local host's parent domain or other - ancestor domains. Search lists are often per-user or - per-process. - - It SHOULD be possible for an administrator to disable a - DNS search-list facility. Administrative denial may be - warranted in some cases, to prevent abuse of the DNS. - - There is danger that a search-list mechanism will - generate excessive queries to the root servers while - testing whether user input is a complete domain name, - lacking a final period to mark it as complete. A - search-list mechanism MUST have one of, and SHOULD have - both of, the following two provisions to prevent this: - - (a) The local resolver/name server can implement - caching of negative responses (see Section - 6.1.3.3). - - (b) The search list expander can require two or more - interior dots in a generated domain name before it - tries using the name in a query to non-local - domain servers, such as the root. - - DISCUSSION: - The intent of this requirement is to avoid - excessive delay for the user as the search list is - tested, and more importantly to prevent excessive - traffic to the root and other high-level servers. - For example, if the user supplied a name "X" and - the search list contained the root as a component, - - - -Internet Engineering Task Force [Page 83] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - a query would have to consult a root server before - the next search list alternative could be tried. - The resulting load seen by the root servers and - gateways near the root would be multiplied by the - number of hosts in the Internet. - - The negative caching alternative limits the effect - to the first time a name is used. The interior - dot rule is simpler to implement but can prevent - easy use of some top-level names. - - - 6.1.5 DOMAIN NAME SYSTEM REQUIREMENTS SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e ------------------------------------------------|-----------|-|-|-|-|-|-- -GENERAL ISSUES | | | | | | | - | | | | | | | -Implement DNS name-to-address conversion |6.1.1 |x| | | | | -Implement DNS address-to-name conversion |6.1.1 |x| | | | | -Support conversions using host table |6.1.1 | | |x| | | -Properly handle RR with zero TTL |6.1.2.1 |x| | | | | -Use QCLASS=* unnecessarily |6.1.2.2 | |x| | | | - Use QCLASS=IN for Internet class |6.1.2.2 |x| | | | | -Unused fields zero |6.1.2.3 |x| | | | | -Use compression in responses |6.1.2.4 |x| | | | | - | | | | | | | -Include config info in responses |6.1.2.5 | | | | |x| -Support all well-known, class-indep. types |6.1.3.5 |x| | | | | -Easily expand type list |6.1.3.5 | |x| | | | -Load all RR types (except MD and MF) |6.1.3.6 |x| | | | | -Load MD or MF type |6.1.3.6 | | | | |x| -Operate when root servers, etc. unavailable |6.1.3.7 |x| | | | | ------------------------------------------------|-----------|-|-|-|-|-|-- -RESOLVER ISSUES: | | | | | | | - | | | | | | | -Resolver support multiple concurrent requests |6.1.3.1 | |x| | | | -Full-service resolver: |6.1.3.1 | | |x| | | - Local caching |6.1.3.1 |x| | | | | - - - -Internet Engineering Task Force [Page 84] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - Information in local cache times out |6.1.3.1 |x| | | | | - Configurable with starting info |6.1.3.1 | |x| | | | -Stub resolver: |6.1.3.1 | | |x| | | - Use redundant recursive name servers |6.1.3.1 |x| | | | | - Local caching |6.1.3.1 | | |x| | | - Information in local cache times out |6.1.3.1 |x| | | | | -Support for remote multi-homed hosts: | | | | | | | - Sort multiple addresses by preference list |6.1.3.4 | |x| | | | - | | | | | | | ------------------------------------------------|-----------|-|-|-|-|-|-- -TRANSPORT PROTOCOLS: | | | | | | | - | | | | | | | -Support UDP queries |6.1.3.2 |x| | | | | -Support TCP queries |6.1.3.2 | |x| | | | - Send query using UDP first |6.1.3.2 |x| | | | |1 - Try TCP if UDP answers are truncated |6.1.3.2 | |x| | | | -Name server limit TCP query resources |6.1.3.2 | | |x| | | - Punish unnecessary TCP query |6.1.3.2 | | | |x| | -Use truncated data as if it were not |6.1.3.2 | | | | |x| -Private agreement to use only TCP |6.1.3.2 | | |x| | | -Use TCP for zone transfers |6.1.3.2 |x| | | | | -TCP usage not block UDP queries |6.1.3.2 |x| | | | | -Support broadcast or multicast queries |6.1.3.2 | | |x| | | - RD bit set in query |6.1.3.2 | | | | |x| - RD bit ignored by server is b'cast/m'cast |6.1.3.2 |x| | | | | - Send only as occasional probe for addr's |6.1.3.2 | |x| | | | ------------------------------------------------|-----------|-|-|-|-|-|-- -RESOURCE USAGE: | | | | | | | - | | | | | | | -Transmission controls, per [DNS:2] |6.1.3.3 |x| | | | | - Finite bounds per request |6.1.3.3 |x| | | | | -Failure after retries => soft error |6.1.3.3 |x| | | | | -Cache temporary failures |6.1.3.3 | |x| | | | -Cache negative responses |6.1.3.3 | |x| | | | -Retries use exponential backoff |6.1.3.3 | |x| | | | - Upper, lower bounds |6.1.3.3 | |x| | | | -Client handle Source Quench |6.1.3.3 | |x| | | | -Server ignore Source Quench |6.1.3.3 | | |x| | | ------------------------------------------------|-----------|-|-|-|-|-|-- -USER INTERFACE: | | | | | | | - | | | | | | | -All programs have access to DNS interface |6.1.4.2 |x| | | | | -Able to request all info for given name |6.1.4.2 |x| | | | | -Returns complete info or error |6.1.4.2 |x| | | | | -Special interfaces |6.1.4.2 | | |x| | | - Name<->Address translation |6.1.4.2 |x| | | | | - | | | | | | | -Abbreviation Facilities: |6.1.4.3 | | |x| | | - - - -Internet Engineering Task Force [Page 85] - - - - -RFC1123 SUPPORT SERVICES -- DOMAINS October 1989 - - - Convention for complete names |6.1.4.3 |x| | | | | - Conversion exactly once |6.1.4.3 |x| | | | | - Conversion in proper context |6.1.4.3 |x| | | | | - Search list: |6.1.4.3 | | |x| | | - Administrator can disable |6.1.4.3 | |x| | | | - Prevention of excessive root queries |6.1.4.3 |x| | | | | - Both methods |6.1.4.3 | |x| | | | ------------------------------------------------|-----------|-|-|-|-|-|-- ------------------------------------------------|-----------|-|-|-|-|-|-- - -1. Unless there is private agreement between particular resolver and - particular server. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 86] - - - - -RFC1123 SUPPORT SERVICES -- INITIALIZATION October 1989 - - - 6.2 HOST INITIALIZATION - - 6.2.1 INTRODUCTION - - This section discusses the initialization of host software - across a connected network, or more generally across an - Internet path. This is necessary for a diskless host, and may - optionally be used for a host with disk drives. For a diskless - host, the initialization process is called "network booting" - and is controlled by a bootstrap program located in a boot ROM. - - To initialize a diskless host across the network, there are two - distinct phases: - - (1) Configure the IP layer. - - Diskless machines often have no permanent storage in which - to store network configuration information, so that - sufficient configuration information must be obtained - dynamically to support the loading phase that follows. - This information must include at least the IP addresses of - the host and of the boot server. To support booting - across a gateway, the address mask and a list of default - gateways are also required. - - (2) Load the host system code. - - During the loading phase, an appropriate file transfer - protocol is used to copy the system code across the - network from the boot server. - - A host with a disk may perform the first step, dynamic - configuration. This is important for microcomputers, whose - floppy disks allow network configuration information to be - mistakenly duplicated on more than one host. Also, - installation of new hosts is much simpler if they automatically - obtain their configuration information from a central server, - saving administrator time and decreasing the probability of - mistakes. - - 6.2.2 REQUIREMENTS - - 6.2.2.1 Dynamic Configuration - - A number of protocol provisions have been made for dynamic - configuration. - - o ICMP Information Request/Reply messages - - - -Internet Engineering Task Force [Page 87] - - - - -RFC1123 SUPPORT SERVICES -- INITIALIZATION October 1989 - - - This obsolete message pair was designed to allow a host - to find the number of the network it is on. - Unfortunately, it was useful only if the host already - knew the host number part of its IP address, - information that hosts requiring dynamic configuration - seldom had. - - o Reverse Address Resolution Protocol (RARP) [BOOT:4] - - RARP is a link-layer protocol for a broadcast medium - that allows a host to find its IP address given its - link layer address. Unfortunately, RARP does not work - across IP gateways and therefore requires a RARP server - on every network. In addition, RARP does not provide - any other configuration information. - - o ICMP Address Mask Request/Reply messages - - These ICMP messages allow a host to learn the address - mask for a particular network interface. - - o BOOTP Protocol [BOOT:2] - - This protocol allows a host to determine the IP - addresses of the local host and the boot server, the - name of an appropriate boot file, and optionally the - address mask and list of default gateways. To locate a - BOOTP server, the host broadcasts a BOOTP request using - UDP. Ad hoc gateway extensions have been used to - transmit the BOOTP broadcast through gateways, and in - the future the IP Multicasting facility will provide a - standard mechanism for this purpose. - - - The suggested approach to dynamic configuration is to use - the BOOTP protocol with the extensions defined in "BOOTP - Vendor Information Extensions" RFC-1084 [BOOT:3]. RFC-1084 - defines some important general (not vendor-specific) - extensions. In particular, these extensions allow the - address mask to be supplied in BOOTP; we RECOMMEND that the - address mask be supplied in this manner. - - DISCUSSION: - Historically, subnetting was defined long after IP, and - so a separate mechanism (ICMP Address Mask messages) - was designed to supply the address mask to a host. - However, the IP address mask and the corresponding IP - address conceptually form a pair, and for operational - - - -Internet Engineering Task Force [Page 88] - - - - -RFC1123 SUPPORT SERVICES -- INITIALIZATION October 1989 - - - simplicity they ought to be defined at the same time - and by the same mechanism, whether a configuration file - or a dynamic mechanism like BOOTP. - - Note that BOOTP is not sufficiently general to specify - the configurations of all interfaces of a multihomed - host. A multihomed host must either use BOOTP - separately for each interface, or configure one - interface using BOOTP to perform the loading, and - perform the complete initialization from a file later. - - Application layer configuration information is expected - to be obtained from files after loading of the system - code. - - 6.2.2.2 Loading Phase - - A suggested approach for the loading phase is to use TFTP - [BOOT:1] between the IP addresses established by BOOTP. - - TFTP to a broadcast address SHOULD NOT be used, for reasons - explained in Section 4.2.3.4. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 89] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - - 6.3 REMOTE MANAGEMENT - - 6.3.1 INTRODUCTION - - The Internet community has recently put considerable effort - into the development of network management protocols. The - result has been a two-pronged approach [MGT:1, MGT:6]: the - Simple Network Management Protocol (SNMP) [MGT:4] and the - Common Management Information Protocol over TCP (CMOT) [MGT:5]. - - In order to be managed using SNMP or CMOT, a host will need to - implement an appropriate management agent. An Internet host - SHOULD include an agent for either SNMP or CMOT. - - Both SNMP and CMOT operate on a Management Information Base - (MIB) that defines a collection of management values. By - reading and setting these values, a remote application may - query and change the state of the managed system. - - A standard MIB [MGT:3] has been defined for use by both - management protocols, using data types defined by the Structure - of Management Information (SMI) defined in [MGT:2]. Additional - MIB variables can be introduced under the "enterprises" and - "experimental" subtrees of the MIB naming space [MGT:2]. - - Every protocol module in the host SHOULD implement the relevant - MIB variables. A host SHOULD implement the MIB variables as - defined in the most recent standard MIB, and MAY implement - other MIB variables when appropriate and useful. - - 6.3.2 PROTOCOL WALK-THROUGH - - The MIB is intended to cover both hosts and gateways, although - there may be detailed differences in MIB application to the two - cases. This section contains the appropriate interpretation of - the MIB for hosts. It is likely that later versions of the MIB - will include more entries for host management. - - A managed host must implement the following groups of MIB - object definitions: System, Interfaces, Address Translation, - IP, ICMP, TCP, and UDP. - - The following specific interpretations apply to hosts: - - o ipInHdrErrors - - Note that the error "time-to-live exceeded" can occur in a - host only when it is forwarding a source-routed datagram. - - - -Internet Engineering Task Force [Page 90] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - - o ipOutNoRoutes - - This object counts datagrams discarded because no route - can be found. This may happen in a host if all the - default gateways in the host's configuration are down. - - o ipFragOKs, ipFragFails, ipFragCreates - - A host that does not implement intentional fragmentation - (see "Fragmentation" section of [INTRO:1]) MUST return the - value zero for these three objects. - - o icmpOutRedirects - - For a host, this object MUST always be zero, since hosts - do not send Redirects. - - o icmpOutAddrMaskReps - - For a host, this object MUST always be zero, unless the - host is an authoritative source of address mask - information. - - o ipAddrTable - - For a host, the "IP Address Table" object is effectively a - table of logical interfaces. - - o ipRoutingTable - - For a host, the "IP Routing Table" object is effectively a - combination of the host's Routing Cache and the static - route table described in "Routing Outbound Datagrams" - section of [INTRO:1]. - - Within each ipRouteEntry, ipRouteMetric1...4 normally will - have no meaning for a host and SHOULD always be -1, while - ipRouteType will normally have the value "remote". - - If destinations on the connected network do not appear in - the Route Cache (see "Routing Outbound Datagrams section - of [INTRO:1]), there will be no entries with ipRouteType - of "direct". - - - DISCUSSION: - The current MIB does not include Type-of-Service in an - ipRouteEntry, but a future revision is expected to make - - - -Internet Engineering Task Force [Page 91] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - - this addition. - - We also expect the MIB to be expanded to allow the remote - management of applications (e.g., the ability to partially - reconfigure mail systems). Network service applications - such as mail systems should therefore be written with the - "hooks" for remote management. - - 6.3.3 MANAGEMENT REQUIREMENTS SUMMARY - - | | | | |S| | - | | | | |H| |F - | | | | |O|M|o - | | |S| |U|U|o - | | |H| |L|S|t - | |M|O| |D|T|n - | |U|U|M| | |o - | |S|L|A|N|N|t - | |T|D|Y|O|O|t -FEATURE |SECTION | | | |T|T|e ------------------------------------------------|-----------|-|-|-|-|-|-- -Support SNMP or CMOT agent |6.3.1 | |x| | | | -Implement specified objects in standard MIB |6.3.1 | |x| | | | - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 92] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - -7. REFERENCES - - This section lists the primary references with which every - implementer must be thoroughly familiar. It also lists some - secondary references that are suggested additional reading. - - INTRODUCTORY REFERENCES: - - - [INTRO:1] "Requirements for Internet Hosts -- Communication Layers," - IETF Host Requirements Working Group, R. Braden, Ed., RFC-1122, - October 1989. - - [INTRO:2] "DDN Protocol Handbook," NIC-50004, NIC-50005, NIC-50006, - (three volumes), SRI International, December 1985. - - [INTRO:3] "Official Internet Protocols," J. Reynolds and J. Postel, - RFC-1011, May 1987. - - This document is republished periodically with new RFC numbers; - the latest version must be used. - - [INTRO:4] "Protocol Document Order Information," O. Jacobsen and J. - Postel, RFC-980, March 1986. - - [INTRO:5] "Assigned Numbers," J. Reynolds and J. Postel, RFC-1010, - May 1987. - - This document is republished periodically with new RFC numbers; - the latest version must be used. - - - TELNET REFERENCES: - - - [TELNET:1] "Telnet Protocol Specification," J. Postel and J. - Reynolds, RFC-854, May 1983. - - [TELNET:2] "Telnet Option Specification," J. Postel and J. Reynolds, - RFC-855, May 1983. - - [TELNET:3] "Telnet Binary Transmission," J. Postel and J. Reynolds, - RFC-856, May 1983. - - [TELNET:4] "Telnet Echo Option," J. Postel and J. Reynolds, RFC-857, - May 1983. - - [TELNET:5] "Telnet Suppress Go Ahead Option," J. Postel and J. - - - -Internet Engineering Task Force [Page 93] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - - Reynolds, RFC-858, May 1983. - - [TELNET:6] "Telnet Status Option," J. Postel and J. Reynolds, RFC- - 859, May 1983. - - [TELNET:7] "Telnet Timing Mark Option," J. Postel and J. Reynolds, - RFC-860, May 1983. - - [TELNET:8] "Telnet Extended Options List," J. Postel and J. - Reynolds, RFC-861, May 1983. - - [TELNET:9] "Telnet End-Of-Record Option," J. Postel, RFC-855, - December 1983. - - [TELNET:10] "Telnet Terminal-Type Option," J. VanBokkelen, RFC-1091, - February 1989. - - This document supercedes RFC-930. - - [TELNET:11] "Telnet Window Size Option," D. Waitzman, RFC-1073, - October 1988. - - [TELNET:12] "Telnet Linemode Option," D. Borman, RFC-1116, August - 1989. - - [TELNET:13] "Telnet Terminal Speed Option," C. Hedrick, RFC-1079, - December 1988. - - [TELNET:14] "Telnet Remote Flow Control Option," C. Hedrick, RFC- - 1080, November 1988. - - - SECONDARY TELNET REFERENCES: - - - [TELNET:15] "Telnet Protocol," MIL-STD-1782, U.S. Department of - Defense, May 1984. - - This document is intended to describe the same protocol as RFC- - 854. In case of conflict, RFC-854 takes precedence, and the - present document takes precedence over both. - - [TELNET:16] "SUPDUP Protocol," M. Crispin, RFC-734, October 1977. - - [TELNET:17] "Telnet SUPDUP Option," M. Crispin, RFC-736, October - 1977. - - [TELNET:18] "Data Entry Terminal Option," J. Day, RFC-732, June 1977. - - - -Internet Engineering Task Force [Page 94] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - - [TELNET:19] "TELNET Data Entry Terminal option -- DODIIS - Implementation," A. Yasuda and T. Thompson, RFC-1043, February - 1988. - - - FTP REFERENCES: - - - [FTP:1] "File Transfer Protocol," J. Postel and J. Reynolds, RFC- - 959, October 1985. - - [FTP:2] "Document File Format Standards," J. Postel, RFC-678, - December 1974. - - [FTP:3] "File Transfer Protocol," MIL-STD-1780, U.S. Department of - Defense, May 1984. - - This document is based on an earlier version of the FTP - specification (RFC-765) and is obsolete. - - - TFTP REFERENCES: - - - [TFTP:1] "The TFTP Protocol Revision 2," K. Sollins, RFC-783, June - 1981. - - - MAIL REFERENCES: - - - [SMTP:1] "Simple Mail Transfer Protocol," J. Postel, RFC-821, August - 1982. - - [SMTP:2] "Standard For The Format of ARPA Internet Text Messages," - D. Crocker, RFC-822, August 1982. - - This document obsoleted an earlier specification, RFC-733. - - [SMTP:3] "Mail Routing and the Domain System," C. Partridge, RFC- - 974, January 1986. - - This RFC describes the use of MX records, a mandatory extension - to the mail delivery process. - - [SMTP:4] "Duplicate Messages and SMTP," C. Partridge, RFC-1047, - February 1988. - - - - -Internet Engineering Task Force [Page 95] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - - [SMTP:5a] "Mapping between X.400 and RFC 822," S. Kille, RFC-987, - June 1986. - - [SMTP:5b] "Addendum to RFC-987," S. Kille, RFC-???, September 1987. - - The two preceding RFC's define a proposed standard for - gatewaying mail between the Internet and the X.400 environments. - - [SMTP:6] "Simple Mail Transfer Protocol," MIL-STD-1781, U.S. - Department of Defense, May 1984. - - This specification is intended to describe the same protocol as - does RFC-821. However, MIL-STD-1781 is incomplete; in - particular, it does not include MX records [SMTP:3]. - - [SMTP:7] "A Content-Type Field for Internet Messages," M. Sirbu, - RFC-1049, March 1988. - - - DOMAIN NAME SYSTEM REFERENCES: - - - [DNS:1] "Domain Names - Concepts and Facilities," P. Mockapetris, - RFC-1034, November 1987. - - This document and the following one obsolete RFC-882, RFC-883, - and RFC-973. - - [DNS:2] "Domain Names - Implementation and Specification," RFC-1035, - P. Mockapetris, November 1987. - - - [DNS:3] "Mail Routing and the Domain System," C. Partridge, RFC-974, - January 1986. - - - [DNS:4] "DoD Internet Host Table Specification," K. Harrenstein, - RFC-952, M. Stahl, E. Feinler, October 1985. - - SECONDARY DNS REFERENCES: - - - [DNS:5] "Hostname Server," K. Harrenstein, M. Stahl, E. Feinler, - RFC-953, October 1985. - - [DNS:6] "Domain Administrators Guide," M. Stahl, RFC-1032, November - 1987. - - - - -Internet Engineering Task Force [Page 96] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - - [DNS:7] "Domain Administrators Operations Guide," M. Lottor, RFC- - 1033, November 1987. - - [DNS:8] "The Domain Name System Handbook," Vol. 4 of Internet - Protocol Handbook, NIC 50007, SRI Network Information Center, - August 1989. - - - SYSTEM INITIALIZATION REFERENCES: - - - [BOOT:1] "Bootstrap Loading Using TFTP," R. Finlayson, RFC-906, June - 1984. - - [BOOT:2] "Bootstrap Protocol (BOOTP)," W. Croft and J. Gilmore, RFC- - 951, September 1985. - - [BOOT:3] "BOOTP Vendor Information Extensions," J. Reynolds, RFC- - 1084, December 1988. - - Note: this RFC revised and obsoleted RFC-1048. - - [BOOT:4] "A Reverse Address Resolution Protocol," R. Finlayson, T. - Mann, J. Mogul, and M. Theimer, RFC-903, June 1984. - - - MANAGEMENT REFERENCES: - - - [MGT:1] "IAB Recommendations for the Development of Internet Network - Management Standards," V. Cerf, RFC-1052, April 1988. - - [MGT:2] "Structure and Identification of Management Information for - TCP/IP-based internets," M. Rose and K. McCloghrie, RFC-1065, - August 1988. - - [MGT:3] "Management Information Base for Network Management of - TCP/IP-based internets," M. Rose and K. McCloghrie, RFC-1066, - August 1988. - - [MGT:4] "A Simple Network Management Protocol," J. Case, M. Fedor, - M. Schoffstall, and C. Davin, RFC-1098, April 1989. - - [MGT:5] "The Common Management Information Services and Protocol - over TCP/IP," U. Warrier and L. Besaw, RFC-1095, April 1989. - - [MGT:6] "Report of the Second Ad Hoc Network Management Review - Group," V. Cerf, RFC-1109, August 1989. - - - -Internet Engineering Task Force [Page 97] - - - - -RFC1123 SUPPORT SERVICES -- MANAGEMENT October 1989 - - -Security Considerations - - There are many security issues in the application and support - programs of host software, but a full discussion is beyond the scope - of this RFC. Security-related issues are mentioned in sections - concerning TFTP (Sections 4.2.1, 4.2.3.4, 4.2.3.5), the SMTP VRFY and - EXPN commands (Section 5.2.3), the SMTP HELO command (5.2.5), and the - SMTP DATA command (Section 5.2.8). - -Author's Address - - Robert Braden - USC/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292-6695 - - Phone: (213) 822 1511 - - EMail: Braden@ISI.EDU - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Engineering Task Force [Page 98] - diff --git a/kernel/picotcp/RFC/rfc1146.txt b/kernel/picotcp/RFC/rfc1146.txt deleted file mode 100644 index c65b9d2..0000000 --- a/kernel/picotcp/RFC/rfc1146.txt +++ /dev/null @@ -1,283 +0,0 @@ - - - - - - -Network Working Group J. Zweig -Request for Comments: 1146 UIUC -Obsoletes: RFC 1145 C. Partridge - BBN - March 1990 - - - TCP Alternate Checksum Options - -Status of This Memo - - This memo suggests a pair of TCP options to allow use of alternate - data checksum algorithms in the TCP header. The use of these options - is experimental, and not recommended for production use. - - Note: This RFC corrects errors introduced in the editing process in - RFC 1145. - - Distribution of this memo is unlimited. - -Introduction - - Some members of the networking community have expressed interest in - using checksum-algorithms with different error detection and - correction properties than the standard TCP checksum. The option - described in this memo provides a mechanism to negotiate the use of - an alternate checksum at connection-establishment time, as well as a - mechanism to carry additional checksum information for algorithms - that utilize checksums that are longer than 16 bits. - -Definition of the Options - - The TCP Alternate Checksum Request Option may be sent in a SYN - segment by a TCP to indicate that the TCP is prepared to both - generate and receive checksums based on an alternate algorithm. - During communication, the alternate checksum replaces the regular TCP - checksum in the checksum field of the TCP header. Should the - alternate checksum require more than 2 octets to transmit, the - checksum may either be moved into a TCP Alternate Checksum Data - Option and the checksum field of the TCP header be sent as 0, or the - data may be split between the header field and the option. Alternate - checksums are computed over the same data as the regular TCP checksum - (see TCP Alternate Checksum Data Option discussion below). - -TCP Alternate Checksum Request Option - - The format of the TCP Alternate Checksum Request Option is: - - - - -Zweig & Partridge [Page 1] - -RFC 1146 TCP Alternate Checksum Options March 1990 - - - +----------+----------+----------+ - | Kind=14 | Length=3 | chksum | - +----------+----------+----------+ - - Here chksum is a number identifying the type of checksum to be used. - - The currently defined values of chksum are: - - 0 -- TCP checksum - 1 -- 8-bit Fletcher's algorithm (see Appendix I) - 2 -- 16-bit Fletcher's algorithm (see Appendix II) - - Note that the 8-bit Fletcher algorithm gives a 16-bit checksum and - the 16-bit algorithm gives a 32-bit checksum. - - Alternate checksum negotiation proceeds as follows: - - A SYN segment used to originate a connection may contain the - Alternate Checksum Request Option, which specifies an alternate - checksum-calculation algorithm to be used for the connection. The - acknowledging SYN-ACK segment may also carry the option. - - If both SYN segments carry the Alternate Checksum Request option, - and both specify the same algorithm, that algorithm must be used - for the remainder of the connection. Otherwise, the standard TCP - checksum algorithm must be used for the entire connection. Thus, - for example, if one TCP specifies type 1 checksums, and the other - specifies type 2 checksums, then they will use type 0 (the regular - TCP checksum). Note that in practice, one TCP will typically be - responding to the other's SYN, and thus either accepting or - rejecting the proposed alternate checksum algorithm. - - Any segment with the SYN bit set must always use the standard TCP - checksum algorithm. Thus the SYN segment will always be - understood by the receiving TCP. The alternate checksum must not - be used until the first non-SYN segment. In addition, because RST - segments may also be received or sent without complete state - information, any segment with the RST bit set must use the - standard TCP checksum. - - The option may not be sent in any segment that does not have the - SYN bit set. - - An implementation of TCP which does not support the option should - silently ignore it (as RFC 1122 requires). Ignoring the option - will force any TCP attempting to use an alternate checksum to use - the standard TCP checksum algorithm, thus ensuring - interoperability. - - - -Zweig & Partridge [Page 2] - -RFC 1146 TCP Alternate Checksum Options March 1990 - - -TCP Alternate Checksum Data Option - - The format of the TCP Alternate Checksum Data Option is: - - +---------+---------+---------+ +---------+ - | Kind=15 |Length=N | data | ... | data | - +---------+---------+---------+ +---------+ - - This field is used only when the alternate checksum that is - negotiated is longer than 16 bits. These checksums will not fit in - the checksum field of the TCP header and thus at least part of them - must be put in an option. Whether the checksum is split between the - checksum field in the TCP header and the option or the entire - checksum is placed in the option is determined on a checksum by - checksum basis. - - The length of this option will depend on the choice of alternate - checksum algorithm for this connection. - - While computing the alternate checksum, the TCP checksum field and - the data portion TCP Alternate Checksum Data Option are replaced with - zeros. - - An otherwise acceptable segment carrying this option on a connection - using a 16-bit checksum algorithm, or carrying this option with an - inappropriate number of data octets for the chosen alternate checksum - algorithm is in error and must be discarded; a RST-segment must be - generated, and the connection aborted. - - Note the requirement above that RST and SYN segments must always use - the standard TCP checksum. - -APPENDIX I: The 8-bit Fletcher Checksum Algorithm - - The 8-bit Fletcher Checksum Algorithm is calculated over a sequence - of data octets (call them D[1] through D[N]) by maintaining 2 - unsigned 1's-complement 8-bit accumulators A and B whose contents are - initially zero, and performing the following loop where i ranges from - 1 to N: - - A := A + D[i] - B := B + A - - It can be shown that at the end of the loop A will contain the 8-bit - 1's complement sum of all octets in the datagram, and that B will - contain (N)D[1] + (N-1)D[2] + ... + D[N]. - - The octets covered by this algorithm should be the same as those over - - - -Zweig & Partridge [Page 3] - -RFC 1146 TCP Alternate Checksum Options March 1990 - - - which the standard TCP checksum calculation is performed, with the - pseudoheader being D[1] through D[12] and the TCP header beginning at - D[13]. Note that, for purposes of the checksum computation, the - checksum field itself must be equal to zero. - - At the end of the loop, the A goes in the first byte of the TCP - checksum and B goes in the second byte. - - Note that, unlike the OSI version of the Fletcher checksum, this - checksum does not adjust the check bytes so that the receiver - checksum is 0. - - There are a number of much faster algorithms for calculating the two - octets of the 8-bit Fletcher checksum. For more information see - [Sklower89], [Nakassis88] and [Fletcher82]. Naturally, any - computation which computes the same number as would be calculated by - the loop above may be used to calculate the checksum. One advantage - of the Fletcher algorithms over the standard TCP checksum algorithm - is the ability to detect the transposition of octets/words of any - size within a datagram. - -APPENDIX II: The 16-bit Fletcher Checksum Algorithm - - The 16-bit Fletcher Checksum algorithm proceeds in precisely the same - manner as the 8-bit checksum algorithm,, except that A, B and the - D[i] are 16-bit quantities. It is necessary (as it is with the - standard TCP checksum algorithm) to pad a datagram containing an odd - number of octets with a zero octet. - - Result A should be placed in the TCP header checksum field and Result - B should appear in an TCP Alternate Checksum Data option. This - option must be present in every TCP header. The two bytes reserved - for B should be set to zero during the calculation of the checksum. - - The checksum field of the TCP header shall contain the contents of A - at the end of the loop. The TCP Alternate Checksum Data option must - be present and contain the contents of B at the end of the loop. - -BIBLIOGRAPHY: - - [BrBoPa89] Braden, R., Borman, D., and C. Partridge, "Computing - the Internet Checksum", ACM Computer Communication - Review, Vol. 19, No. 2, pp. 86-101, April 1989. - [Note that this includes Plummer, W. "IEN-45: TCP - Checksum Function Design" (1978) as an appendix.] - - [Fletcher82] Fletcher, J., "An Arithmetic Checksum for Serial - Transmissions", IEEE Transactions on Communication, - - - -Zweig & Partridge [Page 4] - -RFC 1146 TCP Alternate Checksum Options March 1990 - - - Vol. COM-30, No. 1, pp. 247-252, January 1982. - - [Nakassis88] Nakassis, T., "Fletcher's Error Detection Algorithm: - How to implement it efficiently and how to avoid the - most common pitfalls", ACM Computer Communication - Review, Vol. 18, No. 5, pp. 86-94, October 1988. - - [Sklower89] Sklower, K., "Improving the Efficiency of the OSI - Checksum Calculation", ACM Computer Communication - Review, Vol. 19, No. 5, pp. 32-43, October 1989. - -Security Considerations - - Security issues are not addressed in this memo. - -Authors' Addresses - - Johnny Zweig - Digital Computer Lab - University of Illinois (UIUC) - 1304 West Springfield Avenue - CAMPUS MC 258 - Urbana, IL 61801 - - Phone: (217) 333-7937 - - EMail: zweig@CS.UIUC.EDU - - - Craig Partridge - Bolt Beranek and Newman Inc. - 50 Moulton Street - Cambridge, MA 02138 - - Phone: (617) 873-2459 - - EMail: craig@BBN.COM - - - - - - - - - - - - - - -Zweig & Partridge [Page 5] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1156.txt b/kernel/picotcp/RFC/rfc1156.txt deleted file mode 100644 index 6a72a2e..0000000 --- a/kernel/picotcp/RFC/rfc1156.txt +++ /dev/null @@ -1,5099 +0,0 @@ - - - - - - -Network Working Group K. McCloghrie -Request For Comments: 1156 Hughes LAN Systems -Obsoletes: RFC 1066 M. Rose - Performance Systems International - May 1990 - - - Management Information Base for Network Management - of TCP/IP-based internets - - Table of Contents - - 1. Status of this Memo ................................... 1 - 2. IAB Policy Statement .................................. 2 - 3. Introduction .......................................... 2 - 4. Objects ............................................... 6 - 4.1 Object Groups ........................................ 6 - 4.2 Format of Definitions ................................ 7 - 5. Object Definitions .................................... 8 - 5.1 The System Group ..................................... 9 - 5.2 The Interfaces Group ................................. 11 - 5.2.1 The Interfaces Table ............................... 11 - 5.3 The Address Translation Group ........................ 23 - 5.4 The IP Group ......................................... 26 - 5.4.1 The IP Address Table ............................... 34 - 5.4.2 The IP Routing Table ............................... 36 - 5.5 The ICMP Group ....................................... 43 - 5.6 The TCP Group ........................................ 53 - 5.7 The UDP Group ........................................ 62 - 5.8 The EGP Group ........................................ 64 - 5.8.1 The EGP Neighbor Table ............................. 65 - 6. Definitions ........................................... 68 - 7. Acknowledgements ...................................... 89 - 8. References ............................................ 90 - 9. Security Considerations................................ 91 - 10. Authors' Addresses.................................... 91 - -1. Status of this Memo - - This RFC is a re-release of RFC 1066, with a changed "Status of this - Memo", "IAB Policy Statement", and "Introduction" sections plus a few - minor typographical corrections. The technical content of the - document is unchanged from RFC 1066. - - This memo provides the initial version of the Management Information - Base (MIB) for use with network management protocols in TCP/IP-based - internets in the short-term. In particular, together with its - companion memos which describe the structure of management - - - -McCloghrie & Rose [Page 1] - -RFC 1156 MIB May 1990 - - - information along with the initial network management protocol, these - documents provide a simple, workable architecture and system for - managing TCP/IP-based internets and in particular the Internet. - - This memo specifies a Standard Protocol for the Internet community. - TCP/IP implementations in the Internet which are network manageable - are expected to adopt and implement this specification. - - The Internet Activities Board recommends that all IP and TCP - implementations be network manageable. This implies implementation - of the Internet MIB (RFC-1156) and at least one of the two - recommended management protocols SNMP (RFC-1157) or CMOT (RFC-1095). - It should be noted that, at this time, SNMP is a full Internet - standard and CMOT is a draft standard. See also the Host and Gateway - Requirements RFCs for more specific information on the applicability - of this standard. - - Please refer to the latest edition of the "IAB Official Protocol - Standards" RFC for current information on the state and status of - standard Internet protocols. - - Distribution of this memo is unlimited. - -2. IAB Policy Statement - - This MIB specification is the first edition of an evolving document - defining variables needed for monitoring and control of various - components of the Internet. Not all groups of defined variables are - mandatory for all Internet components. - - For example, the EGP group is mandatory for gateways using EGP but - not for hosts which should not be running EGP. Similarly, the TCP - group is mandatory for hosts running TCP but not for gateways which - aren't running it. What IS mandatory, however, is that all variables - of a group be supported if any element of the group is supported. - - It is expected that additional MIB groups and variables will be - defined over time to accommodate the monitoring and control needs of - new or changing components of the Internet. The responsible working - group(s) will continue to refine this specification. - -3. Introduction - - As reported in RFC 1052, IAB Recommendations for the Development of - Internet Network Management Standards [1], the Internet Activities - Board has directed the Internet Engineering Task Force (IETF) to - create two new working groups in the area of network management. One - group was charged with the further specification and definition of - - - -McCloghrie & Rose [Page 2] - -RFC 1156 MIB May 1990 - - - elements to be included in the Management Information Base. The - other was charged with defining the modifications to the Simple - Network Management Protocol (SNMP) to accommodate the short-term - needs of the network vendor and operator communities. In the long- - term, the use of the OSI network management framework was to be - examined using the ISO CMIS/CMIP [2,3] framework as a basis. Two - documents were produced to define the management information: RFC - 1065, which defined the Structure of Management Information (SMI) - [4], and RFC 1066, which defined the Management Information Base - (MIB) [5]. Both of these documents were designed so as to be - compatible with both the SNMP and the OSI network management - framework. - - This strategy was quite successful in the short-term: Internet-based - network management technology was fielded, by both the research and - commercial communities, within a few months. As a result of this, - portions of the Internet community became network manageable in a - timely fashion. - - As reported in RFC 1109, Report of the Second Ad Hoc Network - Management Review Group [6], the requirements of the SNMP and the OSI - network management frameworks were more different than anticipated. - As such, the requirement for compatibility between the SMI/MIB and - both frameworks was suspended. - - The IAB has designated the SNMP, SMI, and the initial Internet MIB to - be full "Standard Protocols" with "Recommended" status. By this - action, the IAB recommends that all IP and TCP implementations be - network manageable and that the implementations that are network - manageable are expected to adopt and implement the SMI, MIB, and - SNMP. - - As such, the current network management framework for TCP/IP- based - internets consists of: Structure and Identification of Management - Information for TCP/IP-based Internets, which describes how managed - objects contained in the MIB are defined as set forth in RFC 1155 - [7]; Management Information Base for Network Management of TCP/IP- - based Internets, which describes the managed objects contained in the - MIB as set forth in this memo; and, the Simple Network Management - Protocol, which defines the protocol used to manage these objects, as - set forth in RFC 1157 [8]. - - The IAB also urged the working groups to be "extremely sensitive to - the need to keep SNMP simple," and recommends that the MIB working - group take as its starting inputs the MIB definitions found in the - High-Level Entity Management Systems (HEMS) RFC 1024 [9], the initial - SNMP specification [10], and the CMIS/CMIP memos [11,12]. - - - - -McCloghrie & Rose [Page 3] - -RFC 1156 MIB May 1990 - - - Thus, the list of managed objects defined here, has been derived by - taking only those elements which are considered essential. Since - such elements are essential, there is no need to allow the - implementation of individual objects, to be optional. Rather, all - compliant implementations will contain all applicable (see below) - objects defined in this memo. - - This approach of taking only the essential objects is NOT - restrictive, since the SMI defined in the companion memo provides - three extensibility mechanisms: one, the addition of new standard - objects through the definitions of new versions of the MIB; two, the - addition of widely-available but non-standard objects through the - multilateral subtree; and three, the addition of private objects - through the enterprises subtree. Such additional objects can not only - be used for vendor-specific elements, but also for experimentation as - required to further the knowledge of which other objects are - essential. - - The primary criterion for being considered essential was for an - object to be contained in all of the above referenced MIB - definitions. A few other objects have been included, but only if the - MIB working group believed they are truly essential. The detailed - list of criteria against which potential inclusions in this (initial) - MIB were considered, was: - - 1) An object needed to be essential for either fault or - configuration management. - - 2) Only weak control objects were permitted (by weak, it - is meant that tampering with them can do only limited - damage). This criterion reflects the fact that the - current management protocols are not sufficiently secure - to do more powerful control operations. - - 3) Evidence of current use and utility was required. - - 4) An attempt was made to limit the number of objects to - about 100 to make it easier for vendors to fully - instrument their software. - - 5) To avoid redundant variables, it was required that no - object be included that can be derived from others in the - MIB. - - 6) Implementation specific objects (e.g., for BSD UNIX) - were excluded. - - 7) It was agreed to avoid heavily instrumenting critical - - - -McCloghrie & Rose [Page 4] - -RFC 1156 MIB May 1990 - - - sections of code. The general guideline was one counter - per critical section per layer. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 5] - -RFC 1156 MIB May 1990 - - -4. Objects - - Managed objects are accessed via a virtual information store, termed - the Management Information Base or MIB. Objects in the MIB are - defined using Abstract Syntax Notation One (ASN.1) [13]. - - The mechanisms used for describing these objects are specified in the - companion memo. In particular, each object has a name, a syntax, and - an encoding. The name is an object identifier, an administratively - assigned name, which specifies an object type. The object type - together with an object instance serves to uniquely identify a - specific instantiation of the object. For human convenience, we - often use a textual string, termed the OBJECT DESCRIPTOR, to also - refer to the object type. - - The syntax of an object type defines the abstract data structure - corresponding to that object type. The ASN.1 language is used for - this purpose. However, the companion memo purposely restricts the - ASN.1 constructs which may be used. These restrictions are - explicitly made for simplicity. - - The encoding of an object type is simply how that object type is - represented using the object type's syntax. Implicitly tied to the - notion of an object type's syntax and encoding is how the object type - is represented when being transmitted on the network. This memo - specifies the use of the basic encoding rules of ASN.1 [14]. - -4.1. Object Groups - - Since this list of managed objects contains only the essential - elements, there is no need to allow individual objects to be - optional. Rather, the objects are arranged into the following - groups: - - - System - - Interfaces - - Address Translation - - IP - - ICMP - - TCP - - UDP - - EGP - - There are two reasons for defining these groups: one, to provide a - means of assigning object identifiers; two, to provide a method for - implementations of managed agents to know which objects they must - implement. This method is as follows: if the semantics of a group is - applicable to an implementation, then it must implement all objects - - - -McCloghrie & Rose [Page 6] - -RFC 1156 MIB May 1990 - - - in that group. For example, an implementation must implement the EGP - group if and only if it implements the EGP protocol. - -4.2. Format of Definitions - - The next section contains the specification of all object types - contained in the MIB. Following the conventions of the companion - memo, the object types are defined using the following fields: - - OBJECT: - ------- - A textual name, termed the OBJECT DESCRIPTOR, for the - object type, along with its corresponding OBJECT - IDENTIFIER. - - Syntax: - The abstract syntax for the object type, presented using - ASN.1. This must resolve to an instance of the ASN.1 - type ObjectSyntax defined in the SMI. - - Definition: - A textual description of the semantics of the object - type. Implementations should ensure that their - interpretation of the object type fulfills this - definition since this MIB is intended for use in multi- - vendor environments. As such it is vital that object - types have consistent meaning across all machines. - - Access: - One of read-only, read-write, write-only, or - not-accessible. - - Status: - One of mandatory, optional, or obsolete. - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 7] - -RFC 1156 MIB May 1990 - - -5. Object Definitions - - RFC1156-MIB - - DEFINITIONS ::= BEGIN - - IMPORTS - mgmt, OBJECT-TYPE, NetworkAddress, IpAddress, - Counter, Gauge, TimeTicks - FROM RFC1155-SMI; - - mib OBJECT IDENTIFIER ::= { mgmt 1 } - - system OBJECT IDENTIFIER ::= { mib 1 } - interfaces OBJECT IDENTIFIER ::= { mib 2 } - at OBJECT IDENTIFIER ::= { mib 3 } - ip OBJECT IDENTIFIER ::= { mib 4 } - icmp OBJECT IDENTIFIER ::= { mib 5 } - tcp OBJECT IDENTIFIER ::= { mib 6 } - udp OBJECT IDENTIFIER ::= { mib 7 } - egp OBJECT IDENTIFIER ::= { mib 8 } - - END - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 8] - -RFC 1156 MIB May 1990 - - -5.1. The System Group - - Implementation of the System group is mandatory for all - systems. - - OBJECT: - ------- - sysDescr { system 1 } - - Syntax: - OCTET STRING - - Definition: - A textual description of the entity. This value should - include the full name and version identification of the - system's hardware type, software operating-system, and - networking software. It is mandatory that this only - contain printable ASCII characters. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - sysObjectID { system 2 } - - Syntax: - OBJECT IDENTIFIER - - Definition: - The vendor's authoritative identification of the network - management subsystem contained in the entity. This value - is allocated within the SMI enterprises subtree - (1.3.6.1.4.1) and provides an easy and unambiguous means - for determining "what kind of box" is being managed. For - example, if vendor "Flintstones, Inc." was assigned the - subtree 1.3.6.1.4.1.42, it could assign the identifier - 1.3.6.1.4.1.42.1.1 to its "Fred Router". - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 9] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - sysUpTime { system 3 } - - Syntax: - TimeTicks - - Definition: - The time (in hundredths of a second) since the network - management portion of the system was last re-initialized. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 10] - -RFC 1156 MIB May 1990 - - -5.2. The Interfaces Group - - Implementation of the Interfaces group is mandatory for all - systems. - - OBJECT: - ------- - ifNumber { interfaces 1 } - - Syntax: - INTEGER - - Definition: - The number of network interfaces (regardless of their - current state) on which this system can send/receive IP - datagrams. - - Access: - read-only. - - Status: - mandatory. - -5.2.1. The Interfaces Table - - OBJECT: - ------- - ifTable { interfaces 2 } - - Syntax: - SEQUENCE OF IfEntry - - Definition: - A list of interface entries. The number of entries is - given by the value of ifNumber. - - Access: - read-write. - - Status: - mandatory. - - OBJECT: - ------- - ifEntry { ifTable 1 } - - Syntax: - IfEntry ::= SEQUENCE { - - - -McCloghrie & Rose [Page 11] - -RFC 1156 MIB May 1990 - - - ifIndex - INTEGER, - ifDescr - OCTET STRING, - ifType - INTEGER, - ifMtu - INTEGER, - ifSpeed - Gauge, - ifPhysAddress - OCTET STRING, - ifAdminStatus - INTEGER, - ifOperStatus - INTEGER, - ifLastChange - TimeTicks, - ifInOctets - Counter, - ifInUcastPkts - Counter, - ifInNUcastPkts - Counter, - ifInDiscards - Counter, - ifInErrors - Counter, - ifInUnknownProtos - Counter, - ifOutOctets - Counter, - ifOutUcastPkts - Counter, - ifOutNUcastPkts - Counter, - ifOutDiscards - Counter, - ifOutErrors - Counter, - ifOutQLen - Gauge - } - - Definition: - An interface entry containing objects at the subnetwork - layer and below for a particular interface. - - - - -McCloghrie & Rose [Page 12] - -RFC 1156 MIB May 1990 - - - Access: - read-write. - - Status: - mandatory. - - - We now consider the individual components of each interface - entry: - - - OBJECT: - ------- - ifIndex { ifEntry 1 } - - Syntax: - INTEGER - - Definition: - A unique value for each interface. Its value ranges - between 1 and the value of ifNumber. The value for each - interface must remain constant at least from one re- - initialization of the entity's network management system - to the next re-initialization. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifDescr { ifEntry 2 } - - Syntax: - OCTET STRING - - Definition: - A text string containing information about the interface. - This string should include the name of the manufacturer, - the product name and the version of the hardware - interface. The string is intended for presentation to a - human; it must not contain anything but printable ASCII - characters. - - - - - -McCloghrie & Rose [Page 13] - -RFC 1156 MIB May 1990 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifType { ifEntry 3 } - - Syntax: - INTEGER { - other(1), -- none of the following - regular1822(2), - hdh1822(3), - ddn-x25(4), - rfc877-x25(5), - ethernet-csmacd(6), - iso88023-csmacd(7), - iso88024-tokenBus(8), - iso88025-tokenRing(9), - iso88026-man(10), - starLan(11), - proteon-10MBit(12), - proteon-80MBit(13), - hyperchannel(14), - fddi(15), - lapb(16), - sdlc(17), - t1-carrier(18), - cept(19), -- european equivalent of T-1 - basicIsdn(20), - primaryIsdn(21), - -- proprietary serial - propPointToPointSerial(22) - } - - Definition: - The type of interface, distinguished according to the - physical/link/network protocol(s) immediately "below" IP - in the protocol stack. - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 14] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ifMtu { ifEntry 4 } - - Syntax: - INTEGER - - Definition: - The size of the largest IP datagram which can be - sent/received on the interface, specified in octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifSpeed { ifEntry 5 } - - Syntax: - Gauge - - Definition: - An estimate of the interface's current bandwidth in bits - per second. For interfaces which do not vary in - bandwidth or for those where no accurate estimation can - be made, this object should contain the nominal - bandwidth. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifPhysAddress { ifEntry 6 } - - Syntax: - OCTET STRING - - Definition: - The interface's address at the protocol layer immediately - - - -McCloghrie & Rose [Page 15] - -RFC 1156 MIB May 1990 - - - "below" IP in the protocol stack. For interfaces which - do not have such an address (e.g., a serial line), this - object should contain an octet string of zero length. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifAdminStatus { ifEntry 7 } - - Syntax: - INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - - Definition: - The desired state of the interface. The testing(3) state - indicates that no operational packets can be passed. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ifOperStatus { ifEntry 8 } - - Syntax: - INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - - Definition: - The current operational state of the interface. The - testing(3) state indicates that no operational packets - can be passed. - - - -McCloghrie & Rose [Page 16] - -RFC 1156 MIB May 1990 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifLastChange { ifEntry 9 } - - Syntax: - TimeTicks - - Definition: - The value of sysUpTime at the time the interface entered - its current operational state. If the current state was - entered prior to the last re-initialization of the local - network management subsystem, then this object contains a - zero value. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInOctets { ifEntry 10 } - - Syntax: - Counter - - Definition: - The total number of octets received on the interface, - including framing characters. - - Access: - read-only. - - Status: - mandatory. - - - - - - - -McCloghrie & Rose [Page 17] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ifInUcastPkts { ifEntry 11 } - - Syntax: - Counter - - Definition: - The number of (subnet) unicast packets delivered to a - higher-layer protocol. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInNUcastPkts { ifEntry 12 } - - Syntax: - Counter - - Definition: - The number of non-unicast (i.e., subnet broadcast or - subnet multicast) packets delivered to a higher-layer - protocol. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInDiscards { ifEntry 13 } - - Syntax: - Counter - - Definition: - The number of inbound packets which were chosen to be - discarded even though no errors had been detected to - prevent their being deliverable to a higher-layer - - - -McCloghrie & Rose [Page 18] - -RFC 1156 MIB May 1990 - - - protocol. One possible reason for discarding such a - packet could be to free up buffer space. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInErrors { ifEntry 14 } - - Syntax: - Counter - - Definition: - The number of inbound packets that contained errors - preventing them from being deliverable to a higher-layer - protocol. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifInUnknownProtos { ifEntry 15 } - - Syntax: - Counter - - Definition: - The number of packets received via the interface which - were discarded because of an unknown or unsupported - protocol. - - Access: - read-only. - - Status: - mandatory. - - - - - -McCloghrie & Rose [Page 19] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ifOutOctets { ifEntry 16 } - - Syntax: - Counter - - Definition: - The total number of octets transmitted out of the - interface, including framing characters. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutUcastPkts { ifEntry 17 } - - Syntax: - Counter - - Definition: - The total number of packets that higher-level protocols - requested be transmitted to a subnet-unicast address, - including those that were discarded or not sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutNUcastPkts { ifEntry 18 } - - Syntax: - Counter - - Definition: - The total number of packets that higher-level protocols - requested be transmitted to a non-unicast (i.e., a subnet - broadcast or subnet multicast) address, including those - - - -McCloghrie & Rose [Page 20] - -RFC 1156 MIB May 1990 - - - that were discarded or not sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutDiscards { ifEntry 19 } - - Syntax: - Counter - - Definition: - The number of outbound packets which were chosen to be - discarded even though no errors had been detected to - prevent their being transmitted. One possible reason for - discarding such a packet could be to free up buffer - space. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ifOutErrors { ifEntry 20 } - - Syntax: - Counter - - Definition: - The number of outbound packets that could not be - transmitted because of errors. - - Access: - read-only. - - Status: - mandatory. - - - - - -McCloghrie & Rose [Page 21] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ifOutQLen { ifEntry 21 } - - Syntax: - Gauge - - Definition: - The length of the output packet queue (in packets). - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 22] - -RFC 1156 MIB May 1990 - - -5.3. The Address Translation Group - - Implementation of the Address Translation group is mandatory - for all systems. - - The Address Translation group contains one table which is the - union across all interfaces of the translation tables for - converting a NetworkAddress (e.g., an IP address) into a - subnetwork-specific address. For lack of a better term, this - document refers to such a subnetwork-specific address as a - "physical" address. - - Examples of such translation tables are: for broadcast media - where ARP is in use, the translation table is equivalent to - the ARP cache; or, on an X.25 network where non-algorithmic - translation to X.121 addresses is required, the translation - table contains the NetworkAddress to X.121 address - equivalences. - - OBJECT: - ------- - atTable { at 1 } - - Syntax: - SEQUENCE OF AtEntry - - Definition: - The Address Translation tables contain the NetworkAddress - to "physical" address equivalences. Some interfaces do - not use translation tables for determining address - equivalences (e.g., DDN-X.25 has an algorithmic method); - if all interfaces are of this type, then the Address - Translation table is empty, i.e., has zero entries. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - atEntry { atTable 1 } - - Syntax: - AtEntry ::= SEQUENCE { - atIfIndex - - - -McCloghrie & Rose [Page 23] - -RFC 1156 MIB May 1990 - - - INTEGER, - atPhysAddress - OCTET STRING, - atNetAddress - NetworkAddress - } - - Definition: - Each entry contains one NetworkAddress to "physical" - address equivalence. - - Access: - read-write. - - Status: - mandatory. - - We now consider the individual components of each Address - Translation table entry: - - - OBJECT: - ------- - atIfIndex { atEntry 1 } - - Syntax: - INTEGER - - Definition: - The interface on which this entry's equivalence is - effective. The interface identified by a particular - value of this index is the same interface as identified - by the same value of ifIndex. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - atPhysAddress { atEntry 2 } - - Syntax: - OCTET STRING - - - - -McCloghrie & Rose [Page 24] - -RFC 1156 MIB May 1990 - - - Definition: - The media-dependent "physical" address. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - atNetAddress { atEntry 3 } - - Syntax: - NetworkAddress - - Definition: - The NetworkAddress (e.g., the IP address) corresponding to - the media-dependent "physical" address. - - Access: - read-write. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 25] - -RFC 1156 MIB May 1990 - - -5.4. The IP Group - - Implementation of the IP group is mandatory for all systems. - - - OBJECT: - ------- - ipForwarding { ip 1 } - - Syntax: - INTEGER { - gateway(1), -- entity forwards datagrams - host(2) -- entity does NOT forward datagrams - } - - Definition: - The indication of whether this entity is acting as an IP - gateway in respect to the forwarding of datagrams - received by, but not addressed to, this entity. IP - gateways forward datagrams; Hosts do not (except those - Source-Routed via the host). - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipDefaultTTL { ip 2 } - - Syntax: - INTEGER - - Definition: - The default value inserted into the Time-To-Live field of - the IP header of datagrams originated at this entity, - whenever a TTL value is not supplied by the transport - layer protocol. - - Access: - read-write. - - Status: - mandatory. - - - - -McCloghrie & Rose [Page 26] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ipInReceives { ip 3 } - - Syntax: - Counter - - Definition: - The total number of input datagrams received from - interfaces, including those received in error. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInHdrErrors { ip 4 } - - Syntax: - Counter - - Definition: - The number of input datagrams discarded due to errors in - their IP headers, including bad checksums, version number - mismatch, other format errors, time-to-live exceeded, - errors discovered in processing their IP options, etc. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInAddrErrors { ip 5 } - - Syntax: - Counter - - Definition: - The number of input datagrams discarded because the IP - address in their IP header's destination field was not a - - - -McCloghrie & Rose [Page 27] - -RFC 1156 MIB May 1990 - - - valid address to be received at this entity. This count - includes invalid addresses (e.g., 0.0.0.0) and addresses - of unsupported Classes (e.g., Class E). For entities - which are not IP Gateways and therefore do not forward - datagrams, this counter includes datagrams discarded - because the destination address was not a local address. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipForwDatagrams { ip 6 } - - Syntax: - Counter - - Definition: - The number of input datagrams for which this entity was - not their final IP destination, as a result of which an - attempt was made to find a route to forward them to that - final destination. In entities which do not act as IP - Gateways, this counter will include only those packets - which were Source-Routed via this entity, and the - Source-Route option processing was successful. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInUnknownProtos { ip 7 } - - Syntax: - Counter - - Definition: - The number of locally-addressed datagrams received - successfully but discarded because of an unknown or - unsupported protocol. - - - -McCloghrie & Rose [Page 28] - -RFC 1156 MIB May 1990 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInDiscards { ip 8 } - - Syntax: - Counter - - Definition: - The number of input IP datagrams for which no problems - were encountered to prevent their continued processing, - but which were discarded (e.g. for lack of buffer space). - Note that this counter does not include any datagrams - discarded while awaiting re-assembly. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipInDelivers { ip 9 } - - Syntax: - Counter - - Definition: - The total number of input datagrams successfully - delivered to IP user-protocols (including ICMP). - - Access: - read-only. - - Status: - mandatory. - - OBJECT: - ------- - ipOutRequests { ip 10 } - - - -McCloghrie & Rose [Page 29] - -RFC 1156 MIB May 1990 - - - Syntax: - Counter - - Definition: - The total number of IP datagrams which local IP user- - protocols (including ICMP) supplied to IP in requests for - transmission. Note that this counter does not include - any datagrams counted in ipForwDatagrams. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipOutDiscards { ip 11 } - - Syntax: - Counter - - Definition: - The number of output IP datagrams for which no problem - was encountered to prevent their transmission to their - destination, but which were discarded (e.g., for lack of - buffer space). Note that this counter would include - datagrams counted in ipForwDatagrams if any such packets - met this (discretionary) discard criterion. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipOutNoRoutes { ip 12 } - - Syntax: - Counter - - - - - - - -McCloghrie & Rose [Page 30] - -RFC 1156 MIB May 1990 - - - Definition: - The number of IP datagrams discarded because no route - could be found to transmit them to their destination. - Note that this counter includes any packets counted in - ipForwDatagrams which meet this "no-route" criterion. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipReasmTimeout { ip 13 } - - Syntax: - INTEGER - - Definition: - The maximum number of seconds which received fragments - are held while they are awaiting reassembly at this - entity. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipReasmReqds { ip 14 } - - Syntax: - Counter - - Definition: - The number of IP fragments received which needed to be - reassembled at this entity. - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 31] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ipReasmOKs { ip 15 } - - Syntax: - Counter - - Definition: - The number of IP datagrams successfully re-assembled. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipReasmFails { ip 16 } - - Syntax: - Counter - - Definition: - The number of failures detected by the IP re-assembly - algorithm (for whatever reason: timed out, errors, etc). - - Note that this is not necessarily a count of discarded IP - fragments since some algorithms (notably RFC 815's) can - lose track of the number of fragments by combining them - as they are received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipFragOKs { ip 17 } - - Syntax: - Counter - - - - - -McCloghrie & Rose [Page 32] - -RFC 1156 MIB May 1990 - - - Definition: - The number of IP datagrams that have been successfully - fragmented at this entity. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipFragFails { ip 18 } - - Syntax: - Counter - - Definition: - The number of IP datagrams that have been discarded - because they needed to be fragmented at this entity but - could not be, e.g., because their "Don't Fragment" flag - was set. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipFragCreates { ip 19 } - - Syntax: - Counter - - Definition: - The number of IP datagram fragments that have been - generated as a result of fragmentation at this entity. - - Access: - read-only. - - Status: - mandatory. - - - - -McCloghrie & Rose [Page 33] - -RFC 1156 MIB May 1990 - - -5.4.1. The IP Address Table - - The Ip Address table contains this entity's IP addressing - information. - - - OBJECT: - ------- - ipAddrTable { ip 20 } - - Syntax: - SEQUENCE OF IpAddrEntry - - Definition: - The table of addressing information relevant to this - entity's IP addresses. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAddrEntry { ipAddrTable 1 } - - Syntax: - IpAddrEntry ::= SEQUENCE { - ipAdEntAddr - IpAddress, - ipAdEntIfIndex - INTEGER, - ipAdEntNetMask - IpAddress, - ipAdEntBcastAddr - INTEGER - } - - Definition: - The addressing information for one of this entity's IP - addresses. - - Access: - read-only. - - - - - -McCloghrie & Rose [Page 34] - -RFC 1156 MIB May 1990 - - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntAddr { ipAddrEntry 1 } - - Syntax: - IpAddress - - Definition: - The IP address to which this entry's addressing - information pertains. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntIfIndex { ipAddrEntry 2 } - - Syntax: - INTEGER - - Definition: - The index value which uniquely identifies the interface - to which this entry is applicable. The interface - identified by a particular value of this index is the - same interface as identified by the same value of - ifIndex. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntNetMask { ipAddrEntry 3 } - - - - - -McCloghrie & Rose [Page 35] - -RFC 1156 MIB May 1990 - - - Syntax: - IpAddress - - Definition: - The subnet mask associated with the IP address of this - entry. The value of the mask is an IP address with all - the network bits set to 1 and all the hosts bits set to - 0. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipAdEntBcastAddr { ipAddrEntry 4 } - - Syntax: - INTEGER - - Definition: - The value of the least-significant bit in the IP - broadcast address used for sending datagrams on the - (logical) interface associated with the IP address of - this entry. For example, when the Internet standard - all-ones broadcast address is used, the value will be 1. - - Access: - read-only. - - Status: - mandatory. - -5.4.2. The IP Routing Table - - The IP Routing Table contains an entry for each route - presently known to this entity. Note that the action to be - taken in response to a request to read a non-existent entry, - is specific to the network management protocol being used. - - - OBJECT: - ------- - ipRoutingTable { ip 21 } - - - - -McCloghrie & Rose [Page 36] - -RFC 1156 MIB May 1990 - - - Syntax: - SEQUENCE OF IpRouteEntry - - Definition: - This entity's IP Routing table. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteEntry { ipRoutingTable 1 } - - Syntax: - IpRouteEntry ::= SEQUENCE { - ipRouteDest - IpAddress, - ipRouteIfIndex - INTEGER, - ipRouteMetric1 - INTEGER, - ipRouteMetric2 - INTEGER, - ipRouteMetric3 - INTEGER, - ipRouteMetric4 - INTEGER, - ipRouteNextHop - IpAddress, - ipRouteType - INTEGER, - ipRouteProto - INTEGER, - ipRouteAge - INTEGER - } - - Definition: - A route to a particular destination. - - Access: - read-write. - - - - - -McCloghrie & Rose [Page 37] - -RFC 1156 MIB May 1990 - - - Status: - mandatory. - - We now consider the individual components of each route in the - IP Routing Table: - - - OBJECT: - ------- - ipRouteDest { ipRouteEntry 1 } - - Syntax: - IpAddress - - Definition: - The destination IP address of this route. An entry with - a value of 0.0.0.0 is considered a default route. - Multiple such default routes can appear in the table, but - access to such multiple entries is dependent on the - table-access mechanisms defined by the network management - protocol in use. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteIfIndex { ipRouteEntry 2 } - - Syntax: - INTEGER - - Definition: - The index value which uniquely identifies the local - interface through which the next hop of this route should - be reached. The interface identified by a particular - value of this index is the same interface as identified - by the same value of ifIndex. - - Access: - read-write. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 38] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ipRouteMetric1 { ipRouteEntry 3 } - - Syntax: - INTEGER - - Definition: - The primary routing metric for this route. The semantics - of this metric are determined by the routing-protocol - specified in the route's ipRouteProto value. If this - metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteMetric2 { ipRouteEntry 4 } - - Syntax: - INTEGER - - Definition: - An alternate routing metric for this route. The - semantics of this metric are determined by the routing- - protocol specified in the route's ipRouteProto value. If - this metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteMetric3 { ipRouteEntry 5 } - - Syntax: - INTEGER - - - - - -McCloghrie & Rose [Page 39] - -RFC 1156 MIB May 1990 - - - Definition: - An alternate routing metric for this route. The - semantics of this metric are determined by the routing- - protocol specified in the route's ipRouteProto value. If - this metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteMetric4 { ipRouteEntry 6 } - - Syntax: - INTEGER - - Definition: - An alternate routing metric for this route. The - semantics of this metric are determined by the routing- - protocol specified in the route's ipRouteProto value. If - this metric is not used, its value should be set to -1. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteNextHop { ipRouteEntry 7 } - - Syntax: - IpAddress - - Definition: - The IP address of the next hop of this route. - - Access: - read-write. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 40] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - ipRouteType { ipRouteEntry 8 } - - Syntax: - INTEGER { - other(1), -- none of the following - - invalid(2), -- an invalidated route - - -- route to directly - direct(3), -- connected (sub-)network - - -- route to a non-local - remote(4), -- host/network/sub-network - } - - Definition: - The type of route. - - Access: - read-write. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteProto { ipRouteEntry 9 } - - Syntax: - INTEGER { - other(1), -- none of the following - - -- non-protocol information, - -- e.g., manually configured - local(2), -- entries - - -- set via a network management - netmgmt(3), -- protocol - - -- obtained via ICMP, - icmp(4), -- e.g., Redirect - - -- the remaining values are - -- all gateway routing protocols - egp(5), - - - -McCloghrie & Rose [Page 41] - -RFC 1156 MIB May 1990 - - - ggp(6), - hello(7), - rip(8), - is-is(9), - es-is(10), - ciscoIgrp(11), - bbnSpfIgp(12), - oigp(13) - } - - Definition: - The routing mechanism via which this route was learned. - Inclusion of values for gateway routing protocols is not - intended to imply that hosts should support those - protocols. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - ipRouteAge { ipRouteEntry 10 } - - Syntax: - INTEGER - - Definition: - The number of seconds since this route was last updated - or otherwise determined to be correct. Note that no - semantics of "too old" can be implied except through - knowledge of the routing protocol by which the route was - learned. - - Access: - read-write. - - Status: - mandatory. - - - - - - - - - -McCloghrie & Rose [Page 42] - -RFC 1156 MIB May 1990 - - -5.5. The ICMP Group - - Implementation of the ICMP group is mandatory for all systems. - - The ICMP group contains the ICMP input and output statistics. - - Note that individual counters for ICMP message (sub-)codes have been - omitted from this (version of the) MIB for simplicity. - - - OBJECT: - ------- - icmpInMsgs { icmp 1 } - - Syntax: - Counter - - Definition: - The total number of ICMP messages which the entity - received. Note that this counter includes all those - counted by icmpInErrors. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInErrors { icmp 2 } - - Syntax: - Counter - - Definition: - The number of ICMP messages which the entity received but - determined as having errors (bad ICMP checksums, bad - length, etc.). - - Access: - read-only. - - Status: - mandatory. - - - - - -McCloghrie & Rose [Page 43] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - icmpInDestUnreachs { icmp 3 } - - Syntax: - Counter - - Definition: - The number of ICMP Destination Unreachable messages - received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInTimeExcds { icmp 4 } - - Syntax: - Counter - - Definition: - The number of ICMP Time Exceeded messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInParmProbs { icmp 5 } - - Syntax: - Counter - - Definition: - The number of ICMP Parameter Problem messages received. - - Access: - read-only. - - - - -McCloghrie & Rose [Page 44] - -RFC 1156 MIB May 1990 - - - Status: - mandatory. - - - OBJECT: - ------- - icmpInSrcQuenchs { icmp 6 } - - Syntax: - Counter - - Definition: - The number of ICMP Source Quench messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInRedirects { icmp 7 } - - Syntax: - Counter - - Definition: - The number of ICMP Redirect messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInEchos { icmp 8 } - - Syntax: - Counter - - Definition: - The number of ICMP Echo (request) messages received. - - - - -McCloghrie & Rose [Page 45] - -RFC 1156 MIB May 1990 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInEchoReps { icmp 9 } - - Syntax: - Counter - - Definition: - The number of ICMP Echo Reply messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInTimestamps { icmp 10 } - - Syntax: - Counter - - Definition: - The number of ICMP Timestamp (request) messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInTimestampReps { icmp 11 } - - Syntax: - Counter - - - - -McCloghrie & Rose [Page 46] - -RFC 1156 MIB May 1990 - - - Definition: - The number of ICMP Timestamp Reply messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInAddrMasks { icmp 12 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Request messages - received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpInAddrMaskReps { icmp 13 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Reply messages received. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutMsgs { icmp 14 } - - - -McCloghrie & Rose [Page 47] - -RFC 1156 MIB May 1990 - - - Syntax: - Counter - - Definition: - The total number of ICMP messages which this entity - attempted to send. Note that this counter includes all - those counted by icmpOutErrors. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutErrors { icmp 15 } - - Syntax: - Counter - - Definition: - The number of ICMP messages which this entity did not - send due to problems discovered within ICMP such as a - lack of buffers. This value should not include errors - discovered outside the ICMP layer such as the inability - of IP to route the resultant datagram. In some - implementations there may be no types of error which - contribute to this counter's value. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutDestUnreachs { icmp 16 } - - Syntax: - Counter - - Definition: - The number of ICMP Destination Unreachable messages sent. - - - - -McCloghrie & Rose [Page 48] - -RFC 1156 MIB May 1990 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutTimeExcds { icmp 17 } - - Syntax: - Counter - - Definition: - The number of ICMP Time Exceeded messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutParmProbs { icmp 18 } - - Syntax: - Counter - - Definition: - The number of ICMP Parameter Problem messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutSrcQuenchs { icmp 19 } - - Syntax: - Counter - - - - -McCloghrie & Rose [Page 49] - -RFC 1156 MIB May 1990 - - - Definition: - The number of ICMP Source Quench messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutRedirects { icmp 20 } - - Syntax: - Counter - - Definition: - The number of ICMP Redirect messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutEchos { icmp 21 } - - Syntax: - Counter - - Definition: - The number of ICMP Echo (request) messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutEchoReps { icmp 22 } - - - - -McCloghrie & Rose [Page 50] - -RFC 1156 MIB May 1990 - - - Syntax: - Counter - - Definition: - The number of ICMP Echo Reply messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutTimestamps { icmp 23 } - - Syntax: - Counter - - Definition: - The number of ICMP Timestamp (request) messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutTimestampReps { icmp 24 } - - Syntax: - Counter - - Definition: - The number of ICMP Timestamp Reply messages sent. - - Access: - read-only. - - Status: - mandatory. - - - - - - -McCloghrie & Rose [Page 51] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - icmpOutAddrMasks { icmp 25 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Request messages sent. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - icmpOutAddrMaskReps { icmp 26 } - - Syntax: - Counter - - Definition: - The number of ICMP Address Mask Reply messages sent. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 52] - -RFC 1156 MIB May 1990 - - -5.6. The TCP Group - - Implementation of the TCP group is mandatory for all systems - that implement the TCP protocol. - - Note that instances of object types that represent information - about a particular TCP connection are transient; they persist - only as long as the connection in question. - - OBJECT: - ------- - tcpRtoAlgorithm { tcp 1 } - - Syntax: - INTEGER { - other(1), -- none of the following - constant(2), -- a constant rto - rsre(3), -- MIL-STD-1778, Appendix B - vanj(4) -- Van Jacobson's algorithm [15] - } - - Definition: - The algorithm used to determine the timeout value used - for retransmitting unacknowledged octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpRtoMin { tcp 2 } - - Syntax: - INTEGER - - Definition: - The minimum value permitted by a TCP implementation - for the retransmission timeout, measured in - milliseconds. More refined semantics for objects - of this type depend upon the algorithm used to - determine the retransmission timeout. In particular, - when the timeout algorithm is rsre(3), an object - of this type has the semantics of the LBOUND - quantity described in RFC 793. - - - -McCloghrie & Rose [Page 53] - -RFC 1156 MIB May 1990 - - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpRtoMax { tcp 3 } - - Syntax: - INTEGER - - Definition: - The maximum value permitted by a TCP implementation - for the retransmission timeout, measured - in milliseconds. More refined semantics for objects - of this type depend upon the algorithm used to - determine the retransmission timeout. In particular, - when the timeout algorithm is rsre(3), an object of - this type has the semantics of the UBOUND quantity - described in RFC 793. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpMaxConn { tcp 4 } - - Syntax: - INTEGER - - Definition: - The limit on the total number of TCP connections the - entity can support. In entities where the maximum - number of connections is dynamic, this object should - contain the value "-1". - - Access: - read-only. - - - - - -McCloghrie & Rose [Page 54] - -RFC 1156 MIB May 1990 - - - Status: - mandatory. - - - OBJECT: - ------- - tcpActiveOpens { tcp 5 } - - Syntax: - Counter - - Definition: - The number of times TCP connections have made a direct - transition to the SYN-SENT state from the CLOSED - state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpPassiveOpens { tcp 6 } - - Syntax: - Counter - - Definition: - The number of times TCP connections have made a direct - transition to the SYN-RCVD state from the LISTEN - state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpAttemptFails { tcp 7 } - - Syntax: - Counter - - - -McCloghrie & Rose [Page 55] - -RFC 1156 MIB May 1990 - - - Definition: - The number of times TCP connections have made a direct - transition to the CLOSED state from either the - SYN-SENT state or the SYN-RCVD state, plus the number - of times TCP connections have made a direct transition - to the LISTEN state from the SYN-RCVD state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpEstabResets { tcp 8 } - - Syntax: - Counter - - Definition: - The number of times TCP connections have made a direct - transition to the CLOSED state from either the - ESTABLISHED state or the CLOSE-WAIT state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpCurrEstab { tcp 9 } - - Syntax: - Gauge - - Definition: - The number of TCP connections for which the current - state is either ESTABLISHED or CLOSE-WAIT. - - Access: - read-only. - - - - - -McCloghrie & Rose [Page 56] - -RFC 1156 MIB May 1990 - - - Status: - mandatory. - - - OBJECT: - ------- - tcpInSegs { tcp 10 } - - Syntax: - Counter - - Definition: - The total number of segments received, including those - received in error. This count includes segments - received on currently established connections. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpOutSegs { tcp 11 } - - Syntax: - Counter - - Definition: - The total number of segments sent, including those on - current connections but excluding those containing - only retransmitted octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpRetransSegs { tcp 12 } - - Syntax: - Counter - - - -McCloghrie & Rose [Page 57] - -RFC 1156 MIB May 1990 - - - Definition: - The total number of segments retransmitted - that is, - the number of TCP segments transmitted containing one - or more previously transmitted octets. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnTable { tcp 13 } - - Syntax: - SEQUENCE OF TcpConnEntry - - Definition: - A table containing TCP connection-specific - information. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnEntry { tcpConnTable 1 } - - Syntax: - TcpConnEntry ::= SEQUENCE { - tcpConnState - INTEGER, - tcpConnLocalAddress - IpAddress, - tcpConnLocalPort - INTEGER (0..65535), - tcpConnRemAddress - IpAddress, - tcpConnRemPort - INTEGER (0..65535) - } - - - - -McCloghrie & Rose [Page 58] - -RFC 1156 MIB May 1990 - - - Definition: - Information about a particular current TCP connection. - An object of this type is transient, in that it ceases - to exist when (or soon after) the connection makes the - transition to the CLOSED state. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnState { tcpConnEntry 1 } - - Syntax: - INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11) - } - - Definition: - The state of this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnLocalAddress { tcpConnEntry 2 } - - Syntax: - IpAddress - - - -McCloghrie & Rose [Page 59] - -RFC 1156 MIB May 1990 - - - Definition: - The local IP address for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnLocalPort { tcpConnEntry 3 } - - Syntax: - INTEGER (0..65535) - - Definition: - The local port number for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnRemAddress { tcpConnEntry 4 } - - Syntax: - IpAddress - - Definition: - The remote IP address for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - tcpConnRemPort { tcpConnEntry 5 } - - - - -McCloghrie & Rose [Page 60] - -RFC 1156 MIB May 1990 - - - Syntax: - INTEGER (0..65535) - - Definition: - The remote port number for this TCP connection. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 61] - -RFC 1156 MIB May 1990 - - -5.7. The UDP Group - - Implementation of the UDP group is mandatory for all systems - which implement the UDP protocol. - - OBJECT: - ------- - udpInDatagrams { udp 1 } - - Syntax: - Counter - - Definition: - The total number of UDP datagrams delivered to UDP - users. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - udpNoPorts { udp 2 } - - Syntax: - Counter - - Definition: - The total number of received UDP datagrams for which - there was no application at the destination port. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - udpInErrors { udp 3 } - - Syntax: - Counter - - - - -McCloghrie & Rose [Page 62] - -RFC 1156 MIB May 1990 - - - Definition: - The number of received UDP datagrams that could not be - delivered for reasons other than the lack of an - application at the destination port. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - udpOutDatagrams { udp 4 } - - Syntax: - Counter - - Definition: - The total number of UDP datagrams sent from this - entity. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 63] - -RFC 1156 MIB May 1990 - - -5.8. The EGP Group - - Implementation of the EGP group is mandatory for all systems - which implement the EGP protocol. - - OBJECT: - ------- - egpInMsgs { egp 1 } - - Syntax: - Counter - - Definition: - The number of EGP messages received without error. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - egpInErrors { egp 2 } - - Syntax: - Counter - - Definition: - The number of EGP messages received that proved to be - in error. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - egpOutMsgs { egp 3 } - - Syntax: - Counter - - - - - -McCloghrie & Rose [Page 64] - -RFC 1156 MIB May 1990 - - - Definition: - The total number of locally generated EGP messages. - - Access: - read-only. - - Status: - mandatory. - - - OBJECT: - ------- - egpOutErrors { egp 4 } - - Syntax: - Counter - - Definition: - The number of locally generated EGP messages not sent - due to resource limitations within an EGP entity. - - Access: - read-only. - - Status: - mandatory. - -5.8.1. The EGP Neighbor Table - - The Egp Neighbor table contains information about this entity's EGP - neighbors. - - - OBJECT: - ------- - egpNeighTable { egp 5 } - - Syntax: - SEQUENCE OF EgpNeighEntry - - Definition: - The EGP neighbor table. - - Access: - read-only. - - Status: - mandatory. - - - -McCloghrie & Rose [Page 65] - -RFC 1156 MIB May 1990 - - - OBJECT: - ------- - egpNeighEntry { egpNeighTable 1 } - - Syntax: - EgpNeighEntry ::= SEQUENCE { - egpNeighState - INTEGER, - egpNeighAddr - IpAddress - } - - Definition: - Information about this entity's relationship with a - particular EGP neighbor. - - Access: - read-only. - - Status: - mandatory. - - - We now consider the individual components of each EGP - neighbor entry: - - - OBJECT: - ------- - egpNeighState { egpNeighEntry 1 } - - Syntax: - INTEGER { - idle(1), - acquisition(2), - down(3), - up(4), - cease(5) - } - - Definition: - The EGP state of the local system with respect to this - entry's EGP neighbor. Each EGP state is represented - by a value that is one greater than the numerical - value associated with said state in RFC 904. - - Access: - read-only. - - - -McCloghrie & Rose [Page 66] - -RFC 1156 MIB May 1990 - - - Status: - mandatory. - - - OBJECT: - ------- - egpNeighAddr { egpNeighEntry 2 } - - Syntax: - IpAddress - - Definition: - The IP address of this entry's EGP neighbor. - - Access: - read-only. - - Status: - mandatory. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 67] - -RFC 1156 MIB May 1990 - - -6. Definitions - - RFC1156-MIB - - DEFINITIONS ::= BEGIN - - IMPORTS - mgmt, OBJECT-TYPE, NetworkAddress, IpAddress, - Counter, Gauge, TimeTicks - FROM RFC1155-SMI; - - mib OBJECT IDENTIFIER ::= { mgmt 1 } - - system OBJECT IDENTIFIER ::= { mib 1 } - interfaces OBJECT IDENTIFIER ::= { mib 2 } - at OBJECT IDENTIFIER ::= { mib 3 } - ip OBJECT IDENTIFIER ::= { mib 4 } - icmp OBJECT IDENTIFIER ::= { mib 5 } - tcp OBJECT IDENTIFIER ::= { mib 6 } - udp OBJECT IDENTIFIER ::= { mib 7 } - egp OBJECT IDENTIFIER ::= { mib 8 } - - -- object types - - -- the System group - - sysDescr OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-only - STATUS mandatory - ::= { system 1 } - - sysObjectID OBJECT-TYPE - SYNTAX OBJECT IDENTIFIER - ACCESS read-only - STATUS mandatory - ::= { system 2 } - - sysUpTime OBJECT-TYPE - SYNTAX TimeTicks - ACCESS read-only - STATUS mandatory - ::= { system 3 } - - -- the Interfaces group - - ifNumber OBJECT-TYPE - SYNTAX INTEGER - - - -McCloghrie & Rose [Page 68] - -RFC 1156 MIB May 1990 - - - ACCESS read-only - STATUS mandatory - ::= { interfaces 1 } - - -- the Interfaces table - - ifTable OBJECT-TYPE - SYNTAX SEQUENCE OF IfEntry - ACCESS read-write - STATUS mandatory - ::= { interfaces 2 } - - ifEntry OBJECT-TYPE - SYNTAX IfEntry - ACCESS read-write - STATUS mandatory - ::= { ifTable 1 } - - IfEntry ::= SEQUENCE { - ifIndex - INTEGER, - ifDescr - OCTET STRING, - ifType - INTEGER, - ifMtu - INTEGER, - ifSpeed - Gauge, - ifPhysAddress - OCTET STRING, - ifAdminStatus - INTEGER, - ifOperStatus - INTEGER, - ifLastChange - TimeTicks, - ifInOctets - Counter, - ifInUcastPkts - Counter, - ifInNUcastPkts - Counter, - ifInDiscards - Counter, - ifInErrors - Counter, - ifInUnknownProtos - - - -McCloghrie & Rose [Page 69] - -RFC 1156 MIB May 1990 - - - Counter, - ifOutOctets - Counter, - ifOutUcastPkts - Counter, - ifOutNUcastPkts - Counter, - ifOutDiscards - Counter, - ifOutErrors - Counter, - ifOutQLen - Gauge - } - - ifIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ifEntry 1 } - - ifDescr OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-only - STATUS mandatory - ::= { ifEntry 2 } - - ifType OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - regular1822(2), - hdh1822(3), - ddn-x25(4), - rfc877-x25(5), - ethernet-csmacd(6), - iso88023-csmacd(7), - iso88024-tokenBus(8), - iso88025-tokenRing(9), - iso88026-man(10), - starLan(11), - proteon-10MBit(12), - proteon-80MBit(13), - hyperchannel(14), - fddi(15), - lapb(16), - sdlc(17), - t1-carrier(18), - cept(19), - - - -McCloghrie & Rose [Page 70] - -RFC 1156 MIB May 1990 - - - basicIsdn(20), - primaryIsdn(21), - -- proprietary serial - propPointToPointSerial(22) - } - ACCESS read-only - STATUS mandatory - ::= { ifEntry 3 } - - ifMtu OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ifEntry 4 } - - ifSpeed OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - ::= { ifEntry 5 } - - ifPhysAddress OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-only - STATUS mandatory - ::= { ifEntry 6 } - - ifAdminStatus OBJECT-TYPE - SYNTAX INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - ACCESS read-write - STATUS mandatory - ::= { ifEntry 7 } - - ifOperStatus OBJECT-TYPE - SYNTAX INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - ACCESS read-only - STATUS mandatory - ::= { ifEntry 8 } - - ifLastChange OBJECT-TYPE - - - -McCloghrie & Rose [Page 71] - -RFC 1156 MIB May 1990 - - - SYNTAX TimeTicks - ACCESS read-only - STATUS mandatory - ::= { ifEntry 9 } - - ifInOctets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 10 } - - ifInUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 11 } - - ifInNUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 12 } - - ifInDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 13 } - - ifInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 14 } - - ifInUnknownProtos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 15 } - - ifOutOctets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 16 } - - ifOutUcastPkts OBJECT-TYPE - - - -McCloghrie & Rose [Page 72] - -RFC 1156 MIB May 1990 - - - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 17 } - - ifOutNUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 18 } - - ifOutDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 19 } - - ifOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ifEntry 20 } - - ifOutQLen OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - ::= { ifEntry 21 } - - -- the Address Translation group - - atTable OBJECT-TYPE - SYNTAX SEQUENCE OF AtEntry - ACCESS read-write - STATUS mandatory - ::= { at 1 } - - atEntry OBJECT-TYPE - SYNTAX AtEntry - ACCESS read-write - STATUS mandatory - ::= { atTable 1 } - - AtEntry ::= SEQUENCE { - atIfIndex - INTEGER, - atPhysAddress - OCTET STRING, - - - -McCloghrie & Rose [Page 73] - -RFC 1156 MIB May 1990 - - - atNetAddress - NetworkAddress - } - - atIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { atEntry 1 } - - atPhysAddress OBJECT-TYPE - SYNTAX OCTET STRING - ACCESS read-write - STATUS mandatory - ::= { atEntry 2 } - - atNetAddress OBJECT-TYPE - SYNTAX NetworkAddress - ACCESS read-write - STATUS mandatory - ::= { atEntry 3 } - - -- the IP group - - ipForwarding OBJECT-TYPE - SYNTAX INTEGER { - gateway(1), -- entity forwards datagrams - host(2) -- entity does NOT forward datagrams - } - ACCESS read-only - STATUS mandatory - ::= { ip 1 } - - ipDefaultTTL OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ip 2 } - - ipInReceives OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 3 } - - ipInHdrErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - - - -McCloghrie & Rose [Page 74] - -RFC 1156 MIB May 1990 - - - STATUS mandatory - ::= { ip 4 } - - ipInAddrErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 5 } - - ipForwDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 6 } - - ipInUnknownProtos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 7 } - - ipInDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 8 } - - ipInDelivers OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 9 } - - ipOutRequests OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 10 } - - ipOutDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 11 } - - ipOutNoRoutes OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - - - -McCloghrie & Rose [Page 75] - -RFC 1156 MIB May 1990 - - - STATUS mandatory - ::= { ip 12 } - - ipReasmTimeout OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ip 13 } - - ipReasmReqds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 14 } - - ipReasmOKs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 15 } - - ipReasmFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 16 } - - ipFragOKs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 17 } - - ipFragFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 18 } - - ipFragCreates OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { ip 19 } - - -- the IP Interface table - - ipAddrTable OBJECT-TYPE - - - -McCloghrie & Rose [Page 76] - -RFC 1156 MIB May 1990 - - - SYNTAX SEQUENCE OF IpAddrEntry - ACCESS read-only - STATUS mandatory - ::= { ip 20 } - - ipAddrEntry OBJECT-TYPE - SYNTAX IpAddrEntry - ACCESS read-only - STATUS mandatory - ::= { ipAddrTable 1 } - - IpAddrEntry ::= SEQUENCE { - ipAdEntAddr - IpAddress, - ipAdEntIfIndex - INTEGER, - ipAdEntNetMask - IpAddress, - ipAdEntBcastAddr - INTEGER - } - - ipAdEntAddr OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 1 } - - ipAdEntIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 2 } - - ipAdEntNetMask OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 3 } - - ipAdEntBcastAddr OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { ipAddrEntry 4 } - - - - - - -McCloghrie & Rose [Page 77] - -RFC 1156 MIB May 1990 - - - -- the IP Routing table - - ipRoutingTable OBJECT-TYPE - SYNTAX SEQUENCE OF IpRouteEntry - ACCESS read-write - STATUS mandatory - ::= { ip 21 } - - ipRouteEntry OBJECT-TYPE - SYNTAX IpRouteEntry - ACCESS read-write - STATUS mandatory - ::= { ipRoutingTable 1 } - - IpRouteEntry ::= SEQUENCE { - ipRouteDest - IpAddress, - ipRouteIfIndex - INTEGER, - ipRouteMetric1 - INTEGER, - ipRouteMetric2 - INTEGER, - ipRouteMetric3 - INTEGER, - ipRouteMetric4 - INTEGER, - ipRouteNextHop - IpAddress, - ipRouteType - INTEGER, - ipRouteProto - INTEGER, - ipRouteAge - INTEGER - } - - ipRouteDest OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 1 } - - ipRouteIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 2 } - - - -McCloghrie & Rose [Page 78] - -RFC 1156 MIB May 1990 - - - ipRouteMetric1 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 3 } - - ipRouteMetric2 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 4 } - - ipRouteMetric3 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 5 } - - ipRouteMetric4 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 6 } - - ipRouteNextHop OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 7 } - - ipRouteType OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - - invalid(2), -- an invalidated route - - -- route to directly - direct(3), -- connected (sub-)network - - -- route to a non-local - remote(4), -- host/network/sub-network - } - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 8 } - - ipRouteProto OBJECT-TYPE - SYNTAX INTEGER { - - - -McCloghrie & Rose [Page 79] - -RFC 1156 MIB May 1990 - - - other(1), -- none of the following - - -- non-protocol information - -- e.g., manually - local(2), -- configured entries - - -- set via a network - netmgmt(3), -- management protocol - - -- obtained via ICMP, - icmp(4), -- e.g., Redirect - - -- the following are - -- gateway routing protocols - egp(5), - ggp(6), - hello(7), - rip(8), - is-is(9), - es-is(10), - ciscoIgrp(11), - bbnSpfIgp(12), - oigp(13) - } - ACCESS read-only - STATUS mandatory - ::= { ipRouteEntry 9 } - - ipRouteAge OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - ::= { ipRouteEntry 10 } - - -- the ICMP group - - icmpInMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 1 } - - icmpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 2 } - - - - -McCloghrie & Rose [Page 80] - -RFC 1156 MIB May 1990 - - - icmpInDestUnreachs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 3 } - - icmpInTimeExcds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 4 } - - icmpInParmProbs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 5 } - - icmpInSrcQuenchs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 6 } - - icmpInRedirects OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 7 } - - icmpInEchos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 8 } - - icmpInEchoReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 9 } - - icmpInTimestamps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 10 } - - - - -McCloghrie & Rose [Page 81] - -RFC 1156 MIB May 1990 - - - icmpInTimestampReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 11 } - - icmpInAddrMasks OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 12 } - - icmpInAddrMaskReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 13 } - - icmpOutMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 14 } - - icmpOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 15 } - - icmpOutDestUnreachs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 16 } - - icmpOutTimeExcds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 17 } - - icmpOutParmProbs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 18 } - - - - -McCloghrie & Rose [Page 82] - -RFC 1156 MIB May 1990 - - - icmpOutSrcQuenchs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 19 } - - icmpOutRedirects OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 20 } - - icmpOutEchos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 21 } - - icmpOutEchoReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 22 } - - icmpOutTimestamps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 23 } - - icmpOutTimestampReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 24 } - - icmpOutAddrMasks OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 25 } - - icmpOutAddrMaskReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { icmp 26 } - - - - -McCloghrie & Rose [Page 83] - -RFC 1156 MIB May 1990 - - - -- the TCP group - - tcpRtoAlgorithm OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - constant(2), -- a constant rto - rsre(3), -- MIL-STD-1778, Appendix B - vanj(4) -- Van Jacobson's algorithm [15] - } - ACCESS read-only - STATUS mandatory - ::= { tcp 1 } - - tcpRtoMin OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { tcp 2 } - - tcpRtoMax OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { tcp 3 } - - tcpMaxConn OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - ::= { tcp 4 } - - tcpActiveOpens OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 5 } - - tcpPassiveOpens OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 6 } - - tcpAttemptFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 7 } - - - -McCloghrie & Rose [Page 84] - -RFC 1156 MIB May 1990 - - - tcpEstabResets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 8 } - - tcpCurrEstab OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - ::= { tcp 9 } - - tcpInSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 10 } - - tcpOutSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 11 } - - tcpRetransSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { tcp 12 } - - -- the TCP connections table - - tcpConnTable OBJECT-TYPE - SYNTAX SEQUENCE OF TcpConnEntry - ACCESS read-only - STATUS mandatory - ::= { tcp 13 } - - tcpConnEntry OBJECT-TYPE - SYNTAX TcpConnEntry - ACCESS read-only - STATUS mandatory - ::= { tcpConnTable 1 } - - TcpConnEntry ::= SEQUENCE { - tcpConnState - INTEGER, - tcpConnLocalAddress - - - -McCloghrie & Rose [Page 85] - -RFC 1156 MIB May 1990 - - - IpAddress, - tcpConnLocalPort - INTEGER (0..65535), - tcpConnRemAddress - IpAddress, - tcpConnRemPort - INTEGER (0..65535) - } - - tcpConnState OBJECT-TYPE - SYNTAX INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11) - } - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 1 } - - tcpConnLocalAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 2 } - - tcpConnLocalPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 3 } - - tcpConnRemAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { tcpConnEntry 4 } - - tcpConnRemPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - - - -McCloghrie & Rose [Page 86] - -RFC 1156 MIB May 1990 - - - STATUS mandatory - ::= { tcpConnEntry 5 } - - -- the UDP group - - udpInDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 1 } - - udpNoPorts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 2 } - - udpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 3 } - - udpOutDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { udp 4 } - - -- the EGP group - - egpInMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 1 } - - egpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 2 } - - egpOutMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 3 } - - - -McCloghrie & Rose [Page 87] - -RFC 1156 MIB May 1990 - - - egpOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - ::= { egp 4 } - - -- the EGP Neighbor table - - egpNeighTable OBJECT-TYPE - SYNTAX SEQUENCE OF EgpNeighEntry - ACCESS read-only - STATUS mandatory - ::= { egp 5 } - - egpNeighEntry OBJECT-TYPE - SYNTAX EgpNeighEntry - ACCESS read-only - STATUS mandatory - ::= { egpNeighTable 1 } - - EgpNeighEntry ::= SEQUENCE { - egpNeighState - INTEGER, - egpNeighAddr - IpAddress - } - - egpNeighState OBJECT-TYPE - SYNTAX INTEGER { - idle(1), - acquisition(2), - down(3), - up(4), - cease(5) - } - ACCESS read-only - STATUS mandatory - ::= { egpNeighEntry 1 } - - egpNeighAddr OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - ::= { egpNeighEntry 2 } - - END - - - - - -McCloghrie & Rose [Page 88] - -RFC 1156 MIB May 1990 - - -7. Acknowledgements - - The initial draft of this memo was heavily influenced by the the HEMS - [9] and SNMP [10] MIBs. - - Its final form is the result of the suggestions, the dicussions, and - the compromises reached by the members of the IETF MIB working group: - - Karl Auerbach, Epilogue Technology - K. Ramesh Babu, Excelan - Lawrence Besaw, Hewlett-Packard - Jeffrey D. Case, University of Tennessee at Knoxville - James R. Davin, Proteon - Mark S. Fedor, NYSERNet - Robb Foster, BBN - Phill Gross, The MITRE Corporation - Bent Torp Jensen, Convergent Technology - Lee Labarre, The MITRE Corporation - Dan Lynch, Advanced Computing Environments - Keith McCloghrie, The Wollongong Group - Dave Mackie, 3Com/Bridge - Craig Partridge, BBN (chair) - Jim Robertson, 3Com/Bridge - Marshall T. Rose, The Wollongong Group - Greg Satz, cisco - Martin Lee Schoffstall, Rensselaer Polytechnic Institute - Lou Steinberg, IBM - Dean Throop, Data General - Unni Warrier, Unisys - - - - - - - - - - - - - - - - - - - - - - -McCloghrie & Rose [Page 89] - -RFC 1156 MIB May 1990 - - -8. References - - [1] Cerf, V., "IAB Recommendations for the Development of Internet - Network Management Standards", RFC 1052, IAB, April 1988. - - [2] Information processing systems - Open Systems Interconnection, - "Management Information Services Definition", International - Organization for Standardization, Draft Proposal 9595/2, - December 1987. - - [3] Information processing systems - Open Systems Interconnection, - "Management Information Protocol Specification", International - Organization for Standardization, Draft Proposal 9596/2, - December 1987. - - [4] Rose M., and K. McCloghrie, "Structure and Identification of - Management Information for TCP/IP-based internets", RFC 1065, - TWG, August 1988. - - [5] Partridge C., and G. Trewitt, "The High-Level Entity Management - System (HEMS)", RFCs 1021-1024, BBN and Stanford, October 1987. - - [6] Cerf, V., "Report of the Second Ad Hoc Network Management Review - Group", RFC 1109, IAB, August 1989. - - [7] Rose, M., and K. McCloghrie, "Structure and Identification of - Management Information for TCP/IP-based Internets", RFC 1155, - Performance Systems International and Hughes LAN Systems, May - 1990. - - [8] Case, J., M. Fedor, M. Schoffstall, and J. Davin, The Simple - Network Management Protocol", RFC 1157, University of Tennessee - at Knoxville, Performance Systems International, Performance - Systems International, and the MIT Laboratory for Computer - Science, May 1990. - - [9] Partridge C., and G. Trewitt, "HEMS Variable Definitions", RFC - 1024, BBN and Stanford, October 1987. - - [10] Case, J., M. Fedor, M. Schoffstall, and J. Davin, "A Simple - Network Management Protocol", RFC 1067, University of Tennessee - At Knoxville, NYSERNet, Rensselaer Polytechnic, Proteon, August - 1988. - - [11] LaBarre, L., "Structure and Identification of Management - Information for the Internet", Internet Engineering Task Force - working note, Network Information Center, SRI International, - Menlo Park, California, April 1988. - - - -McCloghrie & Rose [Page 90] - -RFC 1156 MIB May 1990 - - - [12] LaBarre, L., "Transport Layer Management Information: TCP", - Internet Engineering Task Force working note in preparation. - Network Information Center, SRI International, Menlo Park, - California, (unpublished). - - [13] Information processing systems - Open Systems Interconnection, - "Specification of Abstract Syntax Notation One (ASN.1)", - International Organization for Standardization, International - Standard 8824, December 1987. - - [14] Information processing systems - Open Systems Interconnection, - "Specification of Basic Encoding Rules for Abstract Notation One - (ASN.1)", International Organization for Standardization, - International Standard 8825, December 1987. - - [15] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM, 1988, - Stanford, California. - -Security Considerations - - Security issues are not discussed in this memo. - -Authors' Addresses - - Keith McCloghrie - The Wollongong Group - 1129 San Antonio Road - Palo Alto, CA 04303 - - Phone: (415) 962-7160 - - EMail: sytek!kzm@HPLABS.HP.COM - - - Marshall T. Rose - PSI, Inc. - PSI California Office - P.O. Box 391776 - Mountain View, CA 94039 - - Phone: (415) 961-3380 - - EMail: mrose@PSI.COM - - - - - - - - -McCloghrie & Rose [Page 91] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1180.txt b/kernel/picotcp/RFC/rfc1180.txt deleted file mode 100644 index 6bbb6d9..0000000 --- a/kernel/picotcp/RFC/rfc1180.txt +++ /dev/null @@ -1,1571 +0,0 @@ - - - - - - -Network Working Group T. Socolofsky -Request for Comments: 1180 C. Kale - Spider Systems Limited - January 1991 - - - A TCP/IP Tutorial - -Status of this Memo - - This RFC is a tutorial on the TCP/IP protocol suite, focusing - particularly on the steps in forwarding an IP datagram from source - host to destination host through a router. It does not specify an - Internet standard. Distribution of this memo is unlimited. - -Table of Contents - - 1. Introduction................................................ 1 - 2. TCP/IP Overview............................................. 2 - 3. Ethernet.................................................... 8 - 4. ARP......................................................... 9 - 5. Internet Protocol........................................... 12 - 6. User Datagram Protocol...................................... 22 - 7. Transmission Control Protocol............................... 24 - 8. Network Applications........................................ 25 - 9. Other Information........................................... 27 - 10. References.................................................. 27 - 11. Relation to other RFCs...................................... 27 - 12. Security Considerations..................................... 27 - 13. Authors' Addresses.......................................... 28 - -1. Introduction - - This tutorial contains only one view of the salient points of TCP/IP, - and therefore it is the "bare bones" of TCP/IP technology. It omits - the history of development and funding, the business case for its - use, and its future as compared to ISO OSI. Indeed, a great deal of - technical information is also omitted. What remains is a minimum of - information that must be understood by the professional working in a - TCP/IP environment. These professionals include the systems - administrator, the systems programmer, and the network manager. - - This tutorial uses examples from the UNIX TCP/IP environment, however - the main points apply across all implementations of TCP/IP. - - Note that the purpose of this memo is explanation, not definition. - If any question arises about the correct specification of a protocol, - please refer to the actual standards defining RFC. - - - -Socolofsky & Kale [Page 1] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - The next section is an overview of TCP/IP, followed by detailed - descriptions of individual components. - -2. TCP/IP Overview - - The generic term "TCP/IP" usually means anything and everything - related to the specific protocols of TCP and IP. It can include - other protocols, applications, and even the network medium. A sample - of these protocols are: UDP, ARP, and ICMP. A sample of these - applications are: TELNET, FTP, and rcp. A more accurate term is - "internet technology". A network that uses internet technology is - called an "internet". - -2.1 Basic Structure - - To understand this technology you must first understand the following - logical structure: - - ---------------------------- - | network applications | - | | - |... \ | / .. \ | / ...| - | ----- ----- | - | |TCP| |UDP| | - | ----- ----- | - | \ / | - | -------- | - | | IP | | - | ----- -*------ | - | |ARP| | | - | ----- | | - | \ | | - | ------ | - | |ENET| | - | ---@-- | - ----------|----------------- - | - ----------------------o--------- - Ethernet Cable - - Figure 1. Basic TCP/IP Network Node - - This is the logical structure of the layered protocols inside a - computer on an internet. Each computer that can communicate using - internet technology has such a logical structure. It is this logical - structure that determines the behavior of the computer on the - internet. The boxes represent processing of the data as it passes - through the computer, and the lines connecting boxes show the path of - - - -Socolofsky & Kale [Page 2] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - data. The horizontal line at the bottom represents the Ethernet - cable; the "o" is the transceiver. The "*" is the IP address and the - "@" is the Ethernet address. Understanding this logical structure is - essential to understanding internet technology; it is referred to - throughout this tutorial. - -2.2 Terminology - - The name of a unit of data that flows through an internet is - dependent upon where it exists in the protocol stack. In summary: if - it is on an Ethernet it is called an Ethernet frame; if it is between - the Ethernet driver and the IP module it is called a IP packet; if it - is between the IP module and the UDP module it is called a UDP - datagram; if it is between the IP module and the TCP module it is - called a TCP segment (more generally, a transport message); and if it - is in a network application it is called a application message. - - These definitions are imperfect. Actual definitions vary from one - publication to the next. More specific definitions can be found in - RFC 1122, section 1.3.3. - - A driver is software that communicates directly with the network - interface hardware. A module is software that communicates with a - driver, with network applications, or with another module. - - The terms driver, module, Ethernet frame, IP packet, UDP datagram, - TCP message, and application message are used where appropriate - throughout this tutorial. - -2.3 Flow of Data - - Let's follow the data as it flows down through the protocol stack - shown in Figure 1. For an application that uses TCP (Transmission - Control Protocol), data passes between the application and the TCP - module. For applications that use UDP (User Datagram Protocol), data - passes between the application and the UDP module. FTP (File - Transfer Protocol) is a typical application that uses TCP. Its - protocol stack in this example is FTP/TCP/IP/ENET. SNMP (Simple - Network Management Protocol) is an application that uses UDP. Its - protocol stack in this example is SNMP/UDP/IP/ENET. - - The TCP module, UDP module, and the Ethernet driver are n-to-1 - multiplexers. As multiplexers they switch many inputs to one output. - They are also 1-to-n de-multiplexers. As de-multiplexers they switch - one input to many outputs according to the type field in the protocol - header. - - - - - -Socolofsky & Kale [Page 3] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - 1 2 3 ... n 1 2 3 ... n - \ | / | \ | | / ^ - \ | | / | \ | | / | - ------------- flow ---------------- flow - |multiplexer| of |de-multiplexer| of - ------------- data ---------------- data - | | | | - | v | | - 1 1 - - Figure 2. n-to-1 multiplexer and 1-to-n de-multiplexer - - If an Ethernet frame comes up into the Ethernet driver off the - network, the packet can be passed upwards to either the ARP (Address - Resolution Protocol) module or to the IP (Internet Protocol) module. - The value of the type field in the Ethernet frame determines whether - the Ethernet frame is passed to the ARP or the IP module. - - If an IP packet comes up into IP, the unit of data is passed upwards - to either TCP or UDP, as determined by the value of the protocol - field in the IP header. - - If the UDP datagram comes up into UDP, the application message is - passed upwards to the network application based on the value of the - port field in the UDP header. If the TCP message comes up into TCP, - the application message is passed upwards to the network application - based on the value of the port field in the TCP header. - - The downwards multiplexing is simple to perform because from each - starting point there is only the one downward path; each protocol - module adds its header information so the packet can be de- - multiplexed at the destination computer. - - Data passing out from the applications through either TCP or UDP - converges on the IP module and is sent downwards through the lower - network interface driver. - - Although internet technology supports many different network media, - Ethernet is used for all examples in this tutorial because it is the - most common physical network used under IP. The computer in Figure 1 - has a single Ethernet connection. The 6-byte Ethernet address is - unique for each interface on an Ethernet and is located at the lower - interface of the Ethernet driver. - - The computer also has a 4-byte IP address. This address is located - at the lower interface to the IP module. The IP address must be - unique for an internet. - - - - -Socolofsky & Kale [Page 4] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - A running computer always knows its own IP address and Ethernet - address. - -2.4 Two Network Interfaces - - If a computer is connected to 2 separate Ethernets it is as in Figure - 3. - - ---------------------------- - | network applications | - | | - |... \ | / .. \ | / ...| - | ----- ----- | - | |TCP| |UDP| | - | ----- ----- | - | \ / | - | -------- | - | | IP | | - | ----- -*----*- ----- | - | |ARP| | | |ARP| | - | ----- | | ----- | - | \ | | / | - | ------ ------ | - | |ENET| |ENET| | - | ---@-- ---@-- | - ----------|-------|--------- - | | - | ---o--------------------------- - | Ethernet Cable 2 - ---------------o---------- - Ethernet Cable 1 - - Figure 3. TCP/IP Network Node on 2 Ethernets - - Please note that this computer has 2 Ethernet addresses and 2 IP - addresses. - - It is seen from this structure that for computers with more than one - physical network interface, the IP module is both a n-to-m - multiplexer and an m-to-n de-multiplexer. - - - - - - - - - - - -Socolofsky & Kale [Page 5] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - 1 2 3 ... n 1 2 3 ... n - \ | | / | \ | | / ^ - \ | | / | \ | | / | - ------------- flow ---------------- flow - |multiplexer| of |de-multiplexer| of - ------------- data ---------------- data - / | | \ | / | | \ | - / | | \ v / | | \ | - 1 2 3 ... m 1 2 3 ... m - - Figure 4. n-to-m multiplexer and m-to-n de-multiplexer - - It performs this multiplexing in either direction to accommodate - incoming and outgoing data. An IP module with more than 1 network - interface is more complex than our original example in that it can - forward data onto the next network. Data can arrive on any network - interface and be sent out on any other. - - TCP UDP - \ / - \ / - -------------- - | IP | - | | - | --- | - | / \ | - | / v | - -------------- - / \ - / \ - data data - comes in goes out - here here - - Figure 5. Example of IP Forwarding a IP Packet - - The process of sending an IP packet out onto another network is - called "forwarding" an IP packet. A computer that has been dedicated - to the task of forwarding IP packets is called an "IP-router". - - As you can see from the figure, the forwarded IP packet never touches - the TCP and UDP modules on the IP-router. Some IP-router - implementations do not have a TCP or UDP module. - -2.5 IP Creates a Single Logical Network - - The IP module is central to the success of internet technology. Each - module or driver adds its header to the message as the message passes - - - -Socolofsky & Kale [Page 6] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - down through the protocol stack. Each module or driver strips the - corresponding header from the message as the message climbs the - protocol stack up towards the application. The IP header contains - the IP address, which builds a single logical network from multiple - physical networks. This interconnection of physical networks is the - source of the name: internet. A set of interconnected physical - networks that limit the range of an IP packet is called an - "internet". - -2.6 Physical Network Independence - - IP hides the underlying network hardware from the network - applications. If you invent a new physical network, you can put it - into service by implementing a new driver that connects to the - internet underneath IP. Thus, the network applications remain intact - and are not vulnerable to changes in hardware technology. - -2.7 Interoperability - - If two computers on an internet can communicate, they are said to - "interoperate"; if an implementation of internet technology is good, - it is said to have "interoperability". Users of general-purpose - computers benefit from the installation of an internet because of the - interoperability in computers on the market. Generally, when you buy - a computer, it will interoperate. If the computer does not have - interoperability, and interoperability can not be added, it occupies - a rare and special niche in the market. - -2.8 After the Overview - - With the background set, we will answer the following questions: - - When sending out an IP packet, how is the destination Ethernet - address determined? - - How does IP know which of multiple lower network interfaces to use - when sending out an IP packet? - - How does a client on one computer reach the server on another? - - Why do both TCP and UDP exist, instead of just one or the other? - - What network applications are available? - - These will be explained, in turn, after an Ethernet refresher. - - - - - - -Socolofsky & Kale [Page 7] - -RFC 1180 A TCP/IP Tutorial January 1991 - - -3. Ethernet - - This section is a short review of Ethernet technology. - - An Ethernet frame contains the destination address, source address, - type field, and data. - - An Ethernet address is 6 bytes. Every device has its own Ethernet - address and listens for Ethernet frames with that destination - address. All devices also listen for Ethernet frames with a wild- - card destination address of "FF-FF-FF-FF-FF-FF" (in hexadecimal), - called a "broadcast" address. - - Ethernet uses CSMA/CD (Carrier Sense and Multiple Access with - Collision Detection). CSMA/CD means that all devices communicate on - a single medium, that only one can transmit at a time, and that they - can all receive simultaneously. If 2 devices try to transmit at the - same instant, the transmit collision is detected, and both devices - wait a random (but short) period before trying to transmit again. - -3.1 A Human Analogy - - A good analogy of Ethernet technology is a group of people talking in - a small, completely dark room. In this analogy, the physical network - medium is sound waves on air in the room instead of electrical - signals on a coaxial cable. - - Each person can hear the words when another is talking (Carrier - Sense). Everyone in the room has equal capability to talk (Multiple - Access), but none of them give lengthy speeches because they are - polite. If a person is impolite, he is asked to leave the room - (i.e., thrown off the net). - - No one talks while another is speaking. But if two people start - speaking at the same instant, each of them know this because each - hears something they haven't said (Collision Detection). When these - two people notice this condition, they wait for a moment, then one - begins talking. The other hears the talking and waits for the first - to finish before beginning his own speech. - - Each person has an unique name (unique Ethernet address) to avoid - confusion. Every time one of them talks, he prefaces the message - with the name of the person he is talking to and with his own name - (Ethernet destination and source address, respectively), i.e., "Hello - Jane, this is Jack, ..blah blah blah...". If the sender wants to - talk to everyone he might say "everyone" (broadcast address), i.e., - "Hello Everyone, this is Jack, ..blah blah blah...". - - - - -Socolofsky & Kale [Page 8] - -RFC 1180 A TCP/IP Tutorial January 1991 - - -4. ARP - - When sending out an IP packet, how is the destination Ethernet - address determined? - - ARP (Address Resolution Protocol) is used to translate IP addresses - to Ethernet addresses. The translation is done only for outgoing IP - packets, because this is when the IP header and the Ethernet header - are created. - -4.1 ARP Table for Address Translation - - The translation is performed with a table look-up. The table, called - the ARP table, is stored in memory and contains a row for each - computer. There is a column for IP address and a column for Ethernet - address. When translating an IP address to an Ethernet address, the - table is searched for a matching IP address. The following is a - simplified ARP table: - - ------------------------------------ - |IP address Ethernet address | - ------------------------------------ - |223.1.2.1 08-00-39-00-2F-C3| - |223.1.2.3 08-00-5A-21-A7-22| - |223.1.2.4 08-00-10-99-AC-54| - ------------------------------------ - TABLE 1. Example ARP Table - - The human convention when writing out the 4-byte IP address is each - byte in decimal and separating bytes with a period. When writing out - the 6-byte Ethernet address, the conventions are each byte in - hexadecimal and separating bytes with either a minus sign or a colon. - - The ARP table is necessary because the IP address and Ethernet - address are selected independently; you can not use an algorithm to - translate IP address to Ethernet address. The IP address is selected - by the network manager based on the location of the computer on the - internet. When the computer is moved to a different part of an - internet, its IP address must be changed. The Ethernet address is - selected by the manufacturer based on the Ethernet address space - licensed by the manufacturer. When the Ethernet hardware interface - board changes, the Ethernet address changes. - -4.2 Typical Translation Scenario - - During normal operation a network application, such as TELNET, sends - an application message to TCP, then TCP sends the corresponding TCP - message to the IP module. The destination IP address is known by the - - - -Socolofsky & Kale [Page 9] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - application, the TCP module, and the IP module. At this point the IP - packet has been constructed and is ready to be given to the Ethernet - driver, but first the destination Ethernet address must be - determined. - - The ARP table is used to look-up the destination Ethernet address. - - 4.3 ARP Request/Response Pair - - But how does the ARP table get filled in the first place? The answer - is that it is filled automatically by ARP on an "as-needed" basis. - - Two things happen when the ARP table can not be used to translate an - address: - - 1. An ARP request packet with a broadcast Ethernet address is sent - out on the network to every computer. - - 2. The outgoing IP packet is queued. - - Every computer's Ethernet interface receives the broadcast Ethernet - frame. Each Ethernet driver examines the Type field in the Ethernet - frame and passes the ARP packet to the ARP module. The ARP request - packet says "If your IP address matches this target IP address, then - please tell me your Ethernet address". An ARP request packet looks - something like this: - - --------------------------------------- - |Sender IP Address 223.1.2.1 | - |Sender Enet Address 08-00-39-00-2F-C3| - --------------------------------------- - |Target IP Address 223.1.2.2 | - |Target Enet Address | - --------------------------------------- - TABLE 2. Example ARP Request - - Each ARP module examines the IP address and if the Target IP address - matches its own IP address, it sends a response directly to the - source Ethernet address. The ARP response packet says "Yes, that - target IP address is mine, let me give you my Ethernet address". An - ARP response packet has the sender/target field contents swapped as - compared to the request. It looks something like this: - - - - - - - - - -Socolofsky & Kale [Page 10] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - --------------------------------------- - |Sender IP Address 223.1.2.2 | - |Sender Enet Address 08-00-28-00-38-A9| - --------------------------------------- - |Target IP Address 223.1.2.1 | - |Target Enet Address 08-00-39-00-2F-C3| - --------------------------------------- - TABLE 3. Example ARP Response - - The response is received by the original sender computer. The - Ethernet driver looks at the Type field in the Ethernet frame then - passes the ARP packet to the ARP module. The ARP module examines the - ARP packet and adds the sender's IP and Ethernet addresses to its ARP - table. - - The updated table now looks like this: - - ---------------------------------- - |IP address Ethernet address | - ---------------------------------- - |223.1.2.1 08-00-39-00-2F-C3| - |223.1.2.2 08-00-28-00-38-A9| - |223.1.2.3 08-00-5A-21-A7-22| - |223.1.2.4 08-00-10-99-AC-54| - ---------------------------------- - TABLE 4. ARP Table after Response - -4.4 Scenario Continued - - The new translation has now been installed automatically in the - table, just milli-seconds after it was needed. As you remember from - step 2 above, the outgoing IP packet was queued. Next, the IP - address to Ethernet address translation is performed by look-up in - the ARP table then the Ethernet frame is transmitted on the Ethernet. - Therefore, with the new steps 3, 4, and 5, the scenario for the - sender computer is: - - 1. An ARP request packet with a broadcast Ethernet address is sent - out on the network to every computer. - - 2. The outgoing IP packet is queued. - - 3. The ARP response arrives with the IP-to-Ethernet address - translation for the ARP table. - - - - - - - -Socolofsky & Kale [Page 11] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - 4. For the queued IP packet, the ARP table is used to translate the - IP address to the Ethernet address. - - 5. The Ethernet frame is transmitted on the Ethernet. - - In summary, when the translation is missing from the ARP table, one - IP packet is queued. The translation data is quickly filled in with - ARP request/response and the queued IP packet is transmitted. - - Each computer has a separate ARP table for each of its Ethernet - interfaces. If the target computer does not exist, there will be no - ARP response and no entry in the ARP table. IP will discard outgoing - IP packets sent to that address. The upper layer protocols can't - tell the difference between a broken Ethernet and the absence of a - computer with the target IP address. - - Some implementations of IP and ARP don't queue the IP packet while - waiting for the ARP response. Instead the IP packet is discarded and - the recovery from the IP packet loss is left to the TCP module or the - UDP network application. This recovery is performed by time-out and - retransmission. The retransmitted message is successfully sent out - onto the network because the first copy of the message has already - caused the ARP table to be filled. - -5. Internet Protocol - - The IP module is central to internet technology and the essence of IP - is its route table. IP uses this in-memory table to make all - decisions about routing an IP packet. The content of the route table - is defined by the network administrator. Mistakes block - communication. - - To understand how a route table is used is to understand - internetworking. This understanding is necessary for the successful - administration and maintenance of an IP network. - - The route table is best understood by first having an overview of - routing, then learning about IP network addresses, and then looking - at the details. - -5.1 Direct Routing - - The figure below is of a tiny internet with 3 computers: A, B, and C. - Each computer has the same TCP/IP protocol stack as in Figure 1. - Each computer's Ethernet interface has its own Ethernet address. - Each computer has an IP address assigned to the IP interface by the - network manager, who also has assigned an IP network number to the - Ethernet. - - - -Socolofsky & Kale [Page 12] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - A B C - | | | - --o------o------o-- - Ethernet 1 - IP network "development" - - Figure 6. One IP Network - - When A sends an IP packet to B, the IP header contains A's IP address - as the source IP address, and the Ethernet header contains A's - Ethernet address as the source Ethernet address. Also, the IP header - contains B's IP address as the destination IP address and the - Ethernet header contains B's Ethernet address as the destination - Ethernet address. - - ---------------------------------------- - |address source destination| - ---------------------------------------- - |IP header A B | - |Ethernet header A B | - ---------------------------------------- - TABLE 5. Addresses in an Ethernet frame for an IP packet - from A to B - - For this simple case, IP is overhead because the IP adds little to - the service offered by Ethernet. However, IP does add cost: the - extra CPU processing and network bandwidth to generate, transmit, and - parse the IP header. - - When B's IP module receives the IP packet from A, it checks the - destination IP address against its own, looking for a match, then it - passes the datagram to the upper-level protocol. - - This communication between A and B uses direct routing. - -5.2 Indirect Routing - - The figure below is a more realistic view of an internet. It is - composed of 3 Ethernets and 3 IP networks connected by an IP-router - called computer D. Each IP network has 4 computers; each computer - has its own IP address and Ethernet address. - - - - - - - - - - -Socolofsky & Kale [Page 13] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - A B C ----D---- E F G - | | | | | | | | | - --o------o------o------o- | -o------o------o------o-- - Ethernet 1 | Ethernet 2 - IP network "development" | IP network "accounting" - | - | - | H I J - | | | | - --o-----o------o------o-- - Ethernet 3 - IP network "factory" - - Figure 7. Three IP Networks; One internet - - Except for computer D, each computer has a TCP/IP protocol stack like - that in Figure 1. Computer D is the IP-router; it is connected to - all 3 networks and therefore has 3 IP addresses and 3 Ethernet - addresses. Computer D has a TCP/IP protocol stack similar to that in - Figure 3, except that it has 3 ARP modules and 3 Ethernet drivers - instead of 2. Please note that computer D has only one IP module. - - The network manager has assigned a unique number, called an IP - network number, to each of the Ethernets. The IP network numbers are - not shown in this diagram, just the network names. - - When computer A sends an IP packet to computer B, the process is - identical to the single network example above. Any communication - between computers located on a single IP network matches the direct - routing example discussed previously. - - When computer D and A communicate, it is direct communication. When - computer D and E communicate, it is direct communication. When - computer D and H communicate, it is direct communication. This is - because each of these pairs of computers is on the same IP network. - - However, when computer A communicates with a computer on the far side - of the IP-router, communication is no longer direct. A must use D to - forward the IP packet to the next IP network. This communication is - called "indirect". - - This routing of IP packets is done by IP modules and happens - transparently to TCP, UDP, and the network applications. - - If A sends an IP packet to E, the source IP address and the source - Ethernet address are A's. The destination IP address is E's, but - because A's IP module sends the IP packet to D for forwarding, the - destination Ethernet address is D's. - - - -Socolofsky & Kale [Page 14] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - ---------------------------------------- - |address source destination| - ---------------------------------------- - |IP header A E | - |Ethernet header A D | - ---------------------------------------- - TABLE 6. Addresses in an Ethernet frame for an IP packet - from A to E (before D) - - D's IP module receives the IP packet and upon examining the - destination IP address, says "This is not my IP address," and sends - the IP packet directly to E. - - ---------------------------------------- - |address source destination| - ---------------------------------------- - |IP header A E | - |Ethernet header D E | - ---------------------------------------- - TABLE 7. Addresses in an Ethernet frame for an IP packet - from A to E (after D) - - In summary, for direct communication, both the source IP address and - the source Ethernet address is the sender's, and the destination IP - address and the destination Ethernet address is the recipient's. For - indirect communication, the IP address and Ethernet addresses do not - pair up in this way. - - This example internet is a very simple one. Real networks are often - complicated by many factors, resulting in multiple IP-routers and - several types of physical networks. This example internet might have - come about because the network manager wanted to split a large - Ethernet in order to localize Ethernet broadcast traffic. - -5.3 IP Module Routing Rules - - This overview of routing has shown what happens, but not how it - happens. Now let's examine the rules, or algorithm, used by the IP - module. - - For an outgoing IP packet, entering IP from an upper layer, IP must - decide whether to send the IP packet directly or indirectly, and IP - must choose a lower network interface. These choices are made by - consulting the route table. - - For an incoming IP packet, entering IP from a lower interface, IP - must decide whether to forward the IP packet or pass it to an upper - layer. If the IP packet is being forwarded, it is treated as an - - - -Socolofsky & Kale [Page 15] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - outgoing IP packet. - - When an incoming IP packet arrives it is never forwarded back out - through the same network interface. - - These decisions are made before the IP packet is handed to the lower - interface and before the ARP table is consulted. - -5.4 IP Address - - The network manager assigns IP addresses to computers according to - the IP network to which the computer is attached. One part of a 4- - byte IP address is the IP network number, the other part is the IP - computer number (or host number). For the computer in table 1, with - an IP address of 223.1.2.1, the network number is 223.1.2 and the - host number is number 1. - - The portion of the address that is used for network number and for - host number is defined by the upper bits in the 4-byte address. All - example IP addresses in this tutorial are of type class C, meaning - that the upper 3 bits indicate that 21 bits are the network number - and 8 bits are the host number. This allows 2,097,152 class C - networks up to 254 hosts on each network. - - The IP address space is administered by the NIC (Network Information - Center). All internets that are connected to the single world-wide - Internet must use network numbers assigned by the NIC. If you are - setting up your own internet and you are not intending to connect it - to the Internet, you should still obtain your network numbers from - the NIC. If you pick your own number, you run the risk of confusion - and chaos in the eventuality that your internet is connected to - another internet. - -5.5 Names - - People refer to computers by names, not numbers. A computer called - alpha might have the IP address of 223.1.2.1. For small networks, - this name-to-address translation data is often kept on each computer - in the "hosts" file. For larger networks, this translation data file - is stored on a server and accessed across the network when needed. A - few lines from that file might look like this: - - 223.1.2.1 alpha - 223.1.2.2 beta - 223.1.2.3 gamma - 223.1.2.4 delta - 223.1.3.2 epsilon - 223.1.4.2 iota - - - -Socolofsky & Kale [Page 16] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - The IP address is the first column and the computer name is the - second column. - - In most cases, you can install identical "hosts" files on all - computers. You may notice that "delta" has only one entry in this - file even though it has 3 IP addresses. Delta can be reached with - any of its IP addresses; it does not matter which one is used. When - delta receives an IP packet and looks at the destination address, it - will recognize any of its own IP addresses. - - IP networks are also given names. If you have 3 IP networks, your - "networks" file for documenting these names might look something like - this: - - 223.1.2 development - 223.1.3 accounting - 223.1.4 factory - - The IP network number is in the first column and its name is in the - second column. - - From this example you can see that alpha is computer number 1 on the - development network, beta is computer number 2 on the development - network and so on. You might also say that alpha is development.1, - Beta is development.2, and so on. - - The above hosts file is adequate for the users, but the network - manager will probably replace the line for delta with: - - 223.1.2.4 devnetrouter delta - 223.1.3.1 facnetrouter - 223.1.4.1 accnetrouter - - These three new lines for the hosts file give each of delta's IP - addresses a meaningful name. In fact, the first IP address listed - has 2 names; "delta" and "devnetrouter" are synonyms. In practice - "delta" is the general-purpose name of the computer and the other 3 - names are only used when administering the IP route table. - - These files are used by network administration commands and network - applications to provide meaningful names. They are not required for - operation of an internet, but they do make it easier for us. - -5.6 IP Route Table - - How does IP know which lower network interface to use when sending - out a IP packet? IP looks it up in the route table using a search - key of the IP network number extracted from the IP destination - - - -Socolofsky & Kale [Page 17] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - address. - - The route table contains one row for each route. The primary columns - in the route table are: IP network number, direct/indirect flag, - router IP address, and interface number. This table is referred to - by IP for each outgoing IP packet. - - On most computers the route table can be modified with the "route" - command. The content of the route table is defined by the network - manager, because the network manager assigns the IP addresses to the - computers. - -5.7 Direct Routing Details - - To explain how it is used, let us visit in detail the routing - situations we have reviewed previously. - - --------- --------- - | alpha | | beta | - | 1 | | 1 | - --------- --------- - | | - --------o---------------o- - Ethernet 1 - IP network "development" - - Figure 8. Close-up View of One IP Network - - The route table inside alpha looks like this: - - -------------------------------------------------------------- - |network direct/indirect flag router interface number| - -------------------------------------------------------------- - |development direct 1 | - -------------------------------------------------------------- - TABLE 8. Example Simple Route Table - - This view can be seen on some UNIX systems with the "netstat -r" - command. With this simple network, all computers have identical - routing tables. - - For discussion, the table is printed again without the network number - translated to its network name. - - - - - - - - -Socolofsky & Kale [Page 18] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - -------------------------------------------------------------- - |network direct/indirect flag router interface number| - -------------------------------------------------------------- - |223.1.2 direct 1 | - -------------------------------------------------------------- - TABLE 9. Example Simple Route Table with Numbers - -5.8 Direct Scenario - - Alpha is sending an IP packet to beta. The IP packet is in alpha's - IP module and the destination IP address is beta or 223.1.2.2. IP - extracts the network portion of this IP address and scans the first - column of the table looking for a match. With this network a match - is found on the first entry. - - The other information in this entry indicates that computers on this - network can be reached directly through interface number 1. An ARP - table translation is done on beta's IP address then the Ethernet - frame is sent directly to beta via interface number 1. - - If an application tries to send data to an IP address that is not on - the development network, IP will be unable to find a match in the - route table. IP then discards the IP packet. Some computers provide - a "Network not reachable" error message. - -5.9 Indirect Routing Details - - Now, let's take a closer look at the more complicated routing - scenario that we examined previously. - - - - - - - - - - - - - - - - - - - - - - -Socolofsky & Kale [Page 19] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - --------- --------- --------- - | alpha | | delta | |epsilon| - | 1 | |1 2 3| | 1 | - --------- --------- --------- - | | | | | - --------o---------------o- | -o----------------o-------- - Ethernet 1 | Ethernet 2 - IP network "Development" | IP network "accounting" - | - | -------- - | | iota | - | | 1 | - | -------- - | | - --o--------o-------- - Ethernet 3 - IP network "factory" - - Figure 9. Close-up View of Three IP Networks - - The route table inside alpha looks like this: - - --------------------------------------------------------------------- - |network direct/indirect flag router interface number| - --------------------------------------------------------------------- - |development direct 1 | - |accounting indirect devnetrouter 1 | - |factory indirect devnetrouter 1 | - --------------------------------------------------------------------- - TABLE 10. Alpha Route Table - - For discussion the table is printed again using numbers instead of - names. - - -------------------------------------------------------------------- - |network direct/indirect flag router interface number| - -------------------------------------------------------------------- - |223.1.2 direct 1 | - |223.1.3 indirect 223.1.2.4 1 | - |223.1.4 indirect 223.1.2.4 1 | - -------------------------------------------------------------------- - TABLE 11. Alpha Route Table with Numbers - - The router in Alpha's route table is the IP address of delta's - connection to the development network. - - - - - - -Socolofsky & Kale [Page 20] - -RFC 1180 A TCP/IP Tutorial January 1991 - - -5.10 Indirect Scenario - - Alpha is sending an IP packet to epsilon. The IP packet is in - alpha's IP module and the destination IP address is epsilon - (223.1.3.2). IP extracts the network portion of this IP address - (223.1.3) and scans the first column of the table looking for a - match. A match is found on the second entry. - - This entry indicates that computers on the 223.1.3 network can be - reached through the IP-router devnetrouter. Alpha's IP module then - does an ARP table translation for devnetrouter's IP address and sends - the IP packet directly to devnetrouter through Alpha's interface - number 1. The IP packet still contains the destination address of - epsilon. - - The IP packet arrives at delta's development network interface and is - passed up to delta's IP module. The destination IP address is - examined and because it does not match any of delta's own IP - addresses, delta decides to forward the IP packet. - - Delta's IP module extracts the network portion of the destination IP - address (223.1.3) and scans its route table for a matching network - field. Delta's route table looks like this: - - ---------------------------------------------------------------------- - |network direct/indirect flag router interface number| - ---------------------------------------------------------------------- - |development direct 1 | - |factory direct 3 | - |accounting direct 2 | - ---------------------------------------------------------------------- - TABLE 12. Delta's Route Table - - Below is delta's table printed again, without the translation to - names. - - ---------------------------------------------------------------------- - |network direct/indirect flag router interface number| - ---------------------------------------------------------------------- - |223.1.2 direct 1 | - |223.1.3 direct 3 | - |223.1.4 direct 2 | - ---------------------------------------------------------------------- - TABLE 13. Delta's Route Table with Numbers - - The match is found on the second entry. IP then sends the IP packet - directly to epsilon through interface number 3. The IP packet - contains the IP destination address of epsilon and the Ethernet - - - -Socolofsky & Kale [Page 21] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - destination address of epsilon. - - The IP packet arrives at epsilon and is passed up to epsilon's IP - module. The destination IP address is examined and found to match - with epsilon's IP address, so the IP packet is passed to the upper - protocol layer. - -5.11 Routing Summary - - When a IP packet travels through a large internet it may go through - many IP-routers before it reaches its destination. The path it takes - is not determined by a central source but is a result of consulting - each of the routing tables used in the journey. Each computer - defines only the next hop in the journey and relies on that computer - to send the IP packet on its way. - -5.12 Managing the Routes - - Maintaining correct routing tables on all computers in a large - internet is a difficult task; network configuration is being modified - constantly by the network managers to meet changing needs. Mistakes - in routing tables can block communication in ways that are - excruciatingly tedious to diagnose. - - Keeping a simple network configuration goes a long way towards making - a reliable internet. For instance, the most straightforward method - of assigning IP networks to Ethernet is to assign a single IP network - number to each Ethernet. - - Help is also available from certain protocols and network - applications. ICMP (Internet Control Message Protocol) can report - some routing problems. For small networks the route table is filled - manually on each computer by the network administrator. For larger - networks the network administrator automates this manual operation - with a routing protocol to distribute routes throughout a network. - - When a computer is moved from one IP network to another, its IP - address must change. When a computer is removed from an IP network - its old address becomes invalid. These changes require frequent - updates to the "hosts" file. This flat file can become difficult to - maintain for even medium-size networks. The Domain Name System helps - solve these problems. - -6. User Datagram Protocol - - UDP is one of the two main protocols to reside on top of IP. It - offers service to the user's network applications. Example network - applications that use UDP are: Network File System (NFS) and Simple - - - -Socolofsky & Kale [Page 22] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - Network Management Protocol (SNMP). The service is little more than - an interface to IP. - - UDP is a connectionless datagram delivery service that does not - guarantee delivery. UDP does not maintain an end-to-end connection - with the remote UDP module; it merely pushes the datagram out on the - net and accepts incoming datagrams off the net. - - UDP adds two values to what is provided by IP. One is the - multiplexing of information between applications based on port - number. The other is a checksum to check the integrity of the data. - -6.1 Ports - - How does a client on one computer reach the server on another? - - The path of communication between an application and UDP is through - UDP ports. These ports are numbered, beginning with zero. An - application that is offering service (the server) waits for messages - to come in on a specific port dedicated to that service. The server - waits patiently for any client to request service. - - For instance, the SNMP server, called an SNMP agent, always waits on - port 161. There can be only one SNMP agent per computer because - there is only one UDP port number 161. This port number is well - known; it is a fixed number, an internet assigned number. If an SNMP - client wants service, it sends its request to port number 161 of UDP - on the destination computer. - - When an application sends data out through UDP it arrives at the far - end as a single unit. For example, if an application does 5 writes - to the UDP port, the application at the far end will do 5 reads from - the UDP port. Also, the size of each write matches the size of each - read. - - UDP preserves the message boundary defined by the application. It - never joins two application messages together, or divides a single - application message into parts. - -6.2 Checksum - - An incoming IP packet with an IP header type field indicating "UDP" - is passed up to the UDP module by IP. When the UDP module receives - the UDP datagram from IP it examines the UDP checksum. If the - checksum is zero, it means that checksum was not calculated by the - sender and can be ignored. Thus the sending computer's UDP module - may or may not generate checksums. If Ethernet is the only network - between the 2 UDP modules communicating, then you may not need - - - -Socolofsky & Kale [Page 23] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - checksumming. However, it is recommended that checksum generation - always be enabled because at some point in the future a route table - change may send the data across less reliable media. - - If the checksum is valid (or zero), the destination port number is - examined and if an application is bound to that port, an application - message is queued for the application to read. Otherwise the UDP - datagram is discarded. If the incoming UDP datagrams arrive faster - than the application can read them and if the queue fills to a - maximum value, UDP datagrams are discarded by UDP. UDP will continue - to discard UDP datagrams until there is space in the queue. - -7. Transmission Control Protocol - - TCP provides a different service than UDP. TCP offers a connection- - oriented byte stream, instead of a connectionless datagram delivery - service. TCP guarantees delivery, whereas UDP does not. - - TCP is used by network applications that require guaranteed delivery - and cannot be bothered with doing time-outs and retransmissions. The - two most typical network applications that use TCP are File Transfer - Protocol (FTP) and the TELNET. Other popular TCP network - applications include X-Window System, rcp (remote copy), and the r- - series commands. TCP's greater capability is not without cost: it - requires more CPU and network bandwidth. The internals of the TCP - module are much more complicated than those in a UDP module. - - Similar to UDP, network applications connect to TCP ports. Well- - defined port numbers are dedicated to specific applications. For - instance, the TELNET server uses port number 23. The TELNET client - can find the server simply by connecting to port 23 of TCP on the - specified computer. - - When the application first starts using TCP, the TCP module on the - client's computer and the TCP module on the server's computer start - communicating with each other. These two end-point TCP modules - contain state information that defines a virtual circuit. This - virtual circuit consumes resources in both TCP end-points. The - virtual circuit is full duplex; data can go in both directions - simultaneously. The application writes data to the TCP port, the - data traverses the network and is read by the application at the far - end. - - TCP packetizes the byte stream at will; it does not retain the - boundaries between writes. For example, if an application does 5 - writes to the TCP port, the application at the far end might do 10 - reads to get all the data. Or it might get all the data with a - single read. There is no correlation between the number and size of - - - -Socolofsky & Kale [Page 24] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - writes at one end to the number and size of reads at the other end. - - TCP is a sliding window protocol with time-out and retransmits. - Outgoing data must be acknowledged by the far-end TCP. - Acknowledgements can be piggybacked on data. Both receiving ends can - flow control the far end, thus preventing a buffer overrun. - - As with all sliding window protocols, the protocol has a window size. - The window size determines the amount of data that can be transmitted - before an acknowledgement is required. For TCP, this amount is not a - number of TCP segments but a number of bytes. - -8. Network Applications - - Why do both TCP and UDP exist, instead of just one or the other? - - They supply different services. Most applications are implemented to - use only one or the other. You, the programmer, choose the protocol - that best meets your needs. If you need a reliable stream delivery - service, TCP might be best. If you need a datagram service, UDP - might be best. If you need efficiency over long-haul circuits, TCP - might be best. If you need efficiency over fast networks with short - latency, UDP might be best. If your needs do not fall nicely into - these categories, then the "best" choice is unclear. However, - applications can make up for deficiencies in the choice. For - instance if you choose UDP and you need reliability, then the - application must provide reliability. If you choose TCP and you need - a record oriented service, then the application must insert markers - in the byte stream to delimit records. - - What network applications are available? - - There are far too many to list. The number is growing continually. - Some of the applications have existed since the beginning of internet - technology: TELNET and FTP. Others are relatively new: X-Windows and - SNMP. The following is a brief description of the applications - mentioned in this tutorial. - -8.1 TELNET - - TELNET provides a remote login capability on TCP. The operation and - appearance is similar to keyboard dialing through a telephone switch. - On the command line the user types "telnet delta" and receives a - login prompt from the computer called "delta". - - TELNET works well; it is an old application and has widespread - interoperability. Implementations of TELNET usually work between - different operating systems. For instance, a TELNET client may be on - - - -Socolofsky & Kale [Page 25] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - VAX/VMS and the server on UNIX System V. - -8.2 FTP - - File Transfer Protocol (FTP), as old as TELNET, also uses TCP and has - widespread interoperability. The operation and appearance is as if - you TELNETed to the remote computer. But instead of typing your - usual commands, you have to make do with a short list of commands for - directory listings and the like. FTP commands allow you to copy - files between computers. - -8.3 rsh - - Remote shell (rsh or remsh) is one of an entire family of remote UNIX - style commands. The UNIX copy command, cp, becomes rcp. The UNIX - "who is logged in" command, who, becomes rwho. The list continues - and is referred to collectively to as the "r" series commands or the - "r*" (r star) commands. - - The r* commands mainly work between UNIX systems and are designed for - interaction between trusted hosts. Little consideration is given to - security, but they provide a convenient user environment. - - To execute the "cc file.c" command on a remote computer called delta, - type "rsh delta cc file.c". To copy the "file.c" file to delta, type - "rcp file.c delta:". To login to delta, type "rlogin delta", and if - you administered the computers in a certain way, you will not be - challenged with a password prompt. - -8.4 NFS - - Network File System, first developed by Sun Microsystems Inc, uses - UDP and is excellent for mounting UNIX file systems on multiple - computers. A diskless workstation can access its server's hard disk - as if the disk were local to the workstation. A single disk copy of - a database on mainframe "alpha" can also be used by mainframe "beta" - if the database's file system is NFS mounted on "beta". - - NFS adds significant load to a network and has poor utility across - slow links, but the benefits are strong. The NFS client is - implemented in the kernel, allowing all applications and commands to - use the NFS mounted disk as if it were local disk. - -8.5 SNMP - - Simple Network Management Protocol (SNMP) uses UDP and is designed - for use by central network management stations. It is a well known - fact that if given enough data, a network manager can detect and - - - -Socolofsky & Kale [Page 26] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - diagnose network problems. The central station uses SNMP to collect - this data from other computers on the network. SNMP defines the - format for the data; it is left to the central station or network - manager to interpret the data. - -8.6 X-Window - - The X Window System uses the X Window protocol on TCP to draw windows - on a workstation's bitmap display. X Window is much more than a - utility for drawing windows; it is entire philosophy for designing a - user interface. - -9. Other Information - - Much information about internet technology was not included in this - tutorial. This section lists information that is considered the next - level of detail for the reader who wishes to learn more. - - o administration commands: arp, route, and netstat - o ARP: permanent entry, publish entry, time-out entry, spoofing - o IP route table: host entry, default gateway, subnets - o IP: time-to-live counter, fragmentation, ICMP - o RIP, routing loops - o Domain Name System - -10. References - - [1] Comer, D., "Internetworking with TCP/IP Principles, Protocols, - and Architecture", Prentice Hall, Englewood Cliffs, New Jersey, - U.S.A., 1988. - - [2] Feinler, E., et al, DDN Protocol Handbook, Volume 2 and 3, DDN - Network Information Center, SRI International, 333 Ravenswood - Avenue, Room EJ291, Menlow Park, California, U.S.A., 1985. - - [3] Spider Systems, Ltd., "Packets and Protocols", Spider Systems - Ltd., Stanwell Street, Edinburgh, U.K. EH6 5NG, 1990. - -11. Relation to other RFCs - - This RFC is a tutorial and it does not UPDATE or OBSOLETE any other - RFC. - -12. Security Considerations - - There are security considerations within the TCP/IP protocol suite. - To some people these considerations are serious problems, to others - they are not; it depends on the user requirements. - - - -Socolofsky & Kale [Page 27] - -RFC 1180 A TCP/IP Tutorial January 1991 - - - This tutorial does not discuss these issues, but if you want to learn - more you should start with the topic of ARP-spoofing, then use the - "Security Considerations" section of RFC 1122 to lead you to more - information. - -13. Authors' Addresses - - Theodore John Socolofsky - Spider Systems Limited - Spider Park - Stanwell Street - Edinburgh EH6 5NG - United Kingdom - - Phone: - from UK 031-554-9424 - from USA 011-44-31-554-9424 - Fax: - from UK 031-554-0649 - from USA 011-44-31-554-0649 - - EMail: TEDS@SPIDER.CO.UK - - - Claudia Jeanne Kale - 12 Gosford Place - Edinburgh EH6 4BJ - United Kingdom - - Phone: - from UK 031-554-7432 - from USA 011-44-31-554-7432 - - EMail: CLAUDIAK@SPIDER.CO.UK - - - - - - - - - - - - - - - - - -Socolofsky & Kale [Page 28] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1185.txt b/kernel/picotcp/RFC/rfc1185.txt deleted file mode 100644 index 4f467f5..0000000 --- a/kernel/picotcp/RFC/rfc1185.txt +++ /dev/null @@ -1,1179 +0,0 @@ - - - - - - -Network Working Group V. Jacobson -Request for Comments: 1185 LBL - R. Braden - ISI - L. Zhang - PARC - October 1990 - - - TCP Extension for High-Speed Paths - -Status of This Memo - - This memo describes an Experimental Protocol extension to TCP for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "IAB - Official Protocol Standards" for the standardization state and status - of this protocol. Distribution of this memo is unlimited. - -Summary - - This memo describes a small extension to TCP to support reliable - operation over very high-speed paths, using sender timestamps - transmitted using the TCP Echo option proposed in RFC-1072. - -1. INTRODUCTION - - TCP uses positive acknowledgments and retransmissions to provide - reliable end-to-end delivery over a full-duplex virtual circuit - called a connection [Postel81]. A connection is defined by its two - end points; each end point is a "socket", i.e., a (host,port) pair. - To protect against data corruption, TCP uses an end-to-end checksum. - Duplication and reordering are handled using a fine-grained sequence - number space, with each octet receiving a distinct sequence number. - - The TCP protocol [Postel81] was designed to operate reliably over - almost any transmission medium regardless of transmission rate, - delay, corruption, duplication, or reordering of segments. In - practice, proper TCP implementations have demonstrated remarkable - robustness in adapting to a wide range of network characteristics. - For example, TCP implementations currently adapt to transfer rates in - the range of 100 bps to 10**7 bps and round-trip delays in the range - 1 ms to 100 seconds. - - However, the introduction of fiber optics is resulting in ever-higher - transmission speeds, and the fastest paths are moving out of the - domain for which TCP was originally engineered. This memo and RFC- - 1072 [Jacobson88] propose modest extensions to TCP to extend the - - - -Jacobson, Braden & Zhang [Page 1] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - domain of its application to higher speeds. - - There is no one-line answer to the question: "How fast can TCP go?". - The issues are reliability and performance, and these depend upon the - round-trip delay and the maximum time that segments may be queued in - the Internet, as well as upon the transmission speed. We must think - through these relationships very carefully if we are to successfully - extend TCP's domain. - - TCP performance depends not upon the transfer rate itself, but rather - upon the product of the transfer rate and the round-trip delay. This - "bandwidth*delay product" measures the amount of data that would - "fill the pipe"; it is the buffer space required at sender and - receiver to obtain maximum throughput on the TCP connection over the - path. RFC-1072 proposed a set of TCP extensions to improve TCP - efficiency for "LFNs" (long fat networks), i.e., networks with large - bandwidth*delay products. - - On the other hand, high transfer rate can threaten TCP reliability by - violating the assumptions behind the TCP mechanism for duplicate - detection and sequencing. The present memo specifies a solution for - this problem, extending TCP reliability to transfer rates well beyond - the foreseeable upper limit of bandwidth. - - An especially serious kind of error may result from an accidental - reuse of TCP sequence numbers in data segments. Suppose that an "old - duplicate segment", e.g., a duplicate data segment that was delayed - in Internet queues, was delivered to the receiver at the wrong moment - so that its sequence numbers fell somewhere within the current - window. There would be no checksum failure to warn of the error, and - the result could be an undetected corruption of the data. Reception - of an old duplicate ACK segment at the transmitter could be only - slightly less serious: it is likely to lock up the connection so that - no further progress can be made and a RST is required to - resynchronize the two ends. - - Duplication of sequence numbers might happen in either of two ways: - - (1) Sequence number wrap-around on the current connection - - A TCP sequence number contains 32 bits. At a high enough - transfer rate, the 32-bit sequence space may be "wrapped" - (cycled) within the time that a segment may be delayed in - queues. Section 2 discusses this case and proposes a mechanism - to reject old duplicates on the current connection. - - (2) Segment from an earlier connection incarnation - - - - -Jacobson, Braden & Zhang [Page 2] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - Suppose a connection terminates, either by a proper close - sequence or due to a host crash, and the same connection (i.e., - using the same pair of sockets) is immediately reopened. A - delayed segment from the terminated connection could fall within - the current window for the new incarnation and be accepted as - valid. This case is discussed in Section 3. - - TCP reliability depends upon the existence of a bound on the lifetime - of a segment: the "Maximum Segment Lifetime" or MSL. An MSL is - generally required by any reliable transport protocol, since every - sequence number field must be finite, and therefore any sequence - number may eventually be reused. In the Internet protocol suite, the - MSL bound is enforced by an IP-layer mechanism, the "Time-to-Live" or - TTL field. - - Watson's Delta-T protocol [Watson81] includes network-layer - mechanisms for precise enforcement of an MSL. In contrast, the IP - mechanism for MSL enforcement is loosely defined and even more - loosely implemented in the Internet. Therefore, it is unwise to - depend upon active enforcement of MSL for TCP connections, and it is - unrealistic to imagine setting MSL's smaller than the current values - (e.g., 120 seconds specified for TCP). The timestamp algorithm - described in the following section gives a way out of this dilemma - for high-speed networks. - - -2. SEQUENCE NUMBER WRAP-AROUND - - 2.1 Background - - Avoiding reuse of sequence numbers within the same connection is - simple in principle: enforce a segment lifetime shorter than the - time it takes to cycle the sequence space, whose size is - effectively 2**31. - - More specifically, if the maximum effective bandwidth at which TCP - is able to transmit over a particular path is B bytes per second, - then the following constraint must be satisfied for error-free - operation: - - 2**31 / B > MSL (secs) [1] - - The following table shows the value for Twrap = 2**31/B in - seconds, for some important values of the bandwidth B: - - - - - - - -Jacobson, Braden & Zhang [Page 3] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - Network B*8 B Twrap - bits/sec bytes/sec secs - _______ _______ ______ ______ - - ARPANET 56kbps 7KBps 3*10**5 (~3.6 days) - - DS1 1.5Mbps 190KBps 10**4 (~3 hours) - - Ethernet 10Mbps 1.25MBps 1700 (~30 mins) - - DS3 45Mbps 5.6MBps 380 - - FDDI 100Mbps 12.5MBps 170 - - Gigabit 1Gbps 125MBps 17 - - - It is clear why wrap-around of the sequence space was not a - problem for 56kbps packet switching or even 10Mbps Ethernets. On - the other hand, at DS3 and FDDI speeds, Twrap is comparable to the - 2 minute MSL assumed by the TCP specification [Postel81]. Moving - towards gigabit speeds, Twrap becomes too small for reliable - enforcement by the Internet TTL mechanism. - - The 16-bit window field of TCP limits the effective bandwidth B to - 2**16/RTT, where RTT is the round-trip time in seconds - [McKenzie89]. If the RTT is large enough, this limits B to a - value that meets the constraint [1] for a large MSL value. For - example, consider a transcontinental backbone with an RTT of 60ms - (set by the laws of physics). With the bandwidth*delay product - limited to 64KB by the TCP window size, B is then limited to - 1.1MBps, no matter how high the theoretical transfer rate of the - path. This corresponds to cycling the sequence number space in - Twrap= 2000 secs, which is safe in today's Internet. - - Based on this reasoning, an earlier RFC [McKenzie89] has cautioned - that expanding the TCP window space as proposed in RFC-1072 will - lead to sequence wrap-around and hence to possible data - corruption. We believe that this is mis-identifying the culprit, - which is not the larger window but rather the high bandwidth. - - For example, consider a (very large) FDDI LAN with a diameter - of 10km. Using the speed of light, we can compute the RTT - across the ring as (2*10**4)/(3*10**8) = 67 microseconds, and - the delay*bandwidth product is then 833 bytes. A TCP - connection across this LAN using a window of only 833 bytes - will run at the full 100mbps and can wrap the sequence space - in about 3 minutes, very close to the MSL of TCP. Thus, high - - - -Jacobson, Braden & Zhang [Page 4] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - speed alone can cause a reliability problem with sequence - number wrap-around, even without extended windows. - - An "obvious" fix for the problem of cycling the sequence space is - to increase the size of the TCP sequence number field. For - example, the sequence number field (and also the acknowledgment - field) could be expanded to 64 bits. However, the proposals for - making such a change while maintaining compatibility with current - TCP have tended towards complexity and ugliness. - - This memo proposes a simple solution to the problem, using the TCP - echo options defined in RFC-1072. Section 2.2 which follows - describes the original use of these options to carry timestamps in - order to measure RTT accurately. Section 2.3 proposes a method of - using these same timestamps to reject old duplicate segments that - could corrupt an open TCP connection. Section 3 discusses the - application of this mechanism to avoiding old duplicates from - previous incarnations. - - 2.2 TCP Timestamps - - RFC-1072 defined two TCP options, Echo and Echo Reply. Echo - carries a 32-bit number, and the receiver of the option must - return this same value to the source host in an Echo Reply option. - - RFC-1072 furthermore describes the use of these options to contain - 32-bit timestamps, for measuring the RTT. A TCP sending data - would include Echo options containing the current clock value. - The receiver would echo these timestamps in returning segments - (generally, ACK segments). The difference between a timestamp - from an Echo Reply option and the current time would then measure - the RTT at the sender. - - This mechanism was designed to solve the following problem: almost - all TCP implementations base their RTT measurements on a sample of - only one packet per window. If we look at RTT estimation as a - signal processing problem (which it is), a data signal at some - frequency (the packet rate) is being sampled at a lower frequency - (the window rate). Unfortunately, this lower sampling frequency - violates Nyquist's criteria and may introduce "aliasing" artifacts - into the estimated RTT [Hamming77]. - - A good RTT estimator with a conservative retransmission timeout - calculation can tolerate the aliasing when the sampling frequency - is "close" to the data frequency. For example, with a window of - 8 packets, the sample rate is 1/8 the data frequency -- less than - an order of magnitude different. However, when the window is tens - or hundreds of packets, the RTT estimator may be seriously in - - - -Jacobson, Braden & Zhang [Page 5] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - error, resulting in spurious retransmissions. - - A solution to the aliasing problem that actually simplifies the - sender substantially (since the RTT code is typically the single - biggest protocol cost for TCP) is as follows: the will sender - place a timestamp in each segment and the receiver will reflect - these timestamps back in ACK segments. Then a single subtract - gives the sender an accurate RTT measurement for every ACK segment - (which will correspond to every other data segment, with a - sensible receiver). RFC-1072 defined a timestamp echo option for - this purpose. - - It is vitally important to use the timestamp echo option with big - windows; otherwise, the door is opened to some dangerous - instabilities due to aliasing. Furthermore, the option is - probably useful for all TCP's, since it simplifies the sender. - - 2.3 Avoiding Old Duplicate Segments - - Timestamps carried from sender to receiver in TCP Echo options can - also be used to prevent data corruption caused by sequence number - wrap-around, as this section describes. - - 2.3.1 Basic Algorithm - - Assume that every received TCP segment contains a timestamp. - The basic idea is that a segment received with a timestamp that - is earlier than the timestamp of the most recently accepted - segment can be discarded as an old duplicate. More - specifically, the following processing is to be performed on - normal incoming segments: - - R1) If the timestamp in the arriving segment timestamp is less - than the timestamp of the most recently received in- - sequence segment, treat the arriving segment as not - acceptable: - - If SEG.LEN > 0, send an acknowledgement in reply as - specified in RFC-793 page 69, and drop the segment; - otherwise, just silently drop the segment.* - -_________________________ -*Sending an ACK segment in reply is not strictly necessary, since the -case can only arise when a later in-order segment has already been -received. However, for consistency and simplicity, we suggest -treating a timestamp failure the same way TCP treats any other -unacceptable segment. - - - - -Jacobson, Braden & Zhang [Page 6] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - R2) If the segment is outside the window, reject it (normal - TCP processing) - - R3) If an arriving segment is in-sequence (i.e, at the left - window edge), accept it normally and record its timestamp. - - R4) Otherwise, treat the segment as a normal in-window, out- - of-sequence TCP segment (e.g., queue it for later delivery - to the user). - - - Steps R2-R4 are the normal TCP processing steps specified by - RFC-793, except that in R3 the latest timestamp is set from - each in-sequence segment that is accepted. Thus, the latest - timestamp recorded at the receiver corresponds to the left edge - of the window and only advances when the left edge moves - [Jacobson88]. - - It is important to note that the timestamp is checked only when - a segment first arrives at the receiver, regardless of whether - it is in-sequence or is queued. Consider the following - example. - - Suppose the segment sequence: A.1, B.1, C.1, ..., Z.1 has - been sent, where the letter indicates the sequence number - and the digit represents the timestamp. Suppose also that - segment B.1 has been lost. The highest in-sequence - timestamp is 1 (from A.1), so C.1, ..., Z.1 are considered - acceptable and are queued. When B is retransmitted as - segment B.2 (using the latest timestamp), it fills the - hole and causes all the segments through Z to be - acknowledged and passed to the user. The timestamps of - the queued segments are *not* inspected again at this - time, since they have already been accepted. When B.2 is - accepted, the receivers's current timestamp is set to 2. - - This rule is vital to allow reasonable performance under loss. - A full window of data is in transit at all times, and after a - loss a full window less one packet will show up out-of-sequence - to be queued at the receiver (e.g., up to ~2**30 bytes of - data); the timestamp option must not result in discarding this - data. - - In certain unlikely circumstances, the algorithm of rules R1-R4 - could lead to discarding some segments unnecessarily, as shown - in the following example: - - Suppose again that segments: A.1, B.1, C.1, ..., Z.1 have - - - -Jacobson, Braden & Zhang [Page 7] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - been sent in sequence and that segment B.1 has been lost. - Furthermore, suppose delivery of some of C.1, ... Z.1 is - delayed until AFTER the retransmission B.2 arrives at the - receiver. These delayed segments will be discarded - unnecessarily when they do arrive, since their timestamps - are now out of date. - - This case is very unlikely to occur. If the retransmission was - triggered by a timeout, some of the segments C.1, ... Z.1 must - have been delayed longer than the RTO time. This is presumably - an unlikely event, or there would be many spurious timeouts and - retransmissions. If B's retransmission was triggered by the - "fast retransmit" algorithm, i.e., by duplicate ACK's, then the - queued segments that caused these ACK's must have been received - already. - - Even if a segment was delayed past the RTO, the selective - acknowledgment (SACK) facility of RFC-1072 will cause the - delayed packets to be retransmitted at the same time as B.2, - avoiding an extra RTT and therefore causing a very small - performance penalty. - - We know of no case with a significant probability of occurrence - in which timestamps will cause performance degradation by - unnecessarily discarding segments. - - 2.3.2 Header Prediction - - "Header prediction" [Jacobson90] is a high-performance - transport protocol implementation technique that is is most - important for high-speed links. This technique optimizes the - code for the most common case: receiving a segment correctly - and in order. Using header prediction, the receiver asks the - question, "Is this segment the next in sequence?" This - question can be answered in fewer machine instructions than the - question, "Is this segment within the window?" - - Adding header prediction to our timestamp procedure leads to - the following sequence for processing an arriving TCP segment: - - H1) Check timestamp (same as step R1 above) - - H2) Do header prediction: if segment is next in sequence and - if there are no special conditions requiring additional - processing, accept the segment, record its timestamp, and - skip H3. - - H3) Process the segment normally, as specified in RFC-793. - - - -Jacobson, Braden & Zhang [Page 8] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - This includes dropping segments that are outside the - window and possibly sending acknowledgments, and queueing - in-window, out-of-sequence segments. - - However, the timestamp check in step H1 is very unlikely to - fail, and it is a relatively expensive operation since it - requires interval arithmetic on a finite field. To perform - this check on every single segment seems like poor - implementation engineering, defeating the purpose of header - prediction. Therefore, we suggest that an implementor - interchange H1 and H2, i.e., perform header prediction FIRST, - performing H1 and H3 only if header prediction fails. We - believe that this change might gain 5-10% in performance on - high-speed networks. - - This reordering does raise a theoretical hazard: a segment from - 2**32 bytes in the past may arrive at exactly the wrong time - and be accepted mistakenly by the header-prediction step. We - make the following argument to show that the probability of - this failure is negligible. - - If all segments are equally likely to show up as old - duplicates, then the probability of an old duplicate - exactly matching the left window edge is the maximum - segment size (MSS) divided by the size of the sequence - space. This ratio must be less than 2**-16, since MSS - must be < 2**16; for example, it will be (2**12)/(2**32) = - 2**-20 for an FDDI link. However, the older a segment is, - the less likely it is to be retained in the Internet, and - under any reasonable model of segment lifetime the - probability of an old duplicate exactly at the left window - edge must be much smaller than 2**16. - - The 16 bit TCP checksum also allows a basic unreliability - of one part in 2**16. A protocol mechanism whose - reliability exceeds the reliability of the TCP checksum - should be considered "good enough", i.e., it won't - contribute significantly to the overall error rate. We - therefore believe we can ignore the problem of an old - duplicate being accepted by doing header prediction before - checking the timestamp. - - 2.3.3 Timestamp Frequency - - It is important to understand that the receiver algorithm for - timestamps does not involve clock synchronization with the - sender. The sender's clock is used to stamp the segments, and - the sender uses this fact to measure RTT's. However, the - - - -Jacobson, Braden & Zhang [Page 9] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - receiver treats the timestamp as simply a monotone-increasing - serial number, without any necessary connection to its clock. - From the receiver's viewpoint, the timestamp is acting as a - logical extension of the high-order bits of the sequence - number. - - However, the receiver algorithm dpes place some requirements on - the frequency of the timestamp "clock": - - (a) Timestamp clock must not be "too slow". - - It must tick at least once for each 2**31 bytes sent. In - fact, in order to be useful to the sender for round trip - timing, the clock should tick at least once per window's - worth of data, and even with the RFC-1072 window - extension, 2**31 bytes must be at least two windows. - - To make this more quantitative, any clock faster than 1 - tick/sec will reject old duplicate segments for link - speeds of ~2 Gbps; a 1ms clock will work up to link - speeds of 2 Tbps (10**12 bps!). - - (b) Timestamp clock must not be "too fast". - - Its cycling time must be greater than MSL seconds. Since - the clock (timestamp) is 32 bits and the worst-case MSL is - 255 seconds, the maximum acceptable clock frequency is one - tick every 59 ns. - - However, since the sender is using the timestamp for RTT - calculations, the timestamp doesn't need to have much more - resolution than the granularity of the retransmit timer, - e.g., tens or hundreds of milliseconds. - - Thus, both limits are easily satisfied with a reasonable clock - rate in the range 1-100ms per tick. - - Using the timestamp option relaxes the requirements on MSL for - avoiding sequence number wrap-around. For example, with a 1 ms - timestamp clock, the 32-bit timestamp will wrap its sign bit in - 25 days. Thus, it will reject old duplicates on the same - connection as long as MSL is 25 days or less. This appears to - be a very safe figure. If the timestamp has 10 ms resolution, - the MSL requirement is boosted to 250 days. An MSL of 25 days - or longer can probably be assumed by the gateway system without - requiring precise MSL enforcement by the TTL value in the IP - layer. - - - - -Jacobson, Braden & Zhang [Page 10] - -RFC 1185 TCP over High-Speed Paths October 1990 - - -3. DUPLICATES FROM EARLIER INCARNATIONS OF CONNECTION - - We turn now to the second potential cause of old duplicate packet - errors: packets from an earlier incarnation of the same connection. - The appendix contains a review the mechanisms currently included in - TCP to handle this problem. These mechanisms depend upon the - enforcement of a maximum segment lifetime (MSL) by the Internet - layer. - - The MSL required to prevent failures due to an earlier connection - incarnation does not depend (directly) upon the transfer rate. - However, the timestamp option used as described in Section 2 can - provide additional security against old duplicates from earlier - connections. Furthermore, we will see that with the universal use of - the timestamp option, enforcement of a maximum segment lifetime would - no longer be required for reliable TCP operation. - - There are two cases to be considered (see the appendix for more - explanation): (1) a system crashing (and losing connection state) - and restarting, and (2) the same connection being closed and reopened - without a loss of host state. These will be described in the - following two sections. - - 3.1 System Crash with Loss of State - - TCP's quiet time of one MSL upon system startup handles the loss - of connection state in a system crash/restart. For an - explanation, see for example "When to Keep Quiet" in the TCP - protocol specification [Postel81]. The MSL that is required here - does not depend upon the transfer speed. The current TCP MSL of 2 - minutes seems acceptable as an operational compromise, as many - host systems take this long to boot after a crash. - - However, the timestamp option may be used to ease the MSL - requirements (or to provide additional security against data - corruption). If timestamps are being used and if the timestamp - clock can be guaranteed to be monotonic over a system - crash/restart, i.e., if the first value of the sender's timestamp - clock after a crash/restart can be guaranteed to be greater than - the last value before the restart, then a quiet time will be - unnecessary. - - To dispense totally with the quiet time would seem to require that - the host clock be synchronized to a time source that is stable - over the crash/restart period, with an accuracy of one timestamp - clock tick or better. Fortunately, we can back off from this - strict requirement. Suppose that the clock is always re- - synchronized to within N timestamp clock ticks and that booting - - - -Jacobson, Braden & Zhang [Page 11] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - (extended with a quiet time, if necessary) takes more than N - ticks. This will guarantee monotonicity of the timestamps, which - can then be used to reject old duplicates even without an enforced - MSL. - - 3.2 Closing and Reopening a Connection - - When a TCP connection is closed, a delay of 2*MSL in TIME-WAIT - state ties up the socket pair for 4 minutes (see Section 3.5 of - [Postel81]. Applications built upon TCP that close one connection - and open a new one (e.g., an FTP data transfer connection using - Stream mode) must choose a new socket pair each time. This delay - serves two different purposes: - - (a) Implement the full-duplex reliable close handshake of TCP. - - The proper time to delay the final close step is not really - related to the MSL; it depends instead upon the RTO for the - FIN segments and therefore upon the RTT of the path.* - Although there is no formal upper-bound on RTT, common - network engineering practice makes an RTT greater than 1 - minute very unlikely. Thus, the 4 minute delay in TIME-WAIT - state works satisfactorily to provide a reliable full-duplex - TCP close. Note again that this is independent of MSL - enforcement and network speed. - - The TIME-WAIT state could cause an indirect performance - problem if an application needed to repeatedly close one - connection and open another at a very high frequency, since - the number of available TCP ports on a host is less than - 2**16. However, high network speeds are not the major - contributor to this problem; the RTT is the limiting factor - in how quickly connections can be opened and closed. - Therefore, this problem will no worse at high transfer - speeds. - - (b) Allow old duplicate segements to expire. - - Suppose that a host keeps a cache of the last timestamp - received from each remote host. This can be used to reject - old duplicate segments from earlier incarnations of the -_________________________ -*Note: It could be argued that the side that is sending a FIN knows -what degree of reliability it needs, and therefore it should be able -to determine the length of the TIME-WAIT delay for the FIN's -recipient. This could be accomplished with an appropriate TCP option -in FIN segments. - - - - -Jacobson, Braden & Zhang [Page 12] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - connection, if the timestamp clock can be guaranteed to have - ticked at least once since the old conennection was open. - This requires that the TIME-WAIT delay plus the RTT together - must be at least one tick of the sender's timestamp clock. - - Note that this is a variant on the mechanism proposed by - Garlick, Rom, and Postel (see the appendix), which required - each host to maintain connection records containing the - highest sequence numbers on every connection. Using - timestamps instead, it is only necessary to keep one quantity - per remote host, regardless of the number of simultaneous - connections to that host. - - We conclude that if all hosts used the TCP timestamp algorithm - described in Section 2, enforcement of a maximum segment lifetime - would be unnecessary and the quiet time at system startup could be - shortened or removed. In any case, the timestamp mechanism can - provide additional security against old duplicates from earlier - connection incarnations. However, a 4 minute TIME-WAIT delay - (unrelated to MSL enforcement or network speed) must be retained - to provide the reliable close handshake of TCP. - -4. CONCLUSIONS - - We have presented a mechanism, based upon the TCP timestamp echo - option of RFC-1072, that will allow very high TCP transfer rates - without reliability problems due to old duplicate segments on the - same connection. This mechanism also provides additional security - against intrusion of old duplicates from earlier incarnations of the - same connection. If the timestamp mechanism were used by all hosts, - the quiet time at system startup could be eliminated and enforcement - of a maximum segment lifetime (MSL) would no longer be necessary. - -REFERENCES - - [Cerf76] Cerf, V., "TCP Resynchronization", Tech Note #79, Digital - Systems Lab, Stanford, January 1976. - - [Dalal74] Dalal, Y., "More on Selecting Sequence Numbers", INWG - Protocol Note #4, October 1974. - - [Garlick77] Garlick, L., R. Rom, and J. Postel, "Issues in Reliable - Host-to-Host Protocols", Proc. Second Berkeley Workshop on - Distributed Data Management and Computer Networks, May 1977. - - [Hamming77] Hamming, R., "Digital Filters", ISBN 0-13-212571-4, - Prentice Hall, Englewood Cliffs, N.J., 1977. - - - - -Jacobson, Braden & Zhang [Page 13] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - [Jacobson88] Jacobson, V., and R. Braden, "TCP Extensions for - Long-Delay Paths", RFC 1072, LBL and USC/Information Sciences - Institute, October 1988. - - [Jacobson90] Jacobson, V., "4BSD Header Prediction", ACM Computer - Communication Review, April 1990. - - [McKenzie89] McKenzie, A., "A Problem with the TCP Big Window - Option", RFC 1110, BBN STC, August 1989. - - [Postel81] Postel, J., "Transmission Control Protocol", RFC 793, - DARPA, September 1981. - - [Tomlinson74] Tomlinson, R., "Selecting Sequence Numbers", INWG - Protocol Note #2, September 1974. - - [Watson81] Watson, R., "Timer-based Mechanisms in Reliable - Transport Protocol Connection Management", Computer Networks, - Vol. 5, 1981. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Jacobson, Braden & Zhang [Page 14] - -RFC 1185 TCP over High-Speed Paths October 1990 - - -APPENDIX -- Protection against Old Duplicates in TCP - - During the development of TCP, a great deal of effort was devoted to - the problem of protecting a TCP connection from segments left from - earlier incarnations of the same connection. Several different - mechanisms were proposed for this purpose [Tomlinson74] [Dalal74] - [Cerf76] [Garlick77]. - - The connection parameters that are required in this discussion are: - - Tc = Connection duration in seconds. - - Nc = Total number of bytes sent on connection. - - B = Effective bandwidth of connection = Nc/Tc. - - Tomlinson proposed a scheme with two parts: a clock-driven selection - of ISN (Initial Sequence Number) for a connection, and a - resynchronization procedure [Tomlinson74]. The clock-driven scheme - chooses: - - ISN = (integer(R*t)) mod 2**32 [2] - - where t is the current time relative to an arbitrary origin, and R is - a constant. R was intended to be chosen so that ISN will advance - faster than sequence numbers will be used up on the connection. - However, at high speeds this will not be true; the consequences of - this will be discussed below. - - The clock-driven choice of ISN in formula [2] guarantees freedom from - old duplicates matching a reopened connection if the original - connection was "short-lived" and "slow". By "short-lived", we mean a - connection that stayed open for a time Tc less than the time to cycle - the ISN, i.e., Tc < 2**32/R seconds. By "slow", we mean that the - effective transfer rate B is less than R. - - This is illustrated in Figure 1, where sequence numbers are plotted - against time. The asterisks show the ISN lines from formula [2], - while the circles represent the trajectories of several short-lived - incarnations of the same connection, each terminating at the "x". - - Note: allowing rapid reuse of connections was believed to be an - important goal during the early TCP development. This - requirement was driven by the hope that TCP would serve as a - basis for user-level transaction protocols as well as - connection-oriented protocols. The paradigm discussed was the - "Christmas Tree" or "Kamikazee" segment that contained SYN and - FIN bits as well as data. Enthusiasm for this was somewhat - - - -Jacobson, Braden & Zhang [Page 15] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - dampened when it was observed that the 3-way SYN handshake and - the FIN handshake mean that 5 packets are required for a minimum - exchange. Furthermore, the TIME-WAIT state delay implies that - the same connection really cannot be reopened immediately. No - further work has been done in this area, although existing - applications (especially SMTP) often generate very short TCP - sessions. The reuse problem is generally avoided by using a - different port pair for each connection. - - - |- 2**32 ISN ISN - | * * - | * * - | * * - | *x * - | o * - ^ | * * - | | * x * - | * o * - S | *o * - e | o * - q | * * - | * * - # | * x * - | *o * - |o_______________*____________ - ^ Time --> - 4.55hrs - - - Figure 1. Clock-Driven ISN avoiding duplication on - short-Lived, slow connections. - - - However, clock-driven ISN selection does not protect against old - duplicate packets for a long-lived or fast connection: the - connection may close (or crash) just as the ISN has cycled around and - reached the same value again. If the connection is then reopened, a - datagram still in transit from the old connection may fall into the - current window. This is illustrated by Figure 2 for a slow, long- - lived connection, and by Figures 3 and 4 for fast connections. In - each case, the point "x" marks the place at which the original - connection closes or crashes. The arrow in Figure 2 illustrates an - old duplicate segment. Figure 3 shows a connection whose total byte - count Nc < 2**32, while Figure 4 concerns Nc >= 2**32. - - To prevent the duplication illustrated in Figure 2, Tomlinson - proposed to "resynchronize" the connection sequence numbers if they - - - -Jacobson, Braden & Zhang [Page 16] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - came within an MSL of the ISN. Resynchronization might take the form - of a delay (point "y") or the choice of a new sequence number (point - "z"). - - |- 2**32 ISN ISN - | * * - | * * - | * * - | * * - | * * - ^ | * * - | | * * - | * * - S | * * - e | * x* y - q | * o * - | * o *z - # | *o * - | * * - |*_________________*____________ - ^ Time --> - 4.55hrs - - Figure 2. Resynchronization to Avoid Duplication - on Slow, Long-Lived Connection - - - - |- 2**32 ISN ISN - | * * - | x o * * - | * * - | o-->o* * - | * * - ^ | o o * - | | * * - | o * * - S | * * - e | o * * - q | * * - | o* * - # | * * - | o * - |*_________________*____________ - ^ Time --> - 4.55hrs - - Figure 3. Duplication on Fast Connection: Nc < 2**32 bytes - - - -Jacobson, Braden & Zhang [Page 17] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - |- 2**32 ISN ISN - | o * * - | x * * - | * * - | o * * - | o * - ^ | * * - | | o * * - | * o * - S | * * - e | o * * - q | * o * - | * * - # | o * - | * o * - |*_________________*____________ - ^ Time --> - 4.55hrs - - Figure 4. Duplication on Fast Connection: Nc > 2**32 bytes - - In summary, Figures 1-4 illustrated four possible failure modes for - old duplicate packets from an earlier incarnation. We will call - these four modes F1 , F2, F3, and F4: - - - F1: B < R, Tc < 4.55 hrs. (Figure 1) - - F2: B < R, Tc >= 4.55 hrs. (Figure 2) - - F3: B >= R, Nc < 2**32 (Figure 3) - - F4: B >= R, Nc >= 2**32 (Figure 4) - - - Another limitation of clock-driven ISN selection should be mentioned. - Tomlinson assumed that the current time t in formula [2] is obtained - from a clock that is persistent over a system crash. For his scheme - to work correctly, the clock must be restarted with an accuracy of - 1/R seconds (e.g, 4 microseconds in the case of TCP). While this may - be possible for some hosts and some crashes, in most cases there will - be an uncertainty in the clock after a crash that ranges from a - second to several minutes. - - As a result of this random clock offset after system - reinitialization, there is a possibility that old segments sent - before the crash may fall into the window of a new connection - incarnation. The solution to this problem that was adopted in the - - - -Jacobson, Braden & Zhang [Page 18] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - final TCP spec is a "quiet time" of MSL seconds when the system is - initialized [Postel81, p. 28]. No TCP connection can be opened until - the expiration of this quiet time. - - A different approach was suggested by Garlick, Rom, and Postel - [Garlick77]. Rather than using clock-driven ISN selection, they - proposed to maintain connection records containing the last ISN used - on every connection. To immediately open a new incarnation of a - connection, the ISN is taken to be greater than the last sequence - number of the previous incarnation, so that the new incarnation will - have unique sequence numbers. To handle a system crash, they - proposed a quiet time, i.e., a delay at system startup time to allow - old duplicates to expire. Note that the connection records need be - kept only for MSL seconds; after that, no collision is possible, and - a new connection can start with sequence number zero. - - The scheme finally adopted for TCP combines features of both these - proposals. TCP uses three mechanisms: - - (A) ISN selection is clock-driven to handle short-lived connections. - The parameter R = 250KBps, so that the ISN value cycles in - 2**32/R = 4.55 hours. - - (B) (One end of) a closed connection is left in a "busy" state, - known as "TIME-WAIT" state, for a time of 2*MSL. TIME-WAIT - state handles the proper close of a long-lived connection - without resynchronization. It also allows reliable completion - of the full-duplex close handshake. - - (C) There is a quiet time of one MSL at system startup. This - handles a crash of a long-lived connection and avoids time - resynchronization problems in (A). - - Notice that (B) and (C) together are logically sufficient to prevent - accidental reuse of sequence numbers from a different incarnation, - for any of the failure modes F1-F4. (A) is not logically necessary - since the close delay (B) makes it impossible to reopen the same TCP - connection immediately. However, the use of (A) does give additional - assurance in a common case, perhaps compensating for a host that has - set its TIME-WAIT state delay too short. - - Some TCP implementations have permitted a connection in the TIME-WAIT - state to be reopened immediately by the other side, thus short- - circuiting mechanism (B). Specifically, a new SYN for the same - socket pair is accepted when the earlier incarnation is still in - TIME-WAIT state. Old duplicates in one direction can be avoided by - choosing the ISN to be the next unused sequence number from the - preceding connection (i.e., FIN+1); this is essentially an - - - -Jacobson, Braden & Zhang [Page 19] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - application of the scheme of Garlick, Rom, and Postel, using the - connection block in TIME-WAIT state as the connection record. - - However, the connection is still vulnerable to old duplicates in the - other direction. Mechanism (A) prevents trouble in mode F1, but - failures can arise in F2, F3, or F4; of these, F2, on short, fast - connections, is the most dangerous. - - Finally, we note TCP will operate reliably without any MSL-based - mechanisms in the following restricted domain: - - * Total data sent is less then 2**32 octets, and - - * Effective sustained rate less than 250KBps, and - - * Connection duration less than 4.55 hours. - - At the present time, the great majority of current TCP usage falls - into this restricted domain. The third component, connection - duration, is the most commonly violated. - -Security Considerations - - Security issues are not discussed in this memo. - -Authors' Addresses - - Van Jacobson - University of California - Lawrence Berkeley Laboratory - Mail Stop 46A - Berkeley, CA 94720 - - Phone: (415) 486-6411 - EMail: van@CSAM.LBL.GOV - - - Bob Braden - University of Southern California - Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292 - - Phone: (213) 822-1511 - EMail: Braden@ISI.EDU - - - - - - -Jacobson, Braden & Zhang [Page 20] - -RFC 1185 TCP over High-Speed Paths October 1990 - - - Lixia Zhang - XEROX Palo Alto Research Center - 3333 Coyote Hill Road - Palo Alto, CA 94304 - - Phone: (415) 494-4415 - EMail: lixia@PARC.XEROX.COM - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Jacobson, Braden & Zhang [Page 21] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1213.txt b/kernel/picotcp/RFC/rfc1213.txt deleted file mode 100644 index 50a26f5..0000000 --- a/kernel/picotcp/RFC/rfc1213.txt +++ /dev/null @@ -1,3923 +0,0 @@ - - - - - - -Network Working Group K. McCloghrie -Request for Comments: 1213 Hughes LAN Systems, Inc. -Obsoletes: RFC 1158 M. Rose - Performance Systems International - Editors - March 1991 - - - Management Information Base for Network Management - of TCP/IP-based internets: - MIB-II - -Status of this Memo - - This memo defines the second version of the Management Information - Base (MIB-II) for use with network management protocols in TCP/IP- - based internets. This RFC specifies an IAB standards track protocol - for the Internet community, and requests discussion and suggestions - for improvements. Please refer to the current edition of the "IAB - Official Protocol Standards" for the standardization state and status - of this protocol. Distribution of this memo is unlimited. - -Table of Contents - - 1. Abstract............................................... 2 - 2. Introduction .......................................... 2 - 3. Changes from RFC 1156 ................................. 3 - 3.1 Deprecated Objects ................................... 3 - 3.2 Display Strings ...................................... 4 - 3.3 Physical Addresses ................................... 4 - 3.4 The System Group ..................................... 5 - 3.5 The Interfaces Group ................................. 5 - 3.6 The Address Translation Group ........................ 6 - 3.7 The IP Group ......................................... 6 - 3.8 The ICMP Group ....................................... 7 - 3.9 The TCP Group ........................................ 7 - 3.10 The UDP Group ....................................... 7 - 3.11 The EGP Group ....................................... 7 - 3.12 The Transmission Group .............................. 8 - 3.13 The SNMP Group ...................................... 8 - 3.14 Changes from RFC 1158 ................. ............. 9 - 4. Objects ............................................... 10 - 4.1 Format of Definitions ................................ 10 - 5. Overview .............................................. 10 - 6. Definitions ........................................... 12 - 6.1 Textual Conventions .................................. 12 - 6.2 Groups in MIB-II ..................................... 13 - 6.3 The System Group ..................................... 13 - - - -SNMP Working Group [Page 1] - -RFC 1213 MIB-II March 1991 - - - 6.4 The Interfaces Group ................................. 16 - 6.5 The Address Translation Group ........................ 23 - 6.6 The IP Group ......................................... 26 - 6.7 The ICMP Group ....................................... 41 - 6.8 The TCP Group ........................................ 46 - 6.9 The UDP Group ........................................ 52 - 6.10 The EGP Group ....................................... 54 - 6.11 The Transmission Group .............................. 60 - 6.12 The SNMP Group ...................................... 60 - 7. Acknowledgements ...................................... 67 - 8. References ............................................ 69 - 9. Security Considerations ............................... 70 - 10. Authors' Addresses ................................... 70 - -1. Abstract - - This memo defines the second version of the Management Information - Base (MIB-II) for use with network management protocols in TCP/IP- - based internets. In particular, together with its companion memos - which describe the structure of management information (RFC 1155) - along with the network management protocol (RFC 1157) for TCP/IP- - based internets, these documents provide a simple, workable - architecture and system for managing TCP/IP-based internets and in - particular the Internet community. - -2. Introduction - - As reported in RFC 1052, IAB Recommendations for the Development of - Internet Network Management Standards [1], a two-prong strategy for - network management of TCP/IP-based internets was undertaken. In the - short-term, the Simple Network Management Protocol (SNMP) was to be - used to manage nodes in the Internet community. In the long-term, - the use of the OSI network management framework was to be examined. - Two documents were produced to define the management information: RFC - 1065, which defined the Structure of Management Information (SMI) - [2], and RFC 1066, which defined the Management Information Base - (MIB) [3]. Both of these documents were designed so as to be - compatible with both the SNMP and the OSI network management - framework. - - This strategy was quite successful in the short-term: Internet-based - network management technology was fielded, by both the research and - commercial communities, within a few months. As a result of this, - portions of the Internet community became network manageable in a - timely fashion. - - As reported in RFC 1109, Report of the Second Ad Hoc Network - Management Review Group [4], the requirements of the SNMP and the OSI - - - -SNMP Working Group [Page 2] - -RFC 1213 MIB-II March 1991 - - - network management frameworks were more different than anticipated. - As such, the requirement for compatibility between the SMI/MIB and - both frameworks was suspended. This action permitted the operational - network management framework, the SNMP, to respond to new operational - needs in the Internet community by producing this document. - - As such, the current network management framework for TCP/IP- based - internets consists of: Structure and Identification of Management - Information for TCP/IP-based internets, RFC 1155 [12], which - describes how managed objects contained in the MIB are defined; - Management Information Base for Network Management of TCP/IP-based - internets: MIB-II, this memo, which describes the managed objects - contained in the MIB (and supercedes RFC 1156 [13]); and, the Simple - Network Management Protocol, RFC 1098 [5], which defines the protocol - used to manage these objects. - -3. Changes from RFC 1156 - - Features of this MIB include: - - (1) incremental additions to reflect new operational - requirements; - - (2) upwards compatibility with the SMI/MIB and the SNMP; - - (3) improved support for multi-protocol entities; and, - - (4) textual clean-up of the MIB to improve clarity and - readability. - - The objects defined in MIB-II have the OBJECT IDENTIFIER prefix: - - mib-2 OBJECT IDENTIFIER ::= { mgmt 1 } - - which is identical to the prefix used in MIB-I. - -3.1. Deprecated Objects - - In order to better prepare implementors for future changes in the - MIB, a new term "deprecated" may be used when describing an object. - A deprecated object in the MIB is one which must be supported, but - one which will most likely be removed from the next version of the - MIB (e.g., MIB-III). - - MIB-II marks one object as being deprecated: - - atTable - - - - -SNMP Working Group [Page 3] - -RFC 1213 MIB-II March 1991 - - - As a result of deprecating the atTable object, the entire Address - Translation group is deprecated. - - Note that no functionality is lost with the deprecation of these - objects: new objects providing equivalent or superior functionality - are defined in MIB-II. - -3.2. Display Strings - - In the past, there have been misinterpretations of the MIB as to when - a string of octets should contain printable characters, meant to be - displayed to a human. As a textual convention in the MIB, the - datatype - - DisplayString ::= - OCTET STRING - - is introduced. A DisplayString is restricted to the NVT ASCII - character set, as defined in pages 10-11 of [6]. - - The following objects are now defined in terms of DisplayString: - - sysDescr - ifDescr - - It should be noted that this change has no effect on either the - syntax nor semantics of these objects. The use of the DisplayString - notation is merely an artifact of the explanatory method used in - MIB-II and future MIBs. - - Further it should be noted that any object defined in terms of OCTET - STRING may contain arbitrary binary data, in which each octet may - take any value from 0 to 255 (decimal). - -3.3. Physical Addresses - - As a further, textual convention in the MIB, the datatype - - PhysAddress ::= - OCTET STRING - - is introduced to represent media- or physical-level addresses. - - The following objects are now defined in terms of PhysAddress: - - ifPhysAddress - atPhysAddress - ipNetToMediaPhysAddress - - - -SNMP Working Group [Page 4] - -RFC 1213 MIB-II March 1991 - - - It should be noted that this change has no effect on either the - syntax nor semantics of these objects. The use of the PhysAddress - notation is merely an artifact of the explanatory method used in - MIB-II and future MIBs. - -3.4. The System Group - - Four new objects are added to this group: - - sysContact - sysName - sysLocation - sysServices - - These provide contact, administrative, location, and service - information regarding the managed node. - -3.5. The Interfaces Group - - The definition of the ifNumber object was incorrect, as it required - all interfaces to support IP. (For example, devices without IP, such - as MAC-layer bridges, could not be managed if this definition was - strictly followed.) The description of the ifNumber object is - changed accordingly. - - The ifTable object was mistaken marked as read-write, it has been - (correctly) re-designated as not-accessible. In addition, several - new values have been added to the ifType column in the ifTable - object: - - ppp(23) - softwareLoopback(24) - eon(25) - ethernet-3Mbit(26) - nsip(27) - slip(28) - ultra(29) - ds3(30) - sip(31) - frame-relay(32) - - Finally, a new column has been added to the ifTable object: - - ifSpecific - - which provides information about information specific to the media - being used to realize the interface. - - - - -SNMP Working Group [Page 5] - -RFC 1213 MIB-II March 1991 - - -3.6. The Address Translation Group - - In MIB-I this group contained a table which permitted mappings from - network addresses (e.g., IP addresses) to physical addresses (e.g., - MAC addresses). Experience has shown that efficient implementations - of this table make two assumptions: a single network protocol - environment, and mappings occur only from network address to physical - address. - - The need to support multi-protocol nodes (e.g., those with both the - IP and CLNP active), and the need to support the inverse mapping - (e.g., for ES-IS), have invalidated both of these assumptions. As - such, the atTable object is declared deprecated. - - In order to meet both the multi-protocol and inverse mapping - requirements, MIB-II and its successors will allocate up to two - address translation tables inside each network protocol group. That - is, the IP group will contain one address translation table, for - going from IP addresses to physical addresses. Similarly, when a - document defining MIB objects for the CLNP is produced (e.g., [7]), - it will contain two tables, for mappings in both directions, as this - is required for full functionality. - - It should be noted that the choice of two tables (one for each - direction of mapping) provides for ease of implementation in many - cases, and does not introduce undue burden on implementations which - realize the address translation abstraction through a single internal - table. - -3.7. The IP Group - - The access attribute of the variable ipForwarding has been changed - from read-only to read-write. - - In addition, there is a new column to the ipAddrTable object, - - ipAdEntReasmMaxSize - - which keeps track of the largest IP datagram that can be re-assembled - on a particular interface. - - The descriptor of the ipRoutingTable object has been changed to - ipRouteTable for consistency with the other IP routing objects. - There are also three new columns in the ipRouteTable object, - - ipRouteMask - ipRouteMetric5 - ipRouteInfo - - - -SNMP Working Group [Page 6] - -RFC 1213 MIB-II March 1991 - - - the first is used for IP routing subsystems that support arbitrary - subnet masks, and the latter two are IP routing protocol-specific. - - Two new objects are added to the IP group: - - ipNetToMediaTable - ipRoutingDiscards - - the first is the address translation table for the IP group - (providing identical functionality to the now deprecated atTable in - the address translation group), and the latter provides information - when routes are lost due to a lack of buffer space. - -3.8. The ICMP Group - - There are no changes to this group. - -3.9. The TCP Group - - Two new variables are added: - - tcpInErrs - tcpOutRsts - - which keep track of the number of incoming TCP segments in error and - the number of resets generated by a TCP. - -3.10. The UDP Group - - A new table: - - udpTable - - is added. - -3.11. The EGP Group - - Experience has indicated a need for additional objects that are - useful in EGP monitoring. In addition to making several additions to - the egpNeighborTable object, i.e., - - egpNeighAs - egpNeighInMsgs - egpNeighInErrs - egpNeighOutMsgs - egpNeighOutErrs - egpNeighInErrMsgs - egpNeighOutErrMsgs - - - -SNMP Working Group [Page 7] - -RFC 1213 MIB-II March 1991 - - - egpNeighStateUps - egpNeighStateDowns - egpNeighIntervalHello - egpNeighIntervalPoll - egpNeighMode - egpNeighEventTrigger - - a new variable is added: - - egpAs - - which gives the autonomous system associated with this EGP entity. - -3.12. The Transmission Group - - MIB-I was lacking in that it did not distinguish between different - types of transmission media. A new group, the Transmission group, is - allocated for this purpose: - - transmission OBJECT IDENTIFIER ::= { mib-2 10 } - - When Internet-standard definitions for managing transmission media - are defined, the transmission group is used to provide a prefix for - the names of those objects. - - Typically, such definitions reside in the experimental portion of the - MIB until they are "proven", then as a part of the Internet - standardization process, the definitions are accordingly elevated and - a new object identifier, under the transmission group is defined. By - convention, the name assigned is: - - type OBJECT IDENTIFIER ::= { transmission number } - - where "type" is the symbolic value used for the media in the ifType - column of the ifTable object, and "number" is the actual integer - value corresponding to the symbol. - -3.13. The SNMP Group - - The application-oriented working groups of the IETF have been tasked - to be receptive towards defining MIB variables specific to their - respective applications. - - For the SNMP, it is useful to have statistical information. A new - group, the SNMP group, is allocated for this purpose: - - snmp OBJECT IDENTIFIER ::= { mib-2 11 } - - - - -SNMP Working Group [Page 8] - -RFC 1213 MIB-II March 1991 - - -3.14. Changes from RFC 1158 - - Features of this MIB include: - - (1) The managed objects in this document have been defined - using the conventions defined in the Internet-standard - SMI, as amended by the extensions specified in [14]. It - must be emphasized that definitions made using these - extensions are semantically identically to those in RFC - 1158. - - (2) The PhysAddress textual convention has been introduced to - represent media addresses. - - (3) The ACCESS clause of sysLocation is now read-write. - - (4) The definition of sysServices has been clarified. - - (5) New ifType values (29-32) have been defined. In - addition, the textual-descriptor for the DS1 and E1 - interface types has been corrected. - - (6) The definition of ipForwarding has been clarified. - - (7) The definition of ipRouteType has been clarified. - - (8) The ipRouteMetric5 and ipRouteInfo objects have been - defined. - - (9) The ACCESS clause of tcpConnState is now read-write, to - support deletion of the TCB associated with a TCP - connection. The definition of this object has been - clarified to explain this usage. - - (10) The definition of egpNeighEventTrigger has been - clarified. - - (11) The definition of several of the variables in the new - snmp group have been clarified. In addition, the - snmpInBadTypes and snmpOutReadOnlys objects are no longer - present. (However, the object identifiers associated - with those objects are reserved to prevent future use.) - - (12) The definition of snmpInReadOnlys has been clarified. - - (13) The textual descriptor of the snmpEnableAuthTraps has - been changed to snmpEnableAuthenTraps, and the definition - has been clarified. - - - -SNMP Working Group [Page 9] - -RFC 1213 MIB-II March 1991 - - - (14) The ipRoutingDiscards object was added. - - (15) The optional use of an implementation-dependent, small - positive integer was disallowed when identifying - instances of the IP address and routing tables. - -4. Objects - - Managed objects are accessed via a virtual information store, termed - the Management Information Base or MIB. Objects in the MIB are - defined using the subset of Abstract Syntax Notation One (ASN.1) [8] - defined in the SMI. In particular, each object has a name, a syntax, - and an encoding. The name is an object identifier, an - administratively assigned name, which specifies an object type. The - object type together with an object instance serves to uniquely - identify a specific instantiation of the object. For human - convenience, we often use a textual string, termed the OBJECT - DESCRIPTOR, to also refer to the object type. - - The syntax of an object type defines the abstract data structure - corresponding to that object type. The ASN.1 language is used for - this purpose. However, the SMI [12] purposely restricts the ASN.1 - constructs which may be used. These restrictions are explicitly made - for simplicity. - - The encoding of an object type is simply how that object type is - represented using the object type's syntax. Implicitly tied to the - notion of an object type's syntax and encoding is how the object type - is represented when being transmitted on the network. - - The SMI specifies the use of the basic encoding rules of ASN.1 [9], - subject to the additional requirements imposed by the SNMP. - -4.1. Format of Definitions - - Section 6 contains contains the specification of all object types - contained in this MIB module. The object types are defined using the - conventions defined in the SMI, as amended by the extensions - specified in [14]. - -5. Overview - - Consistent with the IAB directive to produce simple, workable systems - in the short-term, the list of managed objects defined here, has been - derived by taking only those elements which are considered essential. - - This approach of taking only the essential objects is NOT - restrictive, since the SMI defined in the companion memo provides - - - -SNMP Working Group [Page 10] - -RFC 1213 MIB-II March 1991 - - - three extensibility mechanisms: one, the addition of new standard - objects through the definitions of new versions of the MIB; two, the - addition of widely-available but non-standard objects through the - experimental subtree; and three, the addition of private objects - through the enterprises subtree. Such additional objects can not - only be used for vendor-specific elements, but also for - experimentation as required to further the knowledge of which other - objects are essential. - - The design of MIB-II is heavily influenced by the first extensibility - mechanism. Several new variables have been added based on - operational experience and need. Based on this, the criteria for - including an object in MIB-II are remarkably similar to the MIB-I - criteria: - - (1) An object needed to be essential for either fault or - configuration management. - - (2) Only weak control objects were permitted (by weak, it is - meant that tampering with them can do only limited - damage). This criterion reflects the fact that the - current management protocols are not sufficiently secure - to do more powerful control operations. - - (3) Evidence of current use and utility was required. - - (4) In MIB-I, an attempt was made to limit the number of - objects to about 100 to make it easier for vendors to - fully instrument their software. In MIB-II, this limit - was raised given the wide technological base now - implementing MIB-I. - - (5) To avoid redundant variables, it was required that no - object be included that can be derived from others in the - MIB. - - (6) Implementation specific objects (e.g., for BSD UNIX) were - excluded. - - (7) It was agreed to avoid heavily instrumenting critical - sections of code. The general guideline was one counter - per critical section per layer. - - MIB-II, like its predecessor, the Internet-standard MIB, contains - only essential elements. There is no need to allow individual - objects to be optional. Rather, the objects are arranged into the - following groups: - - - - -SNMP Working Group [Page 11] - -RFC 1213 MIB-II March 1991 - - - - System - - Interfaces - - Address Translation (deprecated) - - IP - - ICMP - - TCP - - UDP - - EGP - - Transmission - - SNMP - - These groups are the basic unit of conformance: This method is as - follows: if the semantics of a group is applicable to an - implementation, then it must implement all objects in that group. - For example, an implementation must implement the EGP group if and - only if it implements the EGP. - - There are two reasons for defining these groups: to provide a means - of assigning object identifiers; and, to provide a method for - implementations of managed agents to know which objects they must - implement. - -6. Definitions - - RFC1213-MIB DEFINITIONS ::= BEGIN - - IMPORTS - mgmt, NetworkAddress, IpAddress, Counter, Gauge, - TimeTicks - FROM RFC1155-SMI - OBJECT-TYPE - FROM RFC-1212; - - -- This MIB module uses the extended OBJECT-TYPE macro as - -- defined in [14]; - - - -- MIB-II (same prefix as MIB-I) - - mib-2 OBJECT IDENTIFIER ::= { mgmt 1 } - - -- textual conventions - - DisplayString ::= - OCTET STRING - -- This data type is used to model textual information taken - -- from the NVT ASCII character set. By convention, objects - -- with this syntax are declared as having - - - -SNMP Working Group [Page 12] - -RFC 1213 MIB-II March 1991 - - - -- - -- SIZE (0..255) - - PhysAddress ::= - OCTET STRING - -- This data type is used to model media addresses. For many - -- types of media, this will be in a binary representation. - -- For example, an ethernet address would be represented as - -- a string of 6 octets. - - - -- groups in MIB-II - - system OBJECT IDENTIFIER ::= { mib-2 1 } - - interfaces OBJECT IDENTIFIER ::= { mib-2 2 } - - at OBJECT IDENTIFIER ::= { mib-2 3 } - - ip OBJECT IDENTIFIER ::= { mib-2 4 } - - icmp OBJECT IDENTIFIER ::= { mib-2 5 } - - tcp OBJECT IDENTIFIER ::= { mib-2 6 } - - udp OBJECT IDENTIFIER ::= { mib-2 7 } - - egp OBJECT IDENTIFIER ::= { mib-2 8 } - - -- historical (some say hysterical) - -- cmot OBJECT IDENTIFIER ::= { mib-2 9 } - - transmission OBJECT IDENTIFIER ::= { mib-2 10 } - - snmp OBJECT IDENTIFIER ::= { mib-2 11 } - - - -- the System group - - -- Implementation of the System group is mandatory for all - -- systems. If an agent is not configured to have a value - -- for any of these variables, a string of length 0 is - -- returned. - - sysDescr OBJECT-TYPE - SYNTAX DisplayString (SIZE (0..255)) - ACCESS read-only - STATUS mandatory - - - -SNMP Working Group [Page 13] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "A textual description of the entity. This value - should include the full name and version - identification of the system's hardware type, - software operating-system, and networking - software. It is mandatory that this only contain - printable ASCII characters." - ::= { system 1 } - - sysObjectID OBJECT-TYPE - SYNTAX OBJECT IDENTIFIER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The vendor's authoritative identification of the - network management subsystem contained in the - entity. This value is allocated within the SMI - enterprises subtree (1.3.6.1.4.1) and provides an - easy and unambiguous means for determining `what - kind of box' is being managed. For example, if - vendor `Flintstones, Inc.' was assigned the - subtree 1.3.6.1.4.1.4242, it could assign the - identifier 1.3.6.1.4.1.4242.1.1 to its `Fred - Router'." - ::= { system 2 } - - sysUpTime OBJECT-TYPE - SYNTAX TimeTicks - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The time (in hundredths of a second) since the - network management portion of the system was last - re-initialized." - ::= { system 3 } - - sysContact OBJECT-TYPE - SYNTAX DisplayString (SIZE (0..255)) - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The textual identification of the contact person - for this managed node, together with information - on how to contact this person." - ::= { system 4 } - - sysName OBJECT-TYPE - SYNTAX DisplayString (SIZE (0..255)) - - - -SNMP Working Group [Page 14] - -RFC 1213 MIB-II March 1991 - - - ACCESS read-write - STATUS mandatory - DESCRIPTION - "An administratively-assigned name for this - managed node. By convention, this is the node's - fully-qualified domain name." - ::= { system 5 } - - sysLocation OBJECT-TYPE - SYNTAX DisplayString (SIZE (0..255)) - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The physical location of this node (e.g., - `telephone closet, 3rd floor')." - ::= { system 6 } - - sysServices OBJECT-TYPE - SYNTAX INTEGER (0..127) - ACCESS read-only - STATUS mandatory - DESCRIPTION - "A value which indicates the set of services that - this entity primarily offers. - - The value is a sum. This sum initially takes the - value zero, Then, for each layer, L, in the range - 1 through 7, that this node performs transactions - for, 2 raised to (L - 1) is added to the sum. For - example, a node which performs primarily routing - functions would have a value of 4 (2^(3-1)). In - contrast, a node which is a host offering - application services would have a value of 72 - (2^(4-1) + 2^(7-1)). Note that in the context of - the Internet suite of protocols, values should be - calculated accordingly: - - layer functionality - 1 physical (e.g., repeaters) - 2 datalink/subnetwork (e.g., bridges) - 3 internet (e.g., IP gateways) - 4 end-to-end (e.g., IP hosts) - 7 applications (e.g., mail relays) - - For systems including OSI protocols, layers 5 and - 6 may also be counted." - ::= { system 7 } - - - - -SNMP Working Group [Page 15] - -RFC 1213 MIB-II March 1991 - - - -- the Interfaces group - - -- Implementation of the Interfaces group is mandatory for - -- all systems. - - ifNumber OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of network interfaces (regardless of - their current state) present on this system." - ::= { interfaces 1 } - - - -- the Interfaces table - - -- The Interfaces table contains information on the entity's - -- interfaces. Each interface is thought of as being - -- attached to a `subnetwork'. Note that this term should - -- not be confused with `subnet' which refers to an - -- addressing partitioning scheme used in the Internet suite - -- of protocols. - - ifTable OBJECT-TYPE - SYNTAX SEQUENCE OF IfEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "A list of interface entries. The number of - entries is given by the value of ifNumber." - ::= { interfaces 2 } - - ifEntry OBJECT-TYPE - SYNTAX IfEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "An interface entry containing objects at the - subnetwork layer and below for a particular - interface." - INDEX { ifIndex } - ::= { ifTable 1 } - - IfEntry ::= - SEQUENCE { - ifIndex - INTEGER, - - - -SNMP Working Group [Page 16] - -RFC 1213 MIB-II March 1991 - - - ifDescr - DisplayString, - ifType - INTEGER, - ifMtu - INTEGER, - ifSpeed - Gauge, - ifPhysAddress - PhysAddress, - ifAdminStatus - INTEGER, - ifOperStatus - INTEGER, - ifLastChange - TimeTicks, - ifInOctets - Counter, - ifInUcastPkts - Counter, - ifInNUcastPkts - Counter, - ifInDiscards - Counter, - ifInErrors - Counter, - ifInUnknownProtos - Counter, - ifOutOctets - Counter, - ifOutUcastPkts - Counter, - ifOutNUcastPkts - Counter, - ifOutDiscards - Counter, - ifOutErrors - Counter, - ifOutQLen - Gauge, - ifSpecific - OBJECT IDENTIFIER - } - - ifIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - - - -SNMP Working Group [Page 17] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "A unique value for each interface. Its value - ranges between 1 and the value of ifNumber. The - value for each interface must remain constant at - least from one re-initialization of the entity's - network management system to the next re- - initialization." - ::= { ifEntry 1 } - - ifDescr OBJECT-TYPE - SYNTAX DisplayString (SIZE (0..255)) - ACCESS read-only - STATUS mandatory - DESCRIPTION - "A textual string containing information about the - interface. This string should include the name of - the manufacturer, the product name and the version - of the hardware interface." - ::= { ifEntry 2 } - - ifType OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - regular1822(2), - hdh1822(3), - ddn-x25(4), - rfc877-x25(5), - ethernet-csmacd(6), - iso88023-csmacd(7), - iso88024-tokenBus(8), - iso88025-tokenRing(9), - iso88026-man(10), - starLan(11), - proteon-10Mbit(12), - proteon-80Mbit(13), - hyperchannel(14), - fddi(15), - lapb(16), - sdlc(17), - ds1(18), -- T-1 - e1(19), -- european equiv. of T-1 - basicISDN(20), - primaryISDN(21), -- proprietary serial - propPointToPointSerial(22), - ppp(23), - softwareLoopback(24), - eon(25), -- CLNP over IP [11] - ethernet-3Mbit(26), - - - -SNMP Working Group [Page 18] - -RFC 1213 MIB-II March 1991 - - - nsip(27), -- XNS over IP - slip(28), -- generic SLIP - ultra(29), -- ULTRA technologies - ds3(30), -- T-3 - sip(31), -- SMDS - frame-relay(32) - } - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The type of interface, distinguished according to - the physical/link protocol(s) immediately `below' - the network layer in the protocol stack." - ::= { ifEntry 3 } - - ifMtu OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The size of the largest datagram which can be - sent/received on the interface, specified in - octets. For interfaces that are used for - transmitting network datagrams, this is the size - of the largest network datagram that can be sent - on the interface." - ::= { ifEntry 4 } - - ifSpeed OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - DESCRIPTION - "An estimate of the interface's current bandwidth - in bits per second. For interfaces which do not - vary in bandwidth or for those where no accurate - estimation can be made, this object should contain - the nominal bandwidth." - ::= { ifEntry 5 } - - ifPhysAddress OBJECT-TYPE - SYNTAX PhysAddress - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The interface's address at the protocol layer - immediately `below' the network layer in the - protocol stack. For interfaces which do not have - - - -SNMP Working Group [Page 19] - -RFC 1213 MIB-II March 1991 - - - such an address (e.g., a serial line), this object - should contain an octet string of zero length." - ::= { ifEntry 6 } - - ifAdminStatus OBJECT-TYPE - SYNTAX INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The desired state of the interface. The - testing(3) state indicates that no operational - packets can be passed." - ::= { ifEntry 7 } - - ifOperStatus OBJECT-TYPE - SYNTAX INTEGER { - up(1), -- ready to pass packets - down(2), - testing(3) -- in some test mode - } - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The current operational state of the interface. - The testing(3) state indicates that no operational - packets can be passed." - ::= { ifEntry 8 } - - ifLastChange OBJECT-TYPE - SYNTAX TimeTicks - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The value of sysUpTime at the time the interface - entered its current operational state. If the - current state was entered prior to the last re- - initialization of the local network management - subsystem, then this object contains a zero - value." - ::= { ifEntry 9 } - - ifInOctets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - - - -SNMP Working Group [Page 20] - -RFC 1213 MIB-II March 1991 - - - STATUS mandatory - DESCRIPTION - "The total number of octets received on the - interface, including framing characters." - ::= { ifEntry 10 } - - ifInUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of subnetwork-unicast packets - delivered to a higher-layer protocol." - ::= { ifEntry 11 } - - ifInNUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of non-unicast (i.e., subnetwork- - broadcast or subnetwork-multicast) packets - delivered to a higher-layer protocol." - ::= { ifEntry 12 } - - ifInDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of inbound packets which were chosen - to be discarded even though no errors had been - detected to prevent their being deliverable to a - higher-layer protocol. One possible reason for - discarding such a packet could be to free up - buffer space." - ::= { ifEntry 13 } - - ifInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of inbound packets that contained - errors preventing them from being deliverable to a - higher-layer protocol." - ::= { ifEntry 14 } - - - - -SNMP Working Group [Page 21] - -RFC 1213 MIB-II March 1991 - - - ifInUnknownProtos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of packets received via the interface - which were discarded because of an unknown or - unsupported protocol." - ::= { ifEntry 15 } - - ifOutOctets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of octets transmitted out of the - interface, including framing characters." - ::= { ifEntry 16 } - - ifOutUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of packets that higher-level - protocols requested be transmitted to a - subnetwork-unicast address, including those that - were discarded or not sent." - ::= { ifEntry 17 } - - ifOutNUcastPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of packets that higher-level - protocols requested be transmitted to a non- - unicast (i.e., a subnetwork-broadcast or - subnetwork-multicast) address, including those - that were discarded or not sent." - ::= { ifEntry 18 } - - ifOutDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of outbound packets which were chosen - - - -SNMP Working Group [Page 22] - -RFC 1213 MIB-II March 1991 - - - to be discarded even though no errors had been - detected to prevent their being transmitted. One - possible reason for discarding such a packet could - be to free up buffer space." - ::= { ifEntry 19 } - - ifOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of outbound packets that could not be - transmitted because of errors." - ::= { ifEntry 20 } - - ifOutQLen OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The length of the output packet queue (in - packets)." - ::= { ifEntry 21 } - - ifSpecific OBJECT-TYPE - SYNTAX OBJECT IDENTIFIER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "A reference to MIB definitions specific to the - particular media being used to realize the - interface. For example, if the interface is - realized by an ethernet, then the value of this - object refers to a document defining objects - specific to ethernet. If this information is not - present, its value should be set to the OBJECT - IDENTIFIER { 0 0 }, which is a syntatically valid - object identifier, and any conformant - implementation of ASN.1 and BER must be able to - generate and recognize this value." - ::= { ifEntry 22 } - - - -- the Address Translation group - - -- Implementation of the Address Translation group is - -- mandatory for all systems. Note however that this group - -- is deprecated by MIB-II. That is, it is being included - - - -SNMP Working Group [Page 23] - -RFC 1213 MIB-II March 1991 - - - -- solely for compatibility with MIB-I nodes, and will most - -- likely be excluded from MIB-III nodes. From MIB-II and - -- onwards, each network protocol group contains its own - -- address translation tables. - - -- The Address Translation group contains one table which is - -- the union across all interfaces of the translation tables - -- for converting a NetworkAddress (e.g., an IP address) into - -- a subnetwork-specific address. For lack of a better term, - -- this document refers to such a subnetwork-specific address - -- as a `physical' address. - - -- Examples of such translation tables are: for broadcast - -- media where ARP is in use, the translation table is - -- equivalent to the ARP cache; or, on an X.25 network where - -- non-algorithmic translation to X.121 addresses is - -- required, the translation table contains the - -- NetworkAddress to X.121 address equivalences. - - atTable OBJECT-TYPE - SYNTAX SEQUENCE OF AtEntry - ACCESS not-accessible - STATUS deprecated - DESCRIPTION - "The Address Translation tables contain the - NetworkAddress to `physical' address equivalences. - Some interfaces do not use translation tables for - determining address equivalences (e.g., DDN-X.25 - has an algorithmic method); if all interfaces are - of this type, then the Address Translation table - is empty, i.e., has zero entries." - ::= { at 1 } - - atEntry OBJECT-TYPE - SYNTAX AtEntry - ACCESS not-accessible - STATUS deprecated - DESCRIPTION - "Each entry contains one NetworkAddress to - `physical' address equivalence." - INDEX { atIfIndex, - atNetAddress } - ::= { atTable 1 } - - AtEntry ::= - SEQUENCE { - atIfIndex - INTEGER, - - - -SNMP Working Group [Page 24] - -RFC 1213 MIB-II March 1991 - - - atPhysAddress - PhysAddress, - atNetAddress - NetworkAddress - } - - atIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS deprecated - DESCRIPTION - "The interface on which this entry's equivalence - is effective. The interface identified by a - particular value of this index is the same - interface as identified by the same value of - ifIndex." - ::= { atEntry 1 } - - atPhysAddress OBJECT-TYPE - SYNTAX PhysAddress - ACCESS read-write - STATUS deprecated - DESCRIPTION - "The media-dependent `physical' address. - - Setting this object to a null string (one of zero - length) has the effect of invaliding the - corresponding entry in the atTable object. That - is, it effectively dissasociates the interface - identified with said entry from the mapping - identified with said entry. It is an - implementation-specific matter as to whether the - agent removes an invalidated entry from the table. - Accordingly, management stations must be prepared - to receive tabular information from agents that - corresponds to entries not currently in use. - Proper interpretation of such entries requires - examination of the relevant atPhysAddress object." - ::= { atEntry 2 } - - atNetAddress OBJECT-TYPE - SYNTAX NetworkAddress - ACCESS read-write - STATUS deprecated - DESCRIPTION - "The NetworkAddress (e.g., the IP address) - corresponding to the media-dependent `physical' - address." - - - -SNMP Working Group [Page 25] - -RFC 1213 MIB-II March 1991 - - - ::= { atEntry 3 } - - - -- the IP group - - -- Implementation of the IP group is mandatory for all - -- systems. - - ipForwarding OBJECT-TYPE - SYNTAX INTEGER { - forwarding(1), -- acting as a gateway - not-forwarding(2) -- NOT acting as a gateway - } - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The indication of whether this entity is acting - as an IP gateway in respect to the forwarding of - datagrams received by, but not addressed to, this - entity. IP gateways forward datagrams. IP hosts - do not (except those source-routed via the host). - - Note that for some managed nodes, this object may - take on only a subset of the values possible. - Accordingly, it is appropriate for an agent to - return a `badValue' response if a management - station attempts to change this object to an - inappropriate value." - ::= { ip 1 } - - ipDefaultTTL OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The default value inserted into the Time-To-Live - field of the IP header of datagrams originated at - this entity, whenever a TTL value is not supplied - by the transport layer protocol." - ::= { ip 2 } - - ipInReceives OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of input datagrams received from - interfaces, including those received in error." - - - -SNMP Working Group [Page 26] - -RFC 1213 MIB-II March 1991 - - - ::= { ip 3 } - - ipInHdrErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of input datagrams discarded due to - errors in their IP headers, including bad - checksums, version number mismatch, other format - errors, time-to-live exceeded, errors discovered - in processing their IP options, etc." - ::= { ip 4 } - - ipInAddrErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of input datagrams discarded because - the IP address in their IP header's destination - field was not a valid address to be received at - this entity. This count includes invalid - addresses (e.g., 0.0.0.0) and addresses of - unsupported Classes (e.g., Class E). For entities - which are not IP Gateways and therefore do not - forward datagrams, this counter includes datagrams - discarded because the destination address was not - a local address." - ::= { ip 5 } - - ipForwDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of input datagrams for which this - entity was not their final IP destination, as a - result of which an attempt was made to find a - route to forward them to that final destination. - In entities which do not act as IP Gateways, this - counter will include only those packets which were - Source-Routed via this entity, and the Source- - Route option processing was successful." - ::= { ip 6 } - - ipInUnknownProtos OBJECT-TYPE - SYNTAX Counter - - - -SNMP Working Group [Page 27] - -RFC 1213 MIB-II March 1991 - - - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of locally-addressed datagrams - received successfully but discarded because of an - unknown or unsupported protocol." - ::= { ip 7 } - - ipInDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of input IP datagrams for which no - problems were encountered to prevent their - continued processing, but which were discarded - (e.g., for lack of buffer space). Note that this - counter does not include any datagrams discarded - while awaiting re-assembly." - ::= { ip 8 } - - ipInDelivers OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of input datagrams successfully - delivered to IP user-protocols (including ICMP)." - ::= { ip 9 } - - ipOutRequests OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of IP datagrams which local IP - user-protocols (including ICMP) supplied to IP in - requests for transmission. Note that this counter - does not include any datagrams counted in - ipForwDatagrams." - ::= { ip 10 } - - ipOutDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of output IP datagrams for which no - - - -SNMP Working Group [Page 28] - -RFC 1213 MIB-II March 1991 - - - problem was encountered to prevent their - transmission to their destination, but which were - discarded (e.g., for lack of buffer space). Note - that this counter would include datagrams counted - in ipForwDatagrams if any such packets met this - (discretionary) discard criterion." - ::= { ip 11 } - - ipOutNoRoutes OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of IP datagrams discarded because no - route could be found to transmit them to their - destination. Note that this counter includes any - packets counted in ipForwDatagrams which meet this - `no-route' criterion. Note that this includes any - datagarms which a host cannot route because all of - its default gateways are down." - ::= { ip 12 } - - ipReasmTimeout OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The maximum number of seconds which received - fragments are held while they are awaiting - reassembly at this entity." - ::= { ip 13 } - - ipReasmReqds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of IP fragments received which needed - to be reassembled at this entity." - ::= { ip 14 } - - ipReasmOKs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of IP datagrams successfully re- - assembled." - - - -SNMP Working Group [Page 29] - -RFC 1213 MIB-II March 1991 - - - ::= { ip 15 } - - ipReasmFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of failures detected by the IP re- - assembly algorithm (for whatever reason: timed - out, errors, etc). Note that this is not - necessarily a count of discarded IP fragments - since some algorithms (notably the algorithm in - RFC 815) can lose track of the number of fragments - by combining them as they are received." - ::= { ip 16 } - - ipFragOKs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of IP datagrams that have been - successfully fragmented at this entity." - ::= { ip 17 } - - ipFragFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of IP datagrams that have been - discarded because they needed to be fragmented at - this entity but could not be, e.g., because their - Don't Fragment flag was set." - ::= { ip 18 } - - ipFragCreates OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of IP datagram fragments that have - been generated as a result of fragmentation at - this entity." - ::= { ip 19 } - - - - - - -SNMP Working Group [Page 30] - -RFC 1213 MIB-II March 1991 - - - -- the IP address table - - -- The IP address table contains this entity's IP addressing - -- information. - - ipAddrTable OBJECT-TYPE - SYNTAX SEQUENCE OF IpAddrEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "The table of addressing information relevant to - this entity's IP addresses." - ::= { ip 20 } - - ipAddrEntry OBJECT-TYPE - SYNTAX IpAddrEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "The addressing information for one of this - entity's IP addresses." - INDEX { ipAdEntAddr } - ::= { ipAddrTable 1 } - - IpAddrEntry ::= - SEQUENCE { - ipAdEntAddr - IpAddress, - ipAdEntIfIndex - INTEGER, - ipAdEntNetMask - IpAddress, - ipAdEntBcastAddr - INTEGER, - ipAdEntReasmMaxSize - INTEGER (0..65535) - } - - ipAdEntAddr OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The IP address to which this entry's addressing - information pertains." - ::= { ipAddrEntry 1 } - - - - - -SNMP Working Group [Page 31] - -RFC 1213 MIB-II March 1991 - - - ipAdEntIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The index value which uniquely identifies the - interface to which this entry is applicable. The - interface identified by a particular value of this - index is the same interface as identified by the - same value of ifIndex." - ::= { ipAddrEntry 2 } - - ipAdEntNetMask OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The subnet mask associated with the IP address of - this entry. The value of the mask is an IP - address with all the network bits set to 1 and all - the hosts bits set to 0." - ::= { ipAddrEntry 3 } - - ipAdEntBcastAddr OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The value of the least-significant bit in the IP - broadcast address used for sending datagrams on - the (logical) interface associated with the IP - address of this entry. For example, when the - Internet standard all-ones broadcast address is - used, the value will be 1. This value applies to - both the subnet and network broadcasts addresses - used by the entity on this (logical) interface." - ::= { ipAddrEntry 4 } - - ipAdEntReasmMaxSize OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The size of the largest IP datagram which this - entity can re-assemble from incoming IP fragmented - datagrams received on this interface." - ::= { ipAddrEntry 5 } - - - - -SNMP Working Group [Page 32] - -RFC 1213 MIB-II March 1991 - - - -- the IP routing table - - -- The IP routing table contains an entry for each route - -- presently known to this entity. - - ipRouteTable OBJECT-TYPE - SYNTAX SEQUENCE OF IpRouteEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "This entity's IP Routing table." - ::= { ip 21 } - - ipRouteEntry OBJECT-TYPE - SYNTAX IpRouteEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "A route to a particular destination." - INDEX { ipRouteDest } - ::= { ipRouteTable 1 } - - IpRouteEntry ::= - SEQUENCE { - ipRouteDest - IpAddress, - ipRouteIfIndex - INTEGER, - ipRouteMetric1 - INTEGER, - ipRouteMetric2 - INTEGER, - ipRouteMetric3 - INTEGER, - ipRouteMetric4 - INTEGER, - ipRouteNextHop - IpAddress, - ipRouteType - INTEGER, - ipRouteProto - INTEGER, - ipRouteAge - INTEGER, - ipRouteMask - IpAddress, - ipRouteMetric5 - INTEGER, - - - -SNMP Working Group [Page 33] - -RFC 1213 MIB-II March 1991 - - - ipRouteInfo - OBJECT IDENTIFIER - } - - ipRouteDest OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The destination IP address of this route. An - entry with a value of 0.0.0.0 is considered a - default route. Multiple routes to a single - destination can appear in the table, but access to - such multiple entries is dependent on the table- - access mechanisms defined by the network - management protocol in use." - ::= { ipRouteEntry 1 } - - ipRouteIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The index value which uniquely identifies the - local interface through which the next hop of this - route should be reached. The interface identified - by a particular value of this index is the same - interface as identified by the same value of - ifIndex." - ::= { ipRouteEntry 2 } - - ipRouteMetric1 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The primary routing metric for this route. The - semantics of this metric are determined by the - routing-protocol specified in the route's - ipRouteProto value. If this metric is not used, - its value should be set to -1." - ::= { ipRouteEntry 3 } - - ipRouteMetric2 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - - - -SNMP Working Group [Page 34] - -RFC 1213 MIB-II March 1991 - - - "An alternate routing metric for this route. The - semantics of this metric are determined by the - routing-protocol specified in the route's - ipRouteProto value. If this metric is not used, - its value should be set to -1." - ::= { ipRouteEntry 4 } - - ipRouteMetric3 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "An alternate routing metric for this route. The - semantics of this metric are determined by the - routing-protocol specified in the route's - ipRouteProto value. If this metric is not used, - its value should be set to -1." - ::= { ipRouteEntry 5 } - - ipRouteMetric4 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "An alternate routing metric for this route. The - semantics of this metric are determined by the - routing-protocol specified in the route's - ipRouteProto value. If this metric is not used, - its value should be set to -1." - ::= { ipRouteEntry 6 } - - ipRouteNextHop OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The IP address of the next hop of this route. - (In the case of a route bound to an interface - which is realized via a broadcast media, the value - of this field is the agent's IP address on that - interface.)" - ::= { ipRouteEntry 7 } - - ipRouteType OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - - invalid(2), -- an invalidated route - - - -SNMP Working Group [Page 35] - -RFC 1213 MIB-II March 1991 - - - -- route to directly - direct(3), -- connected (sub-)network - - -- route to a non-local - indirect(4) -- host/network/sub-network - } - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The type of route. Note that the values - direct(3) and indirect(4) refer to the notion of - direct and indirect routing in the IP - architecture. - - Setting this object to the value invalid(2) has - the effect of invalidating the corresponding entry - in the ipRouteTable object. That is, it - effectively dissasociates the destination - identified with said entry from the route - identified with said entry. It is an - implementation-specific matter as to whether the - agent removes an invalidated entry from the table. - Accordingly, management stations must be prepared - to receive tabular information from agents that - corresponds to entries not currently in use. - Proper interpretation of such entries requires - examination of the relevant ipRouteType object." - ::= { ipRouteEntry 8 } - - ipRouteProto OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - - -- non-protocol information, - -- e.g., manually configured - local(2), -- entries - - -- set via a network - netmgmt(3), -- management protocol - - -- obtained via ICMP, - icmp(4), -- e.g., Redirect - - -- the remaining values are - -- all gateway routing - -- protocols - egp(5), - ggp(6), - - - -SNMP Working Group [Page 36] - -RFC 1213 MIB-II March 1991 - - - hello(7), - rip(8), - is-is(9), - es-is(10), - ciscoIgrp(11), - bbnSpfIgp(12), - ospf(13), - bgp(14) - } - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The routing mechanism via which this route was - learned. Inclusion of values for gateway routing - protocols is not intended to imply that hosts - should support those protocols." - ::= { ipRouteEntry 9 } - - ipRouteAge OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The number of seconds since this route was last - updated or otherwise determined to be correct. - Note that no semantics of `too old' can be implied - except through knowledge of the routing protocol - by which the route was learned." - ::= { ipRouteEntry 10 } - - ipRouteMask OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - DESCRIPTION - "Indicate the mask to be logical-ANDed with the - destination address before being compared to the - value in the ipRouteDest field. For those systems - that do not support arbitrary subnet masks, an - agent constructs the value of the ipRouteMask by - determining whether the value of the correspondent - ipRouteDest field belong to a class-A, B, or C - network, and then using one of: - - mask network - 255.0.0.0 class-A - 255.255.0.0 class-B - 255.255.255.0 class-C - - - -SNMP Working Group [Page 37] - -RFC 1213 MIB-II March 1991 - - - If the value of the ipRouteDest is 0.0.0.0 (a - default route), then the mask value is also - 0.0.0.0. It should be noted that all IP routing - subsystems implicitly use this mechanism." - ::= { ipRouteEntry 11 } - - ipRouteMetric5 OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "An alternate routing metric for this route. The - semantics of this metric are determined by the - routing-protocol specified in the route's - ipRouteProto value. If this metric is not used, - its value should be set to -1." - ::= { ipRouteEntry 12 } - - ipRouteInfo OBJECT-TYPE - SYNTAX OBJECT IDENTIFIER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "A reference to MIB definitions specific to the - particular routing protocol which is responsible - for this route, as determined by the value - specified in the route's ipRouteProto value. If - this information is not present, its value should - be set to the OBJECT IDENTIFIER { 0 0 }, which is - a syntatically valid object identifier, and any - conformant implementation of ASN.1 and BER must be - able to generate and recognize this value." - ::= { ipRouteEntry 13 } - - - -- the IP Address Translation table - - -- The IP address translation table contain the IpAddress to - -- `physical' address equivalences. Some interfaces do not - -- use translation tables for determining address - -- equivalences (e.g., DDN-X.25 has an algorithmic method); - -- if all interfaces are of this type, then the Address - -- Translation table is empty, i.e., has zero entries. - - ipNetToMediaTable OBJECT-TYPE - SYNTAX SEQUENCE OF IpNetToMediaEntry - ACCESS not-accessible - STATUS mandatory - - - -SNMP Working Group [Page 38] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "The IP Address Translation table used for mapping - from IP addresses to physical addresses." - ::= { ip 22 } - - ipNetToMediaEntry OBJECT-TYPE - SYNTAX IpNetToMediaEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "Each entry contains one IpAddress to `physical' - address equivalence." - INDEX { ipNetToMediaIfIndex, - ipNetToMediaNetAddress } - ::= { ipNetToMediaTable 1 } - - IpNetToMediaEntry ::= - SEQUENCE { - ipNetToMediaIfIndex - INTEGER, - ipNetToMediaPhysAddress - PhysAddress, - ipNetToMediaNetAddress - IpAddress, - ipNetToMediaType - INTEGER - } - - ipNetToMediaIfIndex OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The interface on which this entry's equivalence - is effective. The interface identified by a - particular value of this index is the same - interface as identified by the same value of - ifIndex." - ::= { ipNetToMediaEntry 1 } - - ipNetToMediaPhysAddress OBJECT-TYPE - SYNTAX PhysAddress - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The media-dependent `physical' address." - ::= { ipNetToMediaEntry 2 } - - - - -SNMP Working Group [Page 39] - -RFC 1213 MIB-II March 1991 - - - ipNetToMediaNetAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The IpAddress corresponding to the media- - dependent `physical' address." - ::= { ipNetToMediaEntry 3 } - - ipNetToMediaType OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - invalid(2), -- an invalidated mapping - dynamic(3), - static(4) - } - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The type of mapping. - - Setting this object to the value invalid(2) has - the effect of invalidating the corresponding entry - in the ipNetToMediaTable. That is, it effectively - dissasociates the interface identified with said - entry from the mapping identified with said entry. - It is an implementation-specific matter as to - whether the agent removes an invalidated entry - from the table. Accordingly, management stations - must be prepared to receive tabular information - from agents that corresponds to entries not - currently in use. Proper interpretation of such - entries requires examination of the relevant - ipNetToMediaType object." - ::= { ipNetToMediaEntry 4 } - - - -- additional IP objects - - ipRoutingDiscards OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of routing entries which were chosen - to be discarded even though they are valid. One - possible reason for discarding such an entry could - be to free-up buffer space for other routing - - - -SNMP Working Group [Page 40] - -RFC 1213 MIB-II March 1991 - - - entries." - ::= { ip 23 } - - - -- the ICMP group - - -- Implementation of the ICMP group is mandatory for all - -- systems. - - icmpInMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of ICMP messages which the - entity received. Note that this counter includes - all those counted by icmpInErrors." - ::= { icmp 1 } - - icmpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP messages which the entity - received but determined as having ICMP-specific - errors (bad ICMP checksums, bad length, etc.)." - ::= { icmp 2 } - - icmpInDestUnreachs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Destination Unreachable - messages received." - ::= { icmp 3 } - - icmpInTimeExcds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Time Exceeded messages - received." - ::= { icmp 4 } - - - - - -SNMP Working Group [Page 41] - -RFC 1213 MIB-II March 1991 - - - icmpInParmProbs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Parameter Problem messages - received." - ::= { icmp 5 } - - icmpInSrcQuenchs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Source Quench messages - received." - ::= { icmp 6 } - - icmpInRedirects OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Redirect messages received." - ::= { icmp 7 } - - icmpInEchos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Echo (request) messages - received." - ::= { icmp 8 } - - icmpInEchoReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Echo Reply messages received." - ::= { icmp 9 } - - icmpInTimestamps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - - - -SNMP Working Group [Page 42] - -RFC 1213 MIB-II March 1991 - - - "The number of ICMP Timestamp (request) messages - received." - ::= { icmp 10 } - - icmpInTimestampReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Timestamp Reply messages - received." - ::= { icmp 11 } - - icmpInAddrMasks OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Address Mask Request messages - received." - ::= { icmp 12 } - - icmpInAddrMaskReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Address Mask Reply messages - received." - ::= { icmp 13 } - - icmpOutMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of ICMP messages which this - entity attempted to send. Note that this counter - includes all those counted by icmpOutErrors." - ::= { icmp 14 } - - icmpOutErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP messages which this entity did - not send due to problems discovered within ICMP - - - -SNMP Working Group [Page 43] - -RFC 1213 MIB-II March 1991 - - - such as a lack of buffers. This value should not - include errors discovered outside the ICMP layer - such as the inability of IP to route the resultant - datagram. In some implementations there may be no - types of error which contribute to this counter's - value." - ::= { icmp 15 } - - icmpOutDestUnreachs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Destination Unreachable - messages sent." - ::= { icmp 16 } - - icmpOutTimeExcds OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Time Exceeded messages sent." - ::= { icmp 17 } - - icmpOutParmProbs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Parameter Problem messages - sent." - ::= { icmp 18 } - - icmpOutSrcQuenchs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Source Quench messages sent." - ::= { icmp 19 } - - icmpOutRedirects OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Redirect messages sent. For a - - - -SNMP Working Group [Page 44] - -RFC 1213 MIB-II March 1991 - - - host, this object will always be zero, since hosts - do not send redirects." - ::= { icmp 20 } - - icmpOutEchos OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Echo (request) messages sent." - ::= { icmp 21 } - - icmpOutEchoReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Echo Reply messages sent." - ::= { icmp 22 } - - icmpOutTimestamps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Timestamp (request) messages - sent." - ::= { icmp 23 } - - icmpOutTimestampReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Timestamp Reply messages - sent." - ::= { icmp 24 } - - icmpOutAddrMasks OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Address Mask Request messages - sent." - ::= { icmp 25 } - - - - - -SNMP Working Group [Page 45] - -RFC 1213 MIB-II March 1991 - - - icmpOutAddrMaskReps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of ICMP Address Mask Reply messages - sent." - ::= { icmp 26 } - - - -- the TCP group - - -- Implementation of the TCP group is mandatory for all - -- systems that implement the TCP. - - -- Note that instances of object types that represent - -- information about a particular TCP connection are - -- transient; they persist only as long as the connection - -- in question. - - tcpRtoAlgorithm OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - - constant(2), -- a constant rto - rsre(3), -- MIL-STD-1778, Appendix B - vanj(4) -- Van Jacobson's algorithm [10] - } - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The algorithm used to determine the timeout value - used for retransmitting unacknowledged octets." - ::= { tcp 1 } - - tcpRtoMin OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The minimum value permitted by a TCP - implementation for the retransmission timeout, - measured in milliseconds. More refined semantics - for objects of this type depend upon the algorithm - used to determine the retransmission timeout. In - particular, when the timeout algorithm is rsre(3), - an object of this type has the semantics of the - LBOUND quantity described in RFC 793." - - - -SNMP Working Group [Page 46] - -RFC 1213 MIB-II March 1991 - - - ::= { tcp 2 } - - - tcpRtoMax OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The maximum value permitted by a TCP - implementation for the retransmission timeout, - measured in milliseconds. More refined semantics - for objects of this type depend upon the algorithm - used to determine the retransmission timeout. In - particular, when the timeout algorithm is rsre(3), - an object of this type has the semantics of the - UBOUND quantity described in RFC 793." - ::= { tcp 3 } - - tcpMaxConn OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The limit on the total number of TCP connections - the entity can support. In entities where the - maximum number of connections is dynamic, this - object should contain the value -1." - ::= { tcp 4 } - - tcpActiveOpens OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of times TCP connections have made a - direct transition to the SYN-SENT state from the - CLOSED state." - ::= { tcp 5 } - - tcpPassiveOpens OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of times TCP connections have made a - direct transition to the SYN-RCVD state from the - LISTEN state." - ::= { tcp 6 } - - - -SNMP Working Group [Page 47] - -RFC 1213 MIB-II March 1991 - - - tcpAttemptFails OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of times TCP connections have made a - direct transition to the CLOSED state from either - the SYN-SENT state or the SYN-RCVD state, plus the - number of times TCP connections have made a direct - transition to the LISTEN state from the SYN-RCVD - state." - ::= { tcp 7 } - - tcpEstabResets OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of times TCP connections have made a - direct transition to the CLOSED state from either - the ESTABLISHED state or the CLOSE-WAIT state." - ::= { tcp 8 } - - tcpCurrEstab OBJECT-TYPE - SYNTAX Gauge - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of TCP connections for which the - current state is either ESTABLISHED or CLOSE- - WAIT." - ::= { tcp 9 } - - tcpInSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of segments received, including - those received in error. This count includes - segments received on currently established - connections." - ::= { tcp 10 } - - tcpOutSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - - - -SNMP Working Group [Page 48] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "The total number of segments sent, including - those on current connections but excluding those - containing only retransmitted octets." - ::= { tcp 11 } - - tcpRetransSegs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of segments retransmitted - that - is, the number of TCP segments transmitted - containing one or more previously transmitted - octets." - ::= { tcp 12 } - - - -- the TCP Connection table - - -- The TCP connection table contains information about this - -- entity's existing TCP connections. - - tcpConnTable OBJECT-TYPE - SYNTAX SEQUENCE OF TcpConnEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "A table containing TCP connection-specific - information." - ::= { tcp 13 } - - tcpConnEntry OBJECT-TYPE - SYNTAX TcpConnEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "Information about a particular current TCP - connection. An object of this type is transient, - in that it ceases to exist when (or soon after) - the connection makes the transition to the CLOSED - state." - INDEX { tcpConnLocalAddress, - tcpConnLocalPort, - tcpConnRemAddress, - tcpConnRemPort } - ::= { tcpConnTable 1 } - - - - -SNMP Working Group [Page 49] - -RFC 1213 MIB-II March 1991 - - - TcpConnEntry ::= - SEQUENCE { - tcpConnState - INTEGER, - tcpConnLocalAddress - IpAddress, - tcpConnLocalPort - INTEGER (0..65535), - tcpConnRemAddress - IpAddress, - tcpConnRemPort - INTEGER (0..65535) - } - - tcpConnState OBJECT-TYPE - SYNTAX INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11), - deleteTCB(12) - } - ACCESS read-write - STATUS mandatory - DESCRIPTION - "The state of this TCP connection. - - The only value which may be set by a management - station is deleteTCB(12). Accordingly, it is - appropriate for an agent to return a `badValue' - response if a management station attempts to set - this object to any other value. - - If a management station sets this object to the - value deleteTCB(12), then this has the effect of - deleting the TCB (as defined in RFC 793) of the - corresponding connection on the managed node, - resulting in immediate termination of the - connection. - - As an implementation-specific option, a RST - - - -SNMP Working Group [Page 50] - -RFC 1213 MIB-II March 1991 - - - segment may be sent from the managed node to the - other TCP endpoint (note however that RST segments - are not sent reliably)." - ::= { tcpConnEntry 1 } - - tcpConnLocalAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The local IP address for this TCP connection. In - the case of a connection in the listen state which - is willing to accept connections for any IP - interface associated with the node, the value - 0.0.0.0 is used." - ::= { tcpConnEntry 2 } - - tcpConnLocalPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The local port number for this TCP connection." - ::= { tcpConnEntry 3 } - - tcpConnRemAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The remote IP address for this TCP connection." - ::= { tcpConnEntry 4 } - - tcpConnRemPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The remote port number for this TCP connection." - ::= { tcpConnEntry 5 } - - - -- additional TCP objects - - tcpInErrs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - - - -SNMP Working Group [Page 51] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "The total number of segments received in error - (e.g., bad TCP checksums)." - ::= { tcp 14 } - - tcpOutRsts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of TCP segments sent containing the - RST flag." - ::= { tcp 15 } - - - -- the UDP group - - -- Implementation of the UDP group is mandatory for all - -- systems which implement the UDP. - - udpInDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of UDP datagrams delivered to - UDP users." - ::= { udp 1 } - - udpNoPorts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of received UDP datagrams for - which there was no application at the destination - port." - ::= { udp 2 } - - udpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of received UDP datagrams that could - not be delivered for reasons other than the lack - of an application at the destination port." - ::= { udp 3 } - - - -SNMP Working Group [Page 52] - -RFC 1213 MIB-II March 1991 - - - udpOutDatagrams OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of UDP datagrams sent from this - entity." - ::= { udp 4 } - - - -- the UDP Listener table - - -- The UDP listener table contains information about this - -- entity's UDP end-points on which a local application is - -- currently accepting datagrams. - - udpTable OBJECT-TYPE - SYNTAX SEQUENCE OF UdpEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "A table containing UDP listener information." - ::= { udp 5 } - - udpEntry OBJECT-TYPE - SYNTAX UdpEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "Information about a particular current UDP - listener." - INDEX { udpLocalAddress, udpLocalPort } - ::= { udpTable 1 } - - UdpEntry ::= - SEQUENCE { - udpLocalAddress - IpAddress, - udpLocalPort - INTEGER (0..65535) - } - - udpLocalAddress OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The local IP address for this UDP listener. In - - - -SNMP Working Group [Page 53] - -RFC 1213 MIB-II March 1991 - - - the case of a UDP listener which is willing to - accept datagrams for any IP interface associated - with the node, the value 0.0.0.0 is used." - ::= { udpEntry 1 } - - udpLocalPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The local port number for this UDP listener." - ::= { udpEntry 2 } - - - -- the EGP group - - -- Implementation of the EGP group is mandatory for all - -- systems which implement the EGP. - - egpInMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of EGP messages received without - error." - ::= { egp 1 } - - egpInErrors OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of EGP messages received that proved - to be in error." - ::= { egp 2 } - - egpOutMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of locally generated EGP - messages." - ::= { egp 3 } - - egpOutErrors OBJECT-TYPE - SYNTAX Counter - - - -SNMP Working Group [Page 54] - -RFC 1213 MIB-II March 1991 - - - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of locally generated EGP messages not - sent due to resource limitations within an EGP - entity." - ::= { egp 4 } - - - -- the EGP Neighbor table - - -- The EGP neighbor table contains information about this - -- entity's EGP neighbors. - - egpNeighTable OBJECT-TYPE - SYNTAX SEQUENCE OF EgpNeighEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "The EGP neighbor table." - ::= { egp 5 } - - egpNeighEntry OBJECT-TYPE - SYNTAX EgpNeighEntry - ACCESS not-accessible - STATUS mandatory - DESCRIPTION - "Information about this entity's relationship with - a particular EGP neighbor." - INDEX { egpNeighAddr } - ::= { egpNeighTable 1 } - - EgpNeighEntry ::= - SEQUENCE { - egpNeighState - INTEGER, - egpNeighAddr - IpAddress, - egpNeighAs - INTEGER, - egpNeighInMsgs - Counter, - egpNeighInErrs - Counter, - egpNeighOutMsgs - Counter, - egpNeighOutErrs - Counter, - - - -SNMP Working Group [Page 55] - -RFC 1213 MIB-II March 1991 - - - egpNeighInErrMsgs - Counter, - egpNeighOutErrMsgs - Counter, - egpNeighStateUps - Counter, - egpNeighStateDowns - Counter, - egpNeighIntervalHello - INTEGER, - egpNeighIntervalPoll - INTEGER, - egpNeighMode - INTEGER, - egpNeighEventTrigger - INTEGER - } - - egpNeighState OBJECT-TYPE - SYNTAX INTEGER { - idle(1), - acquisition(2), - down(3), - up(4), - cease(5) - } - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The EGP state of the local system with respect to - this entry's EGP neighbor. Each EGP state is - represented by a value that is one greater than - the numerical value associated with said state in - RFC 904." - ::= { egpNeighEntry 1 } - - egpNeighAddr OBJECT-TYPE - SYNTAX IpAddress - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The IP address of this entry's EGP neighbor." - ::= { egpNeighEntry 2 } - - egpNeighAs OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - - - -SNMP Working Group [Page 56] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "The autonomous system of this EGP peer. Zero - should be specified if the autonomous system - number of the neighbor is not yet known." - ::= { egpNeighEntry 3 } - - egpNeighInMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of EGP messages received without error - from this EGP peer." - ::= { egpNeighEntry 4 } - - egpNeighInErrs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of EGP messages received from this EGP - peer that proved to be in error (e.g., bad EGP - checksum)." - ::= { egpNeighEntry 5 } - - egpNeighOutMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of locally generated EGP messages to - this EGP peer." - ::= { egpNeighEntry 6 } - - egpNeighOutErrs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of locally generated EGP messages not - sent to this EGP peer due to resource limitations - within an EGP entity." - ::= { egpNeighEntry 7 } - - egpNeighInErrMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - - - -SNMP Working Group [Page 57] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "The number of EGP-defined error messages received - from this EGP peer." - ::= { egpNeighEntry 8 } - - egpNeighOutErrMsgs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of EGP-defined error messages sent to - this EGP peer." - ::= { egpNeighEntry 9 } - - egpNeighStateUps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of EGP state transitions to the UP - state with this EGP peer." - ::= { egpNeighEntry 10 } - - egpNeighStateDowns OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The number of EGP state transitions from the UP - state to any other state with this EGP peer." - ::= { egpNeighEntry 11 } - - egpNeighIntervalHello OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The interval between EGP Hello command - retransmissions (in hundredths of a second). This - represents the t1 timer as defined in RFC 904." - ::= { egpNeighEntry 12 } - - egpNeighIntervalPoll OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The interval between EGP poll command - - - -SNMP Working Group [Page 58] - -RFC 1213 MIB-II March 1991 - - - retransmissions (in hundredths of a second). This - represents the t3 timer as defined in RFC 904." - ::= { egpNeighEntry 13 } - - egpNeighMode OBJECT-TYPE - SYNTAX INTEGER { active(1), passive(2) } - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The polling mode of this EGP entity, either - passive or active." - ::= { egpNeighEntry 14 } - - egpNeighEventTrigger OBJECT-TYPE - SYNTAX INTEGER { start(1), stop(2) } - ACCESS read-write - STATUS mandatory - DESCRIPTION - "A control variable used to trigger operator- - initiated Start and Stop events. When read, this - variable always returns the most recent value that - egpNeighEventTrigger was set to. If it has not - been set since the last initialization of the - network management subsystem on the node, it - returns a value of `stop'. - - When set, this variable causes a Start or Stop - event on the specified neighbor, as specified on - pages 8-10 of RFC 904. Briefly, a Start event - causes an Idle peer to begin neighbor acquisition - and a non-Idle peer to reinitiate neighbor - acquisition. A stop event causes a non-Idle peer - to return to the Idle state until a Start event - occurs, either via egpNeighEventTrigger or - otherwise." - ::= { egpNeighEntry 15 } - - - -- additional EGP objects - - egpAs OBJECT-TYPE - SYNTAX INTEGER - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The autonomous system number of this EGP entity." - ::= { egp 6 } - - - - -SNMP Working Group [Page 59] - -RFC 1213 MIB-II March 1991 - - - -- the Transmission group - - -- Based on the transmission media underlying each interface - -- on a system, the corresponding portion of the Transmission - -- group is mandatory for that system. - - -- When Internet-standard definitions for managing - -- transmission media are defined, the transmission group is - -- used to provide a prefix for the names of those objects. - - -- Typically, such definitions reside in the experimental - -- portion of the MIB until they are "proven", then as a - -- part of the Internet standardization process, the - -- definitions are accordingly elevated and a new object - -- identifier, under the transmission group is defined. By - -- convention, the name assigned is: - -- - -- type OBJECT IDENTIFIER ::= { transmission number } - -- - -- where "type" is the symbolic value used for the media in - -- the ifType column of the ifTable object, and "number" is - -- the actual integer value corresponding to the symbol. - - - -- the SNMP group - - -- Implementation of the SNMP group is mandatory for all - -- systems which support an SNMP protocol entity. Some of - -- the objects defined below will be zero-valued in those - -- SNMP implementations that are optimized to support only - -- those functions specific to either a management agent or - -- a management station. In particular, it should be - -- observed that the objects below refer to an SNMP entity, - -- and there may be several SNMP entities residing on a - -- managed node (e.g., if the node is hosting acting as - -- a management station). - - snmpInPkts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of Messages delivered to the - SNMP entity from the transport service." - ::= { snmp 1 } - - snmpOutPkts OBJECT-TYPE - SYNTAX Counter - - - -SNMP Working Group [Page 60] - -RFC 1213 MIB-II March 1991 - - - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Messages which were - passed from the SNMP protocol entity to the - transport service." - ::= { snmp 2 } - - snmpInBadVersions OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Messages which were - delivered to the SNMP protocol entity and were for - an unsupported SNMP version." - ::= { snmp 3 } - - snmpInBadCommunityNames OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Messages delivered to - the SNMP protocol entity which used a SNMP - community name not known to said entity." - ::= { snmp 4 } - - snmpInBadCommunityUses OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Messages delivered to - the SNMP protocol entity which represented an SNMP - operation which was not allowed by the SNMP - community named in the Message." - ::= { snmp 5 } - - snmpInASNParseErrs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of ASN.1 or BER errors - encountered by the SNMP protocol entity when - decoding received SNMP Messages." - ::= { snmp 6 } - - - -SNMP Working Group [Page 61] - -RFC 1213 MIB-II March 1991 - - - -- { snmp 7 } is not used - - snmpInTooBigs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - delivered to the SNMP protocol entity and for - which the value of the error-status field is - `tooBig'." - ::= { snmp 8 } - - snmpInNoSuchNames OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - delivered to the SNMP protocol entity and for - which the value of the error-status field is - `noSuchName'." - ::= { snmp 9 } - - snmpInBadValues OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - delivered to the SNMP protocol entity and for - which the value of the error-status field is - `badValue'." - ::= { snmp 10 } - - snmpInReadOnlys OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number valid SNMP PDUs which were - delivered to the SNMP protocol entity and for - which the value of the error-status field is - `readOnly'. It should be noted that it is a - protocol error to generate an SNMP PDU which - contains the value `readOnly' in the error-status - field, as such this object is provided as a means - of detecting incorrect implementations of the - - - -SNMP Working Group [Page 62] - -RFC 1213 MIB-II March 1991 - - - SNMP." - ::= { snmp 11 } - - snmpInGenErrs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - delivered to the SNMP protocol entity and for - which the value of the error-status field is - `genErr'." - ::= { snmp 12 } - - snmpInTotalReqVars OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of MIB objects which have been - retrieved successfully by the SNMP protocol entity - as the result of receiving valid SNMP Get-Request - and Get-Next PDUs." - ::= { snmp 13 } - - snmpInTotalSetVars OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of MIB objects which have been - altered successfully by the SNMP protocol entity - as the result of receiving valid SNMP Set-Request - PDUs." - ::= { snmp 14 } - - snmpInGetRequests OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Get-Request PDUs which - have been accepted and processed by the SNMP - protocol entity." - ::= { snmp 15 } - - snmpInGetNexts OBJECT-TYPE - SYNTAX Counter - - - -SNMP Working Group [Page 63] - -RFC 1213 MIB-II March 1991 - - - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Get-Next PDUs which have - been accepted and processed by the SNMP protocol - entity." - ::= { snmp 16 } - - snmpInSetRequests OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Set-Request PDUs which - have been accepted and processed by the SNMP - protocol entity." - ::= { snmp 17 } - - snmpInGetResponses OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Get-Response PDUs which - have been accepted and processed by the SNMP - protocol entity." - ::= { snmp 18 } - - snmpInTraps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Trap PDUs which have - been accepted and processed by the SNMP protocol - entity." - ::= { snmp 19 } - - snmpOutTooBigs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - generated by the SNMP protocol entity and for - which the value of the error-status field is - `tooBig.'" - ::= { snmp 20 } - - - -SNMP Working Group [Page 64] - -RFC 1213 MIB-II March 1991 - - - snmpOutNoSuchNames OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - generated by the SNMP protocol entity and for - which the value of the error-status is - `noSuchName'." - ::= { snmp 21 } - - snmpOutBadValues OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - generated by the SNMP protocol entity and for - which the value of the error-status field is - `badValue'." - ::= { snmp 22 } - - -- { snmp 23 } is not used - - snmpOutGenErrs OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP PDUs which were - generated by the SNMP protocol entity and for - which the value of the error-status field is - `genErr'." - ::= { snmp 24 } - - snmpOutGetRequests OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Get-Request PDUs which - have been generated by the SNMP protocol entity." - ::= { snmp 25 } - - snmpOutGetNexts OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - - - -SNMP Working Group [Page 65] - -RFC 1213 MIB-II March 1991 - - - DESCRIPTION - "The total number of SNMP Get-Next PDUs which have - been generated by the SNMP protocol entity." - ::= { snmp 26 } - - snmpOutSetRequests OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Set-Request PDUs which - have been generated by the SNMP protocol entity." - ::= { snmp 27 } - - snmpOutGetResponses OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Get-Response PDUs which - have been generated by the SNMP protocol entity." - ::= { snmp 28 } - - snmpOutTraps OBJECT-TYPE - SYNTAX Counter - ACCESS read-only - STATUS mandatory - DESCRIPTION - "The total number of SNMP Trap PDUs which have - been generated by the SNMP protocol entity." - ::= { snmp 29 } - - snmpEnableAuthenTraps OBJECT-TYPE - SYNTAX INTEGER { enabled(1), disabled(2) } - ACCESS read-write - STATUS mandatory - DESCRIPTION - "Indicates whether the SNMP agent process is - permitted to generate authentication-failure - traps. The value of this object overrides any - configuration information; as such, it provides a - means whereby all authentication-failure traps may - be disabled. - - Note that it is strongly recommended that this - object be stored in non-volatile memory so that it - remains constant between re-initializations of the - network management system." - - - -SNMP Working Group [Page 66] - -RFC 1213 MIB-II March 1991 - - - ::= { snmp 30 } - - END - -7. Acknowledgements - - This document was produced by the SNMP Working Group: - - Anne Ambler, Spider - Karl Auerbach, Sun - Fred Baker, ACC - David Bridgham, Epilogue Technology - Ken Brinkerhoff - Ron Broersma, NOSC - Brian Brown, Synoptics - Jack Brown, US Army - Theodore Brunner, Bellcore - Jeff Buffum, HP - Jeffrey Buffum, HP - John Burress, Wellfleet - Jeffrey D. Case, University of Tennessee at Knoxville - Chris Chiptasso, Spartacus - Paul Ciarfella, DEC - Bob Collet - John Cook, Chipcom - Tracy Cox, Bellcore - James R. Davin, MIT-LCS - Eric Decker, cisco - Kurt Dobbins, Cabletron - Nadya El-Afandi, Network Systems - Gary Ellis, HP - Fred Engle - Mike Erlinger - Mark S. Fedor, PSI - Richard Fox, Synoptics - Karen Frisa, CMU - Stan Froyd, ACC - Chris Gunner, DEC - Fred Harris, University of Tennessee at Knoxville - Ken Hibbard, Xylogics - Ole Jacobsen, Interop - Ken Jones - Satish Joshi, Synoptics - Frank Kastenholz, Racal-Interlan - Shimshon Kaufman, Spartacus - Ken Key, University of Tennessee at Knoxville - Jim Kinder, Fibercom - Alex Koifman, BBN - - - -SNMP Working Group [Page 67] - -RFC 1213 MIB-II March 1991 - - - Christopher Kolb, PSI - Cheryl Krupczak, NCR - Paul Langille, DEC - Martin Lee Schoffstall, PSI - Peter Lin, Vitalink - John Lunny, TWG - Carl Malamud - Gary Malkin, FTP Software, Inc. - Randy Mayhew, University of Tennessee at Knoxville - Keith McCloghrie, Hughes LAN Systems - Donna McMaster, David Systems - Lynn Monsanto, Sun - Dave Perkins, 3COM - Jim Reinstedler, Ungerman Bass - Anil Rijsinghani, DEC - Kathy Rinehart, Arnold AFB - Kary Robertson - Marshall T. Rose, PSI (chair) - L. Michael Sabo, NCSC - Jon Saperia, DEC - Greg Satz, cisco - Martin Schoffstall, PSI - John Seligson - Steve Sherry, Xyplex - Fei Shu, NEC - Sam Sjogren, TGV - Mark Sleeper, Sparta - Lance Sprung - Mike St.Johns - Bob Stewart, Xyplex - Emil Sturniold - Kaj Tesink, Bellcore - Geoff Thompson, Synoptics - Dean Throop, Data General - Bill Townsend, Xylogics - Maurice Turcotte, Racal-Milgo - Kannan Varadhou - Sudhanshu Verma, HP - Bill Versteeg, Network Research Corporation - Warren Vik, Interactive Systems - David Waitzman, BBN - Steve Waldbusser, CMU - Dan Wintringhan - David Wood - Wengyik Yeong, PSI - Jeff Young, Cray Research - - - - - -SNMP Working Group [Page 68] - -RFC 1213 MIB-II March 1991 - - - In addition, the comments of the following individuals are also - acknolwedged: - - Craig A. Finseth, Minnesota Supercomputer Center, Inc. - Jeffrey C. Honig, Cornell University Theory Center - Philip R. Karn, Bellcore - -8. References - - [1] Cerf, V., "IAB Recommendations for the Development of Internet - Network Management Standards", RFC 1052, NRI, April 1988. - - [2] Rose M., and K. McCloghrie, "Structure and Identification of - Management Information for TCP/IP-based internets," RFC 1065, - TWG, August 1988. - - [3] McCloghrie, K., and M. Rose, "Management Information Base for - Network Management of TCP/IP-based internets, RFC 1066, TWG, - August 1988. - - [4] Cerf, V., "Report of the Second Ad Hoc Network Management Review - Group", RFC 1109, NRI, August 1989. - - [5] Case, J., Fedor, M., Schoffstall, M., and J. Davin, "Simple - Network Management Protocol (SNMP)", RFC 1098, University of - Tennessee at Knoxville, NYSERNet, Inc., Rensselaer Polytechnic - Institute, MIT Laboratory for Computer Science, April 1989. - - [6] Postel, J., and J. Reynolds, "TELNET Protocol Specification", RFC - 854, USC/Information Sciences Institute, May 1983. - - [7] Satz, G., "Connectionless Network Protocol (ISO 8473) and End - System to Intermediate System (ISO 9542) Management Information - Base", RFC 1162, cisco Systems, Inc., June 1990. - - [8] Information processing systems - Open Systems Interconnection - - Specification of Abstract Syntax Notation One (ASN.1), - International Organization for Standardization, International - Standard 8824, December 1987. - - [9] Information processing systems - Open Systems Interconnection - - Specification of Basic Encoding Rules for Abstract Notation One - (ASN.1), International Organization for Standardization, - International Standard 8825, December 1987. - - [10] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM 1988, - Stanford, California. - - - - -SNMP Working Group [Page 69] - -RFC 1213 MIB-II March 1991 - - - [11] Hagens, R., Hall, N., and M. Rose, "Use of the Internet as a - Subnetwork for Experimentation with the OSI Network Layer", RFC - 1070, U of Wiscsonsin - Madison, U of Wiscsonsin - Madison, The - Wollongong Group, February 1989. - - [12] Rose M., and K. McCloghrie, "Structure and Identification of - Management Information for TCP/IP-based internets", RFC 1155, - Performance Systems International, Hughes LAN Systems, May 1990. - - [13] Case, J., Fedor, M., Schoffstall, M., and J. Davin, "Simple - Network Management Protocol", RFC 1157, SNMP Research, - Performance Systems International, Performance Systems - International, MIT Laboratory for Computer Science, May 1990. - - [14] Rose, M., and K. McCloghrie, Editors, "Concise MIB Definitions", - RFC 1212, Performance Systems International, Hughes LAN Systems, - March 1991. - -9. Security Considerations - - Security issues are not discussed in this memo. - -10. Authors' Addresses - - Keith McCloghrie - Hughes LAN Systems - 1225 Charleston Road - Mountain View, CA 94043 - 1225 Charleston Road - Mountain View, CA 94043 - - Phone: (415) 966-7934 - - EMail: kzm@hls.com - - - Marshall T. Rose - Performance Systems International - 5201 Great America Parkway - Suite 3106 - Santa Clara, CA 95054 - - Phone: +1 408 562 6222 - - EMail: mrose@psi.com - X.500: rose, psi, us - - - - - -SNMP Working Group [Page 70] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1263.txt b/kernel/picotcp/RFC/rfc1263.txt deleted file mode 100644 index 55b6e39..0000000 --- a/kernel/picotcp/RFC/rfc1263.txt +++ /dev/null @@ -1,1067 +0,0 @@ - - - - - - -Network Working Group S. O'Malley -Request for Comments: 1263 L. Peterson - University of Arizona - October 1991 - - - TCP EXTENSIONS CONSIDERED HARMFUL - - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard. Distribution of this document is - unlimited. - -Abstract - - This RFC comments on recent proposals to extend TCP. It argues that - the backward compatible extensions proposed in RFC's 1072 and 1185 - should not be pursued, and proposes an alternative way to evolve the - Internet protocol suite. Its purpose is to stimulate discussion in - the Internet community. - -1. Introduction - - The rapid growth of the size, capacity, and complexity of the - Internet has led to the need to change the existing protocol suite. - For example, the maximum TCP window size is no longer sufficient to - efficiently support the high capacity links currently being planned - and constructed. One is then faced with the choice of either leaving - the protocol alone and accepting the fact that TCP will run no faster - on high capacity links than on low capacity links, or changing TCP. - This is not an isolated incident. We have counted at least eight - other proposed changes to TCP (some to be taken more seriously than - others), and the question is not whether to change the protocol - suite, but what is the most cost effective way to change it. - - This RFC compares the costs and benefits of three approaches to - making these changes: the creation of new protocols, backward - compatible protocol extensions, and protocol evolution. The next - section introduces these three approaches and enumerates the - strengths and weaknesses of each. The following section describes - how we believe these three approaches are best applied to the many - proposed changes to TCP. Note that we have not written this RFC as an - academic exercise. It is our intent to argue against acceptance of - the various TCP extensions, most notably RFC's 1072 and 1185 [4,5], - by describing a more palatable alternative. - - - - -O'Malley & Peterson [Page 1] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - -2. Creation vs. Extension vs. Evolution - -2.1. Protocol Creation - - Protocol creation involves the design, implementation, - standardization, and distribution of an entirely new protocol. In - this context, there are two basic reasons for creating a new - protocol. The first is to replace an old protocol that is so outdated - that it can no longer be effectively extended to perform its original - function. The second is to add a new protocol because users are - making demands upon the original protocol that were not envisioned by - the designer and cannot be efficiently handled in terms of the - original protocol. For example, TCP was designed as a reliable - byte-stream protocol but is commonly used as both a reliable record- - stream protocol and a reliable request-reply protocol due to the lack - of such protocols in the Internet protocol suite. The performance - demands placed upon a byte-stream protocol in the new Internet - environment makes it difficult to extend TCP to meet these new - application demands. - - The advantage of creating a new protocol is the ability to start with - a clean sheet of paper when attempting to solve a complex network - problem. The designer, free from the constraints of an existing - protocol, can take maximum advantage of modern network research in - the basic algorithms needed to solve the problem. Even more - importantly, the implementor is free to steal from a large number of - existing academic protocols that have been developed over the years. - In some cases, if truly new functionality is desired, creating a new - protocol is the only viable approach. - - The most obvious disadvantage of this approach is the high cost of - standardizing and distributing an entirely new protocol. Second, - there is the issue of making the new protocol reliable. Since new - protocols have not undergone years of network stress testing, they - often contain bugs which require backward compatible fixes, and - hence, the designer is back where he or she started. A third - disadvantage of introducing new protocols is that they generally have - new interfaces which require significant effort on the part of the - Internet community to use. This alone is often enough to kill a new - protocol. - - Finally, there is a subtle problem introduced by the very freedom - provided by this approach. Specifically, being able to introduce a - new protocol often results in protocols that go far beyond the basic - needs of the situation. New protocols resemble Senate appropriations - bills; they tend to accumulate many amendments that have nothing to - do with the original problem. A good example of this phenomena is the - attempt to standardize VMTP [1] as the Internet RPC protocol. While - - - -O'Malley & Peterson [Page 2] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - VMTP was a large protocol to begin with, the closer it got to - standardization the more features were added until it essentially - collapsed under its own weight. As we argue below, new protocols - should initially be minimal, and then evolve as the situation - dictates. - - -2.2. Backward Compatible Extensions - - In a backward compatible extension, the protocol is modified in such - a fashion that the new version of the protocol can transparently - inter-operate with existing versions of the protocol. This generally - implies no changes to the protocol's header. TCP slow start [3] is an - example of such a change. In a slightly more relaxed version of - backward compatibility, no changes are made to the fixed part of a - protocol's header. Instead, either some fields are added to the - variable length options field found at the end of the header, or - existing header fields are overloaded (i.e., used for multiple - purposes). However, we can find no real advantage to this technique - over simply changing the protocol. - - Backward compatible extensions are widely used to modify protocols - because there is no need to synchronize the distribution of the new - version of the protocol. The new version is essentially allowed to - diffuse through the Internet at its own pace, and at least in theory, - the Internet will continue to function as before. Thus, the explicit - distribution costs are limited. Backward compatible extensions also - avoid the bureaucratic costs of standardizing a new protocol. TCP is - still TCP and the approval cost of a modification to an existing - protocol is much less than that of a new protocol. Finally, the very - difficulty of making such changes tends to restrict the changes to - the minimal set needed to solve the current problem. Thus, it is rare - to see unneeded changes made when using this technique. - - Unfortunately, this approach has several drawbacks. First, the time - to distribute the new version of the protocol to all hosts can be - quite long (forever in fact). This leaves the network in a - heterogeneous state for long periods of time. If there is the - slightest incompatibly between old and new versions, chaos can - result. Thus, the implicit cost of this type of distribution can be - quite high. Second, designing a backward compatible change to a new - protocol is extremely difficult, and the implementations "tend toward - complexity and ugliness" [5]. The need for backward compatibility - ensures that no code can every really be eliminated from the - protocol, and since such vestigial code is rarely executed, it is - often wrong. Finally, most protocols have limits, based upon the - design decisions of it inventors, that simply cannot be side-stepped - in this fashion. - - - -O'Malley & Peterson [Page 3] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - -2.3. Protocol Evolution - - Protocol evolution is an approach to protocol change that attempts to - escape the limits of backward compatibility without incurring all of - the costs of creating new protocols. The basic idea is for the - protocol designer to take an existing protocol that requires - modification and make the desired changes without maintaining - backward compatibility. This drastically simplifies the job of the - protocol designer. For example, the limited TCP window size could be - fixed by changing the definition of the window size in the header - from 16-bits to 32-bits, and re-compiling the protocol. The effect of - backward compatibility would be ensured by simply keeping both the - new and old version of the protocol running until most machines use - the new version. Since the change is small and invisible to the user - interface, it is a trivial problem to dynamically select the correct - TCP version at runtime. How this is done is discussed in the next - section. - - Protocol evolution has several advantages. First, it is by far the - simplest type of modification to make to a protocol, and hence, the - modifications can be made faster and are less likely to contain bugs. - There is no need to worry about the effects of the change on all - previous versions of the protocol. Also, most of the protocol is - carried over into the new version unchanged, thus avoiding the design - and debugging cost of creating an entirely new protocol. Second, - there is no artificial limit to the amount of change that can be made - to a protocol, and as a consequence, its useful lifetime can be - extended indefinitely. In a series of evolutionary steps, it is - possible to make fairly radical changes to a protocol without - upsetting the Internet community greatly. Specifically, it is - possible to both add new features and remove features that are no - longer required for the current environment. Thus, the protocol is - not condemned to grow without bound. Finally, by keeping the old - version of the protocol around, backward compatibility is guaranteed. - The old code will work as well as it ever did. - - Assuming the infrastructure described in the following subsection, - the only real disadvantage of protocol evolution is the amount of - memory required to run several versions of the same protocol. - Fortunately, memory is not the scarcest resource in modern - workstations (it may, however, be at a premium in the BSD kernel and - its derivatives). Since old versions may rarely if ever be executed, - the old versions can be swapped out to disk with little performance - loss. Finally, since this cost is explicit, there is a huge incentive - to eliminate old protocol versions from the network. - - - - - - -O'Malley & Peterson [Page 4] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - -2.4. Infrastructure Support for Protocol Evolution - - The effective use of protocol evolution implies that each protocol is - considered a vector of implementations which share the same top level - interface, and perhaps not much else. TCP[0] is the current - implementation of TCP and exists to provide backward compatibility - with all existing machines. TCP[1] is a version of TCP that is - optimized for high-speed networks. TCP[0] is always present; TCP[1] - may or may not be. Treating TCP as a vector of protocols requires - only three changes to the way protocols are designed and implemented. - - First, each version of TCP is assigned a unique id, but this id is - not given as an IP protocol number. (This is because IP's protocol - number field is only 8 bits long and could easily be exhausted.) The - "obvious" solution to this limitation is to increase IP's protocol - number field to 32 bits. In this case, however, the obvious solution - is wrong, not because of the difficultly of changing IP, but simply - because there is a better approach. The best way to deal with this - problem is to increase the IP protocol number field to 32 bits and - move it to the very end of the IP header (i.e., the first four bytes - of the TCP header). A backward compatible modification would be made - to IP such that for all packets with a special protocol number, say - 77, IP would look into the four bytes following its header for its - de-multiplexing information. On systems which do not support a - modified IP, an actual protocol 77 would be used to perform the de- - multiplexing to the correct TCP version. - - Second, a version control protocol, called VTCP, is used to select - the appropriate version of TCP for a particular connection. VTCP is - an example of a virtual protocol as introduced in [2]. Application - programs access the various versions of TCP through VTCP. When a TCP - connection is opened to a specific machine, VTCP checks its local - cache to determine the highest common version shared by the two - machines. If the target machine is in the cache, it opens that - version of TCP and returns the connection to the protocol above and - does not effect performance. If the target machine is not found in - the cache, VTCP sends a UDP packet to the other machine asking what - versions of TCP that machine supports. If it receives a response, it - uses that information to select a version and puts the information in - the cache. If no reply is forthcoming, it assumes that the other - machine does not support VTCP and attempts to open a TCP[0] - connection. VTCP's cache is flushed occasionally to ensure that its - information is current. - - Note that this is only one possible way for VTCP to decide the right - version of TCP to use. Another possibility is for VTCP to learn the - right version for a particular host when it resolves the host's name. - That is, version information could be stored in the Domain Name - - - -O'Malley & Peterson [Page 5] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - System. It is also possible that VTCP might take the performance - characteristics of the network into consideration when selecting a - version; TCP[0] may in fact turn out to be the correct choice for a - low-bandwidth network. - - Third, because our proposal would lead to a more dynamically changing - network architecture, a mechanism for distributing new versions will - need to be developed. This is clearly the hardest requirement of the - infrastructure, but we believe that it can be addressed in stages. - More importantly, we believe this problem can be addressed after the - decision has been made to go the protocol evolution route. In the - short term, we are considering only a single new version of TCP--- - TCP[1]. This version can be distributed in the same ad hoc way, and - at exactly the same cost, as the backward compatible changes - suggested in RFC's 1072 and 1185. - - In the medium term, we envision the IAB approving new versions of TCP - every year or so. Given this scenario, a simple distribution - mechanism can be designed based on software distribution mechanisms - that have be developed for other environments; e.g., Unix RDIST and - Mach SUP. Such a mechanism need not be available on all hosts. - Instead, hosts will be divided into two sets, those that can quickly - be updated with new protocols and those that cannot. High - performance machines that can use high performance networks will need - the most current version of TCP as soon as it is available, thus they - have incentive to change. Old machines which are too slow to drive a - high capacity lines can be ignored, and probably should be ignored. - - In the long term, we envision protocols being designed on an - application by application basis, without the need for central - approval. In such a world, a common protocol implementation - environment---a protocol backplane---is the right way to go. Given - such a backplane, protocols can be automatically installed over the - network. While we claim to know how to build such an environment, - such a discussion is beyond the scope of this paper. - - -2.5. Remarks - - Each of these three methods has its advantages. When used in - combination, the result is better protocols at a lower overall cost. - Backward compatible changes are best reserved for changes that do not - affect the protocol's header, and do not require that the instance - running on the other end of the connection also be changed. Protocol - evolution should be the primary way of dealing with header fields - that are no longer large enough, or when one algorithm is substituted - directly for another. New protocols should be written to off load - unexpected user demands on existing protocols, or better yet, to - - - -O'Malley & Peterson [Page 6] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - catch them before they start. - - There are also synergistic effects. First, since we know it is - possible to evolve a newly created protocol once it has been put in - place, the pressure to add unnecessary features should be reduced. - Second, the ability to create new protocols removes the pressure to - overextend a given protocol. Finally, the ability to evolve a - protocol removes the pressure to maintain backward compatibility - where it is really not possible. - - -3. TCP Extensions: A Case Study - - This section examines the effects of using our proposed methodology - to implement changes to TCP. We will begin by analyzing the backward - compatible extensions defined in RFC's 1072 and 1185, and proposing a - set of much simpler evolutionary modifications. We also analyze - several more problematical extensions to TCP, such as Transactional - TCP. Finally, we point our some areas of TCP which may require - changes in the future. - - The evolutionary modification to TCP that we propose includes all of - the functionality described in RFC's 1072 and 1185, but does not - preserve the header format. At the risk of being misunderstood as - believing backward compatibility is a good idea, we also show how our - proposed changes to TCP can be folded into a backward compatible - implementation of TCP. We do this as a courtesy for those readers - that cannot accept the possibility of multiple versions of TCP. - - -3.1. RFC's 1072 and 1185 - - 3.1.1. Round Trip Timing - - In RFC 1072, a new ECHO option is proposed that allows each TCP - packet to carry a timestamp in its header. This timestamp is used to - keep a more accurate estimate of the RTT (round trip time) used to - decide when to re-transmit segments. In the original TCP algorithm, - the sender manually times a small number of sends. The resulting - algorithm was quite complex and does not produce an accurate enough - RTT for high capacity networks. The inclusion of a timestamp in every - header both simplifies the code needed to calculate the RTT and - improves the accuracy and robustness of the algorithm. - - The new algorithm as proposed in RFC 1072 does not appear to have any - serious problems. However, the authors of RFC 1072 go to great - lengths in an attempt to keep this modification backward compatible - with the previous version of TCP. They place an ECHO option in the - - - -O'Malley & Peterson [Page 7] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - SYN segment and state, "It is likely that most implementations will - properly ignore any options in the SYN segment that they do not - understand, so new initial options should not cause problems" [4]. - This statement does not exactly inspire confidence, and we consider - the addition of an optional field to any protocol to be a de-facto, - if not a de-jure, example of an evolutionary change. Optional fields - simply attempt to hide the basic incompatibility inside the protocol, - it does not eliminate it. Therefore, since we are making an - evolutionary change anyway, the only modification to the proposed - algorithm is to move the fields into the header proper. Thus, each - header will contain 32-bit echo and echo reply fields. Two fields are - needed to handle bi-directional data streams. - - - 3.1.2. Window Size and Sequence Number Space - - Long Fat Networks (LFN's), networks which contain very high capacity - lines with very high latency, introduce the possibility that the - number of bits in transit (the bandwidth-delay product) could exceed - the TCP window size, thus making TCP the limiting factor in network - performance. Worse yet, the time it takes the sequence numbers to - wrap around could be reduced to a point below the MSL (maximum - segment lifetime), introducing the possibility of old packets being - mistakenly accepted as new. - - RFC 1072 extends the window size through the use of an implicit - constant scaling factor. The window size in the TCP header is - multiplied by this factor to get the true window size. This - algorithm has three problems. First, one must prove that at all times - the implicit scaling factor used by the sender is the same as the - receiver. The proposed algorithm appears to do so, but the - complexity of the algorithm creates the opportunity for poor - implementations to affect the correctness of TCP. Second, the use of - a scaling factor complicates the TCP implementation in general, and - can have serious effects on other parts of the protocol. - - A final problem is what we characterize as the "quantum window - sizing" problem. Assuming that the scaling factors will be powers of - two, the algorithm right shifts the receiver's window before sending - it. This effectively rounds the window size down to the nearest - multiple of the scaling factor. For large scaling factors, say 64k, - this implies that window values are all multiples of 64k and the - minimum window size is 64k; advertising a smaller window is - impossible. While this is not necessarily a problem (and it seems to - be an extreme solution to the silly window syndrome) what effect this - will have on the performance of high-speed network links is anyone's - guess. We can imagine this extension leading to future papers - entitled "A Quantum Mechanical Approach to Network Performance". - - - -O'Malley & Peterson [Page 8] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - RFC 1185 is an attempt to get around the problem of the window - wrapping too quickly without explicitly increasing the sequence - number space. Instead, the RFC proposes to use the timestamp used in - the ECHO option to weed out old duplicate messages. The algorithm - presented in RFC 1185 is complex and has been shown to be seriously - flawed at a recent End-to-End Research Group meeting. Attempts are - currently underway to fix the algorithm presented in the RFC. We - believe that this is a serious mistake. - - We see two problems with this approach on a very fundamental level. - First, we believe that making TCP depend on accurate clocks for - correctness to be a mistake. The Internet community has NO experience - with transport protocols that depend on clocks for correctness. - Second, the proposal uses two distinct schemes to deal with old - duplicate packets: the sliding window algorithm takes care of "new" - old packets (packets from the current sequence number epoch) and the - timestamp algorithm deals with "old" old packets (packets from - previous sequence number epochs). It is hard enough getting one of - these schemes to work much less to get two to work and ensure that - they do not interfere with one another. - - In RFC 1185, the statement is made that "An obvious fix for the - problem of cycling the sequence number space is to increase the size - of the TCP sequence number field." Using protocol evolution, the - obvious fix is also the correct one. The window size can be increased - to 32 bits by simply changing a short to a long in the definition of - the TCP header. At the same time, the sequence number and - acknowledgment fields can be increased to 64 bits. This change is - the minimum complexity modification to get the job done and requires - little or no analysis to be shown to work correctly. - - On machines that do not support 64-bit integers, increasing the - sequence number size is not as trivial as increasing the window size. - However, it is identical in cost to the modification proposed in RFC - 1185; the high order bits can be thought of as an optimal clock that - ticks only when it has to. Also, because we are not dealing with - real time, the problems with unreliable system clocks is avoided. On - machines that support 64-bit integers, the original TCP code may be - reused. Since only very high performance machines can hope to drive - a communications network at the rates this modification is designed - to support, and the new generation of RISC microprocessors (e.g., - MIPS R4000 and PA-RISC) do support 64-bit integers, the assumption of - 64-bit arithmetic may be more of an advantage than a liability. - - - - - - - - -O'Malley & Peterson [Page 9] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - 3.1.3. Selective Retransmission - - Another problem with TCP's support for LFN's is that the sliding - window algorithm used by TCP does not support any form of selective - acknowledgment. Thus, if a segment is lost, the total amount of data - that must be re-transmitted is some constant times the bandwidth- - delay product, despite the fact that most of the segments have in - fact arrived at the receiver. RFC 1072 proposes to extend TCP to - allow the receiver to return partial acknowledgments to the sender in - the hope that the sender will use that information to avoid - unnecessary re-transmissions. - - It has been our experience on predictable local area networks that - the performance of partial re-transmission strategies is highly non- - obvious, and it generally requires more than one iteration to find a - decent algorithm. It is therefore not surprising that the algorithm - proposed in RFC 1072 has some problems. The proposed TCP extension - allows the receiver to include a short list of received fragments - with every ACK. The idea being that when the receiver sends back a - normal ACK, it checks its queue of segments that have been received - out of order and sends the relative sequence numbers of contiguous - blocks of segments back to the sender. The sender then uses this - information to re-transmit the segments transmitted but not listed in - the ACK. - - As specified, this algorithm has two related problems: (1) it ignores - the relative frequencies of delivered and dropped packets, and (2) - the list provided in the option field is probably too short to do - much good on networks with large bandwidth-delay products. In every - model of high bandwidth networks that we have seen, the packet loss - rate is very low, and thus, the ratio of dropped packets to delivered - packets is very low. An algorithm that returns ACKs as proposed is - simply going to have to send more information than one in which the - receiver returns NAKs. - - This problem is compounded by the short size of the TCP option field - (44 bytes). In theory, since we are only worried about high bandwidth - networks, returning ACKs instead of NAKs is not really a problem; the - bandwidth is available to send any information that's needed. The - problem comes when trying to compress the ACK information into the 44 - bytes allowed. The proposed extensions effectively compresses the - ACK information by allowing the receiver to ACK byte ranges rather - than segments, and scaling the relative sequence numbers of the re- - transmitted segments. This makes it much more difficult for the - sender to tell which segments should be re-transmitted, and - complicates the re-transmission code. More importantly, one should - never compress small amounts of data being sent over a high bandwidth - network; it trades a scarce resource for an abundant resource. On - - - -O'Malley & Peterson [Page 10] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - low bandwidth networks, selective retransmission is not needed and - the SACK option should be disabled. - - We propose two solutions to this problem. First, the receiver can - examine its list of out-of-order packets and guess which segments - have been dropped, and NAK those segments back to the sender. The - number of NAKs should be low enough that one per TCP packet should be - sufficient. Note that the receiver has just as much information as - the sender about what packets should be retransmitted, and in any - case, the NAKs are simply suggestions which have no effect on - correctness. - - Our second proposed modification is to increase the offset field in - the TCP header from 4 bits to 16 bits. This allows 64k-bytes of TCP - header, which allows us to radically simplify the selective re- - transmission algorithm proposed in RFC 1072. The receiver can now - simply send a list of 64-bit sequence numbers for the out-of-order - segments to the sender. The sender can then use this information to - do a partial retransmission without needing an ouji board to - translate ACKs into segments. With the new header size, it may be - faster for the receiver to send a large list than to attempt to - aggregate segments into larger blocks. - - - 3.1.4. Header Modifications - - The modifications proposed above drastically change the size and - structure of the TCP header. This makes it a good time to re-think - the structure of the proposed TCP header. The primary goal of the - current TCP header is to save bits in the output stream. When TCP was - developed, a high bandwidth network was 56kbps, and the key use for - TCP was terminal I/O. In both situations, minimal header size was - important. Unfortunately, while the network has drastically - increased in performance and the usage pattern of the network is now - vastly different, most protocol designers still consider saving a few - bits in the header to be worth almost any price. Our basic goal is - different: to improve performance by eliminating the need to extract - information packed into odd length bit fields in the header. Below - is our first cut at such a modification. - - The protocol id field is there to make further evolutionary - modifications to TCP easier. This field basically subsumes the - protocol number field contained in the IP header with a version - number. Each distinct TCP version has a different protocol id and - this field ensures that the right code is looking at the right - header. The offset field has been increased to 16 bits to support - the larger header size required, and to simplify header processing. - The code field has been extended to 16 bits to support more options. - - - -O'Malley & Peterson [Page 11] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - The source port and destination port are unchanged. The size of both - the sequence number and ACK fields have been increased to 64 bits. - The open window field has been increased to 32 bits. The checksum and - urgent data pointer fields are unchanged. The echo and echo reply - fields are added. The option field remains but can be much larger - than in the old TCP. All headers are padded out to 32 bit - boundaries. Note that these changes increase the minimum header size - from 24 bytes (actually 36 bytes if the ECHO and ECHO reply options - defined in RFC 1072 are included on every packet) to 48 bytes. The - maximum header size has been increased to the maximum segment size. - We do not believe that the the increased header size will have a - measurable effect on protocol performance. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Protocol ID | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Offset | Code | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Source | Dest | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Seq | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Ack | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Window | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Checksum | Urgent | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Echo | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Echo Reply | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Options | Pad | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - 3.1.5. Backward Compatibility - - The most likely objection to the proposed TCP extension is that it is - not backward compatible with the current version of TCP, and most - importantly, TCP's header. In this section we will present three - versions of the proposed extension with increasing degrees of - backward compatibility. The final version will combine the same - degree of backward compatibility found in the protocol described in - - - -O'Malley & Peterson [Page 12] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - RFC's 1072/1185, with the much simpler semantics described in this - RFC. - - We believe that the best way to preserve backward compatibility is to - leave all of TCP alone and support the transparent use of a new - protocol when and where it is needed. The basic scheme is the one - described in section 2.4. Those machines and operating systems that - need to support high speed connections should implement some general - protocol infrastructure that allows them to rapidly evolve protocols. - Machines that do not require such service simply keep using the - existing version of TCP. A virtual protocol is used to manage the use - of multiple TCP versions. - - This approach has several advantages. First, it guarantees backward - compatibility with ALL existing TCP versions because such - implementations will never see strange packets with new options. - Second, it supports further modification of TCP with little - additional costs. Finally, since our version of TCP will more closely - resemble the existing TCP protocol than that proposed in RFC's - 1072/1185, the cost of maintaining two simple protocols will probably - be lower than maintaining one complex protocol. (Note that with high - probability you still have to maintain two versions of TCP in any - case.) The only additional cost is the memory required for keeping - around two copies of TCP. - - For those that insist that the only efficient way to implement TCP - modifications is in a single monolithic protocol, or those that - believe that the space requirements of two protocols would be too - great, we simply migrate the virtual protocol into TCP. TCP is - modified so that when opening a connection, the sender uses the TCP - VERSION option attached to the SYN packet to request using the new - version. The receiver responds with a TCP VERSION ACK in the SYN ACK - packet, after which point, the new header format described in Section - 3.1.4 is used. Thus, there is only one version of TCP, but that - version supports multiple header formats. The complexity of such a - protocol would be no worse than the protocol described in RFC - 1072/1185. It does, however, make it more difficult to make - additional changes to TCP. - - Finally, for those that believe that the preservation of the TCP's - header format has any intrinsic value (e.g., for those that don't - want to re-program their ethernet monitors), a header compatible - version of our proposal is possible. One simply takes all of the - additional information contained in the header given in Section 3.1.4 - and places it into a single optional field. Thus, one could define a - new TCP option which consists of the top 32 bits of the sequence and - ack fields, the echo and echo_reply fields, and the top 16 bits of - the window field. This modification makes it more difficult to take - - - -O'Malley & Peterson [Page 13] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - advantage of machines with 64-bit address spaces, but at a minimum - will be just as easy to process as the protocol described in RFC - 1072/1185. The only restriction is that the size of the header - option field is still limited to 44 bytes, and thus, selective - retransmission using NAKs rather than ACKs will probably be required. - - The key observation is that one should make a protocol extension - correct and simple before trying to make it backward compatible. As - far as we can tell, the only advantages possessed by the protocol - described in RFC 1072/1185 is that its typical header, size including - options, is 8 to 10 bytes shorter. The price for this "advantage" is - a protocol of such complexity that it may prove impossible for normal - humans to implement. Trying to maintain backward compatibility at - every stage of the protocol design process is a serious mistake. - - -3.2. TCP Over Extension - - Another potential problem with TCP that has been discussed recently, - but has not yet resulted in the generation of an RFC, is the - potential for TCP to grab and hold all 2**16 port numbers on a given - machine. This problem is caused by short port numbers, long MSLs, - and the misuse of TCP as a request-reply protocol. TCP must hold onto - each port after a close until all possible messages to that port have - died, about 240 seconds. Even worse, this time is not decreasing with - increase network performance. With new fast hardware, it is possible - for an application to open a TCP connection, send data, get a reply, - and close the connection at a rate fast enough to use up all the - ports in less than 240 seconds. This usage pattern is generated by - people using TCP for something it was never intended to do--- - guaranteeing at-most-once semantics for remote procedure calls. - - The proposed solution is to embed an RPC protocol into TCP while - preserving backward compatibility. This is done by piggybacking the - request message on the SYN packet and the reply message on the SYN- - ACK packet. This approach suffers from one key problem: it reduces - the probability of a correct TCP implementation to near 0. The basic - problem has nothing to do with TCP, rather it is the lack of an - Internet request-reply protocol that guarantees at-most-once - semantics. - - We propose to solve this problem by the creation of a new protocol. - This has already been attempted with VMTP, but the size and - complexity of VMTP, coupled with the process currently required to - standardize a new protocol doomed it from the start. Instead of - solving the general problem, we propose to use Sprite RPC [7], a much - simpler protocol, as a means of off-loading inappropriate users from - TCP. - - - -O'Malley & Peterson [Page 14] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - The basic design would attempt to preserve as much of the TCP - interface as possible in order that current TCP (mis)users could be - switched to Sprite RPC without requiring code modification on their - part. A virtual protocol could be used to select the correct protocol - TCP or Sprite RPC if it exists on the other machine. A backward - compatible modification to TCP could be made which would simply - prevent it from grabbing all of the ports by refusing connections. - This would encourage TCP abusers to use the new protocol. - - Sprite RPC, which is designed for a local area network, has two - problems when extended into the Internet. First, it does not have a - usefully flow control algorithm. Second, it lacks the necessary - semantics to reliably tear down connections. The lack of a tear down - mechanism needs to be solved, but the flow control problem could be - dealt with in later iterations of the protocol as Internet blast - protocols are not yet well understood; for now, we could simple limit - the size of each message to 16k or 32k bytes. This might also be a - good place to use a decomposed version of Sprite RPC [2], which - exposes each of these features as separate protocols. This would - permit the quick change of algorithms, and once the protocol had - stabilized, a monolithic version could be constructed and distributed - to replace the decomposed version. - - In other words, the basic strategy is to introduce as simple of RPC - protocol as possible today, and later evolve this protocol to address - the known limitations. - - -3.3. Future Modifications - - The header prediction algorithm should be generalized so as to be - less sensitive to changes in the protocols header and algorithm. - There almost seems to be as much effort to make all modifications to - TCP backward compatible with header prediction as there is to make - them backward compatible with TCP. The question that needs to be - answered is: are there any changes we can made to TCP to make header - prediction easier, including the addition of information into the - header. In [6], the authors showed how one might generalize - optimistic blast from VMTP to almost any protocol that performs - fragmentation and reassembly. Generalizing header prediction so that - it scales with TCP modification would be step in the right direction. - - It is clear that an evolutionary change to increase the size of the - source and destination ports in the TCP header will eventually be - necessary. We also believe that TCP could be made significantly - simpler and more flexible through the elimination of the pseudo- - header. The solution to this problem is to simply add a length field - and the IP address of the destination to the TCP header. It has also - - - -O'Malley & Peterson [Page 15] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - been mentioned that better and simpler TCP connection establishment - algorithms would be useful. Some form of reliable record stream - protocol should be developed. Performing sliding window and flow - control over records rather than bytes would provide numerous - opportunities for optimizations and allow TCP to return to its - original purpose as a byte-stream protocol. Finally, it has become - clear to us that the current Internet congestion control strategy is - to use TCP for everything since it is the only protocol that supports - congestion control. One of the primary reasons many "new protocols" - are proposed as TCP options is that it is the only way to get at - TCP's congestion control. At some point, a TCP-independent congestion - control scheme must be implemented and one might then be able to - remove the existing congestion control from TCP and radically - simplify the protocol. - - -4. Discussion - - One obvious side effect of the changes we propose is to increase the - size of the TCP header. In some sense, this is inevitable; just about - every field in the header has been pushed to its limit by the radical - growth of the network. However, we have made very little effort to - make the minimal changes to solve the current problem. In fact, we - have tended to sacrifice header size in order to defer future changes - as long as possible. The problem with this is that one of TCP's - claims to fame is its efficiency at sending small one byte packets - over slow networks. Increasing the size of the TCP header will - inevitably result in some increase in overhead on small packets on - slow networks. Clark among others have stated that they see no - fundamental performance limitations that would prevent TCP from - supporting very high speed networks. This is true as far as it goes; - there seems to be a direct trade-off between TCP performance on high - speed networks and TCP performance on slow speed networks. The - dynamic range is simply too great to be optimally supported by one - protocol. Hence, in keeping around the old version of TCP we have - effectively split TCP into two protocols, one for high bandwidth - lines and the other for low bandwidth lines. - - Another potential argument is that all of the changes mentioned above - should be packaged together as a new version of TCP. This version - could be standardized and we could all go back to the status quo of - stable unchanging protocols. While to a certain extent this is - inevitable---there is a backlog of necessary TCP changes because of - the current logistical problems in modifying protocols---it is only - begs the question. The status quo is simply unacceptably static; - there will always be future changes to TCP. Evolutionary change will - also result in a better and more reliable TCP. Making small changes - and distributing them at regular intervals ensures that one change - - - -O'Malley & Peterson [Page 16] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - has actually been stabilized before the next has been made. It also - presents a more balanced workload to the protocol designer; rather - than designing one new protocol every 10 years he makes annual - protocol extensions. It will also eventually make protocol - distribution easier: the basic problem with protocol distribution now - is that it is done so rarely that no one knows how to do it and there - is no incentive to develop the infrastructure needed to perform the - task efficiently. While the first protocol distribution is almost - guaranteed to be a disaster, the problem will get easier with each - additional one. Finally, such a new TCP would have the same problems - as VMTP did; a radically new protocol presents a bigger target. - - The violation of backward compatibility in systems as complex as the - Internet is always a serious step. However, backward compatibility is - a technique, not a religion. Two facts are often overlooked when - backward compatibility gets out of hand. First, violating backward - compatibility is always a big win when you can get away with it. One - of the key advantages of RISC chips over CISC chips is simply that - they were not backward compatible with anything. Thus, they were not - bound by design decisions made when compilers were stupid and real - men programmed in assembler. Second, one is going to have to break - backward compatibility at some point anyway. Every system has some - headroom limitations which result in either stagnation (IBM mainframe - software) or even worse, accidental violations of backward - compatibility. - - Of course, the biggest problem with our approach is that it is not - compatible with the existing standardization process. We hope to be - able to design and distribute protocols in less time than it takes a - standards committee to agree on an acceptable meeting time. This is - inevitable because the basic problem with networking is the - standardization process. Over the last several years, there has been - a push in the research community for lightweight protocols, when in - fact what is needed are lightweight standards. Also note that we - have not proposed to implement some entirely new set of "superior" - communications protocols, we have simply proposed a system for making - necessary changes to the existing protocol suites fast enough to keep - up with the underlying change in the network. In fact, the first - standards organization that realizes that the primary impediment to - standardization is poor logistical support will probably win. - - -5. Conclusions - - The most important conclusion of this RFC is that protocol change - happens and is currently happening at a very respectable clip. While - all of the changes given as example in this document are from TCP, - there are many other protocols that require modification. In a more - - - -O'Malley & Peterson [Page 17] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - prosaic domain, the telephone company is running out of phone - numbers; they are being overrun by fax machines, modems, and cars. - The underlying cause of these problems seems to be an consistent - exponential increase almost all network metrics: number of hosts, - bandwidth, host performance, applications, and so on, combined with - an attempt to run the network with a static set of unchanging network - protocols. This has been shown to be impossible and one can almost - feel the pressure for protocol change building. We simply propose to - explicitly deal with the changes rather keep trying to hold back the - flood. - - Of almost equal importance is the observation that TCP is a protocol - and not a platform for implementing other protocols. Because of a - lack of any alternatives, TCP has become a de-facto platform for - implementing other protocols. It provides a vague standard interface - with the kernel, it runs on many machines, and has a well defined - distribution path. Otherwise sane people have proposed Bounded Time - TCP (an unreliable byte stream protocol), Simplex TCP (which supports - data in only one direction) and Multi-cast TCP (too horrible to even - consider). All of these protocols probably have their uses, but not - as TCP options. The fact that a large number of people are willing to - use TCP as a protocol implementation platform points to the desperate - need for a protocol independent platform. - - Finally, we point out that in our research we have found very little - difference in the actual technical work involved with the three - proposed methods of protocol modification. The amount of work - involved in a backward compatible change is often more than that - required for an evolutionary change or the creation of a new - protocol. Even the distribution costs seem to be identical. The - primary cost difference between the three approaches is the cost of - getting the modification approved. A protocol modification, no matter - how extensive or bizarre, seems to incur much less cost and risk. It - is time to stop changing the protocols to fit our current way of - thinking, and start changing our way of thinking to fit the - protocols. - - -6. References - - -[1] Cheriton D., "VMTP: Versatile Message Transaction Protocol", RFC - 1045, Stanford University, February 1988. - - -[2] Hutchinson, N., Peterson, L., Abbott, M., and S. O'Malley, "RPC in - the x-Kernel: Evaluating New Design Techniques", Proceedings of the - 12th Symposium on Operating System Principles, Pgs. 91-101, - - - -O'Malley & Peterson [Page 18] - -RFC 1263 TCP Extensions Considered Harmful October 1991 - - - December 1989. - - -[3] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM '88, - August 1988. - - -[4] Jacobson, V., and R. Braden, "TCP Extensions for Long-Delay Paths", - RFC 1072, LBL, ISI, October 1988. - - -[5] Jacobson, V., Braden, R., and L. Zhang, "TCP Extensions for High- - Speed Paths", RFC 1185, LBL, ISI, PARC, October 1990. - - -[6] O'Malley, S., Abbott, M., Hutchinson, N., and L. Peterson, "A Tran- - sparent Blast Facility", Journal of Internetworking, Vol. 1, No. - 2, Pgs. 57-75, December 1990. - - -[7] Welch, B., "The Sprite Remote Procedure Call System", UCB/CSD - 86/302, University of California at Berkeley, June 1988. - -7. Security Considerations - - Security issues are not discussed in this memo. - - -8. Authors' Addresses - - Larry L. Peterson - University of Arizona - Department of Computer Sciences - Tucson, AZ 85721 - - Phone: (602) 621-4231 - EMail: llp@cs.arizona.edu - - - Sean O'Malley - University of Arizona - Department of Computer Sciences - Tucson, AZ 85721 - - Phone: 602-621-8373 - EMail: sean@cs.arizona.edu - - - - - -O'Malley & Peterson [Page 19] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1323.txt b/kernel/picotcp/RFC/rfc1323.txt deleted file mode 100644 index 356eaa8..0000000 --- a/kernel/picotcp/RFC/rfc1323.txt +++ /dev/null @@ -1,2075 +0,0 @@ - - - - - - -Network Working Group V. Jacobson -Request for Comments: 1323 LBL -Obsoletes: RFC 1072, RFC 1185 R. Braden - ISI - D. Borman - Cray Research - May 1992 - - - TCP Extensions for High Performance - -Status of This Memo - - This RFC specifies an IAB standards track protocol for the Internet - community, and requests discussion and suggestions for improvements. - Please refer to the current edition of the "IAB Official Protocol - Standards" for the standardization state and status of this protocol. - Distribution of this memo is unlimited. - -Abstract - - This memo presents a set of TCP extensions to improve performance - over large bandwidth*delay product paths and to provide reliable - operation over very high-speed paths. It defines new TCP options for - scaled windows and timestamps, which are designed to provide - compatible interworking with TCP's that do not implement the - extensions. The timestamps are used for two distinct mechanisms: - RTTM (Round Trip Time Measurement) and PAWS (Protect Against Wrapped - Sequences). Selective acknowledgments are not included in this memo. - - This memo combines and supersedes RFC-1072 and RFC-1185, adding - additional clarification and more detailed specification. Appendix C - summarizes the changes from the earlier RFCs. - -TABLE OF CONTENTS - - 1. Introduction ................................................. 2 - 2. TCP Window Scale Option ...................................... 8 - 3. RTTM -- Round-Trip Time Measurement .......................... 11 - 4. PAWS -- Protect Against Wrapped Sequence Numbers ............. 17 - 5. Conclusions and Acknowledgments .............................. 25 - 6. References ................................................... 25 - APPENDIX A: Implementation Suggestions ........................... 27 - APPENDIX B: Duplicates from Earlier Connection Incarnations ...... 27 - APPENDIX C: Changes from RFC-1072, RFC-1185 ...................... 30 - APPENDIX D: Summary of Notation .................................. 31 - APPENDIX E: Event Processing ..................................... 32 - Security Considerations .......................................... 37 - - - -Jacobson, Braden, & Borman [Page 1] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - Authors' Addresses ............................................... 37 - -1. INTRODUCTION - - The TCP protocol [Postel81] was designed to operate reliably over - almost any transmission medium regardless of transmission rate, - delay, corruption, duplication, or reordering of segments. - Production TCP implementations currently adapt to transfer rates in - the range of 100 bps to 10**7 bps and round-trip delays in the range - 1 ms to 100 seconds. Recent work on TCP performance has shown that - TCP can work well over a variety of Internet paths, ranging from 800 - Mbit/sec I/O channels to 300 bit/sec dial-up modems [Jacobson88a]. - - The introduction of fiber optics is resulting in ever-higher - transmission speeds, and the fastest paths are moving out of the - domain for which TCP was originally engineered. This memo defines a - set of modest extensions to TCP to extend the domain of its - application to match this increasing network capability. It is based - upon and obsoletes RFC-1072 [Jacobson88b] and RFC-1185 [Jacobson90b]. - - There is no one-line answer to the question: "How fast can TCP go?". - There are two separate kinds of issues, performance and reliability, - and each depends upon different parameters. We discuss each in turn. - - 1.1 TCP Performance - - TCP performance depends not upon the transfer rate itself, but - rather upon the product of the transfer rate and the round-trip - delay. This "bandwidth*delay product" measures the amount of data - that would "fill the pipe"; it is the buffer space required at - sender and receiver to obtain maximum throughput on the TCP - connection over the path, i.e., the amount of unacknowledged data - that TCP must handle in order to keep the pipeline full. TCP - performance problems arise when the bandwidth*delay product is - large. We refer to an Internet path operating in this region as a - "long, fat pipe", and a network containing this path as an "LFN" - (pronounced "elephan(t)"). - - High-capacity packet satellite channels (e.g., DARPA's Wideband - Net) are LFN's. For example, a DS1-speed satellite channel has a - bandwidth*delay product of 10**6 bits or more; this corresponds to - 100 outstanding TCP segments of 1200 bytes each. Terrestrial - fiber-optical paths will also fall into the LFN class; for - example, a cross-country delay of 30 ms at a DS3 bandwidth - (45Mbps) also exceeds 10**6 bits. - - There are three fundamental performance problems with the current - TCP over LFN paths: - - - -Jacobson, Braden, & Borman [Page 2] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - (1) Window Size Limit - - The TCP header uses a 16 bit field to report the receive - window size to the sender. Therefore, the largest window - that can be used is 2**16 = 65K bytes. - - To circumvent this problem, Section 2 of this memo defines a - new TCP option, "Window Scale", to allow windows larger than - 2**16. This option defines an implicit scale factor, which - is used to multiply the window size value found in a TCP - header to obtain the true window size. - - (2) Recovery from Losses - - Packet losses in an LFN can have a catastrophic effect on - throughput. Until recently, properly-operating TCP - implementations would cause the data pipeline to drain with - every packet loss, and require a slow-start action to - recover. Recently, the Fast Retransmit and Fast Recovery - algorithms [Jacobson90c] have been introduced. Their - combined effect is to recover from one packet loss per - window, without draining the pipeline. However, more than - one packet loss per window typically results in a - retransmission timeout and the resulting pipeline drain and - slow start. - - Expanding the window size to match the capacity of an LFN - results in a corresponding increase of the probability of - more than one packet per window being dropped. This could - have a devastating effect upon the throughput of TCP over an - LFN. In addition, if a congestion control mechanism based - upon some form of random dropping were introduced into - gateways, randomly spaced packet drops would become common, - possible increasing the probability of dropping more than one - packet per window. - - To generalize the Fast Retransmit/Fast Recovery mechanism to - handle multiple packets dropped per window, selective - acknowledgments are required. Unlike the normal cumulative - acknowledgments of TCP, selective acknowledgments give the - sender a complete picture of which segments are queued at the - receiver and which have not yet arrived. Some evidence in - favor of selective acknowledgments has been published - [NBS85], and selective acknowledgments have been included in - a number of experimental Internet protocols -- VMTP - [Cheriton88], NETBLT [Clark87], and RDP [Velten84], and - proposed for OSI TP4 [NBS85]. However, in the non-LFN - regime, selective acknowledgments reduce the number of - - - -Jacobson, Braden, & Borman [Page 3] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - packets retransmitted but do not otherwise improve - performance, making their complexity of questionable value. - However, selective acknowledgments are expected to become - much more important in the LFN regime. - - RFC-1072 defined a new TCP "SACK" option to send a selective - acknowledgment. However, there are important technical - issues to be worked out concerning both the format and - semantics of the SACK option. Therefore, SACK has been - omitted from this package of extensions. It is hoped that - SACK can "catch up" during the standardization process. - - (3) Round-Trip Measurement - - TCP implements reliable data delivery by retransmitting - segments that are not acknowledged within some retransmission - timeout (RTO) interval. Accurate dynamic determination of an - appropriate RTO is essential to TCP performance. RTO is - determined by estimating the mean and variance of the - measured round-trip time (RTT), i.e., the time interval - between sending a segment and receiving an acknowledgment for - it [Jacobson88a]. - - Section 4 introduces a new TCP option, "Timestamps", and then - defines a mechanism using this option that allows nearly - every segment, including retransmissions, to be timed at - negligible computational cost. We use the mnemonic RTTM - (Round Trip Time Measurement) for this mechanism, to - distinguish it from other uses of the Timestamps option. - - - 1.2 TCP Reliability - - Now we turn from performance to reliability. High transfer rate - enters TCP performance through the bandwidth*delay product. - However, high transfer rate alone can threaten TCP reliability by - violating the assumptions behind the TCP mechanism for duplicate - detection and sequencing. - - An especially serious kind of error may result from an accidental - reuse of TCP sequence numbers in data segments. Suppose that an - "old duplicate segment", e.g., a duplicate data segment that was - delayed in Internet queues, is delivered to the receiver at the - wrong moment, so that its sequence numbers falls somewhere within - the current window. There would be no checksum failure to warn of - the error, and the result could be an undetected corruption of the - data. Reception of an old duplicate ACK segment at the - transmitter could be only slightly less serious: it is likely to - - - -Jacobson, Braden, & Borman [Page 4] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - lock up the connection so that no further progress can be made, - forcing an RST on the connection. - - TCP reliability depends upon the existence of a bound on the - lifetime of a segment: the "Maximum Segment Lifetime" or MSL. An - MSL is generally required by any reliable transport protocol, - since every sequence number field must be finite, and therefore - any sequence number may eventually be reused. In the Internet - protocol suite, the MSL bound is enforced by an IP-layer - mechanism, the "Time-to-Live" or TTL field. - - Duplication of sequence numbers might happen in either of two - ways: - - (1) Sequence number wrap-around on the current connection - - A TCP sequence number contains 32 bits. At a high enough - transfer rate, the 32-bit sequence space may be "wrapped" - (cycled) within the time that a segment is delayed in queues. - - (2) Earlier incarnation of the connection - - Suppose that a connection terminates, either by a proper - close sequence or due to a host crash, and the same - connection (i.e., using the same pair of sockets) is - immediately reopened. A delayed segment from the terminated - connection could fall within the current window for the new - incarnation and be accepted as valid. - - Duplicates from earlier incarnations, Case (2), are avoided by - enforcing the current fixed MSL of the TCP spec, as explained in - Section 5.3 and Appendix B. However, case (1), avoiding the - reuse of sequence numbers within the same connection, requires an - MSL bound that depends upon the transfer rate, and at high enough - rates, a new mechanism is required. - - More specifically, if the maximum effective bandwidth at which TCP - is able to transmit over a particular path is B bytes per second, - then the following constraint must be satisfied for error-free - operation: - - 2**31 / B > MSL (secs) [1] - - The following table shows the value for Twrap = 2**31/B in - seconds, for some important values of the bandwidth B: - - - - - - -Jacobson, Braden, & Borman [Page 5] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - Network B*8 B Twrap - bits/sec bytes/sec secs - _______ _______ ______ ______ - - ARPANET 56kbps 7KBps 3*10**5 (~3.6 days) - - DS1 1.5Mbps 190KBps 10**4 (~3 hours) - - Ethernet 10Mbps 1.25MBps 1700 (~30 mins) - - DS3 45Mbps 5.6MBps 380 - - FDDI 100Mbps 12.5MBps 170 - - Gigabit 1Gbps 125MBps 17 - - - It is clear that wrap-around of the sequence space is not a - problem for 56kbps packet switching or even 10Mbps Ethernets. On - the other hand, at DS3 and FDDI speeds, Twrap is comparable to the - 2 minute MSL assumed by the TCP specification [Postel81]. Moving - towards gigabit speeds, Twrap becomes too small for reliable - enforcement by the Internet TTL mechanism. - - The 16-bit window field of TCP limits the effective bandwidth B to - 2**16/RTT, where RTT is the round-trip time in seconds - [McKenzie89]. If the RTT is large enough, this limits B to a - value that meets the constraint [1] for a large MSL value. For - example, consider a transcontinental backbone with an RTT of 60ms - (set by the laws of physics). With the bandwidth*delay product - limited to 64KB by the TCP window size, B is then limited to - 1.1MBps, no matter how high the theoretical transfer rate of the - path. This corresponds to cycling the sequence number space in - Twrap= 2000 secs, which is safe in today's Internet. - - It is important to understand that the culprit is not the larger - window but rather the high bandwidth. For example, consider a - (very large) FDDI LAN with a diameter of 10km. Using the speed of - light, we can compute the RTT across the ring as - (2*10**4)/(3*10**8) = 67 microseconds, and the delay*bandwidth - product is then 833 bytes. A TCP connection across this LAN using - a window of only 833 bytes will run at the full 100mbps and can - wrap the sequence space in about 3 minutes, very close to the MSL - of TCP. Thus, high speed alone can cause a reliability problem - with sequence number wrap-around, even without extended windows. - - Watson's Delta-T protocol [Watson81] includes network-layer - mechanisms for precise enforcement of an MSL. In contrast, the IP - - - -Jacobson, Braden, & Borman [Page 6] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - mechanism for MSL enforcement is loosely defined and even more - loosely implemented in the Internet. Therefore, it is unwise to - depend upon active enforcement of MSL for TCP connections, and it - is unrealistic to imagine setting MSL's smaller than the current - values (e.g., 120 seconds specified for TCP). - - A possible fix for the problem of cycling the sequence space would - be to increase the size of the TCP sequence number field. For - example, the sequence number field (and also the acknowledgment - field) could be expanded to 64 bits. This could be done either by - changing the TCP header or by means of an additional option. - - Section 5 presents a different mechanism, which we call PAWS - (Protect Against Wrapped Sequence numbers), to extend TCP - reliability to transfer rates well beyond the foreseeable upper - limit of network bandwidths. PAWS uses the TCP Timestamps option - defined in Section 4 to protect against old duplicates from the - same connection. - - 1.3 Using TCP options - - The extensions defined in this memo all use new TCP options. We - must address two possible issues concerning the use of TCP - options: (1) compatibility and (2) overhead. - - We must pay careful attention to compatibility, i.e., to - interoperation with existing implementations. The only TCP option - defined previously, MSS, may appear only on a SYN segment. Every - implementation should (and we expect that most will) ignore - unknown options on SYN segments. However, some buggy TCP - implementation might be crashed by the first appearance of an - option on a non-SYN segment. Therefore, for each of the - extensions defined below, TCP options will be sent on non-SYN - segments only when an exchange of options on the SYN segments has - indicated that both sides understand the extension. Furthermore, - an extension option will be sent in a segment only if - the corresponding option was received in the initial - segment. - - A question may be raised about the bandwidth and processing - overhead for TCP options. Those options that occur on SYN - segments are not likely to cause a performance concern. Opening a - TCP connection requires execution of significant special-case - code, and the processing of options is unlikely to increase that - cost significantly. - - On the other hand, a Timestamps option may appear in any data or - ACK segment, adding 12 bytes to the 20-byte TCP header. We - - - -Jacobson, Braden, & Borman [Page 7] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - believe that the bandwidth saved by reducing unnecessary - retransmissions will more than pay for the extra header bandwidth. - - There is also an issue about the processing overhead for parsing - the variable byte-aligned format of options, particularly with a - RISC-architecture CPU. To meet this concern, Appendix A contains - a recommended layout of the options in TCP headers to achieve - reasonable data field alignment. In the spirit of Header - Prediction, a TCP can quickly test for this layout and if it is - verified then use a fast path. Hosts that use this canonical - layout will effectively use the options as a set of fixed-format - fields appended to the TCP header. However, to retain the - philosophical and protocol framework of TCP options, a TCP must be - prepared to parse an arbitrary options field, albeit with less - efficiency. - - Finally, we observe that most of the mechanisms defined in this - memo are important for LFN's and/or very high-speed networks. For - low-speed networks, it might be a performance optimization to NOT - use these mechanisms. A TCP vendor concerned about optimal - performance over low-speed paths might consider turning these - extensions off for low-speed paths, or allow a user or - installation manager to disable them. - - -2. TCP WINDOW SCALE OPTION - - 2.1 Introduction - - The window scale extension expands the definition of the TCP - window to 32 bits and then uses a scale factor to carry this 32- - bit value in the 16-bit Window field of the TCP header (SEG.WND in - RFC-793). The scale factor is carried in a new TCP option, Window - Scale. This option is sent only in a SYN segment (a segment with - the SYN bit on), hence the window scale is fixed in each direction - when a connection is opened. (Another design choice would be to - specify the window scale in every TCP segment. It would be - incorrect to send a window scale option only when the scale factor - changed, since a TCP option in an acknowledgement segment will not - be delivered reliably (unless the ACK happens to be piggy-backed - on data in the other direction). Fixing the scale when the - connection is opened has the advantage of lower overhead but the - disadvantage that the scale factor cannot be changed during the - connection.) - - The maximum receive window, and therefore the scale factor, is - determined by the maximum receive buffer space. In a typical - modern implementation, this maximum buffer space is set by default - - - -Jacobson, Braden, & Borman [Page 8] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - but can be overridden by a user program before a TCP connection is - opened. This determines the scale factor, and therefore no new - user interface is needed for window scaling. - - 2.2 Window Scale Option - - The three-byte Window Scale option may be sent in a SYN segment by - a TCP. It has two purposes: (1) indicate that the TCP is prepared - to do both send and receive window scaling, and (2) communicate a - scale factor to be applied to its receive window. Thus, a TCP - that is prepared to scale windows should send the option, even if - its own scale factor is 1. The scale factor is limited to a power - of two and encoded logarithmically, so it may be implemented by - binary shift operations. - - - TCP Window Scale Option (WSopt): - - Kind: 3 Length: 3 bytes - - +---------+---------+---------+ - | Kind=3 |Length=3 |shift.cnt| - +---------+---------+---------+ - - - This option is an offer, not a promise; both sides must send - Window Scale options in their SYN segments to enable window - scaling in either direction. If window scaling is enabled, - then the TCP that sent this option will right-shift its true - receive-window values by 'shift.cnt' bits for transmission in - SEG.WND. The value 'shift.cnt' may be zero (offering to scale, - while applying a scale factor of 1 to the receive window). - - This option may be sent in an initial segment (i.e., a - segment with the SYN bit on and the ACK bit off). It may also - be sent in a segment, but only if a Window Scale op- - tion was received in the initial segment. A Window Scale - option in a segment without a SYN bit should be ignored. - - The Window field in a SYN (i.e., a or ) segment - itself is never scaled. - - 2.3 Using the Window Scale Option - - A model implementation of window scaling is as follows, using the - notation of RFC-793 [Postel81]: - - * All windows are treated as 32-bit quantities for storage in - - - -Jacobson, Braden, & Borman [Page 9] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - the connection control block and for local calculations. - This includes the send-window (SND.WND) and the receive- - window (RCV.WND) values, as well as the congestion window. - - * The connection state is augmented by two window shift counts, - Snd.Wind.Scale and Rcv.Wind.Scale, to be applied to the - incoming and outgoing window fields, respectively. - - * If a TCP receives a segment containing a Window Scale - option, it sends its own Window Scale option in the - segment. - - * The Window Scale option is sent with shift.cnt = R, where R - is the value that the TCP would like to use for its receive - window. - - * Upon receiving a SYN segment with a Window Scale option - containing shift.cnt = S, a TCP sets Snd.Wind.Scale to S and - sets Rcv.Wind.Scale to R; otherwise, it sets both - Snd.Wind.Scale and Rcv.Wind.Scale to zero. - - * The window field (SEG.WND) in the header of every incoming - segment, with the exception of SYN segments, is left-shifted - by Snd.Wind.Scale bits before updating SND.WND: - - SND.WND = SEG.WND << Snd.Wind.Scale - - (assuming the other conditions of RFC793 are met, and using - the "C" notation "<<" for left-shift). - - * The window field (SEG.WND) of every outgoing segment, with - the exception of SYN segments, is right-shifted by - Rcv.Wind.Scale bits: - - SEG.WND = RCV.WND >> Rcv.Wind.Scale. - - - TCP determines if a data segment is "old" or "new" by testing - whether its sequence number is within 2**31 bytes of the left edge - of the window, and if it is not, discarding the data as "old". To - insure that new data is never mistakenly considered old and vice- - versa, the left edge of the sender's window has to be at most - 2**31 away from the right edge of the receiver's window. - Similarly with the sender's right edge and receiver's left edge. - Since the right and left edges of either the sender's or - receiver's window differ by the window size, and since the sender - and receiver windows can be out of phase by at most the window - size, the above constraints imply that 2 * the max window size - - - -Jacobson, Braden, & Borman [Page 10] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - must be less than 2**31, or - - max window < 2**30 - - Since the max window is 2**S (where S is the scaling shift count) - times at most 2**16 - 1 (the maximum unscaled window), the maximum - window is guaranteed to be < 2*30 if S <= 14. Thus, the shift - count must be limited to 14 (which allows windows of 2**30 = 1 - Gbyte). If a Window Scale option is received with a shift.cnt - value exceeding 14, the TCP should log the error but use 14 - instead of the specified value. - - The scale factor applies only to the Window field as transmitted - in the TCP header; each TCP using extended windows will maintain - the window values locally as 32-bit numbers. For example, the - "congestion window" computed by Slow Start and Congestion - Avoidance is not affected by the scale factor, so window scaling - will not introduce quantization into the congestion window. - -3. RTTM: ROUND-TRIP TIME MEASUREMENT - - 3.1 Introduction - - Accurate and current RTT estimates are necessary to adapt to - changing traffic conditions and to avoid an instability known as - "congestion collapse" [Nagle84] in a busy network. However, - accurate measurement of RTT may be difficult both in theory and in - implementation. - - Many TCP implementations base their RTT measurements upon a sample - of only one packet per window. While this yields an adequate - approximation to the RTT for small windows, it results in an - unacceptably poor RTT estimate for an LFN. If we look at RTT - estimation as a signal processing problem (which it is), a data - signal at some frequency, the packet rate, is being sampled at a - lower frequency, the window rate. This lower sampling frequency - violates Nyquist's criteria and may therefore introduce "aliasing" - artifacts into the estimated RTT [Hamming77]. - - A good RTT estimator with a conservative retransmission timeout - calculation can tolerate aliasing when the sampling frequency is - "close" to the data frequency. For example, with a window of 8 - packets, the sample rate is 1/8 the data frequency -- less than an - order of magnitude different. However, when the window is tens or - hundreds of packets, the RTT estimator may be seriously in error, - resulting in spurious retransmissions. - - If there are dropped packets, the problem becomes worse. Zhang - - - -Jacobson, Braden, & Borman [Page 11] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - [Zhang86], Jain [Jain86] and Karn [Karn87] have shown that it is - not possible to accumulate reliable RTT estimates if retransmitted - segments are included in the estimate. Since a full window of - data will have been transmitted prior to a retransmission, all of - the segments in that window will have to be ACKed before the next - RTT sample can be taken. This means at least an additional - window's worth of time between RTT measurements and, as the error - rate approaches one per window of data (e.g., 10**-6 errors per - bit for the Wideband satellite network), it becomes effectively - impossible to obtain a valid RTT measurement. - - A solution to these problems, which actually simplifies the sender - substantially, is as follows: using TCP options, the sender places - a timestamp in each data segment, and the receiver reflects these - timestamps back in ACK segments. Then a single subtract gives the - sender an accurate RTT measurement for every ACK segment (which - will correspond to every other data segment, with a sensible - receiver). We call this the RTTM (Round-Trip Time Measurement) - mechanism. - - It is vitally important to use the RTTM mechanism with big - windows; otherwise, the door is opened to some dangerous - instabilities due to aliasing. Furthermore, the option is - probably useful for all TCP's, since it simplifies the sender. - - 3.2 TCP Timestamps Option - - TCP is a symmetric protocol, allowing data to be sent at any time - in either direction, and therefore timestamp echoing may occur in - either direction. For simplicity and symmetry, we specify that - timestamps always be sent and echoed in both directions. For - efficiency, we combine the timestamp and timestamp reply fields - into a single TCP Timestamps Option. - - - - - - - - - - - - - - - - - - -Jacobson, Braden, & Borman [Page 12] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - TCP Timestamps Option (TSopt): - - Kind: 8 - - Length: 10 bytes - - +-------+-------+---------------------+---------------------+ - |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)| - +-------+-------+---------------------+---------------------+ - 1 1 4 4 - - The Timestamps option carries two four-byte timestamp fields. - The Timestamp Value field (TSval) contains the current value of - the timestamp clock of the TCP sending the option. - - The Timestamp Echo Reply field (TSecr) is only valid if the ACK - bit is set in the TCP header; if it is valid, it echos a times- - tamp value that was sent by the remote TCP in the TSval field - of a Timestamps option. When TSecr is not valid, its value - must be zero. The TSecr value will generally be from the most - recent Timestamp option that was received; however, there are - exceptions that are explained below. - - A TCP may send the Timestamps option (TSopt) in an initial - segment (i.e., segment containing a SYN bit and no ACK - bit), and may send a TSopt in other segments only if it re- - ceived a TSopt in the initial segment for the connection. - - 3.3 The RTTM Mechanism - - The timestamp value to be sent in TSval is to be obtained from a - (virtual) clock that we call the "timestamp clock". Its values - must be at least approximately proportional to real time, in order - to measure actual RTT. - - The following example illustrates a one-way data flow with - segments arriving in sequence without loss. Here A, B, C... - represent data blocks occupying successive blocks of sequence - numbers, and ACK(A),... represent the corresponding cumulative - acknowledgments. The two timestamp fields of the Timestamps - option are shown symbolically as . Each TSecr - field contains the value most recently received in a TSval field. - - - - - - - - - -Jacobson, Braden, & Borman [Page 13] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - - TCP A TCP B - - ------> - - <---- - - ------> - - <---- - - . . . . . . . . . . . . . . . . . . . . . . - - ------> - - <---- - - (etc) - - - The dotted line marks a pause (60 time units long) in which A had - nothing to send. Note that this pause inflates the RTT which B - could infer from receiving TSecr=131 in data segment C. Thus, in - one-way data flows, RTTM in the reverse direction measures a value - that is inflated by gaps in sending data. However, the following - rule prevents a resulting inflation of the measured RTT: - - A TSecr value received in a segment is used to update the - averaged RTT measurement only if the segment acknowledges - some new data, i.e., only if it advances the left edge of the - send window. - - Since TCP B is not sending data, the data segment C does not - acknowledge any new data when it arrives at B. Thus, the inflated - RTTM measurement is not used to update B's RTTM measurement. - - 3.4 Which Timestamp to Echo - - If more than one Timestamps option is received before a reply - segment is sent, the TCP must choose only one of the TSvals to - echo, ignoring the others. To minimize the state kept in the - receiver (i.e., the number of unprocessed TSvals), the receiver - should be required to retain at most one timestamp in the - connection control block. - - - - - - - -Jacobson, Braden, & Borman [Page 14] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - There are three situations to consider: - - (A) Delayed ACKs. - - Many TCP's acknowledge only every Kth segment out of a group - of segments arriving within a short time interval; this - policy is known generally as "delayed ACKs". The data-sender - TCP must measure the effective RTT, including the additional - time due to delayed ACKs, or else it will retransmit - unnecessarily. Thus, when delayed ACKs are in use, the - receiver should reply with the TSval field from the earliest - unacknowledged segment. - - (B) A hole in the sequence space (segment(s) have been lost). - - The sender will continue sending until the window is filled, - and the receiver may be generating ACKs as these out-of-order - segments arrive (e.g., to aid "fast retransmit"). - - The lost segment is probably a sign of congestion, and in - that situation the sender should be conservative about - retransmission. Furthermore, it is better to overestimate - than underestimate the RTT. An ACK for an out-of-order - segment should therefore contain the timestamp from the most - recent segment that advanced the window. - - The same situation occurs if segments are re-ordered by the - network. - - (C) A filled hole in the sequence space. - - The segment that fills the hole represents the most recent - measurement of the network characteristics. On the other - hand, an RTT computed from an earlier segment would probably - include the sender's retransmit time-out, badly biasing the - sender's average RTT estimate. Thus, the timestamp from the - latest segment (which filled the hole) must be echoed. - - An algorithm that covers all three cases is described in the - following rules for Timestamps option processing on a synchronized - connection: - - (1) The connection state is augmented with two 32-bit slots: - TS.Recent holds a timestamp to be echoed in TSecr whenever a - segment is sent, and Last.ACK.sent holds the ACK field from - the last segment sent. Last.ACK.sent will equal RCV.NXT - except when ACKs have been delayed. - - - - -Jacobson, Braden, & Borman [Page 15] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - (2) If Last.ACK.sent falls within the range of sequence numbers - of an incoming segment: - - SEG.SEQ <= Last.ACK.sent < SEG.SEQ + SEG.LEN - - then the TSval from the segment is copied to TS.Recent; - otherwise, the TSval is ignored. - - (3) When a TSopt is sent, its TSecr field is set to the current - TS.Recent value. - - The following examples illustrate these rules. Here A, B, C... - represent data segments occupying successive blocks of sequence - numbers, and ACK(A),... represent the corresponding - acknowledgment segments. Note that ACK(A) has the same sequence - number as B. We show only one direction of timestamp echoing, for - clarity. - - - o Packets arrive in sequence, and some of the ACKs are delayed. - - By Case (A), the timestamp from the oldest unacknowledged - segment is echoed. - - TS.Recent - -------------------> - 1 - -------------------> - 1 - -------------------> - 1 - <---- - (etc) - - o Packets arrive out of order, and every packet is - acknowledged. - - By Case (B), the timestamp from the last segment that - advanced the left window edge is echoed, until the missing - segment arrives; it is echoed according to Case (C). The - same sequence would occur if segments B and D were lost and - retransmitted.. - - - - - - - - - -Jacobson, Braden, & Borman [Page 16] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - TS.Recent - -------------------> - 1 - <---- - 1 - -------------------> - 1 - <---- - 1 - -------------------> - 2 - <---- - 2 - -------------------> - 2 - <---- - 2 - -------------------> - 4 - <---- - (etc) - - - - -4. PAWS: PROTECT AGAINST WRAPPED SEQUENCE NUMBERS - - 4.1 Introduction - - Section 4.2 describes a simple mechanism to reject old duplicate - segments that might corrupt an open TCP connection; we call this - mechanism PAWS (Protect Against Wrapped Sequence numbers). PAWS - operates within a single TCP connection, using state that is saved - in the connection control block. Section 4.3 and Appendix C - discuss the implications of the PAWS mechanism for avoiding old - duplicates from previous incarnations of the same connection. - - 4.2 The PAWS Mechanism - - PAWS uses the same TCP Timestamps option as the RTTM mechanism - described earlier, and assumes that every received TCP segment - (including data and ACK segments) contains a timestamp SEG.TSval - whose values are monotone non-decreasing in time. The basic idea - is that a segment can be discarded as an old duplicate if it is - received with a timestamp SEG.TSval less than some timestamp - recently received on this connection. - - In both the PAWS and the RTTM mechanism, the "timestamps" are 32- - - - -Jacobson, Braden, & Borman [Page 17] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - bit unsigned integers in a modular 32-bit space. Thus, "less - than" is defined the same way it is for TCP sequence numbers, and - the same implementation techniques apply. If s and t are - timestamp values, s < t if 0 < (t - s) < 2**31, computed in - unsigned 32-bit arithmetic. - - The choice of incoming timestamps to be saved for this comparison - must guarantee a value that is monotone increasing. For example, - we might save the timestamp from the segment that last advanced - the left edge of the receive window, i.e., the most recent in- - sequence segment. Instead, we choose the value TS.Recent - introduced in Section 3.4 for the RTTM mechanism, since using a - common value for both PAWS and RTTM simplifies the implementation - of both. As Section 3.4 explained, TS.Recent differs from the - timestamp from the last in-sequence segment only in the case of - delayed ACKs, and therefore by less than one window. Either - choice will therefore protect against sequence number wrap-around. - - RTTM was specified in a symmetrical manner, so that TSval - timestamps are carried in both data and ACK segments and are - echoed in TSecr fields carried in returning ACK or data segments. - PAWS submits all incoming segments to the same test, and therefore - protects against duplicate ACK segments as well as data segments. - (An alternative un-symmetric algorithm would protect against old - duplicate ACKs: the sender of data would reject incoming ACK - segments whose TSecr values were less than the TSecr saved from - the last segment whose ACK field advanced the left edge of the - send window. This algorithm was deemed to lack economy of - mechanism and symmetry.) - - TSval timestamps sent on {SYN} and {SYN,ACK} segments are used to - initialize PAWS. PAWS protects against old duplicate non-SYN - segments, and duplicate SYN segments received while there is a - synchronized connection. Duplicate {SYN} and {SYN,ACK} segments - received when there is no connection will be discarded by the - normal 3-way handshake and sequence number checks of TCP. - - It is recommended that RST segments NOT carry timestamps, and that - RST segments be acceptable regardless of their timestamp. Old - duplicate RST segments should be exceedingly unlikely, and their - cleanup function should take precedence over timestamps. - - 4.2.1 Basic PAWS Algorithm - - The PAWS algorithm requires the following processing to be - performed on all incoming segments for a synchronized - connection: - - - - -Jacobson, Braden, & Borman [Page 18] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - R1) If there is a Timestamps option in the arriving segment - and SEG.TSval < TS.Recent and if TS.Recent is valid (see - later discussion), then treat the arriving segment as not - acceptable: - - Send an acknowledgement in reply as specified in - RFC-793 page 69 and drop the segment. - - Note: it is necessary to send an ACK segment in order - to retain TCP's mechanisms for detecting and - recovering from half-open connections. For example, - see Figure 10 of RFC-793. - - R2) If the segment is outside the window, reject it (normal - TCP processing) - - R3) If an arriving segment satisfies: SEG.SEQ <= Last.ACK.sent - (see Section 3.4), then record its timestamp in TS.Recent. - - R4) If an arriving segment is in-sequence (i.e., at the left - window edge), then accept it normally. - - R5) Otherwise, treat the segment as a normal in-window, out- - of-sequence TCP segment (e.g., queue it for later delivery - to the user). - - Steps R2, R4, and R5 are the normal TCP processing steps - specified by RFC-793. - - It is important to note that the timestamp is checked only when - a segment first arrives at the receiver, regardless of whether - it is in-sequence or it must be queued for later delivery. - Consider the following example. - - Suppose the segment sequence: A.1, B.1, C.1, ..., Z.1 has - been sent, where the letter indicates the sequence number - and the digit represents the timestamp. Suppose also that - segment B.1 has been lost. The timestamp in TS.TStamp is - 1 (from A.1), so C.1, ..., Z.1 are considered acceptable - and are queued. When B is retransmitted as segment B.2 - (using the latest timestamp), it fills the hole and causes - all the segments through Z to be acknowledged and passed - to the user. The timestamps of the queued segments are - *not* inspected again at this time, since they have - already been accepted. When B.2 is accepted, TS.Stamp is - set to 2. - - This rule allows reasonable performance under loss. A full - - - -Jacobson, Braden, & Borman [Page 19] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - window of data is in transit at all times, and after a loss a - full window less one packet will show up out-of-sequence to be - queued at the receiver (e.g., up to ~2**30 bytes of data); the - timestamp option must not result in discarding this data. - - In certain unlikely circumstances, the algorithm of rules R1-R4 - could lead to discarding some segments unnecessarily, as shown - in the following example: - - Suppose again that segments: A.1, B.1, C.1, ..., Z.1 have - been sent in sequence and that segment B.1 has been lost. - Furthermore, suppose delivery of some of C.1, ... Z.1 is - delayed until AFTER the retransmission B.2 arrives at the - receiver. These delayed segments will be discarded - unnecessarily when they do arrive, since their timestamps - are now out of date. - - This case is very unlikely to occur. If the retransmission was - triggered by a timeout, some of the segments C.1, ... Z.1 must - have been delayed longer than the RTO time. This is presumably - an unlikely event, or there would be many spurious timeouts and - retransmissions. If B's retransmission was triggered by the - "fast retransmit" algorithm, i.e., by duplicate ACKs, then the - queued segments that caused these ACKs must have been received - already. - - Even if a segment were delayed past the RTO, the Fast - Retransmit mechanism [Jacobson90c] will cause the delayed - packets to be retransmitted at the same time as B.2, avoiding - an extra RTT and therefore causing a very small performance - penalty. - - We know of no case with a significant probability of occurrence - in which timestamps will cause performance degradation by - unnecessarily discarding segments. - - 4.2.2 Timestamp Clock - - It is important to understand that the PAWS algorithm does not - require clock synchronization between sender and receiver. The - sender's timestamp clock is used to stamp the segments, and the - sender uses the echoed timestamp to measure RTT's. However, - the receiver treats the timestamp as simply a monotone- - increasing serial number, without any necessary connection to - its clock. From the receiver's viewpoint, the timestamp is - acting as a logical extension of the high-order bits of the - sequence number. - - - - -Jacobson, Braden, & Borman [Page 20] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - The receiver algorithm does place some requirements on the - frequency of the timestamp clock. - - (a) The timestamp clock must not be "too slow". - - It must tick at least once for each 2**31 bytes sent. In - fact, in order to be useful to the sender for round trip - timing, the clock should tick at least once per window's - worth of data, and even with the RFC-1072 window - extension, 2**31 bytes must be at least two windows. - - To make this more quantitative, any clock faster than 1 - tick/sec will reject old duplicate segments for link - speeds of ~8 Gbps. A 1ms timestamp clock will work at - link speeds up to 8 Tbps (8*10**12) bps! - - (b) The timestamp clock must not be "too fast". - - Its recycling time must be greater than MSL seconds. - Since the clock (timestamp) is 32 bits and the worst-case - MSL is 255 seconds, the maximum acceptable clock frequency - is one tick every 59 ns. - - However, it is desirable to establish a much longer - recycle period, in order to handle outdated timestamps on - idle connections (see Section 4.2.3), and to relax the MSL - requirement for preventing sequence number wrap-around. - With a 1 ms timestamp clock, the 32-bit timestamp will - wrap its sign bit in 24.8 days. Thus, it will reject old - duplicates on the same connection if MSL is 24.8 days or - less. This appears to be a very safe figure; an MSL of - 24.8 days or longer can probably be assumed by the gateway - system without requiring precise MSL enforcement by the - TTL value in the IP layer. - - Based upon these considerations, we choose a timestamp clock - frequency in the range 1 ms to 1 sec per tick. This range also - matches the requirements of the RTTM mechanism, which does not - need much more resolution than the granularity of the - retransmit timer, e.g., tens or hundreds of milliseconds. - - The PAWS mechanism also puts a strong monotonicity requirement - on the sender's timestamp clock. The method of implementation - of the timestamp clock to meet this requirement depends upon - the system hardware and software. - - * Some hosts have a hardware clock that is guaranteed to be - monotonic between hardware resets. - - - -Jacobson, Braden, & Borman [Page 21] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - * A clock interrupt may be used to simply increment a binary - integer by 1 periodically. - - * The timestamp clock may be derived from a system clock - that is subject to being abruptly changed, by adding a - variable offset value. This offset is initialized to - zero. When a new timestamp clock value is needed, the - offset can be adjusted as necessary to make the new value - equal to or larger than the previous value (which was - saved for this purpose). - - - 4.2.3 Outdated Timestamps - - If a connection remains idle long enough for the timestamp - clock of the other TCP to wrap its sign bit, then the value - saved in TS.Recent will become too old; as a result, the PAWS - mechanism will cause all subsequent segments to be rejected, - freezing the connection (until the timestamp clock wraps its - sign bit again). - - With the chosen range of timestamp clock frequencies (1 sec to - 1 ms), the time to wrap the sign bit will be between 24.8 days - and 24800 days. A TCP connection that is idle for more than 24 - days and then comes to life is exceedingly unusual. However, - it is undesirable in principle to place any limitation on TCP - connection lifetimes. - - We therefore require that an implementation of PAWS include a - mechanism to "invalidate" the TS.Recent value when a connection - is idle for more than 24 days. (An alternative solution to the - problem of outdated timestamps would be to send keepalive - segments at a very low rate, but still more often than the - wrap-around time for timestamps, e.g., once a day. This would - impose negligible overhead. However, the TCP specification has - never included keepalives, so the solution based upon - invalidation was chosen.) - - Note that a TCP does not know the frequency, and therefore, the - wraparound time, of the other TCP, so it must assume the worst. - The validity of TS.Recent needs to be checked only if the basic - PAWS timestamp check fails, i.e., only if SEG.TSval < - TS.Recent. If TS.Recent is found to be invalid, then the - segment is accepted, regardless of the failure of the timestamp - check, and rule R3 updates TS.Recent with the TSval from the - new segment. - - To detect how long the connection has been idle, the TCP may - - - -Jacobson, Braden, & Borman [Page 22] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - update a clock or timestamp value associated with the - connection whenever TS.Recent is updated, for example. The - details will be implementation-dependent. - - 4.2.4 Header Prediction - - "Header prediction" [Jacobson90a] is a high-performance - transport protocol implementation technique that is most - important for high-speed links. This technique optimizes the - code for the most common case, receiving a segment correctly - and in order. Using header prediction, the receiver asks the - question, "Is this segment the next in sequence?" This - question can be answered in fewer machine instructions than the - question, "Is this segment within the window?" - - Adding header prediction to our timestamp procedure leads to - the following recommended sequence for processing an arriving - TCP segment: - - H1) Check timestamp (same as step R1 above) - - H2) Do header prediction: if segment is next in sequence and - if there are no special conditions requiring additional - processing, accept the segment, record its timestamp, and - skip H3. - - H3) Process the segment normally, as specified in RFC-793. - This includes dropping segments that are outside the win- - dow and possibly sending acknowledgments, and queueing - in-window, out-of-sequence segments. - - Another possibility would be to interchange steps H1 and H2, - i.e., to perform the header prediction step H2 FIRST, and - perform H1 and H3 only when header prediction fails. This - could be a performance improvement, since the timestamp check - in step H1 is very unlikely to fail, and it requires interval - arithmetic on a finite field, a relatively expensive operation. - To perform this check on every single segment is contrary to - the philosophy of header prediction. We believe that this - change might reduce CPU time for TCP protocol processing by up - to 5-10% on high-speed networks. - - However, putting H2 first would create a hazard: a segment from - 2**32 bytes in the past might arrive at exactly the wrong time - and be accepted mistakenly by the header-prediction step. The - following reasoning has been introduced [Jacobson90b] to show - that the probability of this failure is negligible. - - - - -Jacobson, Braden, & Borman [Page 23] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - If all segments are equally likely to show up as old - duplicates, then the probability of an old duplicate - exactly matching the left window edge is the maximum - segment size (MSS) divided by the size of the sequence - space. This ratio must be less than 2**-16, since MSS - must be < 2**16; for example, it will be (2**12)/(2**32) = - 2**-20 for an FDDI link. However, the older a segment is, - the less likely it is to be retained in the Internet, and - under any reasonable model of segment lifetime the - probability of an old duplicate exactly at the left window - edge must be much smaller than 2**-16. - - The 16 bit TCP checksum also allows a basic unreliability - of one part in 2**16. A protocol mechanism whose - reliability exceeds the reliability of the TCP checksum - should be considered "good enough", i.e., it won't - contribute significantly to the overall error rate. We - therefore believe we can ignore the problem of an old - duplicate being accepted by doing header prediction before - checking the timestamp. - - However, this probabilistic argument is not universally - accepted, and the consensus at present is that the performance - gain does not justify the hazard in the general case. It is - therefore recommended that H2 follow H1. - - 4.3. Duplicates from Earlier Incarnations of Connection - - The PAWS mechanism protects against errors due to sequence number - wrap-around on high-speed connection. Segments from an earlier - incarnation of the same connection are also a potential cause of - old duplicate errors. In both cases, the TCP mechanisms to - prevent such errors depend upon the enforcement of a maximum - segment lifetime (MSL) by the Internet (IP) layer (see Appendix of - RFC-1185 for a detailed discussion). Unlike the case of sequence - space wrap-around, the MSL required to prevent old duplicate - errors from earlier incarnations does not depend upon the transfer - rate. If the IP layer enforces the recommended 2 minute MSL of - TCP, and if the TCP rules are followed, TCP connections will be - safe from earlier incarnations, no matter how high the network - speed. Thus, the PAWS mechanism is not required for this case. - - We may still ask whether the PAWS mechanism can provide additional - security against old duplicates from earlier connections, allowing - us to relax the enforcement of MSL by the IP layer. Appendix B - explores this question, showing that further assumptions and/or - mechanisms are required, beyond those of PAWS. This is not part - of the current extension. - - - -Jacobson, Braden, & Borman [Page 24] - -RFC 1323 TCP Extensions for High Performance May 1992 - - -5. CONCLUSIONS AND ACKNOWLEDGMENTS - - This memo presented a set of extensions to TCP to provide efficient - operation over large-bandwidth*delay-product paths and reliable - operation over very high-speed paths. These extensions are designed - to provide compatible interworking with TCP's that do not implement - the extensions. - - These mechanisms are implemented using new TCP options for scaled - windows and timestamps. The timestamps are used for two distinct - mechanisms: RTTM (Round Trip Time Measurement) and PAWS (Protect - Against Wrapped Sequences). - - The Window Scale option was originally suggested by Mike St. Johns of - USAF/DCA. The present form of the option was suggested by Mike - Karels of UC Berkeley in response to a more cumbersome scheme defined - by Van Jacobson. Lixia Zhang helped formulate the PAWS mechanism - description in RFC-1185. - - Finally, much of this work originated as the result of discussions - within the End-to-End Task Force on the theoretical limitations of - transport protocols in general and TCP in particular. More recently, - task force members and other on the end2end-interest list have made - valuable contributions by pointing out flaws in the algorithms and - the documentation. The authors are grateful for all these - contributions. - -6. REFERENCES - - [Clark87] Clark, D., Lambert, M., and L. Zhang, "NETBLT: A Bulk - Data Transfer Protocol", RFC 998, MIT, March 1987. - - [Garlick77] Garlick, L., R. Rom, and J. Postel, "Issues in - Reliable Host-to-Host Protocols", Proc. Second Berkeley Workshop - on Distributed Data Management and Computer Networks, May 1977. - - [Hamming77] Hamming, R., "Digital Filters", ISBN 0-13-212571-4, - Prentice Hall, Englewood Cliffs, N.J., 1977. - - [Cheriton88] Cheriton, D., "VMTP: Versatile Message Transaction - Protocol", RFC 1045, Stanford University, February 1988. - - [Jacobson88a] Jacobson, V., "Congestion Avoidance and Control", - SIGCOMM '88, Stanford, CA., August 1988. - - [Jacobson88b] Jacobson, V., and R. Braden, "TCP Extensions for - Long-Delay Paths", RFC-1072, LBL and USC/Information Sciences - Institute, October 1988. - - - -Jacobson, Braden, & Borman [Page 25] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - [Jacobson90a] Jacobson, V., "4BSD Header Prediction", ACM - Computer Communication Review, April 1990. - - [Jacobson90b] Jacobson, V., Braden, R., and Zhang, L., "TCP - Extension for High-Speed Paths", RFC-1185, LBL and USC/Information - Sciences Institute, October 1990. - - [Jacobson90c] Jacobson, V., "Modified TCP congestion avoidance - algorithm", Message to end2end-interest mailing list, April 1990. - - [Jain86] Jain, R., "Divergence of Timeout Algorithms for Packet - Retransmissions", Proc. Fifth Phoenix Conf. on Comp. and Comm., - Scottsdale, Arizona, March 1986. - - [Karn87] Karn, P. and C. Partridge, "Estimating Round-Trip Times - in Reliable Transport Protocols", Proc. SIGCOMM '87, Stowe, VT, - August 1987. - - [McKenzie89] McKenzie, A., "A Problem with the TCP Big Window - Option", RFC 1110, BBN STC, August 1989. - - [Nagle84] Nagle, J., "Congestion Control in IP/TCP - Internetworks", RFC 896, FACC, January 1984. - - [NBS85] Colella, R., Aronoff, R., and K. Mills, "Performance - Improvements for ISO Transport", Ninth Data Comm Symposium, - published in ACM SIGCOMM Comp Comm Review, vol. 15, no. 5, - September 1985. - - [Postel81] Postel, J., "Transmission Control Protocol - DARPA - Internet Program Protocol Specification", RFC 793, DARPA, - September 1981. - - [Velten84] Velten, D., Hinden, R., and J. Sax, "Reliable Data - Protocol", RFC 908, BBN, July 1984. - - [Watson81] Watson, R., "Timer-based Mechanisms in Reliable - Transport Protocol Connection Management", Computer Networks, Vol. - 5, 1981. - - [Zhang86] Zhang, L., "Why TCP Timers Don't Work Well", Proc. - SIGCOMM '86, Stowe, Vt., August 1986. - - - - - - - - - -Jacobson, Braden, & Borman [Page 26] - -RFC 1323 TCP Extensions for High Performance May 1992 - - -APPENDIX A: IMPLEMENTATION SUGGESTIONS - - The following layouts are recommended for sending options on non-SYN - segments, to achieve maximum feasible alignment of 32-bit and 64-bit - machines. - - - +--------+--------+--------+--------+ - | NOP | NOP | TSopt | 10 | - +--------+--------+--------+--------+ - | TSval timestamp | - +--------+--------+--------+--------+ - | TSecr timestamp | - +--------+--------+--------+--------+ - - -APPENDIX B: DUPLICATES FROM EARLIER CONNECTION INCARNATIONS - - There are two cases to be considered: (1) a system crashing (and - losing connection state) and restarting, and (2) the same connection - being closed and reopened without a loss of host state. These will - be described in the following two sections. - - B.1 System Crash with Loss of State - - TCP's quiet time of one MSL upon system startup handles the loss - of connection state in a system crash/restart. For an - explanation, see for example "When to Keep Quiet" in the TCP - protocol specification [Postel81]. The MSL that is required here - does not depend upon the transfer speed. The current TCP MSL of 2 - minutes seems acceptable as an operational compromise, as many - host systems take this long to boot after a crash. - - However, the timestamp option may be used to ease the MSL - requirements (or to provide additional security against data - corruption). If timestamps are being used and if the timestamp - clock can be guaranteed to be monotonic over a system - crash/restart, i.e., if the first value of the sender's timestamp - clock after a crash/restart can be guaranteed to be greater than - the last value before the restart, then a quiet time will be - unnecessary. - - To dispense totally with the quiet time would require that the - host clock be synchronized to a time source that is stable over - the crash/restart period, with an accuracy of one timestamp clock - tick or better. We can back off from this strict requirement to - take advantage of approximate clock synchronization. Suppose that - the clock is always re-synchronized to within N timestamp clock - - - -Jacobson, Braden, & Borman [Page 27] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - ticks and that booting (extended with a quiet time, if necessary) - takes more than N ticks. This will guarantee monotonicity of the - timestamps, which can then be used to reject old duplicates even - without an enforced MSL. - - B.2 Closing and Reopening a Connection - - When a TCP connection is closed, a delay of 2*MSL in TIME-WAIT - state ties up the socket pair for 4 minutes (see Section 3.5 of - [Postel81]. Applications built upon TCP that close one connection - and open a new one (e.g., an FTP data transfer connection using - Stream mode) must choose a new socket pair each time. The TIME- - WAIT delay serves two different purposes: - - (a) Implement the full-duplex reliable close handshake of TCP. - - The proper time to delay the final close step is not really - related to the MSL; it depends instead upon the RTO for the - FIN segments and therefore upon the RTT of the path. (It - could be argued that the side that is sending a FIN knows - what degree of reliability it needs, and therefore it should - be able to determine the length of the TIME-WAIT delay for - the FIN's recipient. This could be accomplished with an - appropriate TCP option in FIN segments.) - - Although there is no formal upper-bound on RTT, common - network engineering practice makes an RTT greater than 1 - minute very unlikely. Thus, the 4 minute delay in TIME-WAIT - state works satisfactorily to provide a reliable full-duplex - TCP close. Note again that this is independent of MSL - enforcement and network speed. - - The TIME-WAIT state could cause an indirect performance - problem if an application needed to repeatedly close one - connection and open another at a very high frequency, since - the number of available TCP ports on a host is less than - 2**16. However, high network speeds are not the major - contributor to this problem; the RTT is the limiting factor - in how quickly connections can be opened and closed. - Therefore, this problem will be no worse at high transfer - speeds. - - (b) Allow old duplicate segments to expire. - - To replace this function of TIME-WAIT state, a mechanism - would have to operate across connections. PAWS is defined - strictly within a single connection; the last timestamp is - TS.Recent is kept in the connection control block, and - - - -Jacobson, Braden, & Borman [Page 28] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - discarded when a connection is closed. - - An additional mechanism could be added to the TCP, a per-host - cache of the last timestamp received from any connection. - This value could then be used in the PAWS mechanism to reject - old duplicate segments from earlier incarnations of the - connection, if the timestamp clock can be guaranteed to have - ticked at least once since the old connection was open. This - would require that the TIME-WAIT delay plus the RTT together - must be at least one tick of the sender's timestamp clock. - Such an extension is not part of the proposal of this RFC. - - Note that this is a variant on the mechanism proposed by - Garlick, Rom, and Postel [Garlick77], which required each - host to maintain connection records containing the highest - sequence numbers on every connection. Using timestamps - instead, it is only necessary to keep one quantity per remote - host, regardless of the number of simultaneous connections to - that host. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Jacobson, Braden, & Borman [Page 29] - -RFC 1323 TCP Extensions for High Performance May 1992 - - -APPENDIX C: CHANGES FROM RFC-1072, RFC-1185 - - The protocol extensions defined in this document differ in several - important ways from those defined in RFC-1072 and RFC-1185. - - (a) SACK has been deferred to a later memo. - - (b) The detailed rules for sending timestamp replies (see Section - 3.4) differ in important ways. The earlier rules could result - in an under-estimate of the RTT in certain cases (packets - dropped or out of order). - - (c) The same value TS.Recent is now shared by the two distinct - mechanisms RTTM and PAWS. This simplification became possible - because of change (b). - - (d) An ambiguity in RFC-1185 was resolved in favor of putting - timestamps on ACK as well as data segments. This supports the - symmetry of the underlying TCP protocol. - - (e) The echo and echo reply options of RFC-1072 were combined into a - single Timestamps option, to reflect the symmetry and to - simplify processing. - - (f) The problem of outdated timestamps on long-idle connections, - discussed in Section 4.2.2, was realized and resolved. - - (g) RFC-1185 recommended that header prediction take precedence over - the timestamp check. Based upon some scepticism about the - probabilistic arguments given in Section 4.2.4, it was decided - to recommend that the timestamp check be performed first. - - (h) The spec was modified so that the extended options will be sent - on segments only when they are received in the - corresponding segments. This provides the most - conservative possible conditions for interoperation with - implementations without the extensions. - - In addition to these substantive changes, the present RFC attempts to - specify the algorithms unambiguously by presenting modifications to - the Event Processing rules of RFC-793; see Appendix E. - - - - - - - - - - -Jacobson, Braden, & Borman [Page 30] - -RFC 1323 TCP Extensions for High Performance May 1992 - - -APPENDIX D: SUMMARY OF NOTATION - - The following notation has been used in this document. - - Options - - WSopt: TCP Window Scale Option - TSopt: TCP Timestamps Option - - Option Fields - - shift.cnt: Window scale byte in WSopt. - TSval: 32-bit Timestamp Value field in TSopt. - TSecr: 32-bit Timestamp Reply field in TSopt. - - Option Fields in Current Segment - - SEG.TSval: TSval field from TSopt in current segment. - SEG.TSecr: TSecr field from TSopt in current segment. - SEG.WSopt: 8-bit value in WSopt - - Clock Values - - my.TSclock: Local source of 32-bit timestamp values - my.TSclock.rate: Period of my.TSclock (1 ms to 1 sec). - - Per-Connection State Variables - - TS.Recent: Latest received Timestamp - Last.ACK.sent: Last ACK field sent - - Snd.TS.OK: 1-bit flag - Snd.WS.OK: 1-bit flag - - Rcv.Wind.Scale: Receive window scale power - Snd.Wind.Scale: Send window scale power - - - - - - - - - - - - - - - -Jacobson, Braden, & Borman [Page 31] - -RFC 1323 TCP Extensions for High Performance May 1992 - - -APPENDIX E: EVENT PROCESSING - - -Event Processing - - OPEN Call - - ... - An initial send sequence number (ISS) is selected. Send a SYN - segment of the form: - - - - ... - - SEND Call - - CLOSED STATE (i.e., TCB does not exist) - - ... - - LISTEN STATE - - If the foreign socket is specified, then change the connection - from passive to active, select an ISS. Send a SYN segment - containing the options: and - . Set SND.UNA to ISS, SND.NXT to ISS+1. - Enter SYN-SENT state. ... - - SYN-SENT STATE - SYN-RECEIVED STATE - - ... - - ESTABLISHED STATE - CLOSE-WAIT STATE - - Segmentize the buffer and send it with a piggybacked - acknowledgment (acknowledgment value = RCV.NXT). ... - - If the urgent flag is set ... - - If the Snd.TS.OK flag is set, then include the TCP Timestamps - option in each data segment. - - Scale the receive window for transmission in the segment header: - - SEG.WND = (SND.WND >> Rcv.Wind.Scale). - - - -Jacobson, Braden, & Borman [Page 32] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - SEGMENT ARRIVES - - ... - - If the state is LISTEN then - - first check for an RST - - ... - - second check for an ACK - - ... - - third check for a SYN - - if the SYN bit is set, check the security. If the ... - - ... - - If the SEG.PRC is less than the TCB.PRC then continue. - - Check for a Window Scale option (WSopt); if one is found, save - SEG.WSopt in Snd.Wind.Scale and set Snd.WS.OK flag on. - Otherwise, set both Snd.Wind.Scale and Rcv.Wind.Scale to zero - and clear Snd.WS.OK flag. - - Check for a TSopt option; if one is found, save SEG.TSval in the - variable TS.Recent and turn on the Snd.TS.OK bit. - - Set RCV.NXT to SEG.SEQ+1, IRS is set to SEG.SEQ and any other - control or text should be queued for processing later. ISS - should be selected and a SYN segment sent of the form: - - - - If the Snd.WS.OK bit is on, include a WSopt option - in this segment. If the Snd.TS.OK bit is - on, include a TSopt in this - segment. Last.ACK.sent is set to RCV.NXT. - - SND.NXT is set to ISS+1 and SND.UNA to ISS. The connection - state should be changed to SYN-RECEIVED. Note that any other - incoming control or data (combined with SYN) will be processed - in the SYN-RECEIVED state, but processing of SYN and ACK should - not be repeated. If the listen was not fully specified (i.e., - the foreign socket was not fully specified), then the - unspecified fields should be filled in now. - - - -Jacobson, Braden, & Borman [Page 33] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - fourth other text or control - - ... - - If the state is SYN-SENT then - - first check the ACK bit - - ... - - fourth check the SYN bit - - ... - - If the SYN bit is on and the security/compartment and precedence - are acceptable then, RCV.NXT is set to SEG.SEQ+1, IRS is set to - SEG.SEQ, and any acknowledgements on the retransmission queue - which are thereby acknowledged should be removed. - - Check for a Window Scale option (WSopt); if is found, save - SEG.WSopt in Snd.Wind.Scale; otherwise, set both Snd.Wind.Scale - and Rcv.Wind.Scale to zero. - - Check for a TSopt option; if one is found, save SEG.TSval in - variable TS.Recent and turn on the Snd.TS.OK bit in the - connection control block. If the ACK bit is set, use my.TSclock - - SEG.TSecr as the initial RTT estimate. - - If SND.UNA > ISS (our SYN has been ACKed), change the connection - state to ESTABLISHED, form an ACK segment: - - - - and send it. If the Snd.Echo.OK bit is on, include a TSopt - option in this ACK segment. - Last.ACK.sent is set to RCV.NXT. - - Data or controls which were queued for transmission may be - included. If there are other controls or text in the segment - then continue processing at the sixth step below where the URG - bit is checked, otherwise return. - - Otherwise enter SYN-RECEIVED, form a SYN,ACK segment: - - - - and send it. If the Snd.Echo.OK bit is on, include a TSopt - option in this segment. If - - - -Jacobson, Braden, & Borman [Page 34] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - the Snd.WS.OK bit is on, include a WSopt option - in this segment. Last.ACK.sent is set to - RCV.NXT. - - If there are other controls or text in the segment, queue them - for processing after the ESTABLISHED state has been reached, - return. - - fifth, if neither of the SYN or RST bits is set then drop the - segment and return. - - - Otherwise, - - First, check sequence number - - SYN-RECEIVED STATE - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - CLOSE-WAIT STATE - CLOSING STATE - LAST-ACK STATE - TIME-WAIT STATE - - Segments are processed in sequence. Initial tests on arrival - are used to discard old duplicates, but further processing is - done in SEG.SEQ order. If a segment's contents straddle the - boundary between old and new, only the new parts should be - processed. - - Rescale the received window field: - - TrueWindow = SEG.WND << Snd.Wind.Scale, - - and use "TrueWindow" in place of SEG.WND in the following steps. - - Check whether the segment contains a Timestamps option and bit - Snd.TS.OK is on. If so: - - If SEG.TSval < TS.Recent, then test whether connection has - been idle less than 24 days; if both are true, then the - segment is not acceptable; follow steps below for an - unacceptable segment. - - If SEG.SEQ is equal to Last.ACK.sent, then save SEG.ECopt in - variable TS.Recent. - - - - -Jacobson, Braden, & Borman [Page 35] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - There are four cases for the acceptability test for an incoming - segment: - - ... - - If an incoming segment is not acceptable, an acknowledgment - should be sent in reply (unless the RST bit is set, if so drop - the segment and return): - - - - Last.ACK.sent is set to SEG.ACK of the acknowledgment. If the - Snd.Echo.OK bit is on, include the Timestamps option - in this ACK segment. Set - Last.ACK.sent to SEG.ACK and send the ACK segment. After - sending the acknowledgment, drop the unacceptable segment and - return. - - ... - - fifth check the ACK field. - - if the ACK bit is off drop the segment and return. - - if the ACK bit is on - - ... - - ESTABLISHED STATE - - If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK. - Also compute a new estimate of round-trip time. If Snd.TS.OK - bit is on, use my.TSclock - SEG.TSecr; otherwise use the - elapsed time since the first segment in the retransmission - queue was sent. Any segments on the retransmission queue - which are thereby entirely acknowledged... - - ... - - Seventh, process the segment text. - - ESTABLISHED STATE - FIN-WAIT-1 STATE - FIN-WAIT-2 STATE - - ... - - Send an acknowledgment of the form: - - - -Jacobson, Braden, & Borman [Page 36] - -RFC 1323 TCP Extensions for High Performance May 1992 - - - - - If the Snd.TS.OK bit is on, include Timestamps option - in this ACK segment. Set - Last.ACK.sent to SEG.ACK of the acknowledgment, and send it. - This acknowledgment should be piggy-backed on a segment being - transmitted if possible without incurring undue delay. - - - ... - - -Security Considerations - - Security issues are not discussed in this memo. - -Authors' Addresses - - Van Jacobson - University of California - Lawrence Berkeley Laboratory - Mail Stop 46A - Berkeley, CA 94720 - - Phone: (415) 486-6411 - EMail: van@CSAM.LBL.GOV - - - Bob Braden - University of Southern California - Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292 - - Phone: (310) 822-1511 - EMail: Braden@ISI.EDU - - - Dave Borman - Cray Research - 655-E Lone Oak Drive - Eagan, MN 55121 - - Phone: (612) 683-5571 - Email: dab@cray.com - - - - - - -Jacobson, Braden, & Borman [Page 37] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1332.txt b/kernel/picotcp/RFC/rfc1332.txt deleted file mode 100644 index 3e12042..0000000 --- a/kernel/picotcp/RFC/rfc1332.txt +++ /dev/null @@ -1,787 +0,0 @@ - - - - - - -Network Working Group G. McGregor -Request for Comments: 1332 Merit -Obsoletes: RFC 1172 May 1992 - - - - The PPP Internet Protocol Control Protocol (IPCP) - - - -Status of this Memo - - This RFC specifies an IAB standards track protocol for the Internet - community, and requests discussion and suggestions for improvements. - Please refer to the current edition of the "IAB Official Protocol - Standards" for the standardization state and status of this protocol. - Distribution of this memo is unlimited. - -Abstract - - The Point-to-Point Protocol (PPP) [1] provides a standard method of - encapsulating Network Layer protocol information over point-to-point - links. PPP also defines an extensible Link Control Protocol, and - proposes a family of Network Control Protocols (NCPs) for - establishing and configuring different network-layer protocols. - - This document defines the NCP for establishing and configuring the - Internet Protocol [2] over PPP, and a method to negotiate and use Van - Jacobson TCP/IP header compression [3] with PPP. - - This RFC is a product of the Point-to-Point Protocol Working Group of - the Internet Engineering Task Force (IETF). - - - - - - - - - - - - - - - - - - - -McGregor [Page i] - -RFC 1332 PPP IPCP May 1992 - - -Table of Contents - - - 1. Introduction .......................................... 1 - - 2. A PPP Network Control Protocol (NCP) for IP ........... 2 - 2.1 Sending IP Datagrams ............................ 2 - - 3. IPCP Configuration Options ............................ 4 - 3.1 IP-Addresses .................................... 5 - 3.2 IP-Compression-Protocol ......................... 6 - 3.3 IP-Address ...................................... 8 - - 4. Van Jacobson TCP/IP header compression ................ 9 - 4.1 Configuration Option Format ..................... 9 - - APPENDICES ................................................... 11 - - A. IPCP Recommended Options .............................. 11 - - SECURITY CONSIDERATIONS ...................................... 11 - - REFERENCES ................................................... 11 - - ACKNOWLEDGEMENTS ............................................. 11 - - CHAIR'S ADDRESS .............................................. 12 - - AUTHOR'S ADDRESS ............................................. 12 - - - - - - - - - - - - - - - - - - - - - - -McGregor [Page ii] - -RFC 1332 PPP IPCP May 1992 - - -1. Introduction - - PPP has three main components: - - 1. A method for encapsulating datagrams over serial links. - - 2. A Link Control Protocol (LCP) for establishing, configuring, - and testing the data-link connection. - - 3. A family of Network Control Protocols (NCPs) for establishing - and configuring different network-layer protocols. - - In order to establish communications over a point-to-point link, each - end of the PPP link must first send LCP packets to configure and test - the data link. After the link has been established and optional - facilities have been negotiated as needed by the LCP, PPP must send - NCP packets to choose and configure one or more network-layer - protocols. Once each of the chosen network-layer protocols has been - configured, datagrams from each network-layer protocol can be sent - over the link. - - The link will remain configured for communications until explicit LCP - or NCP packets close the link down, or until some external event - occurs (an inactivity timer expires or network administrator - intervention). - - - - - - - - - - - - - - - - - - - - - - - - - - -McGregor [Page 1] - -RFC 1332 PPP IPCP May 1992 - - -2. A PPP Network Control Protocol (NCP) for IP - - The IP Control Protocol (IPCP) is responsible for configuring, - enabling, and disabling the IP protocol modules on both ends of the - point-to-point link. IPCP uses the same packet exchange machanism as - the Link Control Protocol (LCP). IPCP packets may not be exchanged - until PPP has reached the Network-Layer Protocol phase. IPCP packets - received before this phase is reached should be silently discarded. - - The IP Control Protocol is exactly the same as the Link Control - Protocol [1] with the following exceptions: - - Data Link Layer Protocol Field - - Exactly one IPCP packet is encapsulated in the Information field - of PPP Data Link Layer frames where the Protocol field indicates - type hex 8021 (IP Control Protocol). - - Code field - - Only Codes 1 through 7 (Configure-Request, Configure-Ack, - Configure-Nak, Configure-Reject, Terminate-Request, Terminate-Ack - and Code-Reject) are used. Other Codes should be treated as - unrecognized and should result in Code-Rejects. - - Timeouts - - IPCP packets may not be exchanged until PPP has reached the - Network-Layer Protocol phase. An implementation should be - prepared to wait for Authentication and Link Quality Determination - to finish before timing out waiting for a Configure-Ack or other - response. It is suggested that an implementation give up only - after user intervention or a configurable amount of time. - - Configuration Option Types - - IPCP has a distinct set of Configuration Options, which are - defined below. - -2.1. Sending IP Datagrams - - Before any IP packets may be communicated, PPP must reach the - Network-Layer Protocol phase, and the IP Control Protocol must reach - the Opened state. - - Exactly one IP packet is encapsulated in the Information field of PPP - Data Link Layer frames where the Protocol field indicates type hex - 0021 (Internet Protocol). - - - -McGregor [Page 2] - -RFC 1332 PPP IPCP May 1992 - - - The maximum length of an IP packet transmitted over a PPP link is the - same as the maximum length of the Information field of a PPP data - link layer frame. Larger IP datagrams must be fragmented as - necessary. If a system wishes to avoid fragmentation and reassembly, - it should use the TCP Maximum Segment Size option [4], and MTU - discovery [5]. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McGregor [Page 3] - -RFC 1332 PPP IPCP May 1992 - - -3. IPCP Configuration Options - -IPCP Configuration Options allow negotiatiation of desirable Internet -Protocol parameters. IPCP uses the same Configuration Option format -defined for LCP [1], with a separate set of Options. - -The most up-to-date values of the IPCP Option Type field are specified -in the most recent "Assigned Numbers" RFC [6]. Current values are -assigned as follows: - - 1 IP-Addresses - 2 IP-Compression-Protocol - 3 IP-Address - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McGregor [Page 4] - -RFC 1332 PPP IPCP May 1992 - - -3.1. IP-Addresses - - Description - - The use of the Configuration Option IP-Addresses has been - deprecated. It has been determined through implementation - experience that it is difficult to ensure negotiation convergence - in all cases using this option. RFC 1172 [7] provides information - for implementations requiring backwards compatability. The IP- - Address Configuration Option replaces this option, and its use is - preferred. - - This option SHOULD NOT be sent in a Configure-Request if a - Configure-Request has been received which includes either an IP- - Addresses or IP-Address option. This option MAY be sent if a - Configure-Reject is received for the IP-Address option, or a - Configure-Nak is received with an IP-Addresses option as an - appended option. - - Support for this option MAY be removed after the IPCP protocol - status advances to Internet Draft Standard. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McGregor [Page 5] - -RFC 1332 PPP IPCP May 1992 - - -3.2. IP-Compression-Protocol - - Description - - This Configuration Option provides a way to negotiate the use of a - specific compression protocol. By default, compression is not - enabled. - - A summary of the IP-Compression-Protocol Configuration Option format - is shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | IP-Compression-Protocol | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - Type - - 2 - - Length - - >= 4 - - IP-Compression-Protocol - - The IP-Compression-Protocol field is two octets and indicates the - compression protocol desired. Values for this field are always - the same as the PPP Data Link Layer Protocol field values for that - same compression protocol. - - The most up-to-date values of the IP-Compression-Protocol field - are specified in the most recent "Assigned Numbers" RFC [6]. - Current values are assigned as follows: - - Value (in hex) Protocol - - 002d Van Jacobson Compressed TCP/IP - - Data - - The Data field is zero or more octets and contains additional data - as determined by the particular compression protocol. - - - - - -McGregor [Page 6] - -RFC 1332 PPP IPCP May 1992 - - - Default - - No compression protocol enabled. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McGregor [Page 7] - -RFC 1332 PPP IPCP May 1992 - - -3.3. IP-Address - - Description - - This Configuration Option provides a way to negotiate the IP - address to be used on the local end of the link. It allows the - sender of the Configure-Request to state which IP-address is - desired, or to request that the peer provide the information. The - peer can provide this information by NAKing the option, and - returning a valid IP-address. - - If negotiation about the remote IP-address is required, and the - peer did not provide the option in its Configure-Request, the - option SHOULD be appended to a Configure-Nak. The value of the - IP-address given must be acceptable as the remote IP-address, or - indicate a request that the peer provide the information. - - By default, no IP address is assigned. - - A summary of the IP-Address Configuration Option format is shown - below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | IP-Address - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - IP-Address (cont) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Type - - 3 - - Length - - 6 - - IP-Address - - The four octet IP-Address is the desired local address of the - sender of a Configure-Request. If all four octets are set to - zero, it indicates a request that the peer provide the IP-Address - information. - - Default - - No IP address is assigned. - - - -McGregor [Page 8] - -RFC 1332 PPP IPCP May 1992 - - -4. Van Jacobson TCP/IP header compression - -Van Jacobson TCP/IP header compression reduces the size of the TCP/IP -headers to as few as three bytes. This can be a significant improvement -on slow serial lines, particularly for interactive traffic. - -The IP-Compression-Protocol Configuration Option is used to indicate the -ability to receive compressed packets. Each end of the link must -separately request this option if bi-directional compression is desired. - -The PPP Protocol field is set to the following values when transmitting -IP packets: - - Value (in hex) - - 0021 Type IP. The IP protocol is not TCP, or the packet is a - fragment, or cannot be compressed. - - 002d Compressed TCP. The TCP/IP headers are replaced by the - compressed header. - - 002f Uncompressed TCP. The IP protocol field is replaced by - the slot identifier. - -4.1. Configuration Option Format - - A summary of the IP-Compression-Protocol Configuration Option format - to negotiate Van Jacobson TCP/IP header compression is shown below. - The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | IP-Compression-Protocol | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Max-Slot-Id | Comp-Slot-Id | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Type - - 2 - - Length - - 6 - - - - - - -McGregor [Page 9] - -RFC 1332 PPP IPCP May 1992 - - - IP-Compression-Protocol - - 002d (hex) for Van Jacobson Compressed TCP/IP headers. - - Max-Slot-Id - - The Max-Slot-Id field is one octet and indicates the maximum slot - identifier. This is one less than the actual number of slots; the - slot identifier has values from zero to Max-Slot-Id. - - Note: There may be implementations that have problems with only - one slot (Max-Slot-Id = 0). See the discussion in reference - [3]. The example implementation in [3] will only work with 3 - through 254 slots. - - Comp-Slot-Id - - The Comp-Slot-Id field is one octet and indicates whether the slot - identifier field may be compressed. - - 0 The slot identifier must not be compressed. All compressed - TCP packets must set the C bit in every change mask, and - must include the slot identifier. - - 1 The slot identifer may be compressed. - - The slot identifier must not be compressed if there is no ability - for the PPP link level to indicate an error in reception to the - decompression module. Synchronization after errors depends on - receiving a packet with the slot identifier. See the discussion - in reference [3]. - - - - - - - - - - - - - - - - - - - - -McGregor [Page 10] - -RFC 1332 PPP IPCP May 1992 - - -A. IPCP Recommended Options - - The following Configurations Options are recommended: - - IP-Compression-Protocol -- with at least 4 slots, usually 16 - slots. - - IP-Address -- only on dial-up lines. - - -Security Considerations - - Security issues are not discussed in this memo. - - -References - - [1] Simpson, W., "The Point-to-Point Protocol", RFC 1331, May 1992. - - [2] Postel, J., "Internet Protocol", RFC 791, USC/Information - Sciences Institute, September 1981. - - [3] Jacobson, V., "Compressing TCP/IP Headers", RFC 1144, January - 1990. - - [4] Postel, J., "The TCP Maximum Segment Size Option and Related - Topics", RFC 879, USC/Information Sciences Institute, November - 1983. - - [5] Mogul, J., and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - [6] Reynolds, J., and J. Postel, "Assigned Numbers", RFC 1060, - USC/Information Sciences Institute, March 1990. - - [7] Perkins, D., and R. Hobby, "Point-to-Point Protocol (PPP) - initial configuration options", RFC 1172, August 1990. - - -Acknowledgments - - Some of the text in this document is taken from RFCs 1171 & 1172, by - Drew Perkins of Carnegie Mellon University, and by Russ Hobby of the - University of California at Davis. - - Information leading to the expanded IP-Compression option provided by - Van Jacobson at SIGCOMM '90. - - - - -McGregor [Page 11] - -RFC 1332 PPP IPCP May 1992 - - - Bill Simpson helped with the document formatting. - - -Chair's Address - - The working group can be contacted via the current chair: - - Brian Lloyd - Lloyd & Associates - 3420 Sudbury Road - Cameron Park, California 95682 - - Phone: (916) 676-1147 - - EMail: brian@ray.lloyd.com - - - -Author's Address - - Questions about this memo can also be directed to: - - Glenn McGregor - Merit Network, Inc. - 1071 Beal Avenue - Ann Arbor, MI 48109-2103 - - Phone: (313) 763-1203 - - EMail: Glenn.McGregor@Merit.edu - - - - - - - - - - - - - - - - - - - - - -McGregor [Page 12] - diff --git a/kernel/picotcp/RFC/rfc1334.txt b/kernel/picotcp/RFC/rfc1334.txt deleted file mode 100644 index 6051f48..0000000 --- a/kernel/picotcp/RFC/rfc1334.txt +++ /dev/null @@ -1,899 +0,0 @@ - - - - - - -Network Working Group B. Lloyd -Request for Comments: 1334 L&A - W. Simpson - Daydreamer - October 1992 - - - PPP Authentication Protocols - -Status of this Memo - - This RFC specifies an IAB standards track protocol for the Internet - community, and requests discussion and suggestions for improvements. - Please refer to the current edition of the "IAB Official Protocol - Standards" for the standardization state and status of this protocol. - Distribution of this memo is unlimited. - -Abstract - - The Point-to-Point Protocol (PPP) [1] provides a standard method of - encapsulating Network Layer protocol information over point-to-point - links. PPP also defines an extensible Link Control Protocol, which - allows negotiation of an Authentication Protocol for authenticating - its peer before allowing Network Layer protocols to transmit over the - link. - - This document defines two protocols for Authentication: the Password - Authentication Protocol and the Challenge-Handshake Authentication - Protocol. This memo is the product of the Point-to-Point Protocol - Working Group of the Internet Engineering Task Force (IETF). - Comments on this memo should be submitted to the ietf-ppp@ucdavis.edu - mailing list. - -Table of Contents - - 1. Introduction ............................................... 2 - 1.1 Specification Requirements ................................. 2 - 1.2 Terminology ................................................ 3 - 2. Password Authentication Protocol ............................ 3 - 2.1 Configuration Option Format ................................ 4 - 2.2 Packet Format .............................................. 5 - 2.2.1 Authenticate-Request ..................................... 5 - 2.2.2 Authenticate-Ack and Authenticate-Nak .................... 7 - 3. Challenge-Handshake Authentication Protocol.................. 8 - 3.1 Configuration Option Format ................................ 9 - 3.2 Packet Format .............................................. 10 - 3.2.1 Challenge and Response ................................... 11 - 3.2.2 Success and Failure ...................................... 13 - - - -Lloyd & Simpson [Page 1] - -RFC 1334 PPP Authentication October 1992 - - - SECURITY CONSIDERATIONS ........................................ 14 - REFERENCES ..................................................... 15 - ACKNOWLEDGEMENTS ............................................... 16 - CHAIR'S ADDRESS ................................................ 16 - AUTHOR'S ADDRESS ............................................... 16 - -1. Introduction - - PPP has three main components: - - 1. A method for encapsulating datagrams over serial links. - - 2. A Link Control Protocol (LCP) for establishing, configuring, - and testing the data-link connection. - - 3. A family of Network Control Protocols (NCPs) for establishing - and configuring different network-layer protocols. - - In order to establish communications over a point-to-point link, each - end of the PPP link must first send LCP packets to configure the data - link during Link Establishment phase. After the link has been - established, PPP provides for an optional Authentication phase before - proceeding to the Network-Layer Protocol phase. - - By default, authentication is not mandatory. If authentication of - the link is desired, an implementation MUST specify the - Authentication-Protocol Configuration Option during Link - Establishment phase. - - These authentication protocols are intended for use primarily by - hosts and routers that connect to a PPP network server via switched - circuits or dial-up lines, but might be applied to dedicated links as - well. The server can use the identification of the connecting host - or router in the selection of options for network layer negotiations. - - This document defines the PPP authentication protocols. The Link - Establishment and Authentication phases, and the Authentication- - Protocol Configuration Option, are defined in The Point-to-Point - Protocol (PPP) [1]. - -1.1. Specification Requirements - - In this document, several words are used to signify the requirements - of the specification. These words are often capitalized. - - MUST - This word, or the adjective "required", means that the definition - is an absolute requirement of the specification. - - - -Lloyd & Simpson [Page 2] - -RFC 1334 PPP Authentication October 1992 - - - MUST NOT - This phrase means that the definition is an absolute prohibition - of the specification. - - SHOULD - This word, or the adjective "recommended", means that there may - exist valid reasons in particular circumstances to ignore this - item, but the full implications should be understood and carefully - weighed before choosing a different course. - - MAY - This word, or the adjective "optional", means that this item is - one of an allowed set of alternatives. An implementation which - does not include this option MUST be prepared to interoperate with - another implementation which does include the option. - -1.2. Terminology - - This document frequently uses the following terms: - - authenticator - The end of the link requiring the authentication. The - authenticator specifies the authentication protocol to be used in - the Configure-Request during Link Establishment phase. - - peer - The other end of the point-to-point link; the end which is being - authenticated by the authenticator. - - silently discard - This means the implementation discards the packet without further - processing. The implementation SHOULD provide the capability of - logging the error, including the contents of the silently - discarded packet, and SHOULD record the event in a statistics - counter. - -2. Password Authentication Protocol - - The Password Authentication Protocol (PAP) provides a simple method - for the peer to establish its identity using a 2-way handshake. This - is done only upon initial link establishment. - - After the Link Establishment phase is complete, an Id/Password pair - is repeatedly sent by the peer to the authenticator until - authentication is acknowledged or the connection is terminated. - - PAP is not a strong authentication method. Passwords are sent over - the circuit "in the clear", and there is no protection from playback - - - -Lloyd & Simpson [Page 3] - -RFC 1334 PPP Authentication October 1992 - - - or repeated trial and error attacks. The peer is in control of the - frequency and timing of the attempts. - - Any implementations which include a stronger authentication method - (such as CHAP, described below) MUST offer to negotiate that method - prior to PAP. - - This authentication method is most appropriately used where a - plaintext password must be available to simulate a login at a remote - host. In such use, this method provides a similar level of security - to the usual user login at the remote host. - - Implementation Note: It is possible to limit the exposure of the - plaintext password to transmission over the PPP link, and avoid - sending the plaintext password over the entire network. When the - remote host password is kept as a one-way transformed value, and - the algorithm for the transform function is implemented in the - local server, the plaintext password SHOULD be locally transformed - before comparison with the transformed password from the remote - host. - -2.1. Configuration Option Format - - A summary of the Authentication-Protocol Configuration Option format - to negotiate the Password Authentication Protocol is shown below. - The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Authentication-Protocol | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Type - - 3 - - Length - - 4 - - Authentication-Protocol - - c023 (hex) for Password Authentication Protocol. - - Data - - There is no Data field. - - - -Lloyd & Simpson [Page 4] - -RFC 1334 PPP Authentication October 1992 - - -2.2. Packet Format - - Exactly one Password Authentication Protocol packet is encapsulated - in the Information field of a PPP Data Link Layer frame where the - protocol field indicates type hex c023 (Password Authentication - Protocol). A summary of the PAP packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - Code - - The Code field is one octet and identifies the type of PAP packet. - PAP Codes are assigned as follows: - - 1 Authenticate-Request - 2 Authenticate-Ack - 3 Authenticate-Nak - - Identifier - - The Identifier field is one octet and aids in matching requests - and replies. - - Length - - The Length field is two octets and indicates the length of the PAP - packet including the Code, Identifier, Length and Data fields. - Octets outside the range of the Length field should be treated as - Data Link Layer padding and should be ignored on reception. - - Data - - The Data field is zero or more octets. The format of the Data - field is determined by the Code field. - -2.2.1. Authenticate-Request - - Description - - The Authenticate-Request packet is used to begin the Password - Authentication Protocol. The link peer MUST transmit a PAP packet - - - -Lloyd & Simpson [Page 5] - -RFC 1334 PPP Authentication October 1992 - - - with the Code field set to 1 (Authenticate-Request) during the - Authentication phase. The Authenticate-Request packet MUST be - repeated until a valid reply packet is received, or an optional - retry counter expires. - - The authenticator SHOULD expect the peer to send an Authenticate- - Request packet. Upon reception of an Authenticate-Request packet, - some type of Authenticate reply (described below) MUST be - returned. - - Implementation Note: Because the Authenticate-Ack might be - lost, the authenticator MUST allow repeated Authenticate- - Request packets after completing the Authentication phase. - Protocol phase MUST return the same reply Code returned when - the Authentication phase completed (the message portion MAY be - different). Any Authenticate-Request packets received during - any other phase MUST be silently discarded. - - When the Authenticate-Nak is lost, and the authenticator - terminates the link, the LCP Terminate-Request and Terminate- - Ack provide an alternative indication that authentication - failed. - - A summary of the Authenticate-Request packet format is shown below. - The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Peer-ID Length| Peer-Id ... - +-+-+-+-+-+-+-+-+-+-+-+-+ - | Passwd-Length | Password ... - +-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Code - - 1 for Authenticate-Request. - - Identifier - - The Identifier field is one octet and aids in matching requests - and replies. The Identifier field MUST be changed each time an - Authenticate-Request packet is issued. - - - - - - -Lloyd & Simpson [Page 6] - -RFC 1334 PPP Authentication October 1992 - - - Peer-ID-Length - - The Peer-ID-Length field is one octet and indicates the length of - the Peer-ID field. - - Peer-ID - - The Peer-ID field is zero or more octets and indicates the name of - the peer to be authenticated. - - Passwd-Length - - The Passwd-Length field is one octet and indicates the length of - the Password field. - - Password - - The Password field is zero or more octets and indicates the - password to be used for authentication. - -2.2.2. Authenticate-Ack and Authenticate-Nak - - Description - - If the Peer-ID/Password pair received in an Authenticate-Request - is both recognizable and acceptable, then the authenticator MUST - transmit a PAP packet with the Code field set to 2 (Authenticate- - Ack). - - If the Peer-ID/Password pair received in a Authenticate-Request is - not recognizable or acceptable, then the authenticator MUST - transmit a PAP packet with the Code field set to 3 (Authenticate- - Nak), and SHOULD take action to terminate the link. - - A summary of the Authenticate-Ack and Authenticate-Nak packet format - is shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Msg-Length | Message ... - +-+-+-+-+-+-+-+-+-+-+-+-+- - - Code - - 2 for Authenticate-Ack; - - - -Lloyd & Simpson [Page 7] - -RFC 1334 PPP Authentication October 1992 - - - 3 for Authenticate-Nak. - - Identifier - - The Identifier field is one octet and aids in matching requests - and replies. The Identifier field MUST be copied from the - Identifier field of the Authenticate-Request which caused this - reply. - - Msg-Length - - The Msg-Length field is one octet and indicates the length of the - Message field. - - Message - - The Message field is zero or more octets, and its contents are - implementation dependent. It is intended to be human readable, - and MUST NOT affect operation of the protocol. It is recommended - that the message contain displayable ASCII characters 32 through - 126 decimal. Mechanisms for extension to other character sets are - the topic of future research. - -3. Challenge-Handshake Authentication Protocol - - The Challenge-Handshake Authentication Protocol (CHAP) is used to - periodically verify the identity of the peer using a 3-way handshake. - This is done upon initial link establishment, and MAY be repeated - anytime after the link has been established. - - After the Link Establishment phase is complete, the authenticator - sends a "challenge" message to the peer. The peer responds with a - value calculated using a "one-way hash" function. The authenticator - checks the response against its own calculation of the expected hash - value. If the values match, the authentication is acknowledged; - otherwise the connection SHOULD be terminated. - - CHAP provides protection against playback attack through the use of - an incrementally changing identifier and a variable challenge value. - The use of repeated challenges is intended to limit the time of - exposure to any single attack. The authenticator is in control of - the frequency and timing of the challenges. - - This authentication method depends upon a "secret" known only to the - authenticator and that peer. The secret is not sent over the link. - This method is most likely used where the same secret is easily - accessed from both ends of the link. - - - - -Lloyd & Simpson [Page 8] - -RFC 1334 PPP Authentication October 1992 - - - Implementation Note: CHAP requires that the secret be available in - plaintext form. To avoid sending the secret over other links in - the network, it is recommended that the challenge and response - values be examined at a central server, rather than each network - access server. Otherwise, the secret SHOULD be sent to such - servers in a reversably encrypted form. - - The CHAP algorithm requires that the length of the secret MUST be at - least 1 octet. The secret SHOULD be at least as large and - unguessable as a well-chosen password. It is preferred that the - secret be at least the length of the hash value for the hashing - algorithm chosen (16 octets for MD5). This is to ensure a - sufficiently large range for the secret to provide protection against - exhaustive search attacks. - - The one-way hash algorithm is chosen such that it is computationally - infeasible to determine the secret from the known challenge and - response values. - - The challenge value SHOULD satisfy two criteria: uniqueness and - unpredictability. Each challenge value SHOULD be unique, since - repetition of a challenge value in conjunction with the same secret - would permit an attacker to reply with a previously intercepted - response. Since it is expected that the same secret MAY be used to - authenticate with servers in disparate geographic regions, the - challenge SHOULD exhibit global and temporal uniqueness. Each - challenge value SHOULD also be unpredictable, least an attacker trick - a peer into responding to a predicted future challenge, and then use - the response to masquerade as that peer to an authenticator. - Although protocols such as CHAP are incapable of protecting against - realtime active wiretapping attacks, generation of unique - unpredictable challenges can protect against a wide range of active - attacks. - - A discussion of sources of uniqueness and probability of divergence - is included in the Magic-Number Configuration Option [1]. - -3.1. Configuration Option Format - - A summary of the Authentication-Protocol Configuration Option format - to negotiate the Challenge-Handshake Authentication Protocol is shown - below. The fields are transmitted from left to right. - - - - - - - - - -Lloyd & Simpson [Page 9] - -RFC 1334 PPP Authentication October 1992 - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Authentication-Protocol | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Algorithm | - +-+-+-+-+-+-+-+-+ - - Type - - 3 - - Length - - 5 - - Authentication-Protocol - - c223 (hex) for Challenge-Handshake Authentication Protocol. - - Algorithm - - The Algorithm field is one octet and indicates the one-way hash - method to be used. The most up-to-date values of the CHAP - Algorithm field are specified in the most recent "Assigned - Numbers" RFC [2]. Current values are assigned as follows: - - 0-4 unused (reserved) - 5 MD5 [3] - -3.2. Packet Format - - Exactly one Challenge-Handshake Authentication Protocol packet is - encapsulated in the Information field of a PPP Data Link Layer frame - where the protocol field indicates type hex c223 (Challenge-Handshake - Authentication Protocol). A summary of the CHAP packet format is - shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - - - - - -Lloyd & Simpson [Page 10] - -RFC 1334 PPP Authentication October 1992 - - - Code - - The Code field is one octet and identifies the type of CHAP - packet. CHAP Codes are assigned as follows: - - 1 Challenge - 2 Response - 3 Success - 4 Failure - - Identifier - - The Identifier field is one octet and aids in matching challenges, - responses and replies. - - Length - - The Length field is two octets and indicates the length of the - CHAP packet including the Code, Identifier, Length and Data - fields. Octets outside the range of the Length field should be - treated as Data Link Layer padding and should be ignored on - reception. - - Data - - The Data field is zero or more octets. The format of the Data - field is determined by the Code field. - -3.2.1. Challenge and Response - - Description - - The Challenge packet is used to begin the Challenge-Handshake - Authentication Protocol. The authenticator MUST transmit a CHAP - packet with the Code field set to 1 (Challenge). Additional - Challenge packets MUST be sent until a valid Response packet is - received, or an optional retry counter expires. - - A Challenge packet MAY also be transmitted at any time during the - Network-Layer Protocol phase to ensure that the connection has not - been altered. - - The peer SHOULD expect Challenge packets during the Authentication - phase and the Network-Layer Protocol phase. Whenever a Challenge - packet is received, the peer MUST transmit a CHAP packet with the - Code field set to 2 (Response). - - Whenever a Response packet is received, the authenticator compares - - - -Lloyd & Simpson [Page 11] - -RFC 1334 PPP Authentication October 1992 - - - the Response Value with its own calculation of the expected value. - Based on this comparison, the authenticator MUST send a Success or - Failure packet (described below). - - Implementation Note: Because the Success might be lost, the - authenticator MUST allow repeated Response packets after - completing the Authentication phase. To prevent discovery of - alternative Names and Secrets, any Response packets received - having the current Challenge Identifier MUST return the same - reply Code returned when the Authentication phase completed - (the message portion MAY be different). Any Response packets - received during any other phase MUST be silently discarded. - - When the Failure is lost, and the authenticator terminates the - link, the LCP Terminate-Request and Terminate-Ack provide an - alternative indication that authentication failed. - - A summary of the Challenge and Response packet format is shown below. - The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Value-Size | Value ... - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Name ... - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Code - - 1 for Challenge; - - 2 for Response. - - Identifier - - The Identifier field is one octet. The Identifier field MUST be - changed each time a Challenge is sent. - - The Response Identifier MUST be copied from the Identifier field - of the Challenge which caused the Response. - - Value-Size - - This field is one octet and indicates the length of the Value - field. - - - -Lloyd & Simpson [Page 12] - -RFC 1334 PPP Authentication October 1992 - - - Value - - The Value field is one or more octets. The most significant octet - is transmitted first. - - The Challenge Value is a variable stream of octets. The - importance of the uniqueness of the Challenge Value and its - relationship to the secret is described above. The Challenge - Value MUST be changed each time a Challenge is sent. The length - of the Challenge Value depends upon the method used to generate - the octets, and is independent of the hash algorithm used. - - The Response Value is the one-way hash calculated over a stream of - octets consisting of the Identifier, followed by (concatenated - with) the "secret", followed by (concatenated with) the Challenge - Value. The length of the Response Value depends upon the hash - algorithm used (16 octets for MD5). - - Name - - The Name field is one or more octets representing the - identification of the system transmitting the packet. There are - no limitations on the content of this field. For example, it MAY - contain ASCII character strings or globally unique identifiers in - ASN.1 syntax. The Name should not be NUL or CR/LF terminated. - The size is determined from the Length field. - - Since CHAP may be used to authenticate many different systems, the - content of the name field(s) may be used as a key to locate the - proper secret in a database of secrets. This also makes it - possible to support more than one name/secret pair per system. - -3.2.2. Success and Failure - - Description - - If the Value received in a Response is equal to the expected - value, then the implementation MUST transmit a CHAP packet with - the Code field set to 3 (Success). - - If the Value received in a Response is not equal to the expected - value, then the implementation MUST transmit a CHAP packet with - the Code field set to 4 (Failure), and SHOULD take action to - terminate the link. - - A summary of the Success and Failure packet format is shown below. - The fields are transmitted from left to right. - - - - -Lloyd & Simpson [Page 13] - -RFC 1334 PPP Authentication October 1992 - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Message ... - +-+-+-+-+-+-+-+-+-+-+-+-+- - - Code - - 3 for Success; - - 4 for Failure. - - Identifier - - The Identifier field is one octet and aids in matching requests - and replies. The Identifier field MUST be copied from the - Identifier field of the Response which caused this reply. - - Message - - The Message field is zero or more octets, and its contents are - implementation dependent. It is intended to be human readable, - and MUST NOT affect operation of the protocol. It is recommended - that the message contain displayable ASCII characters 32 through - 126 decimal. Mechanisms for extension to other character sets are - the topic of future research. The size is determined from the - Length field. - -Security Considerations - - Security issues are the primary topic of this RFC. - - The interaction of the authentication protocols within PPP are - highly implementation dependent. This is indicated by the use of - SHOULD throughout the document. - - For example, upon failure of authentication, some implementations - do not terminate the link. Instead, the implementation limits the - kind of traffic in the Network-Layer Protocols to a filtered - subset, which in turn allows the user opportunity to update - secrets or send mail to the network administrator indicating a - problem. - - There is no provision for re-tries of failed authentication. - However, the LCP state machine can renegotiate the authentication - protocol at any time, thus allowing a new attempt. It is - - - -Lloyd & Simpson [Page 14] - -RFC 1334 PPP Authentication October 1992 - - - recommended that any counters used for authentication failure not - be reset until after successful authentication, or subsequent - termination of the failed link. - - There is no requirement that authentication be full duplex or that - the same protocol be used in both directions. It is perfectly - acceptable for different protocols to be used in each direction. - This will, of course, depend on the specific protocols negotiated. - - In practice, within or associated with each PPP server, there is a - database which associates "user" names with authentication - information ("secrets"). It is not anticipated that a particular - named user would be authenticated by multiple methods. This would - make the user vulnerable to attacks which negotiate the least - secure method from among a set (such as PAP rather than CHAP). - Instead, for each named user there should be an indication of - exactly one method used to authenticate that user name. If a user - needs to make use of different authentication method under - different circumstances, then distinct user names SHOULD be - employed, each of which identifies exactly one authentication - method. - - Passwords and other secrets should be stored at the respective - ends such that access to them is as limited as possible. Ideally, - the secrets should only be accessible to the process requiring - access in order to perform the authentication. - - The secrets should be distributed with a mechanism that limits the - number of entities that handle (and thus gain knowledge of) the - secret. Ideally, no unauthorized person should ever gain - knowledge of the secrets. It is possible to achieve this with - SNMP Security Protocols [4], but such a mechanism is outside the - scope of this specification. - - Other distribution methods are currently undergoing research and - experimentation. The SNMP Security document also has an excellent - overview of threats to network protocols. - -References - - [1] Simpson, W., "The Point-to-Point Protocol (PPP)", RFC 1331, - Daydreamer, May 1992. - - [2] Reynolds, J., and J. Postel, "Assigned Numbers", RFC 1340, - USC/Information Sciences Institute, July 1992. - - - - - - -Lloyd & Simpson [Page 15] - -RFC 1334 PPP Authentication October 1992 - - - [3] Rivest, R., and S. Dusse, "The MD5 Message-Digest Algorithm", MIT - Laboratory for Computer Science and RSA Data Security, Inc. RFC - 1321, April 1992. - - [4] Galvin, J., McCloghrie, K., and J. Davin, "SNMP Security - Protocols", Trusted Information Systems, Inc., Hughes LAN - Systems, Inc., MIT Laboratory for Computer Science, RFC 1352, - July 1992. - -Acknowledgments - - Some of the text in this document is taken from RFC 1172, by Drew - Perkins of Carnegie Mellon University, and by Russ Hobby of the - University of California at Davis. - - Special thanks to Dave Balenson, Steve Crocker, James Galvin, and - Steve Kent, for their extensive explanations and suggestions. Now, - if only we could get them to agree with each other. - -Chair's Address - - The working group can be contacted via the current chair: - - Brian Lloyd - Lloyd & Associates - 3420 Sudbury Road - Cameron Park, California 95682 - - Phone: (916) 676-1147 - - EMail: brian@lloyd.com - -Author's Address - - Questions about this memo can also be directed to: - - William Allen Simpson - Daydreamer - Computer Systems Consulting Services - P O Box 6205 - East Lansing, MI 48826-6205 - - EMail: Bill.Simpson@um.cc.umich.edu - - - - - - - - -Lloyd & Simpson [Page 16] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1337.txt b/kernel/picotcp/RFC/rfc1337.txt deleted file mode 100644 index ab09c2f..0000000 --- a/kernel/picotcp/RFC/rfc1337.txt +++ /dev/null @@ -1,619 +0,0 @@ - - - - - - -Network Working Group R. Braden -Request for Comments: 1337 ISI - May 1992 - - - TIME-WAIT Assassination Hazards in TCP - -Status of This Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard. Distribution of this memo is - unlimited. - -Abstract - - This note describes some theoretically-possible failure modes for TCP - connections and discusses possible remedies. In particular, one very - simple fix is identified. - -1. INTRODUCTION - - Experiments to validate the recently-proposed TCP extensions [RFC- - 1323] have led to the discovery of a new class of TCP failures, which - have been dubbed the "TIME-WAIT Assassination hazards". This note - describes these hazards, gives examples, and discusses possible - prevention measures. - - The failures in question all result from old duplicate segments. In - brief, the TCP mechanisms to protect against old duplicate segments - are [RFC-793]: - - (1) The 3-way handshake rejects old duplicate initial - segments, avoiding the hazard of replaying a connection. - - (2) Sequence numbers are used to reject old duplicate data and ACK - segments from the current incarnation of a given connection - (defined by a particular host and port pair). Sequence numbers - are also used to reject old duplicate segments. - - For very high-speed connections, Jacobson's PAWS ("Protect - Against Wrapped Sequences") mechanism [RFC-1323] effectively - extends the sequence numbers so wrap-around will not introduce a - hazard within the same incarnation. - - (3) There are two mechanisms to avoid hazards due to old duplicate - segments from an earlier instance of the same connection; see - the Appendix to [RFC-1185] for details. - - - - -Braden [Page 1] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - For "short and slow" connections [RFC-1185], the clock-driven - ISN (initial sequence number) selection prevents the overlap of - the sequence spaces of the old and new incarnations [RFC-793]. - (The algorithm used by Berkeley BSD TCP for stepping ISN - complicates the analysis slightly but does not change the - conclusions.) - - (4) TIME-WAIT state removes the hazard of old duplicates for "fast" - or "long" connections, in which clock-driven ISN selection is - unable to prevent overlap of the old and new sequence spaces. - The TIME-WAIT delay allows all old duplicate segments time - enough to die in the Internet before the connection is reopened. - - (5) After a system crash, the Quiet Time at system startup allows - old duplicates to disappear before any connections are opened. - - Our new observation is that (4) is unreliable: TIME-WAIT state can be - prematurely terminated ("assassinated") by an old duplicate data or - ACK segment from the current or an earlier incarnation of the same - connection. We refer to this as "TIME-WAIT Assassination" (TWA). - - Figure 1 shows an example of TIME-WAIT assassination. Segments 1-5 - are copied exactly from Figure 13 of RFC-793, showing a normal close - handshake. Packets 5.1, 5.2, and 5.3 are an extension to this - sequence, illustrating TWA. Here 5.1 is *any* old segment that is - unacceptable to TCP A. It might be unacceptable because of its - sequence number or because of an old PAWS timestamp. In either case, - TCP A sends an ACK segment 5.2 for its current SND.NXT and RCV.NXT. - Since it has no state for this connection, TCP B reflects this as RST - segment 5.3, which assassinates the TIME-WAIT state at A! - - - - - - - - - - - - - - - - - - - - - -Braden [Page 2] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - - TCP A TCP B - - 1. ESTABLISHED ESTABLISHED - - (Close) - 2. FIN-WAIT-1 --> --> CLOSE-WAIT - - 3. FIN-WAIT-2 <-- <-- CLOSE-WAIT - - (Close) - 4. TIME-WAIT <-- <-- LAST-ACK - - 5. TIME-WAIT --> --> CLOSED - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 5.1. TIME-WAIT <-- ... old duplicate - - 5.2 TIME-WAIT --> --> ???? - - 5.3 CLOSED <-- <-- ???? - (prematurely) - - Figure 1. TWA Example - - - Note that TWA is not at all an unlikely event if there are any - duplicate segments that may be delayed in the network. Furthermore, - TWA cannot be prevented by PAWS timestamps; the event may happen - within the same tick of the timestamp clock. TWA is a consequence of - TCP's half-open connection discovery mechanism (see pp 33-34 of - [RFC-793]), which is designed to clean up after a system crash. - -2. The TWA Hazards - - 2.1 Introduction - - If the connection is immediately reopened after a TWA event, the - new incarnation will be exposed to old duplicate segments (except - for the initial segment, which is handled by the 3-way - handshake). There are three possible hazards that result: - - H1. Old duplicate data may be accepted erroneously. - - H2. The new connection may be de-synchronized, with the two ends - in permanent disagreement on the state. Following the spec - of RFC-793, this desynchronization results in an infinite ACK - - - -Braden [Page 3] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - loop. (It might be reasonable to change this aspect of RFC- - 793 and kill the connection instead.) - - This hazard results from acknowledging something that was not - sent. This may result from an old duplicate ACK or as a - side-effect of hazard H1. - - H3. The new connection may die. - - A duplicate segment (data or ACK) arriving in SYN-SENT state - may kill the new connection after it has apparently opened - successfully. - - Each of these hazards requires that the seqence space of the new - connection overlap to some extent with the sequence space of the - previous incarnation. As noted above, this is only possible for - "fast" or "long" connections. Since these hazards all require the - coincidence of an old duplicate falling into a particular range of - new sequence numbers, they are much less probable than TWA itself. - - TWA and the three hazards H1, H2, and H3 have been demonstrated on - a stock Sun OS 4.1.1 TCP running in an simulated environment that - massively duplicates segments. This environment is far more - hazardous than most real TCP's must cope with, and the conditions - were carefully tuned to create the necessary conditions for the - failures. However, these demonstrations are in effect an - existence proof for the hazards. - - We now present example scenarios for each of these hazards. Each - scenario is assumed to follow immediately after a TWA event - terminated the previous incarnation of the same connection. - - 2.2 HAZARD H1: Acceptance of erroneous old duplicate data. - - Without the protection of the TIME-WAIT delay, it is possible for - erroneous old duplicate data from the earlier incarnation to be - accepted. Figure 2 shows precisely how this might happen. - - - - - - - - - - - - - - -Braden [Page 4] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - - TCP A TCP B - - 1. ESTABL. --> --> ESTABL. - - 2. ESTABL. <-- <-- ESTABL. - - 3. (old dupl)... --> ESTABL. - - 4. ESTABL. <-- <-- ESTABL. - - 5. ESTABL. --> --> ESTABL. - - 6. ... <-- ESTABL. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 7a. ESTABL. --> --> ESTABL. - - 8a. ESTABL. <-- ... - - 9a. ESTABL. --> --> ESTABL. - - Figure 2: Accepting Erroneous Data - - The connection has already been successfully reopened after the - assumed TWA event. Segment 1 is a normal data segment and segment - 2 is the corresponding ACK segment. Old duplicate data segment 3 - from the earlier incarnation happens to fall within the current - receive window, resulting in a duplicate ACK segment #4. The - erroneous data is queued and "lurks" in the TCP reassembly queue - until data segment 5 overlaps it. At that point, either 80 or 40 - bytes of erroneous data is delivered to the user B; the choice - depends upon the particulars of the reassembly algorithm, which - may accept the first or the last duplicate data. - - As a result, B sends segment 6, an ACK for sequence = 640, which - is 40 beyond any data sent by A. Assume for the present that this - ACK arrives at A *after* A has sent segment 7a, the next full data - segment. In that case, the ACK segment 8a acknowledges data that - has been sent, and the error goes undetected. Another possible - continuation after segment 6 leads to hazard H3, shown below. - - 2.3 HAZARD H2: De-synchronized Connection - - This hazard may result either as a side effect of H1 or directly - from an old duplicate ACK that happens to be acceptable but - acknowledges something that has not been sent. - - - -Braden [Page 5] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - Referring to Figure 2 above, suppose that the ACK generated by the - old duplicate data segment arrived before the next data segment - had been sent. The result is an infinite ACK loop, as shown by - the following alternate continuation of Figure 2. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 7b. ESTABL. <-- ... - (ACK something not yet - sent => send ACK) - - 8b. ESTABL. --> --> ESTABL. - (Below window => - send ACK) - - 9b. ESTABL. <-- <-- ESTABL. - - (etc.!) - - Figure 3: Infinite ACK loop - - - 2.4 HAZARD H3: Connection Failure - - An old duplicate ACK segment may lead to an apparent refusal of - TCP A's next connection attempt, as illustrated in Figure 4. Here - indicates the TCP window field SEG.WIND.* - - TCP A TCP B - - 1. CLOSED LISTEN - - 2. SYN-SENT --> --> SYN-RCVD - - 3. ... <-- SYN-RCVD - - 4. SYN-SENT <-- ... (old duplicate) - - 5. SYN-SENT --> --> LISTEN - - 6. ESTABLISHED <-- ... - - 7. ESTABLISHED --> --> LISTEN - - 8. CLOSED <-- <-- LISTEN - - - Figure 4: Connection Failure from Old Duplicate - - - - -Braden [Page 6] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - The key to the failure in Figure 4 is that the RST segment 5 is - acceptable to TCP B in SYN-RECEIVED state, because the sequence - space of the earlier connection that produced this old duplicate - overlaps the new connection space. Thus, in segment #5 - falls within TCP B's receive window [101,900). In experiments, - this failure mode was very easy to demonstrate. (Kurt Matthys has - pointed out that this scenario is time-dependent: if TCP A should - timeout and retransmit the initial SYN after segment 5 arrives and - before segment 6, then the open will complete successfully.) - -3. Fixes for TWA Hazards - - We discuss three possible fixes to TCP to avoid these hazards. - - (F1) Ignore RST segments in TIME-WAIT state. - - If the 2 minute MSL is enforced, this fix avoids all three - hazards. - - This is the simplest fix. One could also argue that it is - formally the correct thing to do; since allowing time for old - duplicate segments to die is one of TIME-WAIT state's functions, - the state should not be truncated by a RST segment. - - (F2) Use PAWS to avoid the hazards. - - Suppose that the TCP ignores RST segments in TIME-WAIT state, - but only long enough to guarantee that the timestamp clocks on - both ends have ticked. Then the PAWS mechanism [RFC-1323] will - prevent old duplicate data segments from interfering with the - new incarnation, eliminating hazard H1. For reasons explained - below, however, it may not eliminate all old duplicate ACK - segments, so hazards H2 and H3 will still exist. - - In the language of the TCP Extensions RFC [RFC-1323]: - - When processing a RST bit in TIME-WAIT state: - - If (Snd.TS.OK is off) or (Time.in.TW.state() >= W) - then enter the CLOSED state, delete the TCB, - drop the RST segment, and return. - - else simply drop the RST segment and return. - - Here "Time.in.TW.state()" is a function returning the elapsed - time since TIME-WAIT state was entered, and W is a constant that - is at least twice the longest possible period for timestamp - clocks, i.e., W = 2 secs [RFC-1323]. - - - -Braden [Page 7] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - This assumes that the timestamp clock at each end continues to - advance at a constant rate whether or not there are any open - connections. We do not have to consider what happens across a - system crash (e.g., the timestamp clock may jump randomly), - because of the assumed Quiet Time at system startup. - - Once this change is in place, the initial timestamps that occur - on the SYN and {SYN,ACK} segments reopening the connection will - be larger than any timestamp on a segment from earlier - incarnations. As a result, the PAWS mechanism operating in the - new connection incarnation will avoid the H1 hazard, ie. - acceptance of old duplicate data. - - The effectiveness of fix (F2) in preventing acceptance of old - duplicate data segments, i.e., hazard H1, has been demonstrated - in the Sun OS TCP mentioned earlier. Unfortunately, these tests - revealed a somewhat surprising fact: old duplicate ACKs from - the earlier incarnation can still slip past PAWS, so that (F2) - will not prevent failures H2 or H3. What happens is that TIME- - WAIT state effectively regenerates the timestamp of an old - duplicate ACK. That is, when an old duplicate arrives in TIME- - WAIT state, an extended TCP will send out its own ACK with a - timestamp option containing its CURRENT timestamp clock value. - If this happens immediately before the TWA mechanism kills - TIME-WAIT state, the result will be a "new old duplicate" - segment with a current timestamp that may pass the PAWS test on - the reopened connection. - - Whether H2 and H3 are critical depends upon how often they - happen and what assumptions the applications make about TCP - semantics. In the case of the H3 hazard, merely trying the open - again is likely to succeed. Furthermore, many production TCPs - have (despite the advice of the researchers who developed TCP) - incorporated a "keep-alive" mechanism, which may kill - connections unnecessarily. The frequency of occurrence of H2 - and H3 may well be much lower than keep-alive failures or - transient internet routing failures. - - (F3) Use 64-bit Sequence Numbers - - O'Malley and Peterson [RFC-1264] have suggested expansion of the - TCP sequence space to 64 bits as an alternative to PAWS for - avoiding the hazard of wrapped sequence numbers within the same - incarnation. It is worthwhile to inquire whether 64-bit - sequence numbers could be used to avoid the TWA hazards as well. - - Using 64 bit sequence numbers would not prevent TWA - the early - termination of TIME-WAIT state. However, it appears that a - - - -Braden [Page 8] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - combination of 64-bit sequence numbers with an appropriate - modification of the TCP parameters could defeat all of the TWA - hazards H1, H2, and H3. The basis for this is explained in an - appendix to this memo. In summary, it could be arranged that - the same sequence space would be reused only after a very long - period of time, so every connection would be "slow" and "short". - -4. Conclusions - - Of the three fixes described in the previous section, fix (F1), - ignoring RST segments in TIME-WAIT state, seems like the best short- - term solution. It is certainly the simplest. It would be very - desirable to do an extended test of this change in a production - environment, to ensure there is no unexpected bad effect of ignoring - RSTs in TIME-WAIT state. - - Fix (F2) is more complex and is at best a partial fix. (F3), using - 64-bit sequence numbers, would be a significant change in the - protocol, and its implications need to be thoroughly understood. - (F3) may turn out to be a long-term fix for the hazards discussed in - this note. - -APPENDIX: Using 64-bit Sequence Numbers - - This appendix provides a justification of our statement that 64-bit - sequence numbers could prevent the TWA hazards. - - The theoretical ISN calculation used by TCP is: - - ISN = (R*T) mod 2**n. - - where T is the real time in seconds (from an arbitrary origin, fixed - when the system is started), R is a constant, currently 250 KBps, and - n = 32 is the size of the sequence number field. - - The limitations of current TCP are established by n, R, and the - maximum segment lifetime MSL = 4 minutes. The shortest time Twrap to - wrap the sequence space is: - - Twrap = (2**n)/r - - where r is the maximum transfer rate. To avoid old duplicate - segments in the same connection, we require that Twrap > MSL (in - practice, we need Twrap >> MSL). - - - - - - - -Braden [Page 9] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - The clock-driven ISN numbers wrap in time TwrapISN: - - TwrapISN = (2**n)/R - - For current TCP, TwrapISN = 4.55 hours. - - The cases for old duplicates from previous connections can be divided - into four regions along two dimensions: - - * Slow vs. fast connections, corresponding to r < R or r >= R. - - * Short vs. long connections, corresponding to duration E < - TwrapISN or E >= TwrapISN. - - On short slow connections, the clock-driven ISN selection rejects old - duplicates. For all other cases, the TIME-WAIT delay of 2*MSL is - required so old duplicates can expire before they infect a new - incarnation. This is discussed in detail in the Appendix to [RFC- - 1185]. - - With this background, we can consider the effect of increasing n to - 64. We would like to increase both R and TwrapISN far enough that - all connections will be short and slow, i.e., so that the clock- - driven ISN selection will reject all old duplicates. Put another - way, we want to every connection to have a unique chunk of the - seqence space. For this purpose, we need R larger than the maximum - foreseeable rate r, and TwrapISN greater than the longest foreseeable - connection duration E. - - In fact, this appears feasible with n = 64 bits. Suppose that we use - R = 2**33 Bps; this is approximately 8 gigabytes per second, a - reasonable upper limit on throughput of a single TCP connection. - Then TwrapISN = 68 years, a reasonable upper limit on TCP connection - duration. Note that this particular choice of R corresponds to - incrementing the ISN by 2**32 every 0.5 seconds, as would happen with - the Berkeley BSD implementation of TCP. Then the low-order 32 bits - of a 64-bit ISN would always be exactly zero. - - REFERENCES - - [RFC-793] Postel, J., "Transmission Control Protocol", RFC-793, - USC/Information Sciences Institute, September 1981. - - [RFC-1185] Jacobson, V., Braden, R., and Zhang, L., "TCP - Extension for High-Speed Paths", RFC-1185, Lawrence Berkeley Labs, - USC/Information Sciences Institute, and Xerox Palo Alto Research - Center, October 1990. - - - - -Braden [Page 10] - -RFC 1337 TCP TIME-WAIT Hazards May 1992 - - - [RFC-1263] O'Malley, S. and L. Peterson, "TCP Extensions - Considered Harmful", RFC-1263, University of Arizona, October - 1991. - - [RFC-1323] Jacobson, V., Braden, R. and D. Borman "TCP Extensions - for High Performance", RFC-1323, Lawrence Berkeley Labs, - USC/Information Sciences Institute, and Cray Research, May 1992. - -Security Considerations - - Security issues are not discussed in this memo. - -Author's Address: - - Bob Braden - University of Southern California - Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292 - - Phone: (213) 822-1511 - EMail: Braden@ISI.EDU - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Braden [Page 11] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1350.txt b/kernel/picotcp/RFC/rfc1350.txt deleted file mode 100644 index e00113b..0000000 --- a/kernel/picotcp/RFC/rfc1350.txt +++ /dev/null @@ -1,619 +0,0 @@ - - - - - - -Network Working Group K. Sollins -Request For Comments: 1350 MIT -STD: 33 July 1992 -Obsoletes: RFC 783 - - - THE TFTP PROTOCOL (REVISION 2) - -Status of this Memo - - This RFC specifies an IAB standards track protocol for the Internet - community, and requests discussion and suggestions for improvements. - Please refer to the current edition of the "IAB Official Protocol - Standards" for the standardization state and status of this protocol. - Distribution of this memo is unlimited. - -Summary - - TFTP is a very simple protocol used to transfer files. It is from - this that its name comes, Trivial File Transfer Protocol or TFTP. - Each nonterminal packet is acknowledged separately. This document - describes the protocol and its types of packets. The document also - explains the reasons behind some of the design decisions. - -Acknowlegements - - The protocol was originally designed by Noel Chiappa, and was - redesigned by him, Bob Baldwin and Dave Clark, with comments from - Steve Szymanski. The current revision of the document includes - modifications stemming from discussions with and suggestions from - Larry Allen, Noel Chiappa, Dave Clark, Geoff Cooper, Mike Greenwald, - Liza Martin, David Reed, Craig Milo Rogers (of USC-ISI), Kathy - Yellick, and the author. The acknowledgement and retransmission - scheme was inspired by TCP, and the error mechanism was suggested by - PARC's EFTP abort message. - - The May, 1992 revision to fix the "Sorcerer's Apprentice" protocol - bug [4] and other minor document problems was done by Noel Chiappa. - - This research was supported by the Advanced Research Projects Agency - of the Department of Defense and was monitored by the Office of Naval - Research under contract number N00014-75-C-0661. - -1. Purpose - - TFTP is a simple protocol to transfer files, and therefore was named - the Trivial File Transfer Protocol or TFTP. It has been implemented - on top of the Internet User Datagram protocol (UDP or Datagram) [2] - - - -Sollins [Page 1] - -RFC 1350 TFTP Revision 2 July 1992 - - - so it may be used to move files between machines on different - networks implementing UDP. (This should not exclude the possibility - of implementing TFTP on top of other datagram protocols.) It is - designed to be small and easy to implement. Therefore, it lacks most - of the features of a regular FTP. The only thing it can do is read - and write files (or mail) from/to a remote server. It cannot list - directories, and currently has no provisions for user authentication. - In common with other Internet protocols, it passes 8 bit bytes of - data. - - Three modes of transfer are currently supported: netascii (This is - ascii as defined in "USA Standard Code for Information Interchange" - [1] with the modifications specified in "Telnet Protocol - Specification" [3].) Note that it is 8 bit ascii. The term - "netascii" will be used throughout this document to mean this - particular version of ascii.); octet (This replaces the "binary" mode - of previous versions of this document.) raw 8 bit bytes; mail, - netascii characters sent to a user rather than a file. (The mail - mode is obsolete and should not be implemented or used.) Additional - modes can be defined by pairs of cooperating hosts. - - Reference [4] (section 4.2) should be consulted for further valuable - directives and suggestions on TFTP. - -2. Overview of the Protocol - - Any transfer begins with a request to read or write a file, which - also serves to request a connection. If the server grants the - request, the connection is opened and the file is sent in fixed - length blocks of 512 bytes. Each data packet contains one block of - data, and must be acknowledged by an acknowledgment packet before the - next packet can be sent. A data packet of less than 512 bytes - signals termination of a transfer. If a packet gets lost in the - network, the intended recipient will timeout and may retransmit his - last packet (which may be data or an acknowledgment), thus causing - the sender of the lost packet to retransmit that lost packet. The - sender has to keep just one packet on hand for retransmission, since - the lock step acknowledgment guarantees that all older packets have - been received. Notice that both machines involved in a transfer are - considered senders and receivers. One sends data and receives - acknowledgments, the other sends acknowledgments and receives data. - - Most errors cause termination of the connection. An error is - signalled by sending an error packet. This packet is not - acknowledged, and not retransmitted (i.e., a TFTP server or user may - terminate after sending an error message), so the other end of the - connection may not get it. Therefore timeouts are used to detect - such a termination when the error packet has been lost. Errors are - - - -Sollins [Page 2] - -RFC 1350 TFTP Revision 2 July 1992 - - - caused by three types of events: not being able to satisfy the - request (e.g., file not found, access violation, or no such user), - receiving a packet which cannot be explained by a delay or - duplication in the network (e.g., an incorrectly formed packet), and - losing access to a necessary resource (e.g., disk full or access - denied during a transfer). - - TFTP recognizes only one error condition that does not cause - termination, the source port of a received packet being incorrect. - In this case, an error packet is sent to the originating host. - - This protocol is very restrictive, in order to simplify - implementation. For example, the fixed length blocks make allocation - straight forward, and the lock step acknowledgement provides flow - control and eliminates the need to reorder incoming data packets. - -3. Relation to other Protocols - - As mentioned TFTP is designed to be implemented on top of the - Datagram protocol (UDP). Since Datagram is implemented on the - Internet protocol, packets will have an Internet header, a Datagram - header, and a TFTP header. Additionally, the packets may have a - header (LNI, ARPA header, etc.) to allow them through the local - transport medium. As shown in Figure 3-1, the order of the contents - of a packet will be: local medium header, if used, Internet header, - Datagram header, TFTP header, followed by the remainder of the TFTP - packet. (This may or may not be data depending on the type of packet - as specified in the TFTP header.) TFTP does not specify any of the - values in the Internet header. On the other hand, the source and - destination port fields of the Datagram header (its format is given - in the appendix) are used by TFTP and the length field reflects the - size of the TFTP packet. The transfer identifiers (TID's) used by - TFTP are passed to the Datagram layer to be used as ports; therefore - they must be between 0 and 65,535. The initialization of TID's is - discussed in the section on initial connection protocol. - - The TFTP header consists of a 2 byte opcode field which indicates - the packet's type (e.g., DATA, ERROR, etc.) These opcodes and the - formats of the various types of packets are discussed further in the - section on TFTP packets. - - - - - - - - - - - -Sollins [Page 3] - -RFC 1350 TFTP Revision 2 July 1992 - - - --------------------------------------------------- - | Local Medium | Internet | Datagram | TFTP | - --------------------------------------------------- - - Figure 3-1: Order of Headers - - -4. Initial Connection Protocol - - A transfer is established by sending a request (WRQ to write onto a - foreign file system, or RRQ to read from it), and receiving a - positive reply, an acknowledgment packet for write, or the first data - packet for read. In general an acknowledgment packet will contain - the block number of the data packet being acknowledged. Each data - packet has associated with it a block number; block numbers are - consecutive and begin with one. Since the positive response to a - write request is an acknowledgment packet, in this special case the - block number will be zero. (Normally, since an acknowledgment packet - is acknowledging a data packet, the acknowledgment packet will - contain the block number of the data packet being acknowledged.) If - the reply is an error packet, then the request has been denied. - - In order to create a connection, each end of the connection chooses a - TID for itself, to be used for the duration of that connection. The - TID's chosen for a connection should be randomly chosen, so that the - probability that the same number is chosen twice in immediate - succession is very low. Every packet has associated with it the two - TID's of the ends of the connection, the source TID and the - destination TID. These TID's are handed to the supporting UDP (or - other datagram protocol) as the source and destination ports. A - requesting host chooses its source TID as described above, and sends - its initial request to the known TID 69 decimal (105 octal) on the - serving host. The response to the request, under normal operation, - uses a TID chosen by the server as its source TID and the TID chosen - for the previous message by the requestor as its destination TID. - The two chosen TID's are then used for the remainder of the transfer. - - As an example, the following shows the steps used to establish a - connection to write a file. Note that WRQ, ACK, and DATA are the - names of the write request, acknowledgment, and data types of packets - respectively. The appendix contains a similar example for reading a - file. - - - - - - - - - -Sollins [Page 4] - -RFC 1350 TFTP Revision 2 July 1992 - - - 1. Host A sends a "WRQ" to host B with source= A's TID, - destination= 69. - - 2. Host B sends a "ACK" (with block number= 0) to host A with - source= B's TID, destination= A's TID. - - At this point the connection has been established and the first data - packet can be sent by Host A with a sequence number of 1. In the - next step, and in all succeeding steps, the hosts should make sure - that the source TID matches the value that was agreed on in steps 1 - and 2. If a source TID does not match, the packet should be - discarded as erroneously sent from somewhere else. An error packet - should be sent to the source of the incorrect packet, while not - disturbing the transfer. This can be done only if the TFTP in fact - receives a packet with an incorrect TID. If the supporting protocols - do not allow it, this particular error condition will not arise. - - The following example demonstrates a correct operation of the - protocol in which the above situation can occur. Host A sends a - request to host B. Somewhere in the network, the request packet is - duplicated, and as a result two acknowledgments are returned to host - A, with different TID's chosen on host B in response to the two - requests. When the first response arrives, host A continues the - connection. When the second response to the request arrives, it - should be rejected, but there is no reason to terminate the first - connection. Therefore, if different TID's are chosen for the two - connections on host B and host A checks the source TID's of the - messages it receives, the first connection can be maintained while - the second is rejected by returning an error packet. - -5. TFTP Packets - - TFTP supports five types of packets, all of which have been mentioned - above: - - opcode operation - 1 Read request (RRQ) - 2 Write request (WRQ) - 3 Data (DATA) - 4 Acknowledgment (ACK) - 5 Error (ERROR) - - The TFTP header of a packet contains the opcode associated with - that packet. - - - - - - - -Sollins [Page 5] - -RFC 1350 TFTP Revision 2 July 1992 - - - 2 bytes string 1 byte string 1 byte - ------------------------------------------------ - | Opcode | Filename | 0 | Mode | 0 | - ------------------------------------------------ - - Figure 5-1: RRQ/WRQ packet - - - RRQ and WRQ packets (opcodes 1 and 2 respectively) have the format - shown in Figure 5-1. The file name is a sequence of bytes in - netascii terminated by a zero byte. The mode field contains the - string "netascii", "octet", or "mail" (or any combination of upper - and lower case, such as "NETASCII", NetAscii", etc.) in netascii - indicating the three modes defined in the protocol. A host which - receives netascii mode data must translate the data to its own - format. Octet mode is used to transfer a file that is in the 8-bit - format of the machine from which the file is being transferred. It - is assumed that each type of machine has a single 8-bit format that - is more common, and that that format is chosen. For example, on a - DEC-20, a 36 bit machine, this is four 8-bit bytes to a word with - four bits of breakage. If a host receives a octet file and then - returns it, the returned file must be identical to the original. - Mail mode uses the name of a mail recipient in place of a file and - must begin with a WRQ. Otherwise it is identical to netascii mode. - The mail recipient string should be of the form "username" or - "username@hostname". If the second form is used, it allows the - option of mail forwarding by a relay computer. - - The discussion above assumes that both the sender and recipient are - operating in the same mode, but there is no reason that this has to - be the case. For example, one might build a storage server. There - is no reason that such a machine needs to translate netascii into its - own form of text. Rather, the sender might send files in netascii, - but the storage server might simply store them without translation in - 8-bit format. Another such situation is a problem that currently - exists on DEC-20 systems. Neither netascii nor octet accesses all - the bits in a word. One might create a special mode for such a - machine which read all the bits in a word, but in which the receiver - stored the information in 8-bit format. When such a file is - retrieved from the storage site, it must be restored to its original - form to be useful, so the reverse mode must also be implemented. The - user site will have to remember some information to achieve this. In - both of these examples, the request packets would specify octet mode - to the foreign host, but the local host would be in some other mode. - No such machine or application specific modes have been specified in - TFTP, but one would be compatible with this specification. - - It is also possible to define other modes for cooperating pairs of - - - -Sollins [Page 6] - -RFC 1350 TFTP Revision 2 July 1992 - - - hosts, although this must be done with care. There is no requirement - that any other hosts implement these. There is no central authority - that will define these modes or assign them names. - - - 2 bytes 2 bytes n bytes - ---------------------------------- - | Opcode | Block # | Data | - ---------------------------------- - - Figure 5-2: DATA packet - - - Data is actually transferred in DATA packets depicted in Figure 5-2. - DATA packets (opcode = 3) have a block number and data field. The - block numbers on data packets begin with one and increase by one for - each new block of data. This restriction allows the program to use a - single number to discriminate between new packets and duplicates. - The data field is from zero to 512 bytes long. If it is 512 bytes - long, the block is not the last block of data; if it is from zero to - 511 bytes long, it signals the end of the transfer. (See the section - on Normal Termination for details.) - - All packets other than duplicate ACK's and those used for - termination are acknowledged unless a timeout occurs [4]. Sending a - DATA packet is an acknowledgment for the first ACK packet of the - previous DATA packet. The WRQ and DATA packets are acknowledged by - ACK or ERROR packets, while RRQ - - - 2 bytes 2 bytes - --------------------- - | Opcode | Block # | - --------------------- - - Figure 5-3: ACK packet - - - and ACK packets are acknowledged by DATA or ERROR packets. Figure - 5-3 depicts an ACK packet; the opcode is 4. The block number in - an ACK echoes the block number of the DATA packet being - acknowledged. A WRQ is acknowledged with an ACK packet having a - block number of zero. - - - - - - - - -Sollins [Page 7] - -RFC 1350 TFTP Revision 2 July 1992 - - - 2 bytes 2 bytes string 1 byte - ----------------------------------------- - | Opcode | ErrorCode | ErrMsg | 0 | - ----------------------------------------- - - Figure 5-4: ERROR packet - - - An ERROR packet (opcode 5) takes the form depicted in Figure 5-4. An - ERROR packet can be the acknowledgment of any other type of packet. - The error code is an integer indicating the nature of the error. A - table of values and meanings is given in the appendix. (Note that - several error codes have been added to this version of this - document.) The error message is intended for human consumption, and - should be in netascii. Like all other strings, it is terminated with - a zero byte. - -6. Normal Termination - - The end of a transfer is marked by a DATA packet that contains - between 0 and 511 bytes of data (i.e., Datagram length < 516). This - packet is acknowledged by an ACK packet like all other DATA packets. - The host acknowledging the final DATA packet may terminate its side - of the connection on sending the final ACK. On the other hand, - dallying is encouraged. This means that the host sending the final - ACK will wait for a while before terminating in order to retransmit - the final ACK if it has been lost. The acknowledger will know that - the ACK has been lost if it receives the final DATA packet again. - The host sending the last DATA must retransmit it until the packet is - acknowledged or the sending host times out. If the response is an - ACK, the transmission was completed successfully. If the sender of - the data times out and is not prepared to retransmit any more, the - transfer may still have been completed successfully, after which the - acknowledger or network may have experienced a problem. It is also - possible in this case that the transfer was unsuccessful. In any - case, the connection has been closed. - -7. Premature Termination - - If a request can not be granted, or some error occurs during the - transfer, then an ERROR packet (opcode 5) is sent. This is only a - courtesy since it will not be retransmitted or acknowledged, so it - may never be received. Timeouts must also be used to detect errors. - - - - - - - - -Sollins [Page 8] - -RFC 1350 TFTP Revision 2 July 1992 - - -I. Appendix - -Order of Headers - - 2 bytes - ---------------------------------------------------------- - | Local Medium | Internet | Datagram | TFTP Opcode | - ---------------------------------------------------------- - -TFTP Formats - - Type Op # Format without header - - 2 bytes string 1 byte string 1 byte - ----------------------------------------------- - RRQ/ | 01/02 | Filename | 0 | Mode | 0 | - WRQ ----------------------------------------------- - 2 bytes 2 bytes n bytes - --------------------------------- - DATA | 03 | Block # | Data | - --------------------------------- - 2 bytes 2 bytes - ------------------- - ACK | 04 | Block # | - -------------------- - 2 bytes 2 bytes string 1 byte - ---------------------------------------- - ERROR | 05 | ErrorCode | ErrMsg | 0 | - ---------------------------------------- - -Initial Connection Protocol for reading a file - - 1. Host A sends a "RRQ" to host B with source= A's TID, - destination= 69. - - 2. Host B sends a "DATA" (with block number= 1) to host A with - source= B's TID, destination= A's TID. - - - - - - - - - - - - - - -Sollins [Page 9] - -RFC 1350 TFTP Revision 2 July 1992 - - -Error Codes - - Value Meaning - - 0 Not defined, see error message (if any). - 1 File not found. - 2 Access violation. - 3 Disk full or allocation exceeded. - 4 Illegal TFTP operation. - 5 Unknown transfer ID. - 6 File already exists. - 7 No such user. - -Internet User Datagram Header [2] - - (This has been included only for convenience. TFTP need not be - implemented on top of the Internet User Datagram Protocol.) - - Format - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Source Port | Destination Port | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Length | Checksum | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Values of Fields - - - Source Port Picked by originator of packet. - - Dest. Port Picked by destination machine (69 for RRQ or WRQ). - - Length Number of bytes in UDP packet, including UDP header. - - Checksum Reference 2 describes rules for computing checksum. - (The implementor of this should be sure that the - correct algorithm is used here.) - Field contains zero if unused. - - Note: TFTP passes transfer identifiers (TID's) to the Internet User - Datagram protocol to be used as the source and destination ports. - - - - - - -Sollins [Page 10] - -RFC 1350 TFTP Revision 2 July 1992 - - -References - - [1] USA Standard Code for Information Interchange, USASI X3.4-1968. - - [2] Postel, J., "User Datagram Protocol," RFC 768, USC/Information - Sciences Institute, 28 August 1980. - - [3] Postel, J., "Telnet Protocol Specification," RFC 764, - USC/Information Sciences Institute, June, 1980. - - [4] Braden, R., Editor, "Requirements for Internet Hosts -- - Application and Support", RFC 1123, USC/Information Sciences - Institute, October 1989. - -Security Considerations - - Since TFTP includes no login or access control mechanisms, care must - be taken in the rights granted to a TFTP server process so as not to - violate the security of the server hosts file system. TFTP is often - installed with controls such that only files that have public read - access are available via TFTP and writing files via TFTP is - disallowed. - -Author's Address - - Karen R. Sollins - Massachusetts Institute of Technology - Laboratory for Computer Science - 545 Technology Square - Cambridge, MA 02139-1986 - - Phone: (617) 253-6006 - - EMail: SOLLINS@LCS.MIT.EDU - - - - - - - - - - - - - - - - - -Sollins [Page 11] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1379.txt b/kernel/picotcp/RFC/rfc1379.txt deleted file mode 100644 index b5f2bdc..0000000 --- a/kernel/picotcp/RFC/rfc1379.txt +++ /dev/null @@ -1,2131 +0,0 @@ - - - - - - -Network Working Group R. Braden -Request for Comments: 1379 ISI - November 1992 - - - Extending TCP for Transactions -- Concepts - -Status of This Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard. Distribution of this memo is - unlimited. - -Abstract - - This memo discusses extension of TCP to provide transaction-oriented - service, without altering its virtual-circuit operation. This - extension would fill the large gap between connection-oriented TCP - and datagram-based UDP, allowing TCP to efficiently perform many - applications for which UDP is currently used. A separate memo - contains a detailed functional specification for this proposed - extension. - - This work was supported in part by the National Science Foundation - under Grant Number NCR-8922231. - -TABLE OF CONTENTS - - 1. INTRODUCTION .................................................. 2 - 2. TRANSACTIONS USING STANDARD TCP ............................... 3 - 3. BYPASSING THE 3-WAY HANDSHAKE ................................. 6 - 3.1 Concept of TAO ........................................... 6 - 3.2 Cache Initialization ..................................... 10 - 3.3 Accepting Segments ............................. 11 - 4. SHORTENING TIME-WAIT STATE .................................... 13 - 5. CHOOSING A MONOTONIC SEQUENCE ................................. 15 - 5.1 Cached Timestamps ........................................ 16 - 5.2 Current TCP Sequence Numbers ............................. 18 - 5.3 64-bit Sequence Numbers .................................. 20 - 5.4 Connection Counts ........................................ 20 - 5.5 Conclusions .............................................. 21 - 6. CONNECTION STATES ............................................. 24 - 7. CONCLUSIONS AND ACKNOWLEDGMENTS ............................... 32 - APPENDIX A: TIME-WAIT STATE AND THE 2-PACKET EXCHANGE ............ 34 - REFERENCES ....................................................... 37 - Security Considerations .......................................... 38 - Author's Address ................................................. 38 - - - - -Braden [Page 1] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - -1. INTRODUCTION - - The TCP protocol [STD-007] implements a virtual-circuit transport - service that provides reliable and ordered data delivery over a - full-duplex connection. Under the virtual circuit model, the life of - a connection is divided into three distinct phases: (1) opening the - connection to create a full-duplex byte stream; (2) transferring data - in one or both directions over this stream; and (3) closing the - connection. Remote login and file transfer are examples of - applications that are well suited to virtual-circuit service. - - Distributed applications, which are becoming increasingly numerous - and sophisticated in the Internet, tend to use a transaction-oriented - rather than a virtual circuit style of communication. Currently, a - transaction-oriented Internet application must choose to suffer the - overhead of opening and closing TCP connections or else build an - application-specific transport mechanism on top of the connectionless - transport protocol UDP. Greater convenience, uniformity, and - efficiency would result from widely-available kernel implementations - of a transport protocol supporting a transaction service model [RFC- - 955]. - - The transaction service model has the following features: - - * The fundamental interaction is a request followed by a response. - - * An explicit open or close phase would impose excessive overhead. - - * At-most-once semantics is required; that is, a transaction must - not be "replayed" by a duplicate request packet. - - * In favorable circumstances, a reliable request/response - handshake can be performed with exactly one packet in each - direction. - - * The minimum transaction latency for a client is RTT + SPT, where - RTT is the round-trip time and SPT is the server processing - time. - - We use the term "transaction transport protocol" for a transport- - layer protocol that follows this model [RFC-955]. - - The Internet architecture allows an arbitrary collection of transport - protocols to be defined on top of the minimal end-to-end datagram - service provided by IP [Clark88]. In practice, however, production - systems implement only TCP and UDP at the transport layer. It has - proven difficult to leverage a new transport protocol into place, to - be widely enough available to be useful for application builders. - - - -Braden [Page 2] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - This memo explores an alternative approach to providing a transaction - transport protocol: extending TCP to implement the transaction - service model, while continuing to support the virtual circuit model. - Each transaction will then be a single instance of a TCP connection. - The proposed transaction extension is effectively implementable - within current TCPs and operating systems, and it should also scale - to the much faster networks, interfaces, and CPUs of the future. - - The present memo explains the theory behind the extension, in - somewhat exquisite detail. Despite the length and complexity of this - memo, the TCP extensions required for transactions are in fact quite - limited and simple. Another memo [TTCP-FS] provides a self-contained - functional specification of the extensions. - - Section 2 of this memo describes the limitations of standard TCP for - transaction processing, to motivate the extensions. Sections 3, 4, - and 5 explore the fundamental extensions that are required for - transactions. Section 6 discusses the changes required in the TCP - connection state diagram. Finally, Section 7 presents conclusions - and acknowledgments. Familiarity with the standard TCP protocol - [STD-007] is assumed. - -2. TRANSACTIONS USING STANDARD TCP - - Reliable transfer of data depends upon sequence numbers. Before data - transfer can begin, both parties must "synchronize" the connection, - i.e, agree on common sequence numbers. The synchronization procedure - must preserve at-most-once semantics, i.e., be free from replay - hazards due to duplicate packets. The TCP developers adopted a - synchronization mechanism known as the 3-way handshake. - - Consider a simple transaction in which client host A sends a single- - segment request to server host B, and B returns a single-segment - response. Many current TCP implementations use at least ten segments - (i.e., packets) for this sequence: three for the 3-way handshake - opening the connection, four to send and acknowledge the request and - response data, and three for TCP's full-duplex data-conserving close - sequence. These ten segments represent a high relative overhead for - two data-bearing segments. However, a more important consideration - is the transaction latency seen by the client: 2*RTT + SPT, larger - than the minimum by one RTT. As CPU and network speeds increase, the - relative significance of this extra transaction latency also - increases. - - Proposed transaction transport protocols have typically used a - "timer-based" approach to connection synchronization [Birrell84]. In - this approach, once end-to-end connection state is established in the - client and server hosts, a subset of this state is maintained for - - - -Braden [Page 3] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - some period of time. A new request before the expiration of this - timeout period can then reestablish the full state without an - explicit handshake. Watson pointed out that the timer-based approach - of his Delta-T protocol [Watson81] would encompass both virtual - circuits and transactions. However, the TCP group adopted the 3-way - handshake (because of uncertainty about the robustness of enforcing - the packet lifetime bounds required by Delta-T, within a general - Internet environment). More recently, Liskov, Shrira, and Wroclawski - [Liskov90] have proposed a different timer-based approach to - connection synchronization, requiring loosely-synchronized clocks in - the hosts. - - The technique proposed in this memo, suggested by Clark [Clark89], - depends upon cacheing of connection state but not upon clocks or - timers; it is described in Section 3 below. Garlick, Rom, and Postel - also proposed a connection synchronization mechanism using cached - state [Garlick77]. Their scheme required each host to maintain - connection records containing the highest sequence number on each - connection. The technique suggested here retains only per-host - state, not per-connection state. - - During TCP development, it was suggested that TCP could support - transactions with data segments containing both SYN and FIN bits. - (These "Kamikaze" segments were not supported as a service; they were - used mainly to crash other experimental TCPs!) To illustrate this - idea, Figure 1 shows a plausible application of the current TCP rules - to create a minimal transaction. (In fact, some minor adjustments in - the standard TCP spec would be required to make Figure 1 fully legal - [STD-007]). - - Figure 1, like many of the examples shown in this memo, uses an - abbreviated form to illustrate segment sequences. For clarity and - brevity, it omits explicit sequence and acknowledgment numbers, - assuming that these will follow the well-known TCP rules. The - notation "ACK(x)" implies a cumulative acknowledgment for the control - bit or data "x" and everything preceding "x" in the sequence space. - The referent of "x" should be clear from the context. Also, host A - will always be the client and host B will be the server in these - diagrams. - - The first three segments in Figure 1 implement the standard TCP - three-way handshake. If segment #1 had been an old duplicate, the - client side would have sent an RST (Reset) bit in segment #3, - terminating the sequence. The request data included on the initial - SYN segment cannot be delivered to user B until segment #3 completes - the 3-way handshake. Loading control bits onto the segments has - reduced the total number of segments to 5, but the client still - observes a transaction latency of 2*RTT + SPT. The 3-way handshake - - - -Braden [Page 4] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - thus precludes high-performance transaction processing. - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - (Client sends request) - 1. SYN-SENT --> --> SYN-RCVD - (data1 queued) - - 2. ESTABLISHED <-- <-- SYN-RCVD - - - 3. FIN-WAIT-1 --> --> CLOSE-WAIT - (data1 to server) - - (Server sends reply) - 4. TIME-WAIT <-- <-- LAST-ACK - (data2 to client) - - 5. TIME-WAIT --> --> CLOSED - - (timeout) - CLOSED - - Figure 1: Transaction Sequence: RFC-793 TCP - - - The TCP close sequence also poses a performance problem for - transactions: one or both end(s) of a closed connection must remain - in "TIME-WAIT" state until a 4 minute timeout has expired [STD-007]. - The same connection (defined by the host and port numbers at both - ends) cannot be reopened until this delay has expired. Because of - TIME-WAIT state, a client program should choose a new local port - number (i.e., a different connection) for each successive - transaction. However, the TCP port field of 16 bits (less the - "well-known" port space) provides only 64512 available user ports. - This limits the total rate of transactions between any pair of hosts - to a maximum of 64512/240 = 268 per second. This is much too low a - rate for low-delay paths, e.g., high-speed LANs. A high rate of - short connections (i.e., transactions) could also lead to excessive - consumption of kernel memory by connection control blocks in TIME- - WAIT state. - - In summary, to perform efficient transaction processing in TCP, we - need to suppress the 3-way handshake and to shorten TIME-WAIT state. - - - -Braden [Page 5] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - Protocol mechanisms to accomplish these two goals are discussed in - Sections 3 and 4, respectively. Both require the choice of a - monotonic sequence-like space; Section 5 analyzes the choices and - makes a selection for this space. Finally, the TCP connection state - machine must be extended as described in Section 6. - - Transaction processing in TCP raises some other protocol issues, - which are discussed in the functional specification memo [TTCP-FS]. - These include: - - (1) augmenting the user interface for transactions, - - (2) delaying acknowledgment segments to allow maximum piggy-backing - of control bits with data, - - (3) measuring the retransmission timeout time (RTO) on very short - connections, and - - (4) providing an initial server window. - - A recently proposed set of enhancements [RFC-1323] defines a TCP - Timestamps option that carries two 32-bit timestamp values. The - Timestamps option is used to accurately measure round-trip time - (RTT). The same option is also used in a procedure known as "PAWS" - (Protect Againsts Wrapped Sequence) to prevent erroneous data - delivery due to a combination of old duplicate segments and sequence - number reuse at very high bandwidths. The particular approach to - transactions chosen in this memo does not require the RFC-1323 - enhancements; however, they are important and should be implemented - in every TCP, with or without the transaction extensions described - here. - -3. BYPASSING THE 3-WAY HANDSHAKE - - To avoid 3-way handshakes for transactions, we introduce a new - mechanism for validating initial SYN segments, i.e., for enforcing - at-most-once semantics without a 3-way handshake. We refer to this - as the TCP Accelerated Open, or TAO, mechanism. - - 3.1 Concept of TAO - - The basis of TAO is this: a TCP uses cached per-host information - to immediately validate new SYNs [Clark89]. If this validation - fails, e.g., because there is no current cached state or the - segment is an old duplicate, the procedure falls back to a normal - 3-way handshake to validate the SYN. Thus, bypassing a 3-way - handshake is considered to be an optional optimization. - - - - -Braden [Page 6] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - The proposed TAO mechanism uses a finite sequence-like space of - values that increase monotonically with successive transactions - (connections) between a given (client, server) host pair. Call - this monotonic space M, and let each initial SYN segment carry an - M value SEG.M. If M is not the existing sequence (SEG.SEQ) field, - SEG.M may be carried in a TCP option. - - When host B receives from host A an initial SYN segment containing - a new value SEG.M, host B compares this against cache.M[A], the - latest M value that B has cached for host A. This comparison is - the "TAO test". Because the M values are monotonically - increasing, SEG.M > cache.M[A] implies that the SYN must be new - and can be accepted immediately. If not, a normal 3-way handshake - is performed to validate the initial SYN segment. Figure 2 - illustrates the TAO mechanism; cached M values are shown enclosed - in square brackets. The M values generated by host A satisfy - x0 < x1, and the M values generated by host B satisfy y0 < y1. - - An appropriate choice for the M value space is discussed in - Section 5. M values are drawn from a finite number space, so - inequalities must be defined in the usual way for sequence numbers - [STD-007]. The M space must not wrap so quickly that an old - duplicate SYN will be erroneously accepted. We assume that some - maximum segment lifetime (MSL) is enforced by the IP layer. - - ____T_C_P__A_____ ____T_C_P__B_____ - - cache.M[B] cache.M[A] - V V - - [ y0 ] [ x0 ] - - 1. --> --> ( (x1 > x0) => - data1 -> user_B; - cache.M[A]= x1) - - [ y0 ] [ x1 ] - 2. <-- <-- - - (data2 -> user_A, - cache.M[B]= y1) - - [ y1 ] [ x1 ] - ... (etc.) ... - - - Figure 2. TAO: Three-Way Handshake is Bypassed - - - - -Braden [Page 7] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - Figure 2 shows the simplest case: each side has cached the latest - M value of the other, and the SEG.M value in the client's SYN - segment is greater than the value in the cache at the server host. - As a result, B can accept the client A's request data1 immediately - and pass it to the server application. B's reply data2 is shown - piggybacked on the segment. As a result of this 2-way - exchange, the cached M values are updated at both sites; the - client side becomes relevant only if the client/server roles - reverse. Validation of the segment at host A is - discussed later. - - Figure 3 shows the TAO test failing but the consequent 3-way - handshake succeeding. B updates its cache with the value x2 >= x1 - when the initial SYN is known to be valid. - - - _T_C_P__A _T_C_P__B - - cache.M[B] cache.M[A] - V V - - [ y0 ] [ x0 ] - 1. --> --> ( (x1 <= x0) => - data1 queued; - 3-way handshake) - - [ y0 ] [ x0 ] - 2. <-- <-- - (cache.M[B]= y1) - - [ y1 ] [ x0 ] - 3. --> --> (Handshake OK => - data1->user_B, - cache.M[A]= x2) - - [ y1 ] [ x2 ] - ... (etc.) ... - - Figure 3. TAO Test Fails but 3-Way Handshake Succeeds. - - There are several possible causes for a TAO test failure on a - legitimate new SYN segment (not an old duplicate). - - (1) There may be no cached M value for this particular client - host. - - (2) The SYN may be the one of a set of nearly-simultaneous SYNs - for different connections but from the same host, which - - - -Braden [Page 8] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - arrived out of order. - - (3) The finite M space may have wrapped around between successive - transactions from the same client. - - (4) The M values may advance too slowly for closely-spaced - transactions. - - None of these TAO failures will cause a lockout, because the - resulting 3-way handshake will succeed. Note that the first - transaction between a given host pair will always require a 3-way - handshake; subsequent transactions can take advantage of TAO. - - The per-host cache required by TAO is highly desirable for other - reasons, e.g., to retain the measured round trip time and MTU for - a given remote host. Furthermore, a host should already have a - per-host routing cache [HR-COMM] that should be easily extensible - for this purpose. - - Figure 4 illustrates a complete TCP transaction sequence using the - TAO mechanism. Bypassing the 3-way handshake leads to new - connection states; Figure 4 shows three of them, "SYN-SENT*", - "CLOSE-WAIT*", and "LAST-ACK*". Explanation of these states is - deferred to Section 6. - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - 1. SYN-SENT* --> --> CLOSE-WAIT* - (TAO test OK=> - data1->user_B) - - <-- <-- LAST-ACK* - 2. TIME-WAIT - (data2->user_A) - - - 3. TIME-WAIT --> --> CLOSED - - (timeout) - CLOSED - - - Figure 4: Minimal Transaction Sequence Using TAO - - - - -Braden [Page 9] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - 3.2 Cache Initialization - - The first connection between hosts A and B will find no cached - state at one or both ends, so both M caches must be initialized. - This requires that the first transaction carry a specially marked - SEG.M value, which we call SEG.M.NEW. Receiving a SEG.M.NEW value - in an initial SYN segment, B will cache this value and send its - own M back to initialize A's cache. When a host crashes and - restarts, all its cached M values cache.M[*] must be invalidated - in order to force a re-synchronization of the caches at both ends. - - This cache synchronization procedure is illustrated in Figure 5, - where client host A has crashed and restarted with its cache - entries undefined, as indicated by "??". Since cache.TS[B] is - undefined, A sends a SEG.M.NEW value instead of SEG.M in the - segment of its first transaction request to B. Receiving this - SEG.M.NEW, the server host B invalidates cache.TS[A] and performs - a 3-way handshake. SEG.M in segment #2 updates A's cache, and - when the handshake completes successfully, B updates its cached M - value to x2 >= x1. - - - _T_C_P__A _T_C_P__B - - cache.M[B] cache.M[A] - V V - [ ?? ] [ x0 ] - - 1. --> --> (invalidate cache; - queue data1; - [ ?? ] 3-way handshake) - - [ ?? ] - 2. <-- <-- - (cache.M[B]= y1) - - [ y1 ] [ ?? ] - - 3. --> --> data1->user_B, - cache.M[A]= x2) - - [ y1 ] [ x2 ] - ... (etc.) ... - - Figure 5. Client Host Crashed - - - Suppose that the 3-way handshake failed, presumably because - - - -Braden [Page 10] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - segment #1 was an old duplicate. Then segment #3 from host A - would be an RST segment, with the result that both side's caches - would be left undefined. - - Figure 6 shows the procedure when the server crashes and restarts. - Upon receiving a segment from a host for which it has no - cached M value, B initiates a 3-way handshake to validate the - request and sends its own M value to A. Again the result is to - update cached M values on both sides. - - - _T_C_P__A _T_C_P__B - - cache.M[B] cache.M[A] - V V - [ y0 ] [ ?? ] - - 1. --> --> (data1 queued; - 3-way handshake) - - [ y0 ] [ ?? ] - 2. <-- <-- - (cache.M[B]= y1) - - [ y1 ] [ ?? ] - 3. --> --> (data1->user_B, - cache.M[A]= x2) - - [ y1 ] [ x2 ] - ... (etc.) ... - - - Figure 6. Server Host Crashed - - - 3.3 Accepting Segments - - Transactions introduce a new hazard of erroneously accepting an - old duplicate segment. To be acceptable, a - segment must arrive in SYN-SENT state, and its ACK field must - acknowledge something that was sent. In current TCPs the - effective send window in SYN-SENT state is exactly one octet, and - an acceptable must exactly ACK this one octet. The - clock-driven selection of Initial Sequence Number (ISN) makes an - erroneous acceptance exceedingly unlikely. An old duplicate SYN - could be accepted erroneously only if successive connection - attempts occurred more often than once every 4 microseconds, or if - the segment lifetime exceeded the 4 hour wraparound time for ISN - - - -Braden [Page 11] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - selection. - - However, when TCP is used for transactions, data sent with the - initial SYN increases the range of sequence numbers that have been - sent. This increases the danger of accepting an old duplicate - segment, and the consequences are more serious. In the - example in Figure 7, segments 1-3 form a normal transaction - sequence, and segment 4 begins a new transaction (incarnation) for - the same connection. Segment #5 is a duplicate of segment #2 from - the preceding transaction. Although the new transaction has a - larger ISN, the previous ACK value 402 falls into the new range - [200,700) of sequence numbers that have been sent, so segment #5 - could be erroneously accepted and passed to the client as the - response to the new request. - - _T_C_P__A _T_C_P__B - - CLOSED LISTEN - - 1. --> --> (TAO test OK) - - - 2. <-- <-- - - - 3. TIME-WAIT --> --> CLOSED - (short timeout) - CLOSED - - (New Request) - 4. --> --> ... - - (Duplicate of segment #2) - 5. <-- <--... - (Acceptable!!) - - - Figure 7: Old Duplicate Causing Error - - - Unfortunately, we cannot simply use TAO on the client side to - detect and reject old duplicate segments. A TAO test at - the client might fail for a valid segment, due to out- - of-order delivery, and this could result in permanent non-delivery - of a valid transaction reply. - - Instead, we include a second M value, an echo of the client's M - value from the initial segment, in the segment. A - - - -Braden [Page 12] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - specially-marked M value, SEG.M.ECHO, is used for this purpose. - The client knows the value it sent in the initial and can - therefore positively validate the using the echoed - value. This is illustrated in Figure 12, which is the same as - Figure 4 with the addition of the echoed value on the - segment #2. - - It should be noted that TCP allows a simultaneous open sequence in - which both sides send and receive an initial (see Figure 8 - of [STD-007]. In this case, the TAO test must be performed on - both sides to preserve the symmetry. See [TTCP-FS] for an - example. - -4. SHORTENING TIME-WAIT STATE - - Once a transaction has been initiated for a particular connection - (pair of ports) between a given host pair, a new transaction for the - same connection cannot take place for a time that is at least: - - RTT + SPT + TIME-WAIT_delay - - Since the client host can cycle among the 64512 available port - numbers, an upper bound on the transaction rate between a particular - host pair is: - - [1] TRmax = 64512 /(RTT + TIME-WAIT_Delay) - - in transactions per second (Tps), where we assumed SPT is negligible. - We must reduce TIME-WAIT_Delay to support high-rate TCP transaction - processing. - - TIME-WAIT state performs two functions: (1) supporting the full- - duplex reliable close of TCP, and (2) allowing old duplicate segments - from an earlier connection incarnation to expire before they can - cause an error (see Appendix to [RFC-1185]). The first function - impacts the application model of a TCP connection, which we would not - want to change. The second is part of the fundamental machinery of - TCP reliable delivery; to safely truncate TIME-WAIT state, we must - provide another means to exclude duplicate packets from earlier - incarnations of the connection. - - To minimize the delay in TIME-WAIT state while performing both - functions, we propose to set the TIME-WAIT delay to: - - [2] TIME-WAIT_Delay = max( K*RTO, U ) - - where U and K are constants and RTO is the dynamically-determined - retransmission timeout, the measured RTT plus an allowance for the - - - -Braden [Page 13] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - RTT variance [Jacobson88]. We choose K large enough so that there is - high probability of the close completing successfully if at all - possible; K = 8 seems reasonable. This takes care of the first - function of TIME-WAIT state. - - In a real implementation, there may be a minimum RTO value Tr, - corresponding to the precision of RTO calculation. For example, in - the popular BSD implementation of TCP, the minimum RTO is Tr = 0.5 - second. Assuming K = 8 and U = 0, Eqns [1] and [2] impose an upper - limit of TRmax = 16K Tps on the transaction rate of these - implementations. - - It is possible to have many short connections only if RTO is very - small, in which case the TIME-WAIT delay [2] reduces to U. To - accelerate the close sequence, we need to reduce U below the MSL - enforced by the IP layer, without introducing a hazard from old - duplicate segments. For this purpose, we introduce another monotonic - number sequence; call it X. X values are required to be monotonic - between successive connection incarnations; depending upon the choice - of the X space (see Section 5), X values may also increase during a - connection. A value from the X space is to be carried in every - segment, and a segment is rejected if it is received with an X value - smaller than the largest X value received. This mechanism does not - use a cache; the largest X value is maintained in the TCP connection - control block (TCB) for each connection. - - The value of U depends upon the choice for the X space, discussed in - the next section. If X is time-like, U can be set to twice the time - granularity (i.e, twice the minimum "tick" time) of X. The TIME-WAIT - delay will then ensure that current X values do not overlap the X - values of earlier incarnations of the same connection. Another - consequence of time-like X values is the possibility that an open but - idle connection might allow the X value to wrap its sign bit, - resulting in a lockup of the connection. To prevent this, a 24-day - idle timer on each open connection could bypass the X check on the - first segment following the idle period, for example. In practice, - many implementations have keep-alive mechanisms that prevent such - long idle periods [RFC-1323]. - - Referring back to Figure 4, our proposed transaction extension - results in a minimum exchange of 3 packets. Segment #3, the final - ACK segment, does not increase transaction latency, but in - combination with the TIME-WAIT delay of K*RTO it ensures that the - server side of the connection will be closed before a new transaction - is issued for this same pair of ports. It also provides an RTT - measurement for the server. - - We may ask whether it would be possible to further reduce the TIME- - - - -Braden [Page 14] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - WAIT delay. We might set K to zero; alternatively, we might allow - the client TCP to start a new transaction request while the - connection was still in TIME-WAIT state, with the new initial SYN - acting as an implied acknowledgment of the previous FIN. Appendix A - summarizes the issues raised by these alternatives, which we call - "truncating" TIME-WAIT state, and suggests some possible solutions. - Further study would be required, but these solutions appear to bend - the theory and/or implementations of the TCP protocol farther than we - wish to bend them. - - We therefore propose using formula [2] with K=8 and retaining the - final ACK(FIN) transmission. To raise the transaction rate, - therefore, we require small values of RTO and U. - -5. CHOOSING A MONOTONIC SEQUENCE - - For simplicity, we want the monotonic sequence X used for shortening - TIME-WAIT state to be identical to the monotonic sequence M for - bypassing the 3-way handshake. Calling the common space M, we will - send an M value SEG.M in each TCP segment. Upon receipt of an - initial SYN segment, SEG.M will be compared with a per-host cached - value to authenticate the SYN without a 3-way handshake; this is the - TAO mechanism. Upon receipt of a non-SYN segment, SEG.M will be - compared with the current value in the connection control block and - used to discard old duplicates. - - Note that the situation with TIME-WAIT state differs from that of - bypassing 3-way handshakes in two ways: (a) TIME-WAIT requires - duplicate detection on every segment vs. only on SYN segments, and - (b) TIME-WAIT applies to a single connection vs. being global across - all connections. This section discusses possible choices for the - common monotonic sequence. - - The SEG.M values must satisfy the following requirements. - - * The values must be monotonic; this requirement is defined more - precisely below. - - * Their granularity must be fine-grained enough to support a high - rate of transaction processing; the M clock must "tick" at least - once between successive transactions. - - * Their range (wrap-around time) must be great enough to allow a - realistic MSL to be enforced by the network. - - The TCP spec calls for an MSL of 120 secs. Since much of the - Internet does not carefully enforce this limit, it would be safer to - have an MSL at least an order of magnitude larger. We set as an - - - -Braden [Page 15] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - objective an MSL of at least 2000 seconds. If there were no TIME- - WAIT delay, the ultimate limit on transaction rate would be set by - speed-of-light delays in the network and by the latency of host - operating systems. As the bottleneck problems with interfacing CPUs - to gigabit LANs are solved, we can imagine transaction durations as - short as 1 microsecond. Therefore, we set an ultimate performance - goal of TRmax at least 10**6 Tps. - - A particular connection between hosts A and B is identified by the - local and remote TCP "sockets", i.e., by the quadruplet: {A, B, - Port.A, Port.B}. Imagine that each host keeps a count CC of the - number of TCP connections it has initiated. We can use this CC - number to distinguish different incarnations of the same connection. - Then a particular SEG.M value may be labeled implicitly by 6 - quantities: {A, B, Port.A, Port.B, CC, n}, where n is the byte offset - of that segment within the connection incarnation. - - To bypass the 3-way handshake, we require thgt SEG.M values on - successive SYN segments from a host A to a host B be monotone - increasing. If CC' > CC, then we require that: - - SEG.M(A,B,Port.A,Port.B,CC',0) > SEG.M(A,B,Port.A,Port.B,CC,0) - - for any legal values of Port.A and Port.B. - - To delete old duplicates (allowing TIME-WAIT state to be shortened), - we require that SEG.M values be disjoint across different - incarnations of the same connection. If CC' > CC then - - SEG.M(A,B,Port.A,Port.B,CC',n') > SEG.M(A,B,Port.A,Port.B,CC,n), - - for any non-negative integers n and n'. - - We now consider four different choices for the common monotonic - space: RFC-1323 timestamps, TCP sequence numbers, the connection - count, and 64-bit TCP sequence numbers. The results are summarized - in Table I. - - 5.1 Cached Timestamps - - The PAWS mechanism [RFC-1323] uses TCP "timestamps" as - monotonically increasing integers in order to throw out old - duplicate segments within the same incarnation. Jacobson - suggested the cacheing of these timestamps for bypassing 3-way - handshakes [Jacobson90], i.e., that TCP timestamps be used for our - common monotonic space M. This idea is attractive since it would - allow the same timestamp options to be used for RTTM, PAWS, and - transactions. - - - -Braden [Page 16] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - To obtain at-most-once service, the criterion for immediate - acceptance of a SYN must be that SEG.M is strictly greater than - the cached M value. That is, to be useful for bypassing 3-way - handshakes, the timestamp clock must tick at least once between - any two successive transactions between the same pair of hosts - (even if different ports are used). Hence, the timestamp clock - rate would determine TRmax, the maximum possible transaction rate. - - Unfortunately, the timestamp clock frequency called for by RFC- - 1323, in the range 1 sec to 1 ms, is much too slow for - transactions. The TCP timestamp period was chosen to be - comparable to the fundamental interval for computing and - scheduling retransmission timeouts; this is generally in the range - of 1 sec. to 1 ms., and in many operating systems, much closer to - 1 second. Although it would be possible to increase the timestamp - clock frequency by several orders of magnitude, to do so would - make implementation more difficult, and on some systems - excessively expensive. - - The wraparound time for TCP timestamps, at least 24 days, causes - no problem for transactions. - - The PAWS mechanism uses TCP timestamps to protect against old - duplicate non-SYN segments from the same incarnation [RFC-1323]. - It can also be used to protect against old duplicate data segments - from earlier incarnations (and therefore allow shortening of - TIME-WAIT state) if we can ensure that the timestamp clock ticks - at least once between the end of one incarnation and the beginning - of the next. This can be achieved by setting U = 2 seconds, i.e., - to twice the maximum timestamp clock period. This value in - formula [2] leads to an upper bound TRmax = 32K Tps between a host - pair. However, as pointed out above, old duplicate SYN detection - using timestamps leads to a smaller transaction rate bound, 1 Tps, - which is unacceptable. In addition, the timestamp approach is - imperfect; it allows old ACK segments to enter the new connection - where they can cause a disconnect. This happens because old - duplicate ACKs that arrive during TIME-WAIT state generate new - ACKs with the current timestamp [RFC-1337]. - - We therefore conclude that timestamps are not adequate as the - monotonic space M; see Table I. However, they may still be useful - to effectively extend some other monotonic number space, just as - they are used in PAWS to extend the TCP sequence number space. - This is discussed below. - - - - - - - -Braden [Page 17] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - 5.2 Current TCP Sequence Numbers - - It is useful to understand why the existing 32-bit TCP sequence - numbers do not form an appropriate monotonic space for - transactions. - - The sequence number sent in an initial SYN is called the Initial - Sequence Number or ISN. According to the TCP specification, an - ISN is to be selected using: - - [3] ISN = (R*T) mod 2**32 - - where T is the real time in seconds (from an arbitrary origin, - fixed when the system is started) and R is a constant, currently - 250 KBps. These ISN values form a monotonic time sequence that - wraps in 4.55 hours = 16380 seconds and has a granularity of 4 - usecs. For transaction rates up to roughly 250K Tps, the ISN - value calculated by formula [3] will be monotonic and could be - used for bypassing the 3-way handshake. - - However, TCP sequence numbers (alone) could not be used to shorten - TIME-WAIT state, because there are several ways that overlap of - the sequence space of successive incarnations can occur (as - described in Appendix to [RFC-1185]). One way is a "fast - connection", with a transfer rate greater than R; another is a - "long" connection, with a duration of approximately 4.55 hours. - TIME-WAIT delay is necessary to protect against these cases. With - the official delay of 240 seconds, formula [1] implies a upper - bound (as RTT -> 0) of TRmax = 268 Tps; with our target MSL of - 2000 sec, TRmax = 32 Tps. These values are unacceptably low. - - To improve this transaction rate, we could use TCP timestamps to - effectively extend the range of the TCP sequence numbers. - Timestamps would guard against sequence number wrap-around and - thereby allow us to increase R in [3] to exceed the maximum - possible transfer rate. Then sequence numbers for successive - incarnations could not overlap. Timestamps would also provide - safety with an MSL as large as 24 days. We could then set U = 0 - in the TIME-WAIT delay calculation [2]. For example, R = 10**9 - Bps leads to TRmax <= 10**9 Tps. See 2(b) in Table I. These - values would more than satisfy our objectives. - - We should make clear how this proposal, sequence numbers plus - timestamps, differs from the timestamps alone discussed (and - rejected) in the previous section. The difference lies in what is - cached and tested for TAO; the proposal here is to cache and test - BOTH the latest TCP sequence number and the latest TCP timestamp. - In effect, we are proposing to use timestamps to logically extend - - - -Braden [Page 18] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - the sequence space to 64 bits. Another alternative, presented in - the next section, is to directly expand the TCP sequence space to - 64 bits. - - Unfortunately, the proposed solution (TCP sequence numbers plus - timestamps) based on equation [3] would be difficult or impossible - to implement on many systems, which base their TCP implementation - upon a very low granularity software clock, typically O(1 sec). - To adapt the procedure to a system with a low granularity software - clock, suppose that we calculate the ISN as: - - [4] ISN = ( R*Ts*floor(T/Ts) + q*CC) mod 2**32 - - where Ts is the time per tick of the software clock, CC is the - connection count, and q is a constant. That is, the ISN is - incremented by the constant R*Ts once every clock tick and by the - constant q for every new connection. We need to choose q to - obtain the required monotonicity. - - For monotonicity of the ISN's themselves, q=1 suffices. However, - monotonicity during the entire connection requires q = R*Ts. This - value of q can be deduced as follows. Let S(T, CC, n) be the - sequence number for byte offset n in a connection with number CC - at time T: - - S(T, CC, n) = (R*Ts*floor(T/Ts) + q*CC + n) mod 2**32. - - For any T1 > T2, we require that: S(T2, CC+1, 0) - S(T1, CC, n) > - 0 for all n. Since R is assumed to be an upper bound on the - transfer rate, we can write down: - - R > n/(T2 - T1), or T2/Ts - T1/Ts > n/(R*Ts) - - Using the relationship: floor(x)-floor(y) > x-y-1 and a little - algebra leads to the conclusion that using q = R*Ts creates the - required monotonic number sequence. Therefore, we consider: - - [5] ISN = R*Ts*(floor(T/Ts) + CC) mod 2**32 - - (which is the algorithm used for ISN selection by BSD TCP). - - For error-free operation, the sequence numbers generated by [5] - must not wrap the sign bit in less than MSL seconds. Since CC - cannot increase faster than TRmax, the safe condition is: - - R* (1 + Ts*TRmax) * MSL < 2**31. - - We are interested in the case: Ts*TRmax >> 1, so this relationship - - - -Braden [Page 19] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - reduces to: - - [6] R * Ts * TRmax * MSL < 2**31. - - This shows a direct trade-off among the maximum effective - bandwidth R, the maximum transaction rate TRmax, and the maximum - segment lifetime MSL. For reasonable limiting values of R, Ts, - and MSL, formula [6] leads to a very low value of TRmax. For - example, with MSL= 2000 secs, R=10**9 Bps, and Ts = 0.5 sec, TRmax - < 2*10**-3 Tps. - - To ease the situation, we could supplement sequence numbers with - timestamps. This would allow an effective MSL of 2 seconds in - [6], since longer times would be protected by differing - timestamps. Then TRmax < 2**30/(R*Ts). The actual enforced MSL - would be increased to 24 days. Unfortunately, TRmax would still - be too small, since we want to support transfer rates up to R ~ - 10**9 Bps. Ts = 0.5 sec would imply TRmax ~ 2 Tps. On many - systems, it appears infeasible to decrease Ts enough to obtain an - acceptable TRmax using this approach. - - 5.3 64-bit TCP Sequence Numbers - - Another possibility would be to simply increase the TCP sequence - space to 64 bits as suggested in [RFC-1263]. We would also - increase the R value for clock-driven ISN selection, beyond the - fastest transfer rate of which the host is capable. A reasonable - upper limit might be R = 10**9 Bps. As noted above, in a - practical implementation we would use: - - ISN = R*Ts*( floor(T/Ts) + CC) mod 2**64 - - leading to: - - R*(1 + Ts * TRmax) * MSL < 2**63 - - For example, suppose that R = 10**9 Bps, Ts = 0.5, and MSL = 16K - secs (4.4 hrs); then this result implies that TRmax < 10**6 Tps. - We see that adding 32 bits to the sequence space has provided - feasible values for transaction processing. - - 5.4 Connection Counts - - The Connection Count CC is well suited to be the monotonic - sequence M, since it "ticks" exactly once for each new connection - incarnation and is constant within a single incarnation. Thus, it - perfectly separates segments from different incarnations of the - same connection and would allow U = 0 in the TIME-WAIT state delay - - - -Braden [Page 20] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - formula [2]. (Strictly, U cannot be reduced below 1/R = 4 usec, - as noted in Section 4. However, this is of little practical - consequence until the ultimate limits on TRmax are approached). - - Assume that CC is a 32-bit number. To prevent wrap-around in the - sign bit of CC in less than MSL seconds requires that: - - TRmax * MSL < 2**31 - - For example, if MSL = 2000 seconds then TRmax < 10**6 Tp. These - are acceptable limits for transaction processing. However, if - they are not, we could augment CC with TCP timestamps to obtain - very far-out limits, as discussed below. - - It would be an implementation choice at the client whether CC is - global for all destinations or private to each destination host - (and maintained in the per-host cache). In the latter case, the - last CC value assigned for each remote host could also be - maintained in the per-host cache. Since there is not typically a - large amount of parallelism in the network connection of a host, - there should be little difference in the performance of these two - different approaches, and the single global CC value is certainly - simpler. - - To augment CC with TCP timestamps, we would bypass a 3-way - handshake if both SEG.CC > cache.CC[A] and SEG.TSval >= - cache.TS[A]. The timestamp check would detect a SYN older than 2 - seconds, so that the effective wrap-around requirement would be: - - TRmax * 2 < 2**31 - - i.e., TRmax < 10**9 Tps. The required MSL would be raised to 24 - days. Using timestamps in this way, we could reduce the size of - CC. For example, suppose CC were 16 bits. Then the wrap-around - condition TRmax * 2 < 2**15 implies that TRmax is 16K. - - Finally, note that using CC to delete old duplicates from earlier - incarnations would not obviate the need for the time-stamp-based - PAWS mechanism to prevent errors within a single incarnation due - to wrapping the 32-bit TCP sequence space at very high transfer - rates. - - 5.5 Conclusions - - The alternatives for monotonic sequence are summarized in Table I. - We see that there are two feasible choices for the monotonic - space: the connection count and 64-bit sequence numbers. Of these - two, we believe that the simpler is the connection count. - - - -Braden [Page 21] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - Implementation of 64-bit sequence numbers would require - negotiation of a new header format and expansion of all variables - and calculations on the sequence space. CC can be carried in an - option and need be examined only once per packet. - - We propose to use a simple 32-bit connection count CC, without - augmentation with timestamps, for the transaction extension. This - choice has the advantages of simplicity and directness. Its - drawback is that it adds a third sequence-like space (in addition - to the TCP sequence number and the TCP timestamp) to each TCP - header and to the main line of packet processing. However, the - additional code is in fact very modest. - - We now have a general outline of the proposed TCP extensions for - transactions. - - o A host maintains a 32-bit global connection counter variable CC. - - o The sender's current CC value is carried in an option in every - TCP segment. - - o CC values are cached per host, and the TAO mechanism is used to - bypass the 3-way handshake when possible. - - o In non-SYN segments, the CC value is used to reject duplicates - from earlier incarnations. This allows TIME-WAIT state delay to - be reduced to K*RTO (i.e., U=0 in Eq. [2]). - - - - - - - - - - - - - - - - - - - - - - - - -Braden [Page 22] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - TABLE I: Summary of Monotonic Sequences - - APPROACH TRmax (Tps) Required MSL COMMENTS - __________________________________________________________________ - - 1. Timestamp & PAWS 1 24 days TRmax is - too small - __________________________________________________________________ - - 2. Current TCP Sequence Numbers - - (a) clock-driven - ISN: eq. [3] 268 240 secs TRmax & MSL - too small - - (b) Timestamps& clock- - driven ISN [3] & 10**9 24 days Hard to - R=10**9 implement - - (c) Timestamps & c-dr - ISN: eq. [4] 2**30/(R*Ts) 24 days TRmax too - small. - __________________________________________________________________ - - 3. 64-bit TCP Sequence Numbers - - 2**63/(MSL*R*Ts) MSL Significant - TCP change - e.g., R=10**9 Bps, - MSL = 4.4 hrs, - Ts = 0.5 sec=> - TRmax = 10**6 - __________________________________________________________________ - - 4. Connection Counts - - (a) no timestamps 2**31/MSL MSL 3rd sequence - e.g., MSL=2000 sec space - TRmax = 10**6 - - (b) with timestamps 2**30 24 days (ditto) - and PAWS - __________________________________________________________________ - - - - - - - - -Braden [Page 23] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - -6. CONNECTION STATES - - TCP has always allowed a connection to be half-closed. TAO makes a - significant addition to TCP semantics by allowing a connection to be - half-synchronized, i.e., to be open for data transfer in one - direction before the other direction has been opened. Thus, the - passive end of a connection (which receives an initial SYN) can - accept data and even a FIN bit before its own SYN has been - acknowledged. This SYN, data, and FIN may arrive on a single segment - (as in Figure 4), or on multiple segments; packetization makes no - difference to the logic of the finite-state machine (FSM) defining - transitions among connection states. - - Half-synchronized connections have several consequences. - - (a) The passive end must provide an implied initial data window in - order to accept data. The minimum size of this implied window - is a parameter in the specification; we suggest 4K bytes. - - (b) New connection states and transitions are introduced into the - TCP FSM at both ends of the connection. At the active end, new - states are required to piggy-back the FIN on the initial SYN - segment. At the passive end, new states are required for a - half-synchronized connection. - - This section develops the resulting FSM description of a TCP - connection as a conventional state/transition diagram. To develop a - complete FSM, we take a constructive approach, as follows: (1) write - down all possible events; (2) write down the precedence rules that - govern the order in which events may occur; (3) construct the - resulting FSM; and (4) augment it to support TAO. In principle, we - do this separately for the active and passive ends; however, the - symmetry of TCP results in the two FSMs being almost entirely - coincident. - - Figure 8 lists all possible state transitions for a TCP connection in - the absence of TAO, as elementary events and corresponding actions. - Each transition is labeled with a letter. Transitions a-g are used - by the active side, and c-i are used by the passive side. Without - TAO, transition "c" (event "rcv ACK(SYN)") synchronizes the - connection, allowing data to be accepted for the user. - - By definition, the first transition for an active (or passive) side - must be "a" (or "i", respectively). During a single instance of a - connection, the active side will progress through some permutation of - the complete sequence of transitions {a b c d e f } or the sequence - {a b c d e f g}. The set of possible permutations is determined by - precedence rules governing the order in which transitions can occur. - - - -Braden [Page 24] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - Label Event / Action - _____ ________________________ - a OPEN / snd SYN - - b rcv SYN [No TAO]/ snd ACK(SYN) - - c rcv ACK(SYN) / - - d CLOSE / snd FIN - - e rcv FIN / snd ACK(FIN) - - f rcv ACK(FIN) / - - g timeout=2MSL / delete TCB - ___________________________________________________ - h passive OPEN / create TCB - - i rcv SYN [No TAO]/ snd SYN, ACK(SYN) - ___________________________________________________ - - Figure 8. Basic TCP Connection Transitions - - - Using the notation "<." to mean "must precede", the precedence rules - are: - - (1) Logical ordering: must open connection before closing it: - - b <. e - - (2) Causality -- cannot receive ACK(x) before x has been sent: - - a <. c and i <. c and d <. f - - (3) Acknowledgments are cumulative - - c <. f - - (4) First packet in each direction must contain a SYN. - - b <. c and b <. f - - (5) TIME-WAIT state - - Whenever d precedes e in the sequence, g must be the last - transition. - - - - -Braden [Page 25] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - Applying these rules, we can enumerate all possible permutations of - the events and summarize them in a state transition diagram. Figure - 9 shows the result, with boxes representing the states and directed - arcs representing the transitions. - - ________ ________ - | | h | | - | CLOSED |--------->| LISTEN | - |________| |________| - | | - | a | i - ____V____ ____V___ ________ - | | b | | e | | - | |--------->| |-------------->| | - |________| |________| |________| - / / | / | - / / | c d / | c - / / __V_____ | ____V___ - / / | | e | | | - d | d / | |------------>| | - | | |________| | |________| - | | | | | - | | | ___V____ | - | | | | | | - | | | | | | - | | | |________| | - | | | | | - ____V___ ______V_ | ________ | | - | | b | | e | | | | | - | |------->| |--------->| | | | - |________| |________| | |________| | | - | / | | | - c | / d c | c | d | - | / | | | - _V___V__ ____V___ V_____V_ - | | e | | | | - | |---->| | | | - |________| |________| |________| - | | | - | f | f | f - ____V___ ____V___ ___V____ - | | e | TIME- | g | | - | |---->| WAIT |-->| CLOSED | - |________| |________| |________| - - - Figure 9: Basic State Diagram - - - - -Braden [Page 26] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - Although Figure 9 gives a correct representation of the possible - event sequences, it is not quite correct for the actions, which do - not compose as shown. In particular, once a control bit X has been - sent, it must continue to be sent until ACK(X) is received. This - requires new transitions with modified actions, shown in the - following list. We use the labeling convention that transitions with - the same event part all have the same letter, with different numbers - of primes to indicate different actions. - - Label Event / Action - _____ _______________________________________ - b' (=i) rcv SYN [No TAO] / snd SYN,ACK(SYN) - b'' rcv SYN [No TAO] / snd SYN,FIN,ACK(SYN) - d' CLOSE / snd SYN,FIN - e' rcv FIN / snd FIN,ACK(FIN) - e'' rcv FIN / snd SYN,FIN,ACK(FIN) - - - Figure 10 shows the state diagram of Figure 9, with the modified - transitions and with the states used by standard TCP [STD-007] - identified. Those states that do not occur in standard TCP are - numbered 1-5. - - Standard TCP has another implied restriction: a FIN bit cannot be - recognized before the connection has been synchronized, i.e., c <. e. - This eliminates from standard TCP the states 1, 2, and 5 shown in - Figure 10. States 3 and 4 are needed if a FIN is to be piggy-backed - on a SYN segment (note that the states shown in Figure 1 are actually - wrong; the states shown as SYN-SENT and ESTABLISHED are really states - 3 and 4). In the absence of piggybacking the FIN bit, Figure 10 - reduces to the standard TCP state diagram [STD-007]. - - The FSM described in Figure 10 is intended to be applied - cumulatively; that is, parsing a single packet header may lead to - more than one transition. For example, the standard TCP state - diagram includes a direct transition from SYN-SENT to ESTABLISHED: - - rcv SYN,ACK(SYN) / snd ACK(SYN). - - This is transition b followed immediately by c. - - - - - - - - - - - -Braden [Page 27] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - ________ ________ - | | h | | - | CLOSED |--------->| LISTEN | - |________| |________| - | | - | a | i - ____V____ ____V___ ________ - | SYN- | b' | SYN- | e' | | - | SENT |--------->|RECEIVED|-------------->| 1 | - |________| |________| |________| - / / | | | - d'/ d'/ | c d' | c | - / / __V_____ | _V______ - / / |ESTAB- | e | | CLOSE- | - | / | LISHED|------------|-->| WAIT | - | | |________| | |________| - | | | | | - | | | _____V__ | - | | | | | | - | | | | 2 | | - | | | |________| | - | | | | | - ____V___ ______V_ | ________ | | - | | b'' | |e''' | | | | | - | 3 |------->| 4 |--------->| 5 | | | - |________| |________| | |________| | | - | / | | | - c | / d c | c | d | - | / | | | - _V___V__ ____V___ V_____V_ - | FIN- | e'' | | | LAST- | - | WAIT-1|---->|CLOSING | | ACK | - |________| |________| |________| - | | | - | f | f | f - ____V___ ____V___ ___V____ - | FIN- | e | TIME- | g | | - | WAIT-2|---->| WAIT |-->| CLOSED | - |________| |________| |________| - - - Figure 10: Basic State Diagram -- Correct Actions - - - Next we introduce TAO. If the TAO test succeeds, the connection - becomes half-synchronized. This requires a new set of states, - mirroring the states of Figure 10, beginning with acceptance of a SYN - (transition "b" or "i"), and ending when ACK(SYN) arrives (transition - - - -Braden [Page 28] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - "c"). Figure 11 shows the result of augmenting Figure 10 with the - additional states for TAO. The transitions are defined in the - following table: - - Key for Figure 11: Complete State Diagram with TAO - - - Label Event / Action - _____ ________________________ - - a OPEN / create TCB, snd SYN - b' rcv SYN [no TAO]/ snd SYN,ACK(SYN) - b'' rcv SYN [no TAO]/ snd SYN,FIN,ACK(SYN) - c rcv ACK(SYN) / - d CLOSE / snd FIN - d' CLOSE / snd SYN,FIN - e rcv FIN / snd ACK(FIN) - e' rcv FIN / snd SYN,ACK(FIN) - e'' rcv FIN / snd FIN,ACK(FIN) - e''' rcv FIN / snd SYN,FIN,ACK(FIN) - f rcv ACK(FIN) / - g timeout=2MSL / delete TCB - h passive OPEN / create TCB - i (= b') rcv SYN [no TAO]/ snd SYN,ACK(SYN) - j rcv SYN [TAO OK] / snd SYN,ACK(SYN) - k rcv SYN [TAO OK] / snd SYN,FIN,ACK(SYN) - - - - Each new state in Figure 11 bears a very simple relationship to a - standard TCP state. We indicate this by naming the new state with - the standard state name followed by a star. States SYN-SENT* and - SYN-RECEIVED* differ from the corresponding unstarred states in - recording the fact that a FIN has been sent. The other new states - with starred names differ from the corresponding unstarred states in - being half-synchronized (hence, a SYN bit needs to be transmitted). - - The state diagram of Figure 11 is more general than required for - transaction processing. In particular, it handles simultaneous - connection synchronization from both sides, allowing one or both - sides to bypass the 3-way handshake. It includes other transitions - that are unlikely in normal transaction processing, for example, the - server sending a FIN before it receives a FIN from the client - (ESTABLISHED* -> FIN-WAIT-1* in Figure 11). - - - - - - - -Braden [Page 29] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - ________ ________ - | | h | | - | CLOSED |--------------->| LISTEN | - |________| |________| - | / | - a| / i | j - | / | - | / _V______ ________ - | j | |ESTAB- | e' | CLOSE- | - | /---------|----->| LISHED*|------------>| WAIT*| - | / | |________| |________| - | / | | | | | - | / | |d' | c d' | | c - ____V___ / ______V_ | _V______ | _V______ - | SYN- | b' | SYN- | c | |ESTAB- | e | | CLOSE- | - | SENT |------>|RECEIVED|-----|-->| LISHED|----------|->| WAIT | - |________| |________| | |________| | |________| - | | | | | | - | | | | ___V____ | - | | | | | LAST- | | - | d' | d' | d' | d | ACK* | | - | | | | |________| | - | | | | | | - | | ______V_ | ________ |c |d - | k | | FIN- | | e''' | | | | - | /------|-->| WAIT-1*|---|------>|CLOSING*| | | - | / | |________| | |________| | | - | / | | | | | | - | / | | c | | c | | - ____V___ / ____V___ V_____V_ ____V___ V____V__ - | SYN- | b'' | SYN- | c | FIN- | e'' | | | LAST- | - | SENT* |----->|RECEIVD*|---->| WAIT-1 |---->|CLOSING | | ACK | - |________| |________| |________| |________| |________| - | | | - | f | f | f - ___V____ ____V___ ___V____ - | FIN- | e |TIME- | g | | - | WAIT-2 |---->| WAIT |-->| CLOSED | - |________| |________| |________| - - Figure 11: Complete State Diagram with TAO - - - - The relationship between starred and unstarred states is very - regular. As a result, the state extensions can be implemented very - simply using the standard TCP FSM with the addition of two "hidden" - boolean flags, as described in the functional specification memo - - - -Braden [Page 30] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - [TTCP-FS]. - - As an example of the application of Figure 11, consider the minimal - transaction shown in Figure 12. - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - 1. SYN-SENT* --> --> CLOSE-WAIT* - (TAO test OK=> - data1->user_B) - - LAST-ACK* - <-- <-- - 2. TIME-WAIT - (TAO test OK, - data2->user_A) - - - 3. TIME-WAIT --> --> CLOSED - - (timeout) - CLOSED - - - Figure 12: Minimal Transaction Sequence - - Sending segment #1 leaves the client end in SYN-SENT* state, which - differs from SYN-SENT state in recording the fact that a FIN has been - sent. At the server end, passing the TAO test enters ESTABLISHED* - state, which passes the data to the user as in ESTABLISHED state and - also records the fact that the connection is half synchronized. Then - the server processes the FIN bit of segment #1, moving to CLOSE-WAIT* - state. - - Moving to CLOSE-WAIT* state should cause the server to send a segment - containing SYN and ACK(FIN). However, transmission of this segment - is deferred so the server can piggyback the response data and FIN on - the same segment, unless a timeout occurs first. When the server - does send segment #2 containing the response data2 and a FIN, the - connection advances from CLOSE-WAIT* to LAST-ACK* state; the - connection is still half-synchronized from B's viewpoint. - - Processing segment #2 at the client again results in multiple - transitions: - - - -Braden [Page 31] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - SYN-SENT* -> FIN-WAIT-1* -> CLOSING* -> CLOSING -> TIME-WAIT - - These correspond respectively to receiving a SYN, a FIN, an ACK for - A's SYN, and an ACK for A's FIN. - - Figure 13 shows a slightly more complex example, a transaction - sequence in which request and response data each require two - segments. This figure assumes that both client and server TCP are - well-behaved, so that e.g., the client sends the single segment #5 to - acknowledge both data segments #3 and #4. SEG.CC values are omitted - for clarity. - - - _T_C_P__A _T_C_P__B - - - 1. SYN-SENT* --> --> ESTABLISHED* - (TAO OK, - data1-> user) - - 2. SYN-SENT* --> --> CLOSE-WAIT* - (data2-> user) - - 3. FIN-WAIT-2 <-- <-- CLOSE-WAIT* - (data3->user) - - 4. TIME_WAIT <-- <-- LAST-ACK* - (data4->user) - - 5. TIME-WAIT --> --> CLOSED - - - Figure 13. Multi-Packet Request/Response Transaction - - -7. CONCLUSIONS AND ACKNOWLEDGMENTS - - TCP was designed to be a highly symmetric protocol. This symmetry is - evident in the piggy-backing of acknowledgments on data and in the - common header format for data segments and acknowledgments. On the - other hand, the examples and discussion in this memo are in general - highly unsymmetrical; the actions of a "client" are clearly - distinguished from those of a "server". To explain this apparent - discrepancy, we note the following. Even when TCP is used for - virtual circuit service, the data transfer phase is symmetrical but - the open and close phases are not. A minimal transaction, consisting - of one segment in each direction, compresses the open, data transfer, - and close phases together, and making the asymmetry of the open and - - - -Braden [Page 32] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - close phases dominant. As request and response messages increase in - size, the virtual circuit model becomes increasingly relevant, and - symmetry again dominates. - - TCP's 3-way handshake precludes any performance gain from including - data on a SYN segment, while TCP's full-duplex data-conserving close - sequence ties up communication resources to the detriment of high- - speed transactions. Merely loading more control bits onto TCP data - segments does not provide efficient transaction service. To use TCP - as an effective transaction transport protocol requires bypassing the - 3-way handshake and shortening the TIME-WAIT delay. This memo has - proposed a backwards-compatible TCP extension to accomplish both - goals. It is our hope that by building upon the current version of - TCP, we can give a boost to community acceptance of the new - facilities. Furthermore, the resulting protocol implementations will - retain the algorithms that have been developed for flow and - congestion control in TCP [Jacobson88]. - - O'Malley and Peterson have recently recommended against backwards- - compatible extensions to TCP, and suggested instead a mechanism to - allow easy installation of alternative versions of a protocol [RFC- - 1263]. While this is an interesting long-term approach, in the - shorter term we suggest that incremental extension of the current TCP - may be a more effective route. - - Besides the backward-compatible extension proposed here, there are - two other possible approaches to making efficient transaction - processing widely available in the Internet: (1) a new version of TCP - or (2) a new protocol specifically adapted to transactions. Since - current TCP "almost" supports transactions, we favor (1) over (2). A - new version of TCP that retained the semantics of STD-007 but used 64 - bit sequence numbers with the procedures and states described in - Sections 3, 4, and 6 of this memo would support transactions as well - as virtual circuits in a clean, coherent manner. - - A potential application of transaction-mode TCP might be SMTP. If - commands and responses are batched, in favorable cases complete SMTP - delivery operations on short messages could be performed with a - single minimal transaction; on the other hand, the body of a message - may be arbitrarily large. Using a TCP extended as in this memo could - significantly reduce the load on large mail hosts. - - This work began as an elaboration of the concept of TAO, due to Dave - Clark. I am grateful to him and to Van Jacobson, John Wroclawski, - Dave Borman, and other members of the End-to-End Research group for - helpful ideas and critiques during the long development of this work. - I also thank Liming Wei, who tested the initial implementation in Sun - OS. - - - -Braden [Page 33] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - -APPENDIX A -- TIME-WAIT STATE AND THE 2-PACKET EXCHANGE - - This appendix considers the implications of reducing TIME-WAIT state - delay below that given in formula [2]. - - An immediate consequence of this would be the requirement for the - server host to accept an initial SYN for a connection in LAST-ACK - state. Without the transaction extensions, the arrival of a new - in LAST-ACK state looks to TCP like a half-open connection, and - TCP's rules are designed to restore correspondence by destroying the - state (through sending a RST segment) at one end or the other. We - would need to thwart this action in the case of transactions. - - There are two different possible ways to further reduce TIME-WAIT - delay. - - (1) Explicit Truncation of TIME-WAIT state - - TIME-WAIT state could be explicitly truncated by accepting a new - sendto() request for a connection in TIME-WAIT state. - - This would allow the ACK(FIN) segment to be delayed and sent - only if a timeout occurs before a new request arrives. This - allows an ideal 2-segment exchange for closely-spaced - transactions, which would restore some symmetry to the - transaction exchange. However, explicit truncation would - represent a significant change in many implementations. - - It might be supposed that even greater symmetry would result if - the new request segment were a that explicitly - acknowledges the previous reply, rather than a that is - only an implicit acknowledgment. However, the new request - segment might arrive at B to find the server side in either - LAST-ACK or CLOSED state, depending upon whether the ACK(FIN) - had arrived. In CLOSED state, a would not be - acceptable. Hence, if the client sent an initial - instead of a segment, there would be a race condition at - the server. - - (2) No TIME-WAIT delay - - TIME-WAIT delay could be removed entirely. This would imply - that the ACK(FIN) would always be sent (which does not of course - guarantee that it will be received). As a result, the arrival - of a new SYN in LAST-ACK state would be rare. - - This choice is much simpler to implement. Its drawback is that - the server will get a false failure report if the ACK(FIN) is - - - -Braden [Page 34] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - lost. This may not matter in practice, but it does represent a - significant change of TCP semantics. It should be noted that - reliable delivery of the reply is not an issue. The client - enter TIME-WAIT state only after the entire reply, including the - FIN bit, has been received successfully. - - The server host B must be certain that a new request received in - LAST-ACK state is indeed a new SYN and not an old duplicate; - otherwise, B could falsely acknowledge a previous response that has - not in fact been delivered to A. If the TAO comparison succeeds, the - SYN must be new; however, the server has a dilemma if the TAO test - fails. - - In Figure A.1, for example, the reply segment from the first - transaction has been lost; since it has not been acknowledged, it is - still in B's retransmission queue. An old duplicate request, segment - #3, arrives at B and its TAO test fails. B is in the position of - having old state it cannot discard (the retransmission queue) and - needing to build new state to pursue a 3-way handshake to validate - the new SYN. If the 3-way handshake failed, it would need to restore - the earlier LAST-ACK* state. (Compare with Figure 15 "Old Duplicate - SYN Initiates a Reset on Two Passive Sockets" in STD-007). This - would be complex and difficult to accomplish in many implementations. - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - - 1. SYN-SENT* --> --> CLOSE-WAIT* - (TAO test OK; - data1->server) - - 2. (lost) X<-- <-- LAST-ACK* - - (old duplicate) - 3. ... --> LAST-ACK* - (TAO test fail; - 3-way handshake?) - - Figure A.1: The Server's Dilemma - - - The only practical action A can taken when the TAO test fails on a - new SYN received in LAST-ACK state is to ignore the SYN, assuming it - is really an old duplicate. We must pursue the possible consequences - - - -Braden [Page 35] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - of this action. - - Section 3.1 listed four possible reasons for failure of the TAO test - on a legitimate SYN segment: (1) no cached state, (2) out-of-order - delivery of SYNs, (3) wraparound of CCgen relative to the cached - value, or (4) the M values advance too slowly. We are assuming that - there is a cached CC value at B (otherwise, the SYN cannot be - acceptable in LAST-ACK state). Wrapping the CC space is very - unlikely and probably impossible; it is difficult to imagine - circumstances which would allow the new SYN to be delivered but not - the ACK(FIN), especially given the long wraparound time of CCgen. - - This leaves the problem of out-of-order delivery of two nearly- - concurrent SYNs for different ports. The second to be delivered may - have a lower CC option and thus be locked out. This can be solved by - using a new CCgen value for every retransmission of an initial SYN. - - Truncation of TIME-WAIT state and acceptance of a SYN in LAST-ACK - state should take place only if there is a cached CC value for the - remote host. Otherwise, a SYN arriving in LAST-ACK state is to be - processed by normal TCP rules, which will result in a RST segment - from either A or B. - - This discussion leads to a paradigm for rejecting old duplicate - segments that is different from TAO. This alternative scheme is - based upon the following: - - (a) Each retransmission of an initial SYN will have a new value of - CC, as described above. - - This provision takes care of reordered SYNs. - - (b) A host maintains a distinct CCgen value for each remote host. - This value could easily be maintained in the same cache used for - the received CC values, e.g., as cache.CCgen[]. - - Once the caches are primed, it should always be true that - cache.CCgen[B] on host A is equal to cache.CC[A] on host B, and - the next transaction from A will carry a CC value exactly 1 - greater. Thus, there is no problem of wraparound of the CC - value. - - (c) A new SYN is acceptable if its SEG.CC > cache.CC[client], - otherwise the SYN is ignored as an old duplicate. - - This alternative paradigm was not adopted because it would be a - somewhat greater perturbation of TCP rules, because it may not have - the robustness of TAO, and because all of its consequences may not be - - - -Braden [Page 36] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - understood. - - -REFERENCES - - [Birrell84] Birrell, A. and B. Nelson, "Implementing Remote - Procedure Calls", ACM TOCS, Vo. 2, No. 1, February 1984. - - [Clark88] Clark, D., "The Design Philosophy of the Internet - Protocols", ACM SIGCOMM '88, Stanford, CA, August 1988. - - [Clark89] Clark, D., Private communication, 1989. - - [Garlick77] Garlick, L., R. Rom, and J. Postel, "Issues in Reliable - Host-to-Host Protocols", Proc. Second Berkeley Workshop on - Distributed Data Management and Computer Networks, May 1977. - - [HR-COMM] Braden, R., Ed., "Requirements for Internet Hosts -- - Communication Layers", STD-003, RFC-1122, October 1989. - - [Jacobson88] Jacobson, V., "Congestion Avoidance and Control", - SIGCOMM '88, Stanford, CA., August 1988. - - [Jacobson90] Jacobson, V., private communication, 1990. - - [Liskov90] Liskov, B., Shrira, L., and J. Wroclawski, "Efficient - At-Most-Once Messages Based on Synchronized Clocks", ACM SIGCOMM - '90, Philadelphia, PA, September 1990. - - [RFC-955] Braden, R., "Towards a Transport Service Transaction - Protocol", RFC-955, September 1985. - - [RFC-1185] Jacobson, V., Braden, R., and Zhang, L., "TCP Extension - for High-Speed Paths", RFC-1185, October 1990. - - [RFC-1263] O'Malley, S. and L. Peterson, "TCP Extensions Considered - Harmful", RFC-1263, University of Arizona, October 1991. - - [RFC-1323] Jacobson, V., Braden, R., and Borman, D., "TCP - Extensions for High Performance, RFC-1323, February 1991. - - [RFC-1337] Braden, R., "TIME-WAIT Assassination Hazards in TCP", - RFC-1337, May 1992. - - [STD-007] Postel, J., "Transmission Control Protocol - DARPA - Internet Program Protocol Specification", STD-007, RFC-793, - September 1981. - - - - -Braden [Page 37] - -RFC 1379 Transaction TCP -- Concepts November 1992 - - - [TTCP-FS] Braden, R., "Transaction TCP -- Functional - Specification", Work in Progress, September 1992. - - [Watson81] Watson, R., "Timer-based Mechanisms in Reliable - Transport Protocol Connection Management", Computer Networks, Vol. - 5, 1981. - -Security Considerations - - Security issues are not discussed in this memo. - -Author's Address - - Bob Braden - University of Southern California - Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292 - - Phone: (310) 822-1511 - EMail: Braden@ISI.EDU - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Braden [Page 38] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1470.txt b/kernel/picotcp/RFC/rfc1470.txt deleted file mode 100644 index 5ccb856..0000000 --- a/kernel/picotcp/RFC/rfc1470.txt +++ /dev/null @@ -1,10755 +0,0 @@ - - - - - - -Network Working Group R. Enger -Request for Comments: 1470 ANS -FYI: 2 J. Reynolds -Obsoletes: 1147 ISI - Editors - June 1993 - - - FYI on a Network Management Tool Catalog: - Tools for Monitoring and Debugging TCP/IP Internets - and Interconnected Devices - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard. Distribution of this memo is - unlimited. - -Abstract - - The goal of this FYI memo is to provide an update to FYI 2, RFC 1147 - [1], which provided practical information to site administrators and - network managers. New and/or updated tools are listed in this RFC. - Additonal descriptions are welcome, and should be sent to: noctools- - entries@merit.edu. - -Introduction - - A static document cannot incorporate references to the latest tools - nor recent revisions to the older catalog entries. To provide a more - timely and responsive information source, the NOCtools catalog is - available on-line via the Internet and Usenet. - - news comp.networks.noctools - ftp wuarchive.wustl.edu:/doc/noctools - - Because of publication delays and other factors, some of the entries - in this catalog may be out of date. The reader is urged to consult - the on-line service to obtain the most up-to-date information. - - The index provided in this document reflects the current contents of - the on-line documentation. - - The NOCtools2 Working Group of the Internet Engineering Task Force - (IETF) has compiled this revised catalog. Future revisions will be - incorporated into the on-line NOCtools catalog. The reader is - encouraged to submit new or revised entries for (near-immediate) - electronic publication. - - - -NOCTools2 Working Group [Page 1] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - The tools described in this catalog are in no way endorsed by the - IETF. For the most part, we have neither evaluated the tools in this - catalog, nor validated their descriptions. Most of the descriptions - of commercial tools have been provided by vendors. Caveat Emptor. - -Acknowledgements - - This catalog is the result of work on the part of the NOCTools2 - Working Group of the User Services Area of the IETF. The following - individuals made especially notable contributions: Chris Myers, - Darren Kinley, Gary Malkin, Mohamed Ellozy, and Mike Patton. - -Current Postings - - The current contents of the NOCtools catalog may be retrieved via - anonymous FTP from wuarchive.wustl.edu. The entries are stored as - individual files in the directory /doc/noctools. - -"No-Writeups" Appendix - - This section contains references to tools which are known to exist, - but which have not been fully cataloged. If anyone wishes to author - an entry for one of these tools please contact us at: - - noctools-request@merit.edu - - Keep in mind that if these or other tools are included in the future, - they will be available in the on-line version of the catalog. - - Each mention is separated by a for improved readability. - If you intend to actually print-out this section of the catalog, then - you should probably strip-out the . - -How to Submit/Update an Entry - - 1) review the template included below to determine what - information you will need to collect, - 2) review the keywords to see what your indexing options are, - 3) assemble (update) catalog entry to include results of - 1) and 2). - 4) Submit your entry using either of the following two methods: - - a) Post your submission to: comp.internet.noctools.submissions - b) Email your submission to: noctools-entries@merit.edu - - New entries will be circulated automatically upon reception. As time - permits, the NOCtools editors will review recent submissions and - incorporate them into the master indexes. Enquiries regarding the - - - -NOCTools2 Working Group [Page 2] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - status of a submission should be E-Mailed to: - - noctools-request@merit.edu - - Those submitting an entry to the catalog should insure that any E- - mail addresses provided are correct and functional. Either the - catalog editors or prospective users of your tool may wish to reach - you. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 3] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -TEMPLATE - - NAME - - - - KEYWORDS - [[,[,...,]]]; - [[,[,...,]]]; - [[,[,...,]]]; - [[,[,...,]]]; - [[,[,...,]]]. - - - ABSTRACT - - - - - - MECHANISM - - - - - - CAVEATS - - - - - - BUGS - - - - - - LIMITATIONS - - - - - - HARDWARE REQUIRED - - - - - - -NOCTools2 Working Group [Page 4] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SOFTWARE REQUIRED - - - - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - - - - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - - - - DATE OF MOST RECENT UPDATE TO THIS CATALOG ENTRY - - -Keywords - - This catalog uses "keywords" for terse characterizations of the - tools. Keywords are abbreviated attributes of a tool or its use. To - allow cross-comparison of tools, uniform keyword definitions have - been developed, and are given below. Following the definitions, - there is an index of catalog entries by keyword. - -Keyword Definitions - - The keywords are always listed in a prefined order, sorted first by - the general category into which they fall, and then alphabetically. - The categories that have been defined for management tool keywords - are: - - o the general management area to which a tool - relates or a tool's functional role; - - o the network resources or components that are - managed; - - o the mechanisms or methods a tool uses to - perform its functions; - - o the operating system and hardware environment - of a tool; and - - o the characteristics of a tool as a hardware - product or software release. - - - - -NOCTools2 Working Group [Page 5] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - The keywords used to describe the general management area or - functional role of a tool are: - - Alarm - a reporting/logging tool that can trigger on specific - events within a network. - - Analyzer - a traffic monitor that reconstructs and interprets pro- - tocol messages that span several packets. - - Benchmark - a tool used to evaluate the performance of network com- - ponents. - - Control - a tool that can change the state or status of a remote - network resource. - - Debugger - a tool that by generating arbitrary packets and moni- - toring traffic, can drive a remote network component to - various states and record its responses. - - Generator - a traffic generation tool. - - Manager - a distributed network management system or system com- - ponent. - - Map - a tool that can discover and report a system's topology - or configuration. - - Reference - a tool for documenting MIB structure or system confi- - guration. - - Routing - a packet route discovery tool. - - Security - a tool for analyzing or reducing threats to security. - - Status - a tool that remotely tracks the status of network com- - ponents. - - - -NOCTools2 Working Group [Page 6] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Traffic - a tool that monitors packet flow. - - The keywords used to identify the network resources or components - that a tool manages are: - - Bridge - a tool for controlling or monitoring LAN bridges. - - CHAOS - a tool for controlling or monitoring implementations of - the CHAOS protocol suite or network components that use - it. - - DECnet - a tool for controlling or monitoring implementations of - the DECnet protocol suite or network components that - use it. - - DNS - a Domain Name System debugging tool. - - Ethernet - a tool for controlling or monitoring network components - on ethernet LANs. - - FDDI - a tool for controlling or monitoring network components - on FDDI LANs or WANs. - - IP - a tool for controlling or monitoring implementations of - the TCP/IP protocol suite or network components that - use it. - - OSI - a tool for controlling or monitoring implementations of - the OSI protocol suite or network components that use - it. - - NFS - a Network File System debugging tool. - - Ring - a tool for controlling or monitoring network components - on Token Ring LANs. - - - - - -NOCTools2 Working Group [Page 7] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SMTP - an SMTP debugging tool. - - Star - a tool for controlling or monitoring network components - on StarLANs. - - The keywords used to describe a tool's mechanism are: - - CMIS - a network management system or component based on - CMIS/CMIP, the Common Management Information System and - Protocol. - - Curses - a tool that uses the "curses" tty interface package. - - Eavesdrop - a tool that silently monitors communications media - (e.g., by putting an ethernet interface into "promiscu- - ous" mode). - - NMS - the tool is a component of or queries a Network Manage- - ment System. - - Ping - a tool that sends packet probes such as ICMP echo mes- - sages; to help distinguish tools, we do not consider - NMS queries or protocol spoofing (see below) as probes. - - Proprietary - a distributed tool that uses proprietary communications - techniques to link its components. - - RMON - a tool which employs the RMON extensions to SNMP. - - SNMP - a network management system or component based on SNMP, - the Simple Network Management Protocol. - - Spoof - a tool that tests operation of remote protocol modules - by peer-level message exchange. - - X - a tool that uses X-Windows. - - - -NOCTools2 Working Group [Page 8] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - The keywords used to describe a tool's operating environment are: - - DOS - a tool that runs under MS-DOS. - - HP - a tool that runs on Hewlett-Packard systems. - - Macintosh - a tool that runs on Macintosh personal computers. - - OS/2 - a tool that runs under the OS/2 operating system. - - Standalone - an integrated hardware/software tool that requires only - a network interface for operation. - Sun - a tool that runs on Sun Microsystems platforms. - (binary distribution built for use on a Sun.) - - UNIX - a tool that runs under 4.xBSD UNIX or related OS. - - VMS - a tool that runs under DEC's VMS operating system. - - The keywords used to describe a tool's characteristics as a hardware - or software acquisition are: - - Free - a tool is available at no charge, though other restric- - tions may apply (tools that are part of an OS distribu- - tion but not otherwise available are not listed as - "free"). - - Library - a tool packaged with either an Application Programming - Interface (API) or object-level subroutines that may be - loaded with programs. - - Sourcelib - a collection of source code (subroutines) upon which - developers may construct other tools. - - - - - - - -NOCTools2 Working Group [Page 9] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -Tools Indexed by Keywords - - Following is an index of the most up-to-date catalog entries sorted - by keyword, which is available via: - - news comp.networks.noctools.tools - ftp wuarchive.wustl.edu:/doc/noctool - - This index can be used to locate the tools with a particular - attribute: tools are listed under each keyword that characterizes - them. The keywords and the subordinate lists of tools under them are - in alphabetical order. - - Alarm - ----- - CMIP Library - Dual Manager - Eagle - EMANATE - EtherMeter - LanProbe - LANWatch - MONET - NetMetrix Load Monitor - NetMetrix Protocol Analyzer - NETMON for Windows - NETscout - NOCOL - SNMP Libraries and Utilities from Empire Technologies - SNMP Libraries and Utilities from SNMP Research - snmpd from Empire Technologies - SpiderMonitor - XNETMON from SNMP Research - xnetmon from Wellfleet - - Analyzer - -------- - LANVista - LANWatch - NetMetrix Protocol Analyzer - NETscout - PacketView - Sniffer - SpiderMonitor - - - - - - - -NOCTools2 Working Group [Page 10] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Benchmark - --------- - hammer & anvil - iozone - LADDIS - LANVista - nhfsstone - SPIMS - spray - ttcp - XNETMON from SNMP Research - - CMIS - ---- - CMIP library - Generic Managed System - MIB Browser - - Control - ------- - CMIP Library - Dual Manager - Eagle - MIB Manager from Empire Technologies - MONET - NETMON for Windows - proxyd - SNMP Libraries and Utilities from Empire Technologies - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System - snmpd from Empire Technologies - TokenVIEW - XNETMON from SNMP Research - - Debugger - -------- - Ethernet Box II - LANVista - NetMetrix Traffic Generator - ping from UCB - SPIMS - XNETMON from SNMP Research - - Generator - --------- - hammer & anvil - LADDIS - LANVista - - - -NOCTools2 Working Group [Page 11] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - NetMetrix Traffic Generator - nhfsstone - ping - ping from UCB - Sniffer - SpiderMonitor - spray - TTCP - - Manager - ------- - Beholder - CMIP Library - CMU SNMP Distribution - decaddrs by Wellfleet - Dual Manager - EMANATE - Ethernet Box II - getone by Wellfleet - Interactive Network Map - LanProbe - LANVista - MIB Manager from Empire Technologies - MONET - NetLabs CMOT Agent - NetLabs SNMP Agent - NETMON for Windows - NETscout - NNStat - NOCOL - OverVIEW - SAS/CPE for Open Systems Software - SNMP Development Kit - SNMP Libraries and Utilities from Empire Technologies - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System - snmpd from Empire Technologies - tokenview - Tricklet - Wollongong-Manager - XNETMON from SNMP Research - XNETMON from Wellfleet - xnetperfmon - - Map - --- - decaddrs by Wellfleet - Dual Manager - - - -NOCTools2 Working Group [Page 12] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - etherhostprobe - EtherMeter - Interactive Network Map - LanProbe - NETMON for Windows - Network Integrator I - NPRV - SNMP Libraries and Utilities from SNMP Research - XNETMON by SNMP Research - XNETMON by Wellfleet - - Reference - --------- - EMANATE - ethernet-codes - HyperMIB - MIB Manager from Empire Technologies - XNETMON - - Routing - ------- - arp - decaddrs by Wellfleet - etherhostprobe - getone by Wellfleet - hopcheck - MONET - net_monitor - NETMON for Windows - netstat - NPRV - ping from UCB - query - traceroute - - Security - -------- - Computer Security Checklist - Dual Manager - Eagle - EMANATE - LAN Patrol - SNMP Libraries and Utilities from SNMP Research - XNETMON by SNMP Research - xnetperfmon - - - - - - -NOCTools2 Working Group [Page 13] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Status - ------ - Beholder - CMIP Library - CMU SNMP - DiG - dnsstats - doc - Dual Manager - EMANATE - fping - getone by Wellfleet - host - Internet Rover - lamers - LanProbe - mconnect - MONET - net_monitor - Netlabs CMOT Agent - Netlabs SNMP Agent - NETscout - NNStat - NOCOL - NPRV - OverVIEW - ping - ping from UCB - proxyd from SNMP Research - SAS/CPE - SNMP Development Kit - SNMP Libraries and Utilities from Empire Technologies - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System - PSI SNMP - snmpd from Empire Technologies - snmpd from SNMP Research - TokenVIEW - Tricklet - vrfy - XNETMON by SNMP Research - xnetmon by Wellfleet - xnetperfmon - xup - - - - - - - -NOCTools2 Working Group [Page 14] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Traffic - ------- - etherfind - EtherMeter - Ethernet Box II - EtherView - getethers - LAN Patrol - LanProbe - LANVista - LANWatch - ENTM - MONET - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - NETMON by Mitre - NETscout - netwatch - Network Integrator I - nfswatch - nhfsstone - NNStat - ositrace - PacketView - Sniffer - SpiderMonitor - spray - tcpdump - tcplogger - trpt - ttcp - XNETMON by SNMP Research - - Bridge - ------ - decaddrs by Wellfleet - EMANATE - MIB Manager from Empire Technologies - MONET - proxyd by SNMP Research - SAS/CPE - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System - snmpd from SNMP Research - XNETMON from SNMP Research - - - - -NOCTools2 Working Group [Page 15] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CHAOS - ----- - Interactive Network Map - LANWatch - - DECnet - ------ - decaddrs by Wellfleet - LANVista - LANWatch - MONET - net_monitor - NetMetrix Protocol Analyzer - NETMON for Windows - NETscout - Sniffer - SNMP Libraries and Utilities from SNMP Research - SpiderMonitor - XNETMON from SNMP Research - xnetperfmon from SNMP Research - - DNS - --- - DiG - dnsstats - doc - lamers - LANWatch - NetMetrix Protocol Analyzer - NOCOL - - Ethernet - -------- - arp - Beholder - Eagle - EMANATE - etherfind - etherhostprobe - EtherMeter - Ethernet Box II - ethernet-codes - EtherView - getethers - LAN Patrol - LanProbe - LANVista - LANWatch - - - -NOCTools2 Working Group [Page 16] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - ENTM - Interactive Network Map - MONET - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - NETMON for Windows - NETscout - netwatch - Network Integrator I - nfswatch - NNStat - PacketView - proxyd from SNMP Research - SAS/CPE - Sniffer - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - snmpd from SNMP Research - SpiderMonitor - tcpdump - XNETMON from SNMP Research - xnetperfmon from SNMP Research - - FDDI - ---- - EMANATE - ethernet-codes - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - nfswatch - SAS/CPE - SNMP Libraries and utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - snmpd from SNMP Research - XNETMON from SNMP Research - - IP - -- - arp - CMU SNMP - Dual Manager - Eagle - EMANATE - etherfind - - - -NOCTools2 Working Group [Page 17] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - etherhostprobe - EtherView - fping - getone from Wellfleet - hammer & anvil - hopcheck - Internet Rover - LanProbe - LANVista - LANWatch - ENTM - Interactive Network Map - MIB Manager from Empire Technologies - MONET - net_monitor - Netlabs CMOT Agent - Netlabs SNMP Agent - NetMetrix Load Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - NETMON by Mitre - NETMON for Windows - NETscout - netstat - netwatch - nfswatch - nhfsstone - NNStat - NOCOL - NPRV - OverVIEW - PacketView - ping - ping from UCB - proxyd from SNMP Research - query - SAS/CPE - SNMP Development Kit - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - PSI SNMP - snmpd from Empire Technologies - snmpd from SNMP Research - PSI SNMP - SpiderMonitor - SPIMS - spray - tcpdump - - - -NOCTools2 Working Group [Page 18] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - tcplogger - traceroute - trpt - ttcp - XNETMON from SNMP Research - xnetmon from Wellfleet - xnetperfmon from SNMP Research - - OSI - --- - CMIP Library - Dual Manager - EMANATE - LANVista - LANWatch - Netlabs CMOT Agent - NetMetrix Protocol Analyzer - NETMON for Windows - NETscout - NOCOL - ositrace - proxyd from SNMP Research - SAS/CPE - Sniffer - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - snmpd from SNMP Research - SpiderMonitor - SPIMS - XNETMON from SNMP Research - xnetperfmon from SNMP Research - - NFS - --- - etherfind - EtherView - iozone - LADDIS - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NETscout - nfswatch - nhfsstone - Sniffer - tcpdump - - - - - - -NOCTools2 Working Group [Page 19] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Ring - ---- - Eagle - EMANATE - Interactive Network Map - LANVista - LANWatch - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - NETMON by Mitre - NETMON for Windows - NETscout - netwatch - PacketView - proxyd from SNMP Research - Sniffer - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - snmpd from SNMP Research - TokenVIEW - XNETMON from SNMP Research - xnetperfmon from SNMP Research - - SMTP - ---- - host - Internet Rover - LANWatch - mconnect - NetMetrix Protocol Analyzer - Sniffer - vrfy - - Star - ---- - EMANATE - Interactive Network Map - LAN Patrol - LANWatch - NETMON for Windows - NETscout - proxyd from SNMP Research - Sniffer - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - snmpd from SNMP Research - - - -NOCTools2 Working Group [Page 20] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - XNETMON from SNMP Research - xnetperfmon from SNMP Research - - Curses - ------ - Eagle - Internet Rover - net_monitor - nfswatch - NOCOL - PSI SNMP - - Eavesdrop - --------- - etherfind - Ethernet Box II - EtherView - LAN Patrol - LANVista - LANWatch - ENTM - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetNetrix Traffic Generator - NETMON from Mitre - NETscout - netwatch - nfswatch - NNStat - OSITRACE - PacketView - Sniffer - SpiderMonitor - tcplogger - trpt - - NMS - --- - CMU SNMP - decaddrs from Wellfleet - Dual Manager - EMANATE - EtherMeter - Ethernet Box II - getone from Wellfleet - Interactive Network Map - MONET - - - -NOCTools2 Working Group [Page 21] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Netlabs CMOT Agent - Netlabs SNMP Agent - NETMON for Windows - NETscout - NNStat - NOCOL - OverVIEW - proxyd from SNMP Research - SNMP Development Kit - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - PSI SNMP - snmpd from Empire Technologies - snmpd from SNMP Research - TokenVIEW - XNETMON from SNMP Research - xnetmon from Wellfleet - xnetperfmon from SNMP Research - - Ping - ---- - etherhostprobe - fping - getethers - hopcheck - Interactive Network Map - Internet Rover - LANWatch - net_monitor - NOCOL - NPRV - ping - ping from UCB - spray - traceroute - ttcp - XNETMON from SNMP Research - xup - - Proprietary - ----------- - Eagle - EtherMeter - Ethernet Box II - LanProbe - LANVista - TokenVIEW - - - - -NOCTools2 Working Group [Page 22] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - RMON - ---- - Beholder - - SNMP - ---- - Beholder - CMU SNMP - decaddrs from Wellfleet - Dual Manager - EMANATE - getone from Wellfleet - Interactive Network Map - MIB Manager from Empire Technologies - MONET - Netlabs SNMP Agent - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - NETMON for Windows - NETscout - NOCOL - OverVIEW - proxyd from SNMP Research - SNMP Development Kit - SNMP Libraries and utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - PSI SNMP - snmpd from Empire Technologies - snmpd from SNMP Research - Wollongong-Manager - XNETMON from SNMP Research - xnetmon from Wellfleet - xnetperfmon from SNMP Research - - Spoof - ----- - DiG - doc - Internet Rover - host - LADDIS - mconnect - nhfsstone - NOCOL - query - SPIMS - - - -NOCTools2 Working Group [Page 23] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - vrfy - - X - - - Dual Manager - Interactive Network Map - MIB Manager from Empire Technologies - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - SAS/CPE - PSI SNMP - XNETMON from SNMP Research - xnetperfmon from SNMP Research - xup - - DEC - --- - Wollongong-Manager - - DOS - --- - Computer Security Checklist - Ethernet Box II - hammer & anvil - hopcheck - iozone - LAN Patrol - LANVista - netmon - NETMON for Windows - netwatch - OverVIEW - PacketView - ping - SAS/CPE - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - snmpd from SNMP Research - TokenVIEW - Wollongong-Manager - xnetperfmon from SNMP Research - - - - - - - - -NOCTools2 Working Group [Page 24] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HP - -- - iozone - SAS/CPE - xup - - Macintosh - --------- - HyperMIB - - OS/2 - ---- - Beholder - Tricklet - - Standalone - ---------- - LANVista - Sniffer - SNMP Packaged Agent System from SNMP Research - SpiderMonitor - - Sun - --- - Avatar SunSNMPD - Wollongong Manager - - UNIX - ---- - arp - CMIP Library - CMU SNMP - decaddrs from Wellfleet - DiG - doc - dnsstats - Eagle - etherfind - etherhostprobe - EtherView - fping - getethers - getone from Wellfleet - host - Interactive Network Map - Internet Rover - iozone - LADDIS - - - -NOCTools2 Working Group [Page 25] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - lamers - mconnect - MIB Manager from Empire Technologies - MONET - net_monitor - Dual Manager - NetMetrix Load Monitor - NetMetrix NFS Monitor - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - NETMON from Mitre - NETscout - netstat - Network Integrator I - nfswatch - nhfsstone - NNStat - NOCOL - OSITRACE - ping - ping from UCB - proxyd from SNMP Research - query - SAS/CPE - SNMP Development Kit - SNMP Libraries and Utilities from Empire Technologies - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - PSI SNMP - snmpd from Empire Technologies - snmpd from SNMP Research - SPIMS - spray - tcpdump - tcplogger - traceroute - Tricklet - trpt - ttcp - vrfy - XNETMON from SNMP Research - xnetmon from Wellfleet - xnetperfmon from SNMP Research - - VMS - --- - arp - ENTM - - - -NOCTools2 Working Group [Page 26] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - fping - net_monitor - netstat - NPRV - ping - SNMP Libraries and Utilities from SNMP Research - tcpdump - traceroute - ttcp - xnetperfmon from SNMP Research - - Free - ---- - arp - Beholder - CMIP Library - CMU SNMP Distribution - DiG - dnsstats - doc - ENTM - fping - getethers - hammer & anvil - hopcheck - host - Interactive Network Map - Internet Rover - iozone - lamers - net_monitor - netmon from Mitre - netstat - netwatch - nfswatch - nhfsstone - NNStat - NOCOL - NPRV - OSITRACE - PING - ping from UCB - query - SNMP Development Kit - tcpdump - tcplogger - traceroute - Tricklet - - - -NOCTools2 Working Group [Page 27] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - trpt - ttcp - vrfy - - Library - ------- - CMIP Library - CMU SNMP - Dual Manager - NetMetrix Protocol Analyzer - NetMetrix Traffic Generator - proxyd from SNMP Research - SAS/CPE - - Sourcelib - --------- - Beholder - CMIP Library - CMU SNMP - EMANATE - HyperMIB - Interactive Network Map - Internet Rover - LANWatch - MIB Manager from Empire Technologies - net_monitor - NETMON for Windows - NOCOL - proxyd from SNMP Research - SNMP Development Kit - SNMP Libraries and Utilities from Empire Technologies - SNMP Libraries and Utilities from SNMP Research - SNMP Packaged Agent System from SNMP Research - snmpd from SNMP Research - SpiderMonitor - Tricklet - XNETMON from SNMP Research - xnetperfmon from SNMP Research - -Tool Descriptions - - This section is an updated collection of brief descriptions of tools - for managing TCP/IP internets. These entries are in alphabetical - order, by tool name. - - The entries all follow a standard format. Immediately after the NAME - of a tool are its associated KEYWORDS. Keywords are terse - descriptions of the purposes or attributes of a tool. A more - - - -NOCTools2 Working Group [Page 28] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - detailed description of a tool's purpose and characteristics is given - in the ABSTRACT section. The MECHANISM section describes how a tool - works. In CAVEATS, warnings about tool use are given. In BUGS, - known bugs or bug-report procedures are given. LIMITATIONS describes - the boundaries of a tool's capabilities. HARDWARE REQUIRED and - SOFTWARE REQUIRED relate the operational environment a tool needs. - Finally, in AVAILABILITY, pointers to vendors, online repositories, - or other sources for a tool are given. - - Where tool names conflict, the vendor name is used as well. For - example, MITRE, and SNMP Research each submitted an updated - description of a tool called, "NETMON". These tools were - independently developed, are functionally different, and run in - different environments. MITRE's tool is listed as "NETMON_MITRE," - and the tool from SNMP Research as "NETMON_WINDOWS_SNMP_RESEARCH". - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 29] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog ARP - - NAME - arp - - KEYWORDS - routing; ethernet, IP;; UNIX, VMS; free. - - ABSTRACT - Arp displays and can modify the internet-to-ethernet - address translations tables used by ARP, the address - resolution protocol. - - MECHANISM - The arp program accesses operating system memory to - read the ARP data structures. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - Only the super user can modify ARP entries. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX or related OS, or VMS. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - - Available via anonymous FTP from uunet.uu.net, in - directory bsd-sources/src/etc. Available with 4.xBSD - UNIX and related operating systems. For VMS, available - as part of TGV MultiNet IP software package, as well as - Wollongong's WIN/TCP and Process Software Corporation's - TCPware for VMS. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - This entry maintained by the NOCtools editors. - Send email to noctools-request@merit.edu. - - - - - - - -NOCTools2 Working Group [Page 30] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog AVATAR-SNMP-TOOLKIT - - NAME - SNMP Application Development Toolkit - - KEYWORDS - manager;;SNMP;;sourcelib. - - ABSTRACT - snmpapi is an api toolkit for developing SNMP - applications and agents. The toolkit is simple and - very fast that can be used for any type of - application. It is very well suited for embedded - systems such as bridges or routers. An example MIB II - agent for Sun Sparcstations is provided. snmpapi is - distributed in source form only. - - MECHANISM - snmpapi is a library of C functions. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None. - - HARDWARE REQUIRED - No restrictions. - - AVAILABILITY - Available now. For more information, send e-mail to - info@avatar.com. - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 31] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog AVATAR-SUNSNMPD - - NAME - sunsnmpd - - KEYWORDS - manager;;snmp;sun;. - - ABSTRACT - sunsnmpd is a fully supported SNMP agent with MIB II - support for Sun Sparscations running SunOS 4.1 or - higher. sunsnmpd supports both SNMP GET and SET - operations. - - MECHANISM - sundnmpd is a daemon process which starts up at boot - time from the rc.local file. It uses /dev/kmem to access - kernel structures. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - Must be started by a super user. - - HARDWARE REQUIRED - Sun Sparcstations. - - AVAILABILITY - Available now. Site licensing only. For more information, - send e-mail to info@avatar.com. - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 32] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog ChameLAN-100 - - NAME - ChameLAN 100 - - KEYWORDS - analyzer, benchmark, debugger, generator, map, - reference, status, traffic; bridge, DECnet, ethernet, - FDDI, IP, OSI, NFS, ring; eavesdrop, SNMP, X; - standalone, UNIX. - - ABSTRACT - - Tekelec's ChameLAN 100 is a portable diagnostic system - for monitoring and simulation of FDDI, Ethernet and - Token Ring networks -- simultaneously. Protocol - analysis of multiple topologies, as well as mixed - topoloies simultaneously, is a key feature of - the product family. Tekelec's proprietary FDDI - hardware guarantees complete real-time analysis of - networks and network components at the full ring - bandwidth of 125 Mbps. It passively connects to the - network and captures 100 percent of the data, measures - performance and isolates real-time problems. - - The simulation option offers full bandwidth load - generation that allows you to create and simulate any - network condition. It gives you the ability to inject - errors and misformed frames. A set of - confidence tests allow simple evaluation of new - equipment. A ring map feature displays network - topology and status of all nodes via the SMT - process. - - Monitoring of FDDI, Ethernet and Token Ring allows the - user to: view network status in real time; view - network, node, or node pair statistics; capture - frames; control capture using trigger and filter - capabilities; view real-time statistics; view captured - frames in decoded format; and view the last frame - transmitted by each station. - - The following Real-Time Network Statistics of FDDI, - Ethernet and Token Ring networks is displayed: frame - rate, runts, byte rate, jabbers, CRC/align errors, and - collisions. - - Product developers can use the ChameLAN 100 to observe - - - -NOCTools2 Working Group [Page 33] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - and control various events to help debug their FDDI, - Ethernet and Token Ring products. End users can - perform real-time monitoring to test and - diagnose problems that may occur when developing, - installing or managing FDDI, Ethernet and Token Ring - networks and network products. End users can use the - ChameLAN 100 to aid in the installation and - maintenance of Ethernet and Token Ring networks. To - isolate specific network trouble spots the ChameLAN - 100 uses filtering and triggering techniques for data - capture. Higher level protocol decode includes - TCP/IP, OSI and DECnet protocol suites. Protocol - decode of IPX, SNMP, XTP, and AppleTalk are also - supported. Development of additional protocol decodes - is also under development. The ChameLAN 100 family - also offers a Protocol Management Development System - (PMDS) that enables users to develop custom protocol - decode suites. - - The FDDI, Ethernet and Token Ring hardware interfaces - feature independent processing power. Real-time data - is monitored unobtrusively at full bandwidth without - affecting network activity. Real-time data may also - be saved to a 120MB or optional 200MB hard disk drive - for later analysis. FDDI data is captured at 125 megabits - per second (Mbps), Ethernet at 10 Mbps and Token Ring - at 4 or 16 Mbps. - - MECHANISM - This portable, standalone unit incorporates the power - of UNIX, X-Windows and Motif. Its UNIX-based - programming interface facilitates development of - customized monitoring and simulation applications. The - ChameLAN 100 may connect to the network at any - location using standard equipment. Standard graphical - Motif/X-Windows and TCP/IP allow remote control - through Ethernet and 10Base T interfaces. Tekelec - also offers a rackmounted model -- ChameLAN 100-X. - Both models can be controlled via a Sun Workstation - remotely. - - CAVEATS - none. - - BUGS - none known. - - - - - -NOCTools2 Working Group [Page 34] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LIMITATIONS - none reported. - - HARDWARE REQUIRED - None. The ChameLAN 100 is a self-contained unit, and - includes its own interface cards. It installs - into a network with standard interface - connectors. - - SOFTWARE REQUIRED - None. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - The ChameLAN 100 product famil y is available - commercially. For more information or a free demo, - call or write: - - 1.800.tek.elec - Tekelec - 26580 West Agoura Road - Calabasas, CA 91302 - Phone: 818.880.5656 - Fax: 818.880.6993 - - The ChameLAN 100 is listed on the GSA schedule. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Todd Koch - Public Relations Specialist - 818.880.7718 - Internet: todd.koch@tekelec.com - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 35] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog CMU_SNMP - - NAME - The CMU SNMP Distribution - - KEYWORDS - manager, status; IP; NMS, SNMP; UNIX; free, sourcelib. - - ABSTRACT - The CMU SNMP Distribution includes source code for an - SNMP agent, several SNMP client applications, an ASN.1 - library, and supporting documentation. - - The agent compiles into about 10 KB of 68000 code. The - distribution includes a full agent that runs on a - Kinetics FastPath2/3/4, and is built into the KIP - appletalk/ethernet gateway. The machine independent - portions of this agent also run on CMU's IBM PC/AT - based router. - - The applications are designed to be useful in the real - world. Information is collected and presented in a - useful format and is suitable for everyday status - monitoring. Input and output are interpreted - symbolically. The tools can be used without - referencing the RFCs. - - MECHANISM - SNMP. - - CAVEATS - None. - - BUGS - None reported. Send bug reports to - sw0l+snmp@andrew.cmu.edu. ("sw0l" is "ess double-you - zero ell.") - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - The KIP gateway agent runs on a Kinetics FastPath2/3/4. - Otherwise, no restrictions. - - SOFTWARE REQUIRED - The code was written with efficiency and portability in - mind. The applications compile and run on the follow- - - - -NOCTools2 Working Group [Page 36] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - ing systems: IBM PC/RT running ACIS Release 3, Sun3/50 - running SUNOS 3.5, and the DEC microVax running Ultrix - 2.2. They are expected to run on any system with a - Berkeley socket interface. - - AVAILABILITY - This distribution is copyrighted by CMU, but may be - used and sold without permission. Consult the copy- - right notices for further information. The distribu- - tion is available by anonymous FTP from the host - lancaster.andrew.cmu.edu (128.2.13.21) as the files - pub/cmu-snmp.9.tar, and pub/kip-snmp.9.tar. The former - includes the libraries and the applications, and the - latter is the KIP SNMP agent. - - Please direct questions, comments, and bug reports to - sw0l+snmp@andrew.cmu.edu. ("sw0l" is "ess double-you - zero ell.") If you pick up this package, please send a - note to the above address, so that you may be notified - of future enhancements/changes and additions to the set - of applications (several are planned). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 37] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog COMPUTER-SECURITY-CHECKLIST - - NAME - Computer Security Checklist - - KEYWORDS - security; DOS. - - ABSTRACT - This program consists of 858 computer security ques- - tions divided up in thirteen sections. The program - presents the questions to the user and records their - responses. After answering the questions in one of the - thirteen sections, the user can generate a report from - the questions and the user's answers. The thirteen - sections are: telecommunications security, physical - access security, personnel security, systems develop- - ment security, security awareness and training prac- - tices, organizational and management security, data and - program security, processing and operations security, - ergonomics and error prevention, environmental secu- - rity, and backup and recovery security. - - The questions are weighted as to their importance, and - the report generator can sort the questions by weight. - This way the most important issues can be tackled - first. - - MECHANISM - The questions are displayed on the screen and the user - is prompted for a single keystroke reply. When the end - of one of the thirteen sections is reached, the answers - are written to a disk file. The question file and the - answer file are merged to create the report file. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - No restrictions. - - - - - -NOCTools2 Working Group [Page 38] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SOFTWARE REQUIRED - DOS operating system. - - AVAILABILITY - A commercial product available from: - C.D., Ltd. - P.O. Box 58363 - Seattle, WA 98138 - (206) 243-8700 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 39] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog CMIP-LIBRARY - - NAME - CMIP Library - - KEYWORDS - manager; osi; cmis; unix; free, sourcelib. - - ABSTRACT - - The CMIP Library implements the functionality of the - Common Management Information Service/Protocol as in - the full international standards (ISO 9595, ISO 9596) - published in 1990. It is designed to work with the - ISODE package and can act as a building block for the - construction of CMIP-based agent and manager - applications. - - MECHANISM - The CMIP library uses ISO ROS, ACSE and ASN.1 - presentation, as implemented in ISODE, to provide its - service. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None known. - - HARDWARE REQUIRED - Has been tested on SUN 3 and SUN 4 architectures. - - SOFTWARE REQUIRED - The ISODE protocol suite, BSD UNIX. - - AVAILABILITY - The CMIP library and related management tools built - upon it, known as OSIMIS (OSI Management Information - Service), are publicly available from University - College London, England via FTP and FTAM. To obtain - information regarding a copy send email to - osimis-request@cs.ucl.ac.uk or call +44 71 380 7366. - - - - - - -NOCTools2 Working Group [Page 40] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog DECADDRS - - NAME - decaddrs, decaroute, decnroute, xnsroutes, bridgetab - - KEYWORDS - manager, map, routing; bridge, DECnet; NMS, SNMP; UNIX. - - ABSTRACT - These commands display private MIB information from - Wellfleet systems. They retrieve and format for - display values of one or several MIB variables from the - Wellfleet Communications private enterprise MIB, using - the SNMP (RFC1098). In particular these tools are used - to examine the non-IP modules (DECnet, XNS, and Bridg- - ing) of a Wellfleet system. - - Decaddrs displays the DECnet configuration of a - Wellfleet system acting as a DECnet router, showing the - static parameters associated with each DECnet inter- - face. Decaroute and decnroute display the DECnet - inter-area and intra-area routing tables (that is area - routes and node routes). Xnsroutes displays routes - known to a Wellfleet system acting as an XNS router. - Bridgetab displays the bridge forwarding table with the - disposition of traffic arriving from or directed to - each station known to the Wellfleet bridge module. All - these commands take an IP address as the argument and - can specify an SNMP community for the retrieval. One - SNMP query is performed for each row of the table. - Note that the Wellfleet system must be operating as an - IP router for the SNMP to be accessible. - - MECHANISM - Management information is exchanged by use of SNMP. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Distributed and supported for Sun 3 systems. - - - - -NOCTools2 Working Group [Page 41] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SOFTWARE REQUIRED - Distributed and supported for SunOS 3.5 and 4.x. - - AVAILABILITY - Commercial product of: - Wellfleet Communications, Inc. - 12 DeAngelo Drive - Bedford, MA 01730-2204 - (617) 275-2400 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 42] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog DIG - - NAME - DiG - - KEYWORDS - status; DNS; spoof; UNIX; free. - - ABSTRACT - DiG (domain information groper), is a command line tool - which queries DNS servers in either an interactive or a - batch mode. It was developed to be more - convenient/flexible than nslookup for gathering perfor- - mance data and testing DNS servers. - - MECHANISM - Dig is built on a slightly modified version of the bind - resolver (release 4.8). - - CAVEATS - none. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX. - - AVAILABILITY - DiG is available via anonymous FTP from venera.isi.edu - in pub/dig.2.0.tar.Z. - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 43] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog EMANATE_SNMP_RESEARCH - - NAME - EMANATE: Enhanced MANagement Agent Through Extensions - from SNMP Research. - - KEYWORDS - alarm, control, manager, reference, security, status; - bridge, Ethernet, FDDI, IP, OSI, ring, star; - NMS, SNMP; - sourcelib. - - ABSTRACT - The EMANATE system provides a run-time extensible SNMP - agent that dynamically reconfigures an agent's MIB - without having to recompile, relink, or restart the - agent. An EMANATE capable SNMP agent can support zero, - one, or many subagents and dynamically reconfigure to - connect or disconnect those subagents' MIBs. - - The EMANATE system consists of several logically - independent components and subsystems: - - o Master SNMP agent which contains an API to communicate - with subagents. - o Subagents which implement various MIBS. - o Subagent Developer's Kit which contains tools to assist - in the implementation of subagents. - o EMANATE libraries which provide the API for the - subagent. - - MECHANISM - A concise API allows a standard means of communication - between the master and subagents. System dependent - mechanisms are employed for transfer of information - between the master and subagents. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - - - - - -NOCTools2 Working Group [Page 44] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HARDWARE REQUIRED - Multiple platforms including PC's, workstations, hosts, - and servers are supported. Contact SNMP Research for - more details. - - SOFTWARE REQUIRED - C compiler. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 45] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog ETHERFIND_SUN - - NAME - etherfind - - KEYWORDS - traffic; ethernet, IP, NFS; eavesdrop; UNIX. - - ABSTRACT - Etherfind examines the packets that traverse a network - interface, and outputs a text file describing the - traffic. In the file, a single line of text describes - a single packet: it contains values such as protocol - type, length, source, and destination. Etherfind can - print out all packet traffic on the ethernet, or - traffic for the local host. Further packet filtering - can be done on the basis of protocol: IP, ARP, RARP, - ICMP, UDP, ND, TCP, and filtering can also be done - based on the source, destination addresses as well as - TCP and UDP port numbers. - - MECHANISM - In usual operations, and by default, etherfind puts the - interface in promiscuous mode. In 4.3BSD UNIX and - related OSs, it uses a Network Interface Tap (NIT) to - obtain a copy of traffic on an ethernet interface. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - Minimal protocol information is printed. Can only be - run by the super user. The syntax is painful. - - HARDWARE REQUIRED - Ethernet. - - SOFTWARE REQUIRED - SunOS. - - AVAILABILITY - Executable included in Sun OS "Networking Tools and - Programs" software installation option. - - - - - -NOCTools2 Working Group [Page 46] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog ETHERNET-CODES - - NAME - ethernet-codes - - KEYWORDS - reference; - ethernet, fddi; - ; - ; - ; - - ABSTRACT - Mike Patton of MIT LCS has compiled a very - comprehensive list of the IEEE numbers used on - Ethernet and FDDI (with some permutation). - This file contains collected information on the - various codes used on IEEE 802.3 and EtherNet. - There are three "pages": type codes, vendor - codes, and the uses of multicast (including - broadcast) addresses. - - MECHANISM - FTP the file and use it like a secret decoder ring. - - CAVEATS - Since this information is from collected wisdom, - there are certainly omissions. - - BUGS - Mike welcomes any further additions. - They can be sent to a special mailbox that he has set up: - - MAP=EtherNet-codes@LCS.MIT.Edu - - LIMITATIONS - See caveats. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - No restrictions. - - - - - - - - -NOCTools2 Working Group [Page 47] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - The file is stored as flat, non-compressed ASCII text. - It can be FTP'ed from: - ftp.lcs.mit.edu - - Retreive the file: - /pub/map/EtherNet-codes - - To submit additions or obtain further assistance, send email to: - MAP=EtherNet-codes@LCS.MIT.Edu - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - This entry maintained by the NOCtools editors. - Send email to noctools-request@merit.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 48] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog GENERIC-MANAGED-SYSTEM - - NAME - Generic Managed System - - KEYWORDS - manager; osi; cmis; unix; free, sourcelib - - ABSTRACT - The Generic Managed System (GMS) implements the - functions that would be common to any OSI managed - system. These include the parseing of CMIS requests, - selection of managed objects according to the scoping - and filtering rules, handling of notifications and - event forwarding discriminators etc. The intention is - that the implementors should use the GMS as a basis - for their own managed object implementations. A - support environment is provided to assist with this. - - MECHANISM - The GMS uses the UCL CMIP library plus a library of - C++ objects representing common managed objects and - attribute types. - - CAVEATS - The system is still experimental, is subject to change - and is not yet well documented. - - BUGS - See above. - - LIMITATIONS - None known. - - HARDWARE REQUIRED - Has been tested on SUN 3 and SUN 4 architectures. - - SOFTWARE REQUIRED - The ISODE protocol suite, BSD UNIX, UCL CMIP Library, - GNU C++ (g++). - - AVAILABILITY - The CMIP library and related management tools built - upon it, known as OSIMIS (OSI Management Information - Service), are publicly available from University - College London, England via FTP and FTAM. To obtain - information regarding a copy send email to - osimis-request@cs.ucl.ac.uk or call +44 71 380 7366. - - - -NOCTools2 Working Group [Page 49] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog GETETHERS - - NAME - getethers - - KEYWORDS - Traffic; Ethernet; Ping; UNIX; Free - - ABSTRACT - Getethers runs through all addresses on an ethernet - segment (a.b.c.1 to a.b.c.254) and pings each address, - and then determines the ethernet address for that - host. It produces a list, in either plain ASCII, the - file format for the Excelan Lanalyzer, or the file - format for the Network General Sniffer, of - hostname/ethernet address pairs for all hosts on the - local nework. The plain ASCII list optionally - includes the vendor name of the ethernet card in - each system, to aid in the determination of the - identity of unknown systems. - - MECHANISM - Getethers uses a raw IP socket to generate ICMP echo - requests and receive ICMP echo replies, and then - examines the kernel ARP table to determine the - ethernet address of each responding system. - - CAVEATS - Assumes that the ethernet it is looking at is either - a Class C IP network, or part of a Class B IP network - that is subnetted with a netmask of 255.255.255.0. - (This is easy to change, but it's compiled in.) - - BUGS - None known. - - LIMITATIONS - None. - - HARDWARE REQUIRED - Has been tested on Sun-3 and Sun-4 (SPARC) systems - under SunOS 4.1.x, DEC VAXes under 4.3BSD. - - SOFTWARE REQUIRED - Runs under SunOS 4.x and 4.3BSD; should be easy to - port to any other Berkeley-like system. Requires - raw sockets and the ioctl calls to get at the ARP - table. - - - -NOCTools2 Working Group [Page 50] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - Public domain, and freely distributable. Available - via anonymous FTP from harbor.ecn.purdue.edu; also has - been posted to comp.sources.unix. The current version - is Version 1.4 from May 1992. - - Contact point: - Dave Curry - Purdue University - Engineering Computer Network - 1285 Electrical Engineering Bldg. - West Lafayette, IN 47907-1285 - davy@ecn.purdue.edu - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Dave Curry (see address above). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 51] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog GETONE_WELLFLEET - - NAME - getone, getmany, getroute, getarp, getaddr, getif, - getid. - - KEYWORDS - manager, routing, status; IP; NMS, SNMP; UNIX. - - ABSTRACT - These commands retrieve and format for display values - of one or several MIB variables (RFC1066) using the - SNMP (RFC1098). Getone and getmany retrieve arbitrary - MIB variables; getroute, getarp, getaddr, and getif - retrieve and display tabular information (routing - tables, ARP table, interface configuration, etc.), and - getid retrieves and displays system name, identifica- - tion and boot time. - - Getone retrieves and displays - the value of the designated MIB variable from the - specified target system. The SNMP community name to be - used for the retrieval can also be specified. Getmany - works similarly for groups of MIB variables rather than - individual values. The name of each variable, its - value and its data type is displayed. Getroute returns - information from the ipRoutingTable MIB structure, - displaying the retrieved information in an accessible - format. Getarp behaves similarly for the address - translation table; getaddr for the ipAddressTable; and - getif displays information from the interfaces table, - supplemented with information from the ipAddressTable. - Getid displays the system name, identification, ipFor- - warding state, and the boot time and date. All take a - system name or IP address as an argument and can - specify an SNMP community for the retrieval. One SNMP - query is performed for each row of the table. - - MECHANISM - Queries SNMP agent(s). - - CAVEATS - None. - - BUGS - None known. - - - - - -NOCTools2 Working Group [Page 52] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Distributed and supported for Sun 3 systems. - - SOFTWARE REQUIRED - Distributed and supported for SunOS 3.5 and 4.x. - - AVAILABILITY - Commercial product of: - Wellfleet Communications, Inc. - 12 DeAngelo Drive - Bedford, MA 01730-2204 - (617) 275-2400 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 53] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog HAMMER_ANVIL - - NAME - hammer & anvil - - KEYWORDS - benchmark, generator; IP; DOS; free. - - ABSTRACT - Hammer and Anvil are the benchmarking programs for IP - routers. Using these tools, gateways have been tested - for per-packet delay, router-generated traffic over- - head, maximum sustained throughput, etc. - - MECHANISM - Tests are performed on a gateway in an isolated - testbed. Hammer generates packets at controlled rates. - It can set the length and interpacket interval of a - packet stream. Anvil counts packet arrivals. - - CAVEATS - Hammer should not be run on a live network. - - BUGS - None reported. - - LIMITATIONS - Early versions of hammer could not produce inter-packet - intervals shorter than 55 usec. - - HARDWARE REQUIRED - Hammer runs on a PC/AT or compatible, and anvil - requires a PC or clone. Both use a Micom Interlan - NI5210 for LAN interface. - - SOFTWARE REQUIRED - MS-DOS. - - AVAILABILITY - Hammer and anvil are copyrighted, though free. Copies - are available from pub/eutil on husc6.harvard.edu. - - - - - - - - - - -NOCTools2 Working Group [Page 54] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog HOPCHECK - - NAME - hopcheck - - KEYWORDS - routing; IP; ping; DOS; free. - - ABSTRACT - Hopcheck is a tool that lists the gateways traversed by - packets sent from the hopcheck-resident PC to a desti- - nation. Hopcheck uses the same mechanism as traceroute - but is for use on IBM PC compatibles that have ethernet - connections. Hopcheck is part of a larger TCP/IP pack- - age that is known as ka9q that is for use with packet - radio. Ka9q can coexist on a PC with other TCP/IP - packages such as FTP Inc's PC/TCP, but must be used - independently of other packages. Ka9q was written by - Phil Karn. Hopcheck was added by Katie Stevens, - dkstevens@ucdavis.edu. Unlike traceroute, which - requires a UNIX kernel mod, hopcheck will run on the - standard, unmodified ka9q release. - - MECHANISM - See the description in traceroute. - - CAVEATS - See the description in traceroute. - - BUGS - None known. - - HARDWARE REQUIRED - IBM PC compatible with ethernet network interface card; - ethernet card supported through FTP spec packet driver. - - SOFTWARE REQUIRED - DOS. - - AVAILABILITY - Free for radio amateurs and educational institutions; - others should contact Phil Karn, karn@ka9q.bellcore.com. - Available via anonymous FTP at ucdavis.edu, in the - directory "dist/nethop". - - - - - - - -NOCTools2 Working Group [Page 55] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog INTERNET_ROVER - - NAME - Internet Rover - - KEYWORDS - status; IP, SMTP; curses, ping, spoof; UNIX; free, - sourcelib. - - ABSTRACT - Internet Rover is a prototype network monitor that uses - multiple protocol "modules" to test network functional- - ity. This package consists of two primary pieces of - code: the data collector and the problem display. - - There is one data collector that performs a series of - network tests, and maintains a list of problems with - the network. There can be many display processes all - displaying the current list of problems which is useful - in a multi-operator NOC. - - The display task uses curses, allowing many terminal - types to display the problem file either locally or - from a remote site. Full source is provided. The data - collector is easily configured and extensible. Contri- - butions such as additional protocol modules, and shell - script extensions are welcome. - - MECHANISM - A configuration file contains a list of nodes, - addresses, NodeUp? protocol test (ping in most cases), - and a list of further tests to be performed if the node - is in fact up. Modules are included to test TELNET, - FTP, and SMTP. If the configuration contains a test - that isn't recognized, a generic test is assumed, and a - filename is checked for existence. This way users can - create scripts that create a file if there is a prob- - lem, and the data collector simply checks the existence - of that file to determine if there is problem. - - CAVEATS - None. - - BUGS - None known. - - - - - - -NOCTools2 Working Group [Page 56] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LIMITATIONS - This tool does not yet have the capability to perform - actions based on the result of the test. Rather, it is - intended for a multi-operator environment, and simply - displays a list of what is wrong with the net. - - HARDWARE REQUIRED - This software is known to run on Suns and IBM RTs. - - SOFTWARE REQUIRED - Curses, 4.xBSD UNIX socket programming libraries, BSD - ping. - - AVAILABILITY - Full source available via anonymous FTP from merit.edu - (35.1.1.42) in the ~ftp/pub/inetrover directory. - Source and executables are public domain and can be - freely distributed for non-commercial use. This pack- - age is unsupported, but bug reports and fixes may be - sent to: wbn@merit.edu. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 57] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog IOZONE - - NAME - iozone - - KEYWORDS - benchmark; nfs;; dos,hp,unix,vmx; free. - - ABSTRACT - Software to assess the sequential file I/O capability - of a system. May be useful as reference to compare - against results obtained when files are accessed via - NFS, Andrew, etc. - - MECHANISM - This test writes a X MEGABYTE sequential file in Y - byte chunks, then rewinds it and reads it back. - [The size of the file should be big enough to factor - out the effect of any disk cache.]. Finally, - IOZONE deletes the temporary file. Options allow one to - vary X and Y. In addition, 'auto test' runs IOZONE - repeatedly using record sizes from 512 to 8192 bytes - (adjustable), and file sizes from 1 to 16 megabytes - (adjustable). It creates a table of results. - - CAVEATS - The file is written (filling any cache buffers), and - then read. If the cache is >= X MB, then most if not - all the reads will be satisfied from the cache. - However, if it is less than or equal to - .5X MB, then NONE of the reads will be satisfied from - the cache. This is becase after the file is written, - a .5X MB cache will contain the upper .5 MB of the - test file, but we will start reading - from the beginning of the file (data which is no - longer in the cache). - - In order for this to be a fair test, the length of the - test file must be AT LEAST 2X the amount of disk cache - memory for your system. If not, you are really - testing the speed at which your CPU - can read blocks out of the cache (not a fair test). - - BUGS - none known at this time. - - - - - - -NOCTools2 Working Group [Page 58] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LIMITATIONS - IOZONE does not normally test the raw I/O speed of - your disk or system-em. It tests the speed of - sequential I/O to actual files. - Therefore, this measurement factors in the efficiency - of you machines file system, operating system, C - compiler, and C runtime library. It produces a - measurement which is the number of bytes - per second that your system can read or write to a file. - - HARDWARE REQUIRED - - This program has been ported and tested on the - following computer operating systems: - -Vendor Operating System Notes on compiling IOzone ------------------------------------------------------------------------ -Apollo Domain/OS no cc switches -- BSD domain -AT&T UNIX System V R4 -AT&T 6386WGS AT&T UNIX 5.3.2 define SYSTYPE_SYSV -Generic AT&T UNIX System V R3 may need cc -DSVR3 -Convergent Unisys/AT&T SVR3 cc -DCONVERGENT -o iozone iozone.c -Digital Equipment ULTRIX V4.1 -Digital Equipment VAX/VMS V5.4 see below ** -Digital Equipment VAX/VMS (POSIX) -Hewlett-Packard HP-UX 7.05 -IBM AIX Ver. 3 rel. 1 -Interactive UNIX System V R3 -Microsoft MS-DOS 3.3 tested Borland, Microsoft C -MIPS RISCos 4.52 -NeXt NeXt OS 2.x -OSF OSF/1 -Portable! POSIX 1003.1-1988 may need to define _POSIX_SOURCE -QNX QNX 4.0 -SCO UNIX System V/386 3.2.2 -SCO XENIX 2.3 -SCO XENIX 3.2 -Silicon Graphics UNIX cc -DSGI -o iozone iozone.c -Sony Microsystems UNIX same as MIPS -Sun Microsystems SUNOS 4.1.1 -Tandem Computers GUARDIAN 90 1. call the source file IOZONEC - 2. C/IN IOZONEC/IOZONE;RUNNABLE - 3. RUN IOZONE -Tandem Computers Non-Stop UX - -** for VMS, define iozone as a foreign command via this DCL command: - - $IOZONE :== $SYS$DISK:[]IOZONE.EXE - - - -NOCTools2 Working Group [Page 59] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - this lets you pass the command line arguments to IOZONE - - SOFTWARE REQUIRED - OS as shown in the hardware listing above. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - Author: Bill Norcott - 1060 Hyde Avenue - San Jose, CA 95129 - norcott_bill@tandem.com - - Availability: - This tool has been posted to comp.sources.misc. - It is available from the usual archive sites. - Program can be located using ARCHIE or other - servers. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - This entry is maintained by the noctools editors. - Send email to noctools-request@merit.edu. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 60] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog LADDIS - - NAME - LADDIS - - KEYWORDS - benchmark, generator; - NFS; - spoof; - unix; - free. - - ABSTRACT - - "LADDIS: A Multi-Vendor and Vendor-Neutral SPEC NFS - Benchmark", Bruce Nelson, LADDIS Group & Auspex Systems. - - Over the past 24 months, engineers from Legato, - Auspex, Data General, DEC, Interphase, and Sun - (LADDIS) met regularly to create the LADDIS NFS - benchmark: an unbiased, standard, vendor-independent, - scalable NFS performance test. - - The purpose of the LADDIS benchmark is to give users a - credible and undisputed test of NFS performance, and - to give vendors a publishable standard performance - measure that customers can use for load planning, - system configuration, and equipment buying decisions. - Toward this end, the LADDIS benchmark is being adopted - by SPEC (the System Performance Evaluation - Cooperative, creators of SPECmarks) as the first - member of SPEC's System-level File Server (SFS) - benchmark suite." - - "In particular, we have had unexpected interest from - some router vendors in using LADDIS to both rate and - stress-test IP routers. This is because LADDIS can - send back-to-back full-size packet trains, and because - it can generate a 90%-Ethernet util on simulated - "real" NFS workloads, just like routers encounter in - the real world. But LADDIS is for local Ethernet or - FDDI nets only, not WAN." - - MECHANISM - Generates NFS requests and measures responsiveness of - the server. - - - - - -NOCTools2 Working Group [Page 61] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - "LADDIS is not released yet by SPEC, although a free - beta version, quite stable, is available now as - PRE-LADDIS. So you might want to put PRE-LADDIS in - your listing, noting that full LADDIS - availability from SPEC is expected by the end of 1992." - - BUGS - The licensee is requested to direct beta test comments - via electronicmail to: - "spec-preladdis-comments@riscee.pko.dec.com". - - This alias will forward all comments to the SPECSFS - mailing list (which includes the LADDIS Group). - - LIMITATIONS - LADDIS is for local Ethernet or FDDI nets only, not - WAN. - - HARDWAE REQUIRED - A host with LAN connectivity. Presumably, a host with - enough horsepower to generate an adequate work load. - - SOFTWARE REQUIRED - LADDIS is a sophisticated Unix-based NFS traffic - generator program. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - Date: Mon, 10 Feb 92 13:12:20 PST - From: bnelson (Bruce Nelson) - - Dear Person: - - The SPEC PRE-LADDIS beta test process became - operational on Monday, February 3, 1992. This email - describes the process as announced during the LADDIS - Group's presentation at UniForum '92 and - also at Interop '91. The content of the beta test - license and the license request process are consistent - with the proposals approved by the SPEC Steering - Committee at the January 1992 meeting in Milpitas, - California. - - The SPEC PRE-LADDIS beta test will consist of one beta - test version of PRE-LADDIS distributed ONLY by - electronic mail. The SPEC PRE-LADDIS Beta test - software is licensed by SPEC, not by the LADDIS - Group. - - - -NOCTools2 Working Group [Page 62] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - To obtain the PRE-LADDIS Beta test software, an - individual must: - - 1. Request the SPEC PRE-LADDIS beta test License by - electronic mail to - "spec-preladdis-beta-test@riscee.pko.dec.com" with a - subject line of "Request SPEC PRE-LADDIS Beta Test - License". - 2. Print a hardcopy of the license and sign. - 3. Attach a cover letter written on the individual's - company letterhead requesting the PRE-LADDIS Beta - Test Kit. - 4. U.S. Mail the signed license and cover letter to: - SPEC PRE-LADDIS Beta Test - c/o NCGA, 2722 Merrilee Drive, Suite 200 - Fairfax, VA 22031 - - After completing these steps, the SPEC PRE-LADDIS beta - test kit will be emailed to the requestor from - riscee.pko.dec.com. The licensee is requested to - direct beta test comments via electronic mail - to "spec-preladdis-comments@riscee.pko.dec.com". This - alias will forward all comments to the SPECSFS mailing - list (which includes the - LADDIS Group). - - Note that PRE-LADDIS is ONLY available through - electronic mail and ONLY through the process listed - above in steps 1-4. If you do not have internet email - available to you (which is unlikely if you are - receiving THIS email), you must arrange delivery of - PRE-LADDIS through some email-capable part of your - organization, not through LADDIS members like Auspex, - DEC, Sun, etc. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - This entry is maintained by the NOCtools editors. - Send E-mail to noctools-request@merit.edu. - - - - - - - - - - - - - -NOCTools2 Working Group [Page 63] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog LAN_PATROL - - NAME - LAN Patrol - - KEYWORDS - security, traffic; ethernet, star; eavesdrop; DOS. - - ABSTRACT - LAN Patrol is a full-featured network analyzer that - provides essential information for effective fault and - performance management. It allows network managers to - easily monitor user activity, find traffic overloads, - plan for growth, test cable, uncover intruders, balance - network services, and so on. LAN Patrol uses state of - the art data collection techniques to monitor all - activity on a network, giving an accurate picture of - how it is performing. - - LAN Patrol's reports can be saved as ASCII files to - disk, and imported into spreadsheet or database pro- - grams for further analysis. - - MECHANISM - The LAN Patrol interface driver programs a standard - interface card to capture all traffic on a network seg- - ment. The driver operates from the background of a - standard PC, maintaining statistics for each station on - the network. The information can be viewed on the PC's - screen, or as a user-defined report output either to - file or printer. - - CAVEATS - None. Normal operation is completely passive, making - LAN Patrol transparent to the network. - - BUGS - None known. - - LIMITATIONS - LAN Patrol can monitor up to 10,000 packets/sec on an - AT class PC, and is limited to monitoring a maximum of - 1024 stations for intervals of up to 30 days. - - Because LAN Patrol operates at the physical level, it - will only see traffic for the segment on which it is - installed; it cannot see traffic across bridges. - - - - -NOCTools2 Working Group [Page 64] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HARDWARE REQUIRED - Computer: IBM PC/XT/AT, PS/2 Model 30, or compatible. - Requires 512K memory and a hard drive or double-sided - disk drive. - - Display: Color or monochrome text. Color display - allows color-coding of traffic information. - - Ethernet, StarLAN, LattisNet, or StarLAN 10 network - interface card. - - SOFTWARE REQUIRED - PC DOS, MS-DOS version 3.1 or greater. - - AVAILABILITY - LAN Patrol many be purchased through network dealers, - or directly from: - Legend Software, Inc. - Phone: (201) 227-8771 - FAX: (201) 906-1151 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 65] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog LANVista - - NAME - LANVista - - KEYWORDS - analyzer, benchmark, debugger, generator, manager, traffic; - DECnet, Ethernet, IP, OSI, Ring; Eavesdrop, Proprietary; - DOS, Standalone. - - ABSTRACT - CXR/Digilog's LANVista family of protocol and statistical - analyzers provide the tools to troubleshoot an Ethernet and - Token Ring 4/16Mbps network. LANVista lets you capture - frames to RAM and or disk, generate traffic for stress - testing, test your network cable for fault isolation, and - decode all 7 layers of many popular protocol stacks. - LANVista's 100 family offers exceptional price/performance - and a wide range of options. Combined with an - integrated upgrade path to the fully distributed LANVista - 200 system, the 100 line provides a reasonably priced - entry into LAN management and protocol analysis. - - All LANVista models are fully operable under Microsoft - Windows. Under Windows, LANVista can be operated in - the background, gathering data and alarms as other - tasks are completed. Displayed data may easily be - cut from LANVista and pasted into other Windows - applications such as Excel, Lotus 1-2-3, Harvard - Graphics, etc. - - The versatile LANVista family can also be remotely - controlled through the use of PC Anywhere, Commute, - Carbon Copy, or other PC remote control packages. - This feature allows the use of "co-pilot" mode which - enables an operator at the central site to guide and - train a remote operator through network management or - analysis tasks. - - All LANVista models provide features vital to effective - network management and troubleshooting. Basic - capabilities include: Network database, statistics - based on the entire network and on a node basis, Token - Ring functional address statistics, Bridged traffic - statistics, Protocol statistics, logging of statistics - to a printer or file of user definable alarms, Hardware - Pre-Capture filtering, Post capture filtering, Playback of - captured data, Traffic simulation and On-line context - - - -NOCTools2 Working Group [Page 66] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - sensitive Help. - - Protocol Interpreters used for decoding network traffic - supported by LANVista include: TCP/IP, DECnet, Banyan - Vines, XNS/MS-Net, AppleTalk, IBM Token Ring, Novell, - 3Com 3+ Open, SNMP and OSI. - - MECHANISM - LANVista is available in three forms. A kit version which - consists of a plug-in PC card and Master software, a self - contained unit that packages the kit version in a portable - PC, and a Distributed system. The LANVista distributed - system allows slave units placed anywhere in the world to - be controlled from a single central location for - centralized management of an enterprise network. - LANVista's PC cards provides a physical interface to - the LAN and frame preprocessing power. The Master - software controls the PC card, and the display and - processing of information gathered from the network. - - CAVEATS - Optimal performance of LANVista's master software is achieved - with DOS 5.0 by utilizing RAMDRIVE.SYS, SMARTDRV.SYS and High - memory. - - BUGS - None Known. - - LIMITATIONS - None Known. - - HARDWARE REQUIRED - IBM PC AT, 386, 486 or compatible. - - SOFTWARE REQUIRED - DOS - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - LANVista is available worldwide. For information on a - local sales representative contact: - - CXR/DIGILOG - 900 Business Center Drive - Horsham, PA 19044 - Phone 1-800-DIGILOG - FAX: 215-956-0108 - - GSA schedule pricing is honored. - - - -NOCTools2 Working Group [Page 67] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - CXR/DIGILOG Help Desk 1-800-DIGILOG - Send email to: lanvista@digilog.uucp - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 68] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog LANPROBE - - NAME - LanProbe -- the HP 4990S LanProbe Distributed Analysis - System. - - KEYWORDS - alarm, manager, map, status, traffic; ethernet; eaves- - drop, NMS; proprietary. - - ABSTRACT - The LanProbe distributed monitoring system performs - remote and local monitoring of ethernet LANs in a pro- - tocol and vendor independent manner. - - LanProbe discovers each active node on a segment and - displays it on a map with its adapter card vendor name, - ethernet address, and IP address. Additional informa- - tion about the nodes, such as equipment type and physi- - cal location can be entered in to the data base by the - user. - - When the NodeLocator option is used, data on the actual - location of nodes is automatically entered and the map - becomes an accurate representation of the physical lay- - out of the segment. Thereafter when a new node is - installed and becomes active, or when a node is moved - or becomes inactive, the change is detected and shown - on the map in real time. The system also provides the - network manager with precise cable fault information - displayed on the map. - - Traffic statistics are gathered and displayed and can - be exported in (comma delimited) CSV format for further - analysis. Alerts can be set on user defined thres- - holds. - - Trace provides a remote protocol analyzer capability - with decodes for common protocols. - - Significant events (like power failure, cable breaks, - new node on network, broadcast IP source address seen, - etc.) are tracked in a log that is uploaded to Pro- - beView periodically. - - ProbeView generates reports that can be manipulated by - MSDOS based word processors, spreadsheets, and DBMS. - - - - -NOCTools2 Working Group [Page 69] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - The system consists of one or more LanProbe segment - monitors and ProbeView software running under Microsoft - Windows. The LanProbe segment monitor attaches to the - end of an ethernet segment and monitors all traffic. - Attachment can be direct to a thin or thick coax cable, - or via an external transceiver to fiber optic or twist- - ed pair cabling. Network data relating to the segment - is transferred to a workstation running ProbeView via - RS-232, ethernet, or a modem connection. - - ProbeView software, which runs on a PC/AT class works- - tation, presents network information in graphical - displays. - - The HP4992A NodeLocator option attaches to the opposite - end of the cable from the HP4991A LanProbe segment mon- - itor. It automatically locates the position of nodes - on the ethernet networks using coaxial cabling schemes. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - HP 4991A LanProbe segment monitor - HP 4992A NodeLocator (for optional capabilities) - 80386 based PC capable of running MS-Windows - - SOFTWARE REQUIRED - HP 4990A ProbeView - MSDOS 3.0 or higher and Microsoft Windows/286 2.1. - - AVAILABILITY - A commercial product available from: - Hewlett-Packard Company - P.O. Box 10301, - Palo Alto, CA 94303-0890 - - - - - - - - -NOCTools2 Working Group [Page 70] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog LANWATCH - - NAME - LANWatch - - KEYWORDS - alarm, analyzer, traffic; CHAOS, DECnet, DNS, ethernet, - IP, OSI, ring, SMTP, star; eavesdrop; DOS; library, - sourcelib. - - ABSTRACT - LANWatch 2.0 is an inexpensive, powerful and flexible - network analyzer that runs under DOS on personal com- - puters and requires no hardware modifications to either - the host or the network. LANWatch is an invaluable - tool for installing, troubleshooting, and monitoring - local area networks, and for developing and debugging - new protocols. Network managers using LANWatch can - inspect network traffic patterns and packet errors to - isolate performance problems and bottlenecks. Protocol - developers can use LANWatch to inspect and verify - proper protocol handling. Since LANWatch is a - software-only package which installs easily in existing - PCs, network technicians and field service engineers - can carry LANWatch in their briefcase for convenient - network analysis at remote sites. - - LANWatch has two operating modes: Display and Examine. - In Display Mode, LANWatch traces network traffic by - displaying captured packets in real time. Examine Mode - allows you to scroll back through stored packets to - inspect them in detail. To select a subset of packets - for display, storage or retrieval, there is an exten- - sive set of built-in filters. Using filters, LANWatch - collects only packets of interest, saving the user from - having to sort through all network traffic to isolate - specific packets. The built-in filters include alarm, - trigger, capture, load, save and search. They can be - controlled separately to match on source or destination - address, protocol, or packet contents at the hardware - and transport layers. LANWatch also includes suffi- - cient source code so users can modify the existing - filters and parsers or add new ones. - - The LANWatch distribution includes executables and - source for several post-processors: a TCP protocol - analyzer, a node-by-node traffic analyzer and a dump - file listing tool. - - - -NOCTools2 Working Group [Page 71] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - Uses many common PC network interfaces by placing them - in promiscuous mode and capturing traffic. - - CAVEATS - Most PC network interfaces will not capture 100% of the - traffic on a fully-loaded network (primarily missing - back-to-back packets). - - BUGS - None known. - - LIMITATIONS - LANWatch can't analyze what it doesn't see (see - Caveats). - - HARDWARE REQUIRED - LANWatch requires a PC or PS/2 with a supported network - interface card. - - SOFTWARE REQUIRED - LANWatch runs in DOS. Modification of the supplied - source code or creation of additional filters and - parsers requires Microsoft C 5.1 - - AVAILABILITY - LANWatch is commercially available from FTP Software, - Incorporated, 26 Princess Street, Wakefield, MA, 01880 - (617 246-0900). - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 72] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog LLL_ENTM - - NAME - ENTM -- Ethernet Traffic Monitor - - KEYWORDS - traffic; ethernet, IP; eavesdrop; VMS; free. - - ABSTRACT - ENTM is a screen-oriented utility that runs under - VAX/VMS. It monitors local ethernet traffic and - displays either a real time or cumulative, histogram - showing a percent breakdown of traffic by ethernet pro- - tocol type. The information in the display can be - reported based on packet count or byte count. The per- - cent of broadcast, multicast and approximate lost pack- - ets is reported as well. The screen display is updated - every three seconds. Additionally, a real time, slid- - ing history window may be displayed showing ethernet - traffic patterns for the last five minutes. - - ENTM can also report IP traffic statistics by packet - count or byte count. The IP histograms reflect infor- - mation collected at the TCP and UDP port level, includ- - ing ICMP type/code combinations. Both the ethernet and - IP histograms may be sorted by ASCII protocol/port name - or by percent-value. All screen displays can be saved - in a file for printing later. - - MECHANISM - This utility simply places the ethernet controller in - promiscuous mode and monitors the local area network - traffic. It preallocates 10 receive buffers and - attempts to keep 22 reads pending on the ethernet dev- - ice. - - CAVEATS - Placing the ethernet controller in promiscuous mode may - severly slow down a VAX system. Depending on the speed - of the VAX system and the amount of traffic on the lo- - cal ethernet, a large amount of CPU time may be spent - on the Interrupt Stack. Running this code on any pro- - duction system during operational hours is discouraged. - - BUGS - Due to a bug in the VAX/VMS ethernet/802 device driver, - IEEE 802 format packets may not always be detected. A - simple test is performed to "guess" which packets are - - - -NOCTools2 Working Group [Page 73] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - in IEEE 802 format (DSAP equal to SSAP). Thus, some - DSAP/SSAP pairs may be reported as an ethernet type, - while valid ethernet types may be reported as IEEE 802 - packets. - - In some hardware configurations, placing an ethernet - controller in promiscuous mode with automatic-restart - enabled will hang the controller. Our VAX 8650 hangs - running this code, while our uVAX IIs and uVAX IIIs do - not. - - Please report any additional bugs to the author at: - Allen Sturtevant - National Magnetic Fusion Energy Computer Center - Lawrence Livermore National Laboratory - P.O. Box 808; L-561 - Livermore, CA 94550 - Phone : (415) 422-8266 - E-Mail: sturtevant@ccc.nmfecc.gov - - LIMITATIONS - The user is required to have PHY_IO, TMPMBX and NETMBX - privileges. When activated, the program first checks - that the user process as enough quotas remaining - (BYTLM, BIOLM, ASTLM and PAGFLQUO) to successfully run - the program without entering into an involuntary wait - state. Some quotas require a fairly generous setting. - - The contents of IEEE 802 packets are not examined. - Only the presence of IEEE 802 packets on the wire is - reported. - - The count of lost packets is approximated. If, after - each read completes on the ethernet device, the utility - detects that it has no reads pending on that device, - the lost packet counter is incremented by one. - - When the total number of bytes processed exceeds - 7fffffff hex, all counters are automatically reset to - zero. - - HARDWARE REQUIRED - A DEC ethernet controller. - - SOFTWARE REQUIRED - VAX/VMS version V5.1+. - - - - - -NOCTools2 Working Group [Page 74] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - For executables only, FTP to the ANONYMOUS account - (password GUEST) on CCC.NMFECC.GOV and GET the follow- - ing files: - - [ANONYMOUS.PROGRAMS.ENTM]ENTM.DOC (ASCII text) - [ANONYMOUS.PROGRAMS.ENTM]ENTM.EXE (binary) - [ANONYMOUS.PROGRAMS.ENTM]EN_TYPES.DAT (ASCII text) - [ANONYMOUS.PROGRAMS.ENTM]IP_TYPES.DAT (ASCII text) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 75] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog Interactive Network Map - - NAME - map -- Interactive Network Map - - KEYWORDS - manager, map; CHAOS, ethernet, IP, ring, star; NMS, - ping, SNMP, X; UNIX; free, sourcelib. - - ABSTRACT - Map draws a map of network connectivity and allows - interactive examination of information about various - components including whether hosts can be reached over - the network. - - The program is supplied with complete source and is - written in a modular fashion to make addition of dif- - ferent protocols stacks, displays, or hardcopy devices - relatively easy. This is one of the reasons why the - initial version supports at least two of each. Contri- - butions of additional drivers in any of these areas - will be welcome as well as porting to additional plat- - forms. - - MECHANISM - Net components are pinged by use of ICMP echo and, - optionally, CHAOS status requests and SNMP "gets." The - program initializes itself from static data stored in - the file system and therefore does not need to access - the network in order to get running (unless the static - files are network mounted). - - CAVEATS - As of publication, the tool is in beta release. - - BUGS - Several minor nits, documented in distribution files. - Bug discoveries should be reported by email to Bug- - Map@LCS.MIT.Edu. - - LIMITATIONS - See distribution file for an indepth discussion of sys- - tem capabilities and potential. - - HARDWARE REQUIRED - An X display is needed for interactive display of the - map, non-graphical interaction is available in non- - display mode. For hardcopy output a PostScript or Tek- - - - -NOCTools2 Working Group [Page 76] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - tronix 4692 printer is required. - - SOFTWARE REQUIRED - BSD UNIX or related OS. IP/ICMP is required; - CHAOS/STATUS and SNMP can be used but are optional. - X-Windows is required for interactive display of the - map. - - AVAILABILITY - The program is Copyright MIT. It is available via - anonymous FTP with a license making it free to use and - distribute for non-commercial purposes. FTP to host - FTP.LCS.MIT.Edu, directory nets. The complete - distribution is in map.tar.Z and some short - documentation files are there (as well as in the - distribution). Of most interest are ReadMe and Intro. - - To be added to the email forum that discusses the - software, or for other administrative details, send a - request to: MAP-Request@LCS.MIT.Edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 77] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog MCONNECT - - NAME - mconnect - - KEYWORDS - status; SMTP; spoof; UNIX. - - ABSTRACT - Mconnect allows an interactive session with a remote - mailer. Mail delivery problems can be diagnosed by - connecting to the remote mailer and issuing SMTP com- - mands directly. - - MECHANISM - Opens a TCP connection to remote SMTP on port 25. Pro- - vides local line buffering and editing, which is the - distinction between mconnect and a TELNET to port 25. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - Mconnect is not a large improvement over using a TELNET - connection to port 25. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX or related OS. - - AVAILABILITY - Available with 4.xBSD UNIX and related operating sys- - tems. - - - - - - - - - - - - - -NOCTools2 Working Group [Page 78] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog MIB-BROWSER - - NAME - MIB Browser - - KEYWORDS - manager; osi; cmis, x; unix; free, sourcelib. - - ABSTRACT - The MIB Browser is an X Windows HCI tool that allows - you to "browse" through the objects in a Management - Information Base (MIB). The browser is generic in that - it can connect to a CMIS agent without having any - prior knowledge of the structure of the MIB in the - agent. - - MECHANISM - CMIP is used to transfer the values of attributes - between the managed system and the browser. - - CAVEATS - None. - - BUGS - Unexpected termination of the agent can cause browser - to crash (ISODE bug!). - - HARDWARE REQUIRED - Unix workstation, has been tested on SUN 3 and SUN 4 - architectures. - - SOFTWARE REQUIRED - The ISODE protocol suite, BSD UNIX, X Windows, GNU C++ - (g++), Interviews (2.6). - - AVAILABILITY - The CMIP library and related management tools built - upon it, known as OSIMIS (OSI Management Information - Service), are publicly available from University - College London, England via FTP and FTAM. To obtain - information regarding a copy send email to - osimis-request@cs.ucl.ac.uk or call +44 71 380 7366. - - - - - - - - - -NOCTools2 Working Group [Page 79] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog MONET - - NAME - MONET -- the Hughes LAN Systems SNMP Network Management - Center (formerly the Hughes LAN Systems 9100) software - product runs on a Sun SPARCStation hardware platform. - - KEYWORDS - control, graphics, network topology,manager, routing, - status, traffic; bridge, configuration, performance, - alarm management, relational database, mib parser for - RDBMS, intelligent hub management, DECnet, ethernet, - IP; NMS, SNMP; UNIX. - - ABSTRACT - Monet provides the capability to manage and control - SNMP-based networking products from any vendor including - those from Hughes LAN Systems. - - A comprehensive relational database manages the data and - ensures easy access and control of resources throughout - the network. - - Monet provides multivendor management through its - advanced Mib master MIB parser that allows the parsing - of enterprise MIBs (ASN.1 format per RFC1212) directly - into the RDBMS for use by Monet's applications. - - Major features include: - - Remote access with X: - Use of the X/Motif user-interface, enabling remote - access to the all applications. - - Database Management - Stores and retrieves the information required to - administer and configure the network. It can be - used to: - - Store and recall configuration data for all - devices. - - Provide availability history for devices. - - Assign new internet addresses. - - Provide administrative information such as - physical location of devices, responsible - person, maintenance history, asset data, - hardware/software versions, etc. - - Full-function SQL interface. - - User-customizable RDBMS report generation. - - - -NOCTools2 Working Group [Page 80] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Graphics and Network Mapping - The Graphics module enables the user to view the - nodes in the network as "dynamic" icons in - heirarchical maps. The network is represented by - these heirarchical maps. Though there is a - library of device icons, cities and geographical - maps included, the user has access to a - graphics editor that allows customizing and the - creation of new icons and maps. - A Device's icon may be selected to: - - Register/deregister the device, - - Access the open alarms and acknowledge - faults for the selected device, - - Ping the device to determine accessibility, - - Draw graphs of any of the device's numeric - MIB objects, either the values as retrieved - in real-time or the history values - previously stored in the RDBMS by the - Performance Manager, - - Telnet to the device, - - Customize the graphical dynamics (color, - fill, rotation, etc.) of the device's icon - by associating them to the values of the - device's MIB objects. - - Configuration Management - - Retrieves configuration information from SNMP - devices. - - Stores device parameters in the RDBMS, with - common sets of parameters used for multiple - devices, or for multiple ports on a device, - stored only once in the RDBMS. - - Configures devices from the parameters stored in - the RDBMS, including those relating to TCP/IP, - DECnet and any other protocol/feature - configurable via SNMP. - - Polls devices to compare their current parameter - values with those in the database and produce - reports of the discrepancies. - - Collect data about the state of the network. - - Learn the parameters of the devices in the - network and populate the database. - - Performance Management - - Displays local network traffic graphically, by - packet size, protocol, network utilization, - sources and destinations of packets, etc. - - Provides for the scheduling of jobs to retrieve - - - -NOCTools2 Working Group [Page 81] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MIB values of a device and store them in the RDBMS - for review or summary reporting at a later time. - - Allows high/low thresholds to be set on retrieved - values with alarms generated when thresholds are - exceeded. - - Fault Management - - Provides availability monitoring and indicates - potential problems. - - Creates alarms from received SNMP traps, and from - other internally-generated conditions, - - Records alarms in the alarm log in the RDBMS. - - Lists alarms for selected set of devices, - according to various filter conditions, - - Possible causes and suggested actions for the - alarms are listed. - - New alarms are indicated by a flashing icon and - optional audio alert. - - Visual indication of alarms bubbles up the network - map heirarchy. - - Cumulative reports can be produced. - - Utilities Function - - View and/or terminate current NMC processes, - - Access to database maintenance utilities. - - MECHANISM - SNMP. - - CAVEATS - None reported. - - BUGS - None known. - - LIMITATIONS - Maximum number of nodes that can be monitored is - 18,000. This can include Hosts, Terminal Servers, PCs, - Routers, and Bridges. - - HARDWARE REQUIRED - The host for the NMC software is a Sun 4 desktop works- - tation. Recommended minimum hardware is the Sun IPX - Color workstation, with a 1/4" SCSI tape drive. - - SOFTWARE REQUIRED - MONET V5.0, which is provided on 1/4" tape format, runs on - the Sun 4.1.1 Operating System. - - - -NOCTools2 Working Group [Page 82] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - A commercial product of: - Hughes LAN Systems Inc. - 1225 Charleston Road - Mountain View, CA 94043 - Phone: (415) 966-7300 - Fax: (415) 960-3738 - RCA Telex: 276572 - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - kishoret@msgate.hls.com - kzm@hls.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 83] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NET_MONITOR - - NAME - net_monitor - - KEYWORDS - routing, status; DECnet, IP; curses, ping; UNIX, VMS; - free, sourcelib. - - ABSTRACT - Net_monitor uses ICMP echo (and DECnet reachability - information on VAX/VMS) to monitor a network. The mon- - itoring is very simplistic, but has proved useful. It - periodically tests whether hosts are reachable and - reports the results in a full-screen display. It - groups hosts together in common sets. If all hosts in - a set become unreachable, it makes a lot of racket with - bells, since it assumes that this means that some com- - mon piece of hardware that supports that set has - failed. The periodicity of the tests, hosts to test, - and groupings of hosts are controlled with a single - configuration file. - - The idea for this program came from the PC/IP monitor - facility, but is an entirely different program with - different functionality. - - MECHANISM - Reachability is tested using ICMP echo facilities for - TCP/IP hosts (and DECnet reachability information on - VAX/VMS). A DECnet node is considered reachable if it - appears in the list of hosts in a "show network" com- - mand issued on a routing node. - - CAVEATS - This facility has been found to be most useful when run - in a window on a workstation rather than on a terminal - connected to a host. It could be useful if ported to a - PC (looks easy using FTP Software's programming - libraries), but this has not been done. Curses is very - slow and cpu intensive on VMS, but the tool has been - run in a window on a VAXstation 2000. Just don't try - to run it on a terminal connected to a 11/750. - - BUGS - None known. - - - - - -NOCTools2 Working Group [Page 84] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LIMITATIONS - This tool is not meant to be a replacement for a more - comprehensive network management facility such as is - provided with SNMP. - - HARDWARE REQUIRED - A host with a network connection. - - SOFTWARE REQUIRED - Curses, 4.xBSD UNIX socket programming libraries (lim- - ited set) and some flavor of TCP/IP that supports ICMP - echo request (ping). It has been run on VAX/VMS run- - ning WIN/TCP and several flavors of 4BSD UNIX (includ- - ing SunOS 3.2, 4.0, and 4.3BSD). It could be ported to - any platform that provides a BSD-style programming li- - brary with an ICMP echo request facility and curses. - - AVAILABILITY - Requests should be sent to the author: - - Dale Smith - Asst Dir of Network Services - University of Oregon - Computing Center - Eugene, OR 97403-1211 - - Internet: dsmith@oregon.uoregon.edu. - BITNET: dsmith@oregon.bitnet - UUCP: ...hp-pcd!uoregon!dsmith - Voice: (503)686-4394 - - With the source code, a makefile is provided for most - any UNIX box and a VMS makefile compatible with the - make distributed with PMDF. A VMS DCL command file is - also provided, for use by those VMS sites without - "make." - - The author will attempt to fix bugs, but no support is - promised. The tool is copyrighted, but free (for now). - - - - - - - - - - - - -NOCTools2 Working Group [Page 85] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETLABS_CMOT_AGENT - - NAME - Netlabs CMOT Agent - - KEYWORDS - manager, status; IP, OSI; NMS. - - ABSTRACT - Netlabs' CMOT code debuted in Interop 89. The CMOT - code comes with an Extensible MIB, which allows users - to add new MIB variables. The code currently supports - all the MIB variables in RFC 1095 via the data types in - RFC 1065, as well as the emerging MIB-II, which is - currently in experimental stage. The CMOT has been - benchmarked at 100 Management Operations per Second - (MOPS) for a 1-MIPS machine. - - MECHANISM - The Netlabs CMOT agent supports the control and moni- - toring of network resources by use of CMOT message - exchanges. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Portable to most hardware. - - SOFTWARE REQUIRED - Portable to most operating systems. - - AVAILABILITY - Commercially available from: - Netlabs Inc - 11693 Chenault Street Ste 348 - Los Angeles CA 90049 - (213) 476-4070 - lam@netlabs.com (Anne Lam) - - - - - - -NOCTools2 Working Group [Page 86] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETLABS_DUAL_MANAGER - - NAME - Dual Manager - - KEYWORDS - alarm, control, manager, map, security, status; IP, - OSI; NMS, SNMP, X; UNIX; library. - - ABSTRACT - Netlabs' Dual Manager provides management of TCP/IP - networks using both SNMP and CMOT protoocls. Such - management can be initiated either through the X- - Windows user interface (both Motif and Openlook), or - through OSI Network Management (CMIP) commands. The - Dual Manager provides for configuration, fault, secu- - rity and performance management. It provides extensive - map management features, including scanned maps in the - background. It provides simple mechanisms to extend - the MIB and assign specific lists of objects to - specific network elements, thereby providing for the - management of all vendors' specific MIB extensions. It - provides an optional relational DBMS for storing and - retrieving MIB and alarm information. Finally, the - Dual Manager is an open platform, in that it provides - several Application Programming Interfaces (APIs) for - users to extend the functionality of the Dual Manager. - - The Dual Manager is expected to work as a TCP/IP - "branch manager" under DEC's EMA, AT&T's UNMA and other - OSI-conformant enterprise management architectures. - - MECHANISM - The Netlabs Dual Manager supports the control and moni- - toring of network resources by use of both CMOT and - SNMP message exchanges. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Runs on Sun/3 and Sun/4s. - - - -NOCTools2 Working Group [Page 87] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SOFTWARE REQUIRED - Available on System V or SCO Open Desktop environments. - Uses X-Windows for the user interface. - - AVAILABILITY - Commercially available from: - Netlabs Inc - 11693 Chenault Street Ste 348 - Los Angeles CA 90049 - (213) 476-4070 - lam@netlabs.com (Anne Lam) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 88] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETLABS_SNMP_AGENT - - NAME - Netlabs SNMP Agent. - - KEYWORDS - manager, status; IP; NMS, SNMP. - - ABSTRACT - Netlabs' SNMP code debuted in Interop 89, where it - showed interoperation of the code with several imple- - mentations on the show floor. The SNMP code comes with - an Extensible MIB, which allows users to add new MIB - variables. The code currently supports all the MIB - variables in RFC 1066 via the data types in RFC 1065, - as well as the emerging MIB-II, which is currently in - experimental stage. The SNMP has been benchmarked at - 200 Management Operations per Second (MOPS) for a 1- - MIPS machine. - - MECHANISM - The Netlabs SNMP agent supports the control and moni- - toring of network resources by use of SNMP message - exchanges. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Portable to most hardware. - - SOFTWARE REQUIRED - Portable to most operating systems. - - AVAILABILITY - Commercially available from: - Netlabs Inc - 11693 Chenault Street Ste 348 - Los Angeles CA 90049 - (213) 476-4070 - lam@netlabs.com (Anne Lam) - - - - -NOCTools2 Working Group [Page 89] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NetMetrix-Load-Monitor - - NAME - NetMetrix Load Monitor - - KEYWORDS - alarm,traffic; Ethernet, FDDI, IP, Ring; Eavesdrop, - SNMP, X; UNIX; - - ABSTRACT - The NetMetrix Load Monitor is a distributed - client-server monitoring tool for ethernet, token - ring, and FDDI networks. A unique "dual" architecture - provides compatibility with both RMON and X windows. - RMON allows interoperability and an enterprise-wide - view, while X windows enables much more powerful, - intelligent applications at remote segments and saves - network bandwidth. - - The Load Monitor provides extensive traffic - statistics. It looks at load by time interval, source - node, destination node, application, protocol or - packet size. A powerful ZOOM feature allows extensive - correlational analysis which is displayed in a wide - variety of graphs and tables. - - You can answer questions such as: Which sources are - generating most of the load on the network when it is - most heavily loaded and where is this load going? - Which source/destination pairs generate the most - traffic over the day? Where should bridges and - routers be located to optimally partition the network? - How much load do applications, like the X Windows - protocol, put on the network and who is generating that - load when it is the greatest. - - A floating license allows easy access to the software - tool anywhere you need it. - - MECHANISM - NetMetrix turns the network interface into promiscuous - mode to capture packets. - - CAVEATS - none. - - BUGS - none known. - - - -NOCTools2 Working Group [Page 90] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LIMITATIONS - none. - - HARDWARE REQUIRED - SPARC system - - SOFTWARE REQUIRED - SunOS 4.0 or higher - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - NetMetrix is available from: - Sales Department - Metrix Network Systems, Inc. - One Tara Boulevard - Nashua, New Hampshire 03062 - telephone: 603-888-7000 - fax: 603-891-2796 - email: info@metrix.com - - Government agencies please note that NetMetrix is on the GSA - schedule. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Norma Shepperd - Marketing Administrator - 603-888-7000 - norma@metrix.com - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 91] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NetMetrix-NFS-Monitor - - NAME - NetMetrix NFS Monitor - - KEYWORDS - traffic; Ethernet, FDDI, NFS, Ring; Eavesdrop, SNMP, X; - UNIX - - ABSTRACT - The NetMetrix NFS Monitor is a distributed network - monitoring tool which monitors and graphs NFS load, - response time, retransmits, rejects and errors by - server, client, NFS procedure, or time - interval. Breakdown server activity by file system - and client activity by user. - - A powerful ZOOM feature lets you correlate monitoring - variables. You can see client/server relationships, - compare server performance, evaluate NFS performance - enhancement strategies. - - A floating license and the X Window protocol allows - monitoring of remote ethernet, token ring and FDDI - segments from a central enterprise-wide display. - - MECHANISM - NetMetrix turns the network interface into promiscuous - mode to capture packets. - - CAVEATS - none. - - BUGS - none known. - - LIMITATIONS - none. - - HARDWARE REQUIRED - SPARC system - - SOFTWARE REQUIRED - SunOS 4.0 or higher - - - - - - - -NOCTools2 Working Group [Page 92] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - NetMetrix is available from: - Sales Department - Metrix Network Systems, Inc. - One Tara Boulevard - Nashua, New Hampshire 03062 - telephone: 603-888-7000 - fax: 603-891-2796 - email: info@metrix.com - - Government agencies please note that NetMetrix is on - the GSA schedule. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Norma Shepperd - Marketing Administrator - 603-888-7000 - norma@metrix.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 93] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NetMetrix-Protocol-Analyzer - - NAME - NetMetrix Protocol Analyzer - - KEYWORDS - alarm, analyzer, traffic; DECnet, DNS, Ethernet, FDDI, - IP, OSI, NFS, Ring, SMTP; Eavesdrop, SNMP, X; UNIX; - Library - - ABSTRACT - The NetMetrix Protocol Analyzer is a distributed - client-server monitoring tool for ethernet, token - ring, and FDDI networks. A unique "dual" architecture - provides compatibility with both RMON and - X windows. RMON allows interoperability, while X - windows enables much more powerful, intelligent - applications at remote segments and saves network - bandwidth. - - With the Protocol Analyzer, you can decode and display - packets as they are being captured. Extensive filters - let you sift through packets either before or after - trace capture. The capture filter may be specified by - source, destination between hosts, protocol, packet - size, pattern match, or by a complete expression using - an extensive filter expression language. - - Full 7-layer packet decodes are available for all - major protocols including DECnet, Appletalk, Novell, - XNS, SNA, BANYAN, OSI and TCP/IP. The decodes for the - TCP/IP stack have all major protocols including NFS, - YP, DNS, SNMP, OSPF, etc. - - Request and reply packets are matched. Packets can be - displayed in summary, detail or hex, with multiple - views to see packet dialogues side by side. - - A complete developers' kit is available for custom - decodes. - - A floating license allows easy acess to the software - tool anywhere you need it. - - MECHANISM - NetMetrix turns the network interface into promiscuous - mode to capture packets. - - - - -NOCTools2 Working Group [Page 94] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - none. - - BUGS - none known. - - LIMITATIONS - none. - - HARDWARE REQUIRED - SPARC system - - SOFTWARE REQUIRED - SunOS 4.0 or higher - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - NetMetrix is available from: - Sales Department - Metrix Network Systems, Inc. - One Tara Boulevard - Nashua, New Hampshire 03062 - telephone: 603-888-7000 - fax: 603-891-2796 - email: info@metrix.com - - Government agencies please note that NetMetrix is on the - GSA schedule. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Norma Shepperd - Marketing Administrator - 603-888-7000 - norma@metrix.com - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 95] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NetMetrix-Traffic-Generator - - NAME - NetMetrix Traffic Generator - - KEYWORDS - Debugger, Generator, Traffic; Ethernet, FDDI, IP, - Ring; Eavesdrop, SNMP, X; UNIX; Library - - ABSTRACT - The NetMetrix Traffic Generator is a distributed - software tool which allows you to simulate network - load or test packet dialogues between nodes on your - ethernet, token ring, or FDDI segments. The Traffic - Generator can also be used to test and validate - management station alarms, routers, bridges, hubs, etc. - - An easy-to-use programming interface provides complete - flexibility over variables such as bandwidth, packet - sequence, and conditional responses. - - A floating license and the X Window System protocol - allows testing of remote ethernet, token ring and FDDI - segments from a central console. - - MECHANISM - NetMetrix turns the network interface into promiscuous - mode to capture packets. - - CAVEATS - none. - - BUGS - none known. - - LIMITATIONS - none. - - HARDWARE REQUIRED - SPARC system - - SOFTWARE REQUIRED - SunOS 4.0 or higher - - - - - - - - -NOCTools2 Working Group [Page 96] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - NetMetrix is available from: - Sales Department - Metrix Network Systems, Inc. - One Tara Boulevard - Nashua, New Hampshire 03062 - telephone: 603-888-7000 - fax: 603-891-2796 - email: info@metrix.com - - Government agencies please note that NetMetrix is on - the GSA schedule. - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Norma Shepperd - Marketing Administrator - 603-888-7000 - norma@metrix.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 97] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETMON_MITRE - - NAME - NETMON and iptrace - - KEYWORDS - traffic; IP; eavesdrop; UNIX; free. - - ABSTRACT - NETMON is a facility to enable communication of net- - working events from the BSD UNIX operating system to a - user-level network monitoring or management program. - Iptrace is a program interfacing to NETMON which logs - TCP-IP traffic for performance measurement and gateway - monitoring. It is easy to build other NETMON-based - tools using iptrace as a model. - - NETMON resides in the 4.3BSD UNIX kernel. It is - independent of hardware-specific code in UNIX. It is - transparent to protocol and network type, having no - internal assumptions about the network protocols being - recorded. It is installed in BSD-like kernels by - adding a standard function call (probe) to a few points - in the input and output routines of the protocols to be - logged. - - NETMON is analogous to Sun Microsystems' NIT, but the - interface tap function is extended by recording more - context information. Aside from the timestamp, the - choice of information recorded is up to the installer - of the probes. The NETMON probes added to the BSD IP - code supplied with the distribution include as context: - input and output queue lengths, identification of the - network interface, and event codes labeling packet dis- - cards. (The NETMON distribution is geared towards - measuring the performance of BSD networking protocols - in an IP gateway). - - NETMON is designed so that it can reside within the - monitored system with minimal interference to the net- - work processing. The estimated and measured overhead - is around five percent of packet processing. - - The user-level tool "iptrace" is provided with NETMON. - This program logs IP traffic, either at IP-level only, - or as it passes through the network interface drivers - as well. As a separate function, iptrace produces a - host traffic matrix output. Its third type of output - - - -NOCTools2 Working Group [Page 98] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - is abbreviated sampling, in which only a pre-set number - of packets from each new host pair is logged. The - three output types are configured dynamically, in any - combination. - - OSITRACE, another logging tool with a NETMON interface, - is available separately (and documented in a separate - entry in this catalog). - - MECHANISM - Access to the information logged by NETMON is through a - UNIX special file, /dev/netmon. User reads are blocked - until the buffer reaches a configurable level of full- - ness. - - Several other parameters of NETMON can be tuned at com- - pile time. A diagnostic program, netmonstat, is - included in the distribution. - - CAVEATS - None. - - BUGS - Bug reports and questions should be addressed to: - ie-tools@gateway.mitre.org - Requests to join this mailing list: - ie-tools-request@gateway.mitre.org - Questions and suggestions can also be directed to: - Allison Mankin (703)883-7907 - mankin@gateway.mitre.org - - LIMITATIONS - A NETMON interface for tcpdump and other UNIX protocol - analyzers is not included, but it is simple to write. - NETMON probes for a promiscuous ethernet interface are - similarly not included. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX-like network protocols or the ability to - install the BSD publicly available network protocols in - the system to be monitored. - - - - - - - -NOCTools2 Working Group [Page 99] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - The NETMON distribution is available by anonymous FTP - in pub/netmon.tar or pub/netmon.tar.Z from aelred- - 3.ie.org. A short user's and installation guide, - NETMON.doc, is available in the same location. The - NETMON distribution is provided "as is" and requires - retention of a copyright text in code derived from it. - It is copyrighted by the MITRE-Washington Networking - Center. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 100] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETMON_WINDOWS_SNMP_RESEARCH - - NAME - NETMON for Windows -- an SNMP-based network management - tool that runs under Microsoft Windows 3.0 from SNMP - Research. - - KEYWORDS - alarm, control, manager, map, routing; - DECnet, Ethernet, IP, OSI, ring, star; - NMS, SNMP; - DOS; - sourcelib. - - ABSTRACT - The NETMON application implements a powerful network - management station based on a low-cost DOS platform. - NETMON's network management tools for configuration, - performance, security, and fault management have been - used successfully with a wide assortment of wide- and - local-area-network topologies and medias. Multiprotocol - devices are supported including those using TCP/IP, - DECnet, and OSI protocols. - - Some features of NETMON's network management tools include: - - o Fault management tool displays a map of the network - configuration with node and link state indicated - in one of several colors to indicate current status; - o Configuration management tool may be used to edit the - network management information base stored in the - NMS to reflect changes occurring in the network; - o Graphs and tabular tools for use in fault and performance - management; - o Mechanisms by which additional variables, such as vendor- - specific variables, may be added; - o Alarms may be enabled to alert the operator of events - occurring in the network; - o Events are logged to disk; - o Output data may be transferred via flat files for - additional report generation by a variety of - statistical packages. - - The NETMON application comes complete with source code - including a powerful set of portable libraries for generating - and parsing SNMP messages. - - - - - -NOCTools2 Working Group [Page 101] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - The NETMON for Windows application is based on the - Simple Network Management Protocol (SNMP). Polling is - performed via the powerful SNMP get-next operator and - the SNMP get operator. Trap directed polling is used - to regulate the focus and intensity of the polling. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - The minimum system is a IBM 386 computer, or - compatible, with hard disk drive. - - SOFTWARE REQUIRED - DOS 5.0 or later, Windows 3.0 in 386 mode, and TCP/IP - kernel software from FTP Software. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 102] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETscout - - NAME - NETscout(tm) - - KEYWORDS - Alarm, Analyzer, Manager, Status, Traffic; - DECnet, Ethernet, IP, OSI, NFS, Ring, Star, Eavesdrop; - NMS, SNMP; - UNIX; - - ABSTRACT - The NETscout family of distributed LAN Analyzer - devices are intended to provide network users with a - comprehensive capability to identify and isolate fault - conditions in data communications networks. - NETscout has the capability to collect wide ranging - statistical data, to display selectively captured and - fully decoded network traffic, to set user-defined - alarm conditions, and to obtain real-time updates - from all segments of a widely dispersed internetwork - from a centralized SNMP-compatible network management - console. - - The NETscout family is based on standards so that - operation may be realized in heterogeneous networks - which constitute a multi-protocol, multi-topology, - multi-vendor environment. The fundamental standards - upon which NETscout is based are the Simple Network - Management Protocol (SNMP), which defines the protocol - for all inter-communications between NETscout devices, - and the Remote Monitoring Management Information Base - (RMON-MIB), which defines the type of information - which is to be gathered and made available to the - user for each network segment. - - NETscout clients provide a full array of monitoring - and analysis features including intelligent seven - level decoding of all majorprotocol stacks: - - DOD including TCP/IP XNS Novell - DECNET including LAT ISO APPLETALK - IBM Token Ring Vines NETBIOS/SMB - SNMP including RMON-MIB SUN-NFS SMT - - NETscout agents support all nine groups of the - RMON-MIB standard. NETscout agents can work with any - SNMP-based network management system and currently - - - -NOCTools2 Working Group [Page 103] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - support Ethernet and Token Ring. - - MECHANISM - The operation of the NETscout family is divided into - two distinct subcategories. The first is the "Client" - which is the user console from which operational - commands are issued and where all results and - diagnostic information are displayed. In a NETscout - topology it is feasible to have multiple clients - active simultaneously within a single network. The - second category is the "Agent", a hardware/software - device which is attached to a specific network - segment and which gathers statistical information for - that segment as well as providing a window into that - segment where network traffic may be observed and - gathered for more detailed user analysis. A - typical network will have multiple segments and - multiple agents up to the point of having one agent - for each logical network segment. - - NETscout Model 9210 is a software package which, when - combined in a Sun SPARCstation in conjunction with - SunNet Manager running under Open Windows, implements - the NETscout client function. SunNet Manager provides - the background operational tools for client operation - while the NETscout software provides - application-specific functions related to RMON-MIB - support as well as all software necessary to - perform the protocol decode function. - SunNet Manager also implements a network map file - which includes a topographical display of the entire - network and is the mechanism for selecting - network elements to perform operations. - - NETscout Model 9215 is a software package that - operates in conjunction with SunNet Manager and - implements the statistics monitoring function only. - That is, it does not include the protocol - decode function or the mechanism to retrieve actual - data from a remote agent. It does, however, include - complete statistics gathering and event and alarm - generation. - - Frontier NETscout Models 9510 and 9515, and Model 9610 - and 9615 are agent software packages that implement - selected network diagnostic functions when loaded into - a Sun SPARCstation (9510, 9515) or a SynOptics - LattisNet Hub (9610, 9615) respectively which is - - - -NOCTools2 Working Group [Page 104] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - connected to an Ethernet network segment - using conventional network interface hardware. Models - 9510 and 9610 support all nine RMON-MIB groups - including "filters" and "packet capture" and thus - provide for complete protocol monitoring and decode - when used with a client - equipped with protocol decode software. Models 9515 - an 9615 include support for seven RMON-MIB groups - which excludes "filters" and "data capture" and - therefore perform network monitoring only through - collection and presentation of network statistics, - events, and alarms. All models also support the MIB2 - system and interface groups. - - Frontier NETscout Models 9520 and 9525, and Model 9620 - and 9625 are agent software packages that are - identical in function to their respective models - described above except that they are for use on - Token Ring segments. - - CAVEATS - The RMON-MIB standard for Token Ring applications has - not yet beenformally released and is not approved. - NETscout products correspond to the latest draft for - Token Ring functions and will be updated as - required to conform to the standard as it is approved. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Sun SPARCstation or LattisNet Hub depending upon Model - number. - - SOFTWARE REQUIRED - Sun OS 4.1.1 for client and agent, SunNet Manager for - client. - - - - - - - - - - - -NOCTools2 Working Group [Page 105] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - NETscout products are available commercially. For - information regarding your local representative, contact: - Frontier Software Development, Inc. - 1501 Main Street - Tewksbury, MA 01876 - Phone: 508-851-8872 - Fax: 508-851-6956 - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Marketing - Frontier Software - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 106] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETSTAT - - NAME - netstat - - KEYWORDS - routing; IP; UNIX, VMS; free. - - ABSTRACT - Netstat is a program that accesses network related data - structures within the kernel, then provides an ASCII - format at the terminal. Netstat can provide reports on - the routing table, TCP connections, TCP and UDP - "listens", and protocol memory management. - - MECHANISM - Netstat accesses operating system memory to read the - kernel routing tables. - - CAVEATS - Kernel data structures can change while netstat is run- - ning. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX or related OS, or VMS. - - AVAILABILITY - Available via anonymous FTP from uunet.uu.net, in - directory bsd-sources/src/ucb. Available with 4.xBSD - UNIX and related operating systems. For VMS, available - as part of TGV MultiNet IP software package, as well as - Wollongong's WIN/TCP. - - - - - - - - - - -NOCTools2 Working Group [Page 107] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NETWORK_INTEGRATOR - - NAME - Network Integrator I - - KEYWORDS - map, traffic; ethernet; UNIX. - - ABSTRACT - This tool monitors traffic on network segments. All - information is dumped to either a log file or, for - real-time viewing, to a command tool window. Data is - time-stamped according to date and time. Logging can - continue for up to 24 hours. - - The tool is flexible in data collection and presenta- - tion. Traffic filters can be specified according to - header values of numerous protocols, including those - used by Apple, DEC, Sun, HP, and Apollo. Bandwidth - utilization can be monitored, as well as actual load - and peak throughput. Additionally, the Network - Integrator can analyze a network's topology, and record - the location of all operational nodes on a network. - - Data can be displayed in six separate formats of bar - graphs. In addition, there are several routines for - producing statistical summaries of the data collected. - - MECHANISM - The tools work through RPC and XDR calls. - - CAVEATS - Although the tool adds only little traffic to a net- - work, generation of statistics from captured files - requires a significant portion of a workstation's CPU. - - BUGS - None known. - - LIMITATIONS - Must be root to run monitor. There does not seem to be - a limit to the number of nodes, since it monitors by - segments. The only major limitation is the amount of - disk space that a user can commit to the log files. - The size of the log files, however, can be controlled - through the tool's parameters. - - - - - -NOCTools2 Working Group [Page 108] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HARDWARE REQUIRED - Sun3 or Sun4. - - SOFTWARE REQUIRED - 4.0BSD UNIX or greater, or related OS. - - AVAILABILITY - Copyrighted, commercially available from - Network Integrators, - (408) 927-0412. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 109] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NFSwatch - - NAME - nfswatch - - KEYWORDS - Traffic; Ethernet, IP, NFS; Curses, Eavesdrop; UNIX; - Free - - ABSTRACT - Nfswatch monitors all incoming ethernet traffic to an - NFS file server and divides it into several - categories. The number and percentage of packets - received in each category is displayed on - the screen in a continuously updated display. - - By default, nfswatch monitors all packets destined for - the local host over a single network interface. - Options are provided to specify the specific interface - to be monitored, or all interfaces at once. NFS - traffic to the local host, to a remote host, from a - specific host, between two hosts, or all NFS traffic - on the network may be monitored. - - Categories of packets monitored and counted include: - ND Read, ND Write, NFS Read, NFS Write, NFS Mount, - Yellow Pages (NIS), RPC Authorization, Other RPC, TCP, - UDP, ICMP, RIP, ARP, RARP, Ethernet Broadcast, and - Other. - - Packets are also tallied either by file system or file - (specific files may be watched as an option), NFS - procedure name (RPC call), or NFS client hostname. - - Facilities for taking "snapshots" of the screen, as - well as saving data to a log file for later analysis - (the analysis tool is included) are also available. - - MECHANISM - Nfswatch uses the Network Interface Tap, nit(4) under - SunOS 4.x, and the Packet Filter, packetfilter(4), - under Ultrix 4.x, to place the ethernet interface into - promiscuous mode. It filters out NFS packets, and - decodes the file handles in order to determine how to - count the packet. - - - - - - -NOCTools2 Working Group [Page 110] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - Because the NFS file handle is a non-standard (server - private) piece of data, nfswatch must be modified to - understand file handles used by various - implementations. It currently knows - about the SunOS 4.x and Ultrix file handle formats. - - BUGS - Does not monitor FDDI interfaces. (It should be a - simple change, but neither author has access to a - system with FDDI interfaces for testing.) - - LIMITATIONS - Up to 256 exported file systems and 256 individual - files can be monitored at any time. - - Only NFS requests are counted; the NFS traffic - generated by a server in response to those packets - is not counted. - - HARDWARE REQUIRED - Any Ultrix system (VAX or DEC RISC hardware) - - SOFTWARE REQUIRED - Ultrix release 4.0 or later. For Ultrix 4.1, may - require the patched "if_ln.o" kernel module, available - from Digital's Customer Support Center. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - Copyrighted, but freely distributable. Available via - anonymous FTP from harbor.ecn.purdue.edu, - ftp.erg.sri.com, and gatekeeper.dec.com, as well as - numerous other sites around the Internet. The current - version is Version 3.0 from January 1991. - - Contact points: - - Dave Curry Jeff Mogul - Purdue University Digital Equipment Corp. - Engineering Computer Network Western Research Laboratory - 1285 Electrical Engineering Bldg. 100 Hamilton Avenue - West Lafayette, IN 47907-1285 Palo Alto, CA 94301 - davy@ecn.purdue.edu mogul@decwrl.dec.com - - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Dave Curry (see address above). - - - - -NOCTools2 Working Group [Page 111] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NHFSSTONE - - NAME - nhfsstone - - KEYWORDS - benchmark, generator; NFS; spoof; UNIX; free. - - ABSTRACT - Nhfsstone (pronounced n-f-s-stone, the "h" is silent) - is an NFS benchmarking program. It is used on an NFS - client to generate an artificial load with a particular - mix of NFS operations. It reports the average response - time of the server in milliseconds per call and the - load in calls per second. The nhfsstone distribution - includes a script, "nhfsnums" that converts test - results into plot(5) format so that they can be graphed - using graph(1) and other tools. - - MECHANISM - Nhfsstone is an NFS traffic generator. It adjusts its - calling patterns based on the client's kernel NFS - statistics and the elapsed time. Load can be generated - over a given time or number of NFS calls. - - CAVEATS - Nhfsstone will compete for system resources with other - applications. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - 4.xBSD-based UNIX - - - - - - - - - - - -NOCTools2 Working Group [Page 112] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - Available via anonymous FTP from bugs.cs.wisc.edu. - Alternatively, Legato Systems will provide the program - free of charge, if certain conditions are met. Send - name and both email and U.S. mail addresses to: - Legato Systems, Inc. - Nhfsstone - 260 Sheridan Avenue - Palo Alto, California 94306 - - A mailing list is maintained for regular information - and bug fixes: nhfsstone@legato.com or - uunet!legato.com!nhfsstone. To join the list: - nhfsstone-request@legato.com or - uunet!legato.com!nhfsstone-request. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 113] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NNSTAT - - NAME - NNStat - - KEYWORDS - manager, status, traffic; ethernet, IP; eavesdrop, NMS; - UNIX; free. - - ABSTRACT - NNStat is a collection of programs that provides an - internet statistic collecting capability. The NNStat - strategy for statistic collection is to collect traffic - statistics via a promiscuous ethernet tap on the local - networks, versus instrumenting the gateways. If all - traffic entering or leaving a network or set of net- - works traverses a local ethernet, then by stationing a - statistic gathering agent on each local network a pro- - file of network traffic can be gathered. Statistical - data is retrieved from the local agents by a global - manager. - - A program called "statspy" performs the data gathering - function. Essentially, statspy reads all packets on an - ethernet interface and records all information of - interest. Information of interest is gathered by exa- - mining each packet and determining if the source or - destination IP address is one that is being monitored, - typically a gateway address. If so then the contents - of the packet are examined to see if they match further - criteria. - - A program called "collect" performs global data collec- - tion. It periodically polls various statspy processes - in the domain of interest to retrieve locally logged - statistical data. - - The NNSTAT distribution comes with several sample awk - programs which process the logged output of the collect - program. - - MECHANISM - Local agents (statspy processes) collect raw traffic - data via a promiscuous ethernet tap. Statistical, fil- - tered or otherwise reduced data is retrieved from the - local agents by a global manager (the "collect" pro- - cess). - - - - -NOCTools2 Working Group [Page 114] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - None. - - BUGS - Bug fixes, extensions, and other pointers are discussed - in the electronic mail forum, bytecounters. To join, - send a request to bytecounters-request@venera.isi.edu. - Forum exchanges are archived in the file - bytecounters/bytecounters.mail, available via anonymous - FTP from venera.isi.edu. - - LIMITATIONS - NNStat presumes a topology of one or more long haul - networks gatewayed to local ethernets. - - A kernel mod required to run with SunOS4. These mods - are described in the bytecounters archive. - - HARDWARE REQUIRED - Ethernet interface. Sun 3, Sun 4 (SPARC), or PC RT - workstation. - - SOFTWARE REQUIRED - Distribution is for BSD UNIX, could easily be adapted - to any UNIX with promiscuous ethernet support. - - AVAILABILITY - Distribution is available via anonymous FTP from - venera.isi.edu, in file pub/NNStat.tar.Z. Documenta- - tion is in pub/NNStat.userdoc.ms.Z. - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 115] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NOCOL(8) - - NAME - nocol - network monitoring tools for an IP network - - SYNOPSIS - This is an overview of the NOCOL software. - - DESCRIPTION - NOCOL (Network Operations Center On-Line) is a - collection of network monitoring programs that run on - Unix systems. The software consists of a number of - monitoring agents that poll various parameters from any - system and put it in a format suitable for - post-processing. The post-processors can be a display - agent, an automated troubleshooting program, an - event logging program, etc. Presently, monitors for - tracking reachability, SNMP traps, data throughput - rate, and nameservers have been developed and are in - use. Addition of more monitoring agents is easy and - they will be added as necessary. A display agent- - nocol(1) using curses has already been developed. Work - on an "intelligent" module is currently in progress for - event logging and some automatic troubleshooting. - - All data collected by the monitoring agents follows a - fixed (non-readable) format. Each data entry is termed - an event in NOCOL, and each event has certain flags and - severity associated with it. The display agent - nocol(1), displays the output of these monitoring - agents depending on the severity of the event. There - can be multiple displays running simultanously and - all process the same set of monitored data. - - There are four levels of severity associated with an - event- CRITICAL, ERROR, WARNING and INFO. The severity - level is controlled independently by the monitoring - agents, and the decision to raise or set an event's - severity to any level depends on the logic imbedded in - the monitoring agent. - - As an example, for the pingmon(8) monitor, if a site is - unreachable via ping, it would be assigned a severity - of WARNING by pingmon, which would then elevate to - CRITICAL if the site is still unreachable after some - time. In the case of trapmon(8), an SNMP trap message - of EGP neighbor lost would be directly assigned a - severity level of CRITICAL, while an Warm Start trap is - - - -NOCTools2 Working Group [Page 116] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - assigned a severity of WARNING. - - The display agent (and other data post-processors) - would use this event severity to decide whether to - display it (or troubleshoot/log it) depending on the - user selected display severity level. - - The software is very flexible and allows enhancements - and development with a minimum amount of effort. The - display module processes all the files present in the - data directory, and displays them sequentially. This - allows new monitoring programs to simply start - generating data in the data directory and the display - module will automatically start displaying the new - data. The monitoring tools can be changed, and the only - element that has to remain common between all the - modules is the EVENT data structure. - - CURRENT MODULES - NOCOL presently consists of the following modules: - - nocol - which simply displays the data collected by the - monitoring agents. It uses the curses screen - management system to support a wide variety of terminal - types. The criterion for displaying an event is: - - 1. Severity level of the event is higher than the - severity level set in the display. - - 2. The display filter (if set) matches some string in - the event line. - - The display can be in regular 80 column mode or in - extended 132 column mode. Critical events are - displayed in reverse video (if the terminal type - supports it). Additional features like displaying - informational messages in a part of the window, - automatic resizing window sizes, operator - acknowledgement via a bell when a new event goes - critical are also available. - - ippingmon - which monitors the reachability of a site via "ICMP" - ping packets (ICMP was preferred over SNMP for many - obvious reasons). This program can use the default out- - put from the system's ping program, but an accompanying - program ( multiping) can ping multiple IP sites at the - - - -NOCTools2 Working Group [Page 117] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - same time and is preferable for monitoring a large list - of sites. A site is marked unreachable if a certain - number of packets is lost, and the severity level is - increased each time that the site tests unreachable. - - osipingmon - which is similar to the ippingmon module but uses the - OSI ping program instead. No multiple ping program for - OSI sites has been developed at this time. The only - requirement is that the system's ping program output - match the typical BSD IP ping program's output. - - nsmon - which monitors the nameservers (named) on the list of - specified hosts. It periodically sends an SOA query for - the default domain and if the queried nameservers - cannot resolve the query, then the site is elevated to - CRITICAL status. - - tpmon - For monitoring the throughput (kbits per second) to a - list of hosts. The program connects to the discard - socket on the remote machine (using a STREAM socket) - and sends large packets for a small amount of time to - evaluate the effective throughput. It elevates a site - to WARNING level if the throughput drops below a - certain threshold (set in the configuration file). - - trapmon - Converts all SNMP traps into a format suitable for - displaying using NOCOL. The severity of the various - traps is preset (and can be changed during compilation - time). - - - PLATFORM - Any Unix system with the curses screen management library - and IP (Internet Protocol) programming facility. It has been - tested on Sun Sparc 4.1.1, Ultrix, and NeXT systems. Porting - to other platforms might require minor adjustments depending - on the vagaries of the different vendors (mostly in the - include files). - - AVAILABILITY - NOCOL was developed at JvNCnet and has been in use for - monitoring the JvNCnet wide area network since 1989. - It is available via anonymous FTP from ftp.jvnc.net under - pub/jvncnet-packages/nocol.tar.Z. The system running at - - - -NOCTools2 Working Group [Page 118] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - JvNCet can be viewed by logging into the host nocol.jvnc.net - with username nocol (an rlogin instead of telnet will handle - your X window terminal types better). - To be added to the NOCOL mailing list (for future updates - and bug fixes), send a message to nocol-users- - request@jvnc.net with your email address. - - FUTURE DEVELOPMENTS - - Possible future enhancements are: - - 1. Event logging. - - 2. Addition of an automated troubleshooting mechanism - when a site severity level reaches a particular - level. - - 3. SNMP monitors to watch the state of certain vari- - ables (interface errors, packet rate, route state - changes). - - AUTHOR - The software was developed at JvNCnet over a period of time. - The overall design and initial development was done by Vikas - Aggarwal and Sze-Ying Wuu. Additional development is being - done and coordinated by Vikas Aggarwal (vikas@jvnc.net). - Copyright 1992 JvNCnet. (See the file COPYRIGHT for full - details) - - SEE ALSO - nocol(1) nocol(3) tpmon(8) tsmon(8) nsmon(8) - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 119] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NPRV - - NAME - NPRV -- IP Node/Protocol Reachability Verifier - - KEYWORDS - map, routing, status; IP; ping; VMS; free. - - ABSTRACT - NPRV is a full-screen, keypad-oriented utility that - runs under VAX/VMS. It allows the user to quickly scan - through a user-defined list of IP addresses (or domain - names) and verify a node's reachability. The node's - reachability is determined by performing an ICMP echo, - UDP echo and a TCP echo at alternating three second - intervals. The total number of packets sent and - received are displayed, as well as the minimum, average - and maximum round-trip times (in milliseconds) for each - type of echo. Additionally, a "trace route" function - is performed to determine the path from the local sys- - tem to the remote host. Once all of the trace route - information has filled the screen, a "snapshot" of the - screen can be written to a text file. Upon exiting the - utility, these text files can be used to generate a - logical network map showing host and gateway intercon- - nectivity. - - MECHANISM - The ICMP echo is performed by sending ICMP ECHO REQUEST - packets. The UDP and TCP echoes are performed by con- - necting to the UDP/TCP echo ports (port number 7). The - trace route information is compiled by sending alter- - nating ICMP ECHO REQUEST packets and UDP packets with - very large destination UDP port numbers (in two - passes). Each packet is initially sent with a TTL - (time to live) of 1. This should cause an ICMP TIME - EXCEEDED error to be generated by the first routing - gateway. Then each packet is sent with a TTL of 2. - This should cause an ICMP TIME EXCEEDED error to be - generated by the second routing gateway. Then each - packet is sent with a TTL of 3, and so on. This pro- - cess continues until an ICMP ECHO REPLY or UDP PORT - UNREACHABLE is received. This indicates that the - remote host has been reached and that the trace route - information is complete. - - - - - - -NOCTools2 Working Group [Page 120] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - This utility sends one echo packet per second (ICMP, - UDP or TCP), as well as sending out one trace route - packet per second. If a transmitted trace route packet - is returned in less than one second, another trace - route packet is sent in 100 milliseconds. This could - cause a significant amount of contention on the local - network. - - BUGS - None known. Please report any discovered bugs to the - author at: - Allen Sturtevant - National Magnetic Fusion Energy Computer Center - Lawrence Livermore National Laboratory - P.O. Box 808; L-561 - Livermore, CA 94550 - Phone : (415) 422-8266 - E-Mail: sturtevant@ccc.nmfecc.gov - - LIMITATIONS - The user is required to have SYSPRV privilege to per- - form the ICMP Echo and trace route functions. The - utility will still run with this privilege disabled, - but only the UDP Echo and TCP Echo information will be - displayed. This utility is written in C, but unfor- - tunately it cannot be easily ported over to UNIX since - many VMS system calls are used and all screen I/O is - done using the VMS Screen Management Routines. - - HARDWARE REQUIRED - Any network interface supported by TGV Incorporated's - MultiNet software. - - SOFTWARE REQUIRED - VAX/VMS V5.1+ and TGV Incorporated's MultiNet version - 2.0. - - AVAILABILITY - For executables only, FTP to the ANONYMOUS account - (password GUEST) on CCC.NMFECC.GOV (128.55.128.30) and - GET the following files: - - [ANONYMOUS.PROGRAMS.NPRV]NPRV.DOC (ASCII text) - [ANONYMOUS.PROGRAMS.NPRV]NPRV.EXE (binary) - [ANONYMOUS.PROGRAMS.NPRV]SAMPLE.IPA (ASCII text) - - - - - -NOCTools2 Working Group [Page 121] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog NSLOOKUP - - NAME - nslookup - - KEYWORDS - status; DNS, BIND; UNIX, VMS; free. - - ABSTRACT - Nslookup is an interactive program for querying - Internet Domain Name System (DNS) servers. It is - essentially a user-friendly front end to - the BIND "resolver" library routines. - - This program is useful for converting a hostname - into an IP address (and vice versa), determining - the name servers for a domain , listing - the contents of a domain, displaying any type of - DNS record, such as MX, CNAME, SOA, etc., - diagnosing name server problems. - - By default, nslookup will query - the default name server but you can specify a - different server on the command line or from a - configuration file. You can also specify - different values for the options that control the - resolver routines. - - MECHANISM - The program formats, sends and receives DNS - (RFC 1034) queries. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None known. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX or related OS, or VMS. - - - - - -NOCTools2 Working Group [Page 122] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - NSLookup is included in the BIND distribution. - - Available via anonymous FTP from uunet.uu.net, - in directory /networking/ip/dns/bind. Available - with 4.xBSD UNIX and related operating systems. - For VMS, available as part of TGV MultiNet IP - software package, as well as Wollongong's WIN/TCP. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 123] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog OSITRACE - - NAME - OSITRACE - - KEYWORDS - traffic; OSI; eavesdrop; UNIX; free. - - ABSTRACT - OSITRACE is a network performance tool that displays - information about ISO TP4 connections. One line of - output is displayed for each packet indicating the - time, source, destination, length, packet type, - sequence number, credit, and any optional parameters - contained in the packet. Numerous options are avail- - able to control the output of OSITRACE. - - To obtain packets to analyze, OSITRACE uses Sun - Microsystems' Network Interface Tap (NIT) in SunOS 3.4, - 3.5, and 4.0.X. OSITRACE may also obtain data from the - NETMON utility which is described as another tool - entry. - - In Sun systems, OSITRACE may be easily installed: OSI - kernel support is not needed, nor is any other form of - OSI software support. - - MECHANISM - This tool has been designed in such a way that code to - process different protocol suites may be easily added. - As such, OSITRACE also has the ability to trace the DOD - TCP protocols. - - CAVEATS - None. - - BUGS - Bug reports and questions should be addressed to: ie- - tools@gateway.mitre.org - - Requests to join this mailing list: ie-tools- - request@gateway.mitre.org - - Questions and suggestions can also be directed to: Greg - Hollingsworth, gregh@gateway.mitre.org - - LIMITATIONS - None reported. - - - -NOCTools2 Working Group [Page 124] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HARDWARE REQUIRED - No restriction. - - SOFTWARE REQUIRED - SunOS 3.4, 3.5, or 4.0.X, or BSD UNIX-like network pro- - tocols with NETMON installed. - - AVAILABILITY - OSITRACE is copyrighted by the MITRE-Washington Net- - working Center, but freely distributed "as is." It re- - quires retention of a copyright text in code derived - from it. The distribution is available by anonymous - FTP in pub/pdutrace.tar or pub/pdutrace.tar.Z from - aelred-3.ie.org. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 125] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog OVERVIEW - - NAME - OverVIEW - - KEYWORDS - manager, status; IP; NMS, SNMP; DOS. - - ABSTRACT - Network and internet monitor; Performance monitor; - Fully Graphic user interface; Event logging; TFTP boot - server - - MECHANISM - OverVIEW uses SNMP to query routers, gateways and - hosts. Also supports SGMP, PING and is committed to - CMIP/CMOT. The SNMP queries allow dynamic determina- - tion of configuration and state. Sets of related - queries allows monitoring of congestion and faults. - The hardware and software are sold as an integrated - package. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - 256 nodes, 256 nets - - HARDWARE REQUIRED - 80286, 640K, EGA, mouse. - - SOFTWARE REQUIRED - MS-DOS, OverVIEW, Network kernel, Mouse driver, SNMP - agents for monitored devices. - - AVAILABILITY - Fully supported product of Proteon, Inc. For more - information, contact: - Proteon, Inc. Phone: (508) 898-2800 - 2 Technology Drive Fax: (508) 366-8901 - Westborough, MA 01581 Telex: 928124 - - - - - - - -NOCTools2 Working Group [Page 126] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog PING - - NAME - ping - - KEYWORDS - generator, status; IP; ping; DOS, UNIX, VMS; free. - - ABSTRACT - Ping is perhaps the most basic tool for internet - management. It verifies that a remote IP implementa- - tion and the intervening networks and interfaces are - functional. It can be used to measure round trip - delay. Numerous versions of the ping program exist. - - MECHANISM - Ping is based on the ICMP ECHO_REQUEST message. - - CAVEATS - If run repeatedly, ping could generate high system - loads. - - BUGS - None known. - - LIMITATIONS - PC/TCP's ping is the only implementation known support - both loose and strict source routing. Though some ping - implementations support the ICMP "record route" - feature, the usefulness of this option for debugging - routes is limited by the fact that many gateways do not - correctly implement it. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - None. - - AVAILABILITY - Ping is widely included in TCP/IP distributions. Pub- - lic domain versions of ping are available via anonymous - FTP from uunet.uu.net, in directory bsd- - sources/src/etc, and from venera.isi.edu, in directory - pub. - - - - - - -NOCTools2 Working Group [Page 127] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog PROCESS-TCPWARE-SNMP - - NAME - SNMP agent - - KEYWORDS - alarm, manager, status, traffic; IP; SNMP; VMS;. - - ABSTRACT - The SNMP agent listens for and responds to network - management requests sent from SNMP-conforming network - management stations. The SNMP agent also sends SNMP - traps, under specific conditions, to identified trap - receivers. SNMP communities and generation of traps - are fully configurable. The SNMP agent supports all - MIB-II variables except the EGP group. - - MECHANISM - Network management variables are made available for - inspection and/or alteration by means of the Simple - Network Management Protocol (SNMP). - - CAVEATS - None. - - BUGS - No known bugs. - - LIMITATIONS - Does not yet provide the ability for sites to add - extra MIB definitions. - - HARDWARE REQUIRED - Supported VAX processors. - - SOFTWARE REQUIRED - VMS V4 or later - - AVAILABILITY - The SNMP agent is included in TCPware for VMS, a - commercial product available under license from: - Process Software Corporation - 959 Concord Street - Framingham, MA 01701 - +1 800 722 7770, +1 508 879 6994 (voice) - +1 508 879-0042 (FAX) TELEX 517891 - sales@process.com - - - - -NOCTools2 Working Group [Page 128] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog PROXYD - - NAME - proxyd -- SNMP proxy agent daemons from SNMP Research. - - KEYWORDS - control, management, status; - bridge, Ethernet, IP, OSI, ring, star; - NMS, SNMP; - UNIX; - library, sourcelib. - - ABSTRACT - SNMP proxy agents may be used to permit the monitoring - and controlling of network elements which are otherwise - not addressable using the SNMP management protocol - (e.g., a network bridge that implements a proprietary - management protocol). Similarly, SNMP proxy agents may - be used to protect SNMP agents from redundant network - management agents through the use of caches. Finally, - SNMP proxy agents may be used to implement elaborate - MIB access policies. - - The proxy agent daemon: - - - listens for SNMP queries and commands from logically - remote network management stations, - - translates and retransmits those as appropriate - network management queries or cache lookups, - - listens for and parses the responses, - - translates the responses into SNMP responses, and - - returns those responses as SNMP messages to the - network management station that originated the - transaction. - - The proxy agent daemon also emits SNMP traps to - identified trap receivers. The proxy agent daemon is - designed to make the addition of additional vendor- - specific variables a straight-forward task. The proxy - application comes complete with source code including a - powerful set of portable libraries for generating and - parsing SNMP messages and a set of command line utilities. - - MECHANISM - Network management variables are made available for - inspection and/or alteration by means of the Simple - Network Management Protocol (SNMP). - - - - -NOCTools2 Working Group [Page 129] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - This application is a template for proxy application - writers. - - Only a few of the many LanBridge 100 variables are - supported. - - HARDWARE REQUIRED - System from Sun Microsystems, Incorporated. - - SOFTWARE REQUIRED - Sun OS 3.5 or 4.x. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 130] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog PROXYD_SNMP_RESEARCH - - NAME - proxyd -- SNMP proxy agent daemons from SNMP Research. - - KEYWORDS - control, management, status; - bridge, Ethernet, IP, OSI, ring, star; - NMS, SNMP; - UNIX; - library, sourcelib. - - ABSTRACT - SNMP proxy agents may be used to permit the monitoring - and controlling of network elements which are otherwise - not addressable using the SNMP management protocol - (e.g., a network bridge that implements a proprietary - management protocol). Similarly, SNMP proxy agents may - be used to protect SNMP agents from redundant network - management agents through the use of caches. Finally, - SNMP proxy agents may be used to implement elaborate - MIB access policies. - - The proxy agent daemon: - - - listens for SNMP queries and commands from logically - remote network management stations, - - translates and retransmits those as appropriate - network management queries or cache lookups, - - listens for and parses the responses, - - translates the responses into SNMP responses, and - - returns those responses as SNMP messages to the - network management station that originated the - transaction. - - The proxy agent daemon also emits SNMP traps to - identified trap receivers. The proxy agent daemon is - designed to make the addition of additional vendor- - specific variables a straight-forward task. The proxy - application comes complete with source code including a - powerful set of portable libraries for generating and - parsing SNMP messages and a set of command line utilities. - - MECHANISM - Network management variables are made available for - inspection and/or alteration by means of the Simple - Network Management Protocol (SNMP). - - - - -NOCTools2 Working Group [Page 131] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - This application is a template for proxy application - writers. - - Only a few of the many LanBridge 100 variables are - supported. - - HARDWARE REQUIRED - System from Sun Microsystems, Incorporated. - - SOFTWARE REQUIRED - Sun OS 3.5 or 4.x. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 132] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog QUERY - - NAME - query, ripquery - - KEYWORDS - routing; IP; spoof; UNIX; free. - - ABSTRACT - Query allows remote viewing of a gateway's routing - tables. - - MECHANISM - Query formats and sends a RIP request or POLL command - to a destination gateway. - - CAVEATS - Query is intended to be used a a tool for debugging - gateways, not for network management. SNMP is the pre- - ferred protocol for network management. - - BUGS - None known. - - LIMITATIONS - The polled gateway must run RIP. - - HARDWARE REQUIRED - No restriction. - - SOFTWARE REQUIRED - 4.3BSD UNIX or related OS. - - AVAILABILITY - Available with routed and gated distributions. - - Routed may be obtained via anonymous FTP from - uunet.uu.net, in file bsd- - sources/src/network/routed.tar.Z. - - Gated may be obtained via anonymous FTP from - devvax.tn.cornell.edu. Distribution files are in - directory pub/gated. - - - - - - - - -NOCTools2 Working Group [Page 133] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SAS-CPE - - NAME - SAS/CPE(tm) for Open Systems Software - - KEYWORDS - manager, status; - bridge, ethernet, FDDI, IP, OSI, NFS; - X; - DOS, HP, UNIX; - library. - - ABSTRACT - SAS/CPE(tm) for Open Systems software is an integrated system designed - to facilitate the analysis and presentation of computer performance - and resource utilization data. SAS/CPE software features include: - - . Processing of raw computer and network performance data into - detail-level SAS data sets. - . Conversion and validation of logged data values to forms - more useful for display and analysis (e.g., I/O counts - are converted to I/O rates per second). - . Numerous sample reports on performance data processed by - SAS/CPE software. - . Reduction of logged performance data into daily, weekly, - monthly or yearly summarized values. - . Menu-driven interface to the creation and management of multiple - performance data bases. - . Menu-driven report designing interface that allows users with no - programming knowledge to create and manage custom reports from - their performance data base. No SAS coding is needed for this - interface. - - MECHANISM - SAS/CPE for Open Systems processes and reports data - from SNMP and other proprietary monitoring protocols, - as well as du and accounting. - - CAVEATS - The product is currently in alpha testing. - - BUGS - None known. - - LIMITATIONS - None reported. - - - - - -NOCTools2 Working Group [Page 134] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HARDWARE REQUIRED - HP, SUN or IBM Workstation - - SOFTWARE REQUIRED - The SAS(r) System Base Software, SAS/GRAPH Software and - SAS/CPE for Open System Software - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - SAS/CPE for Open Systems Software is available from: - SAS Institute Inc. - SAS Campus Drive - Cary, NC 27513 - Phone 919-677-8000 - FAX 919-677-8123 - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Send email to snodjs@mvs.sas.com. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 135] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SNIFFER - - NAME - Sniffer - - KEYWORDS - analyzer, generator, traffic; DECnet, ethernet, IP, - NFS, OSI, ring, SMTP, star; eavesdrop; standalone. - - ABSTRACT - The Network General Sniffer is a protocol analyzer for - performing LAN diagnostics, monitoring, traffic genera- - tion, and troubleshooting. The Sniffer protocol - analyzer has the capability of capturing every packet - on a network and of decoding all seven layers of the - OSI protocol model. Capture frame selection is based - on several different filters: protocol content at lower - levels; node addresses; pattern matching (up to 8 - logically-related patterns of 32 bytes each); and des- - tination class. Users may extend the protocol - interpretation capability of the Sniffer by writing - their own customized protocol interpreters and linking - them to the Sniffer software. - - The Sniffer displays network traffic information and - performance statistics in real time, in user-selectable - formats. Numeric station addresses are translated to - symbolic names or manufacturer ID names. Network - activities measured include frames accepted, Kbytes - accepted, and buffer use. Each network version has - additional counters for activities specific to that - network. Network activity is expressed as - frames/second, Kbytes/second, or per cent of network - bandwidth utilization. - - Data collection by the Sniffer may be output to printer - or stored to disk in either print-file or spread-sheet - format. - - Protocol suites understood by the Sniffer include: - Banyan Vines, IBM Token-Ring, Novell Netware, XNS/MS- - Net (3Com 3+), DECnet, TCP/IP (including SNMP and - applications-layer protocols such as FTP, SMTP, and - TELNET), X Windows (for X version 11), NFS, and several - SUN proprietary protocols (including mount, pmap, RPC, - and YP). Supported LANs include: ethernet, Token-ring - (4Mb and 16Mb versions), ARCNET, StarLAN, IBM PC Net- - work (Broadband), and Apple Localtalk Network. - - - -NOCTools2 Working Group [Page 136] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - The Sniffer is a self-contained, portable protocol - analyzer that require only AC line power and connection - to a network to operate. Normally passive (except when - in Traffic Generator mode), it captures images of all - or of selected frames in a working buffer, ready for - immediate analysis and display. - - The Sniffer is a standalone device. Two platforms are - available: one for use with single network topologies, - the other for use with multi-network topologies. Both - include Sniffer core software, a modified network - interface card (or multiple cards), and optional proto- - col interpreter suites. - - All Sniffer functions may be remotely controlled from a - modem-connected PC. Output from the Sniffer can be - imported to database or spreadsheet packages. - - CAVEATS - In normal use, the Sniffer is a passive device, and so - will not adversely effect network performance. Perfor- - mance degradation will be observed, of course, if the - Sniffer is set to Traffic Generator mode and connected - to an active network. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - None. The Sniffer is a self-contained unit, and - includes its own interface card. It installs into a - network as would any normal workstation. - - SOFTWARE REQUIRED - None. - - - - - - - - - - - - -NOCTools2 Working Group [Page 137] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - The Sniffer is available commercially. For information - on your local representative, call or write: - Network General Corporation - 4200 Bohannon Drive - Menlo Park, CA 94025 - Phone: 415-688-2700 - Fax: 415-321-0855 - - For acquisition by government agencies, the Sniffer is - included on the GSA schedule. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 138] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SNMP_DEVELOPMENT_KIT - - NAME - The SNMP Development Kit - - KEYWORDS - manager, status; IP; NMS, SNMP; UNIX; free, sourcelib. - - ABSTRACT - The SNMP Development Kit comprises C Language source - code for a programming library that facilitates access - to the management services of the SNMP (RFC 1098). - Sources are also included for a few simple client - applications whose main purpose is to illustrate the - use of the library. Example client applications query - remote SNMP agents in a variety of modes, and generate - or collect SNMP traps. Code for an example SNMP agent - that supports a subset of the Internet MIB (RFC 1066) - is also included. - - MECHANISM - The Development Kit facilitates development of SNMP- - based management applications -- both clients and - agents. Example applications execute SNMP management - operations according to the values of command line - arguments. - - CAVEATS - None. - - BUGS - Fixed in the next release. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - The SNMP library source code is highly portable and - runs on a wide range of platforms. - - SOFTWARE REQUIRED - The SNMP library source code has almost no operating - system dependencies and runs in a wide range of - environments. Certain portions of the example SNMP - agent code are specific to the 4.3BSD implementation of - the UNIX system for the DEC MicroVAX. - - - - - -NOCTools2 Working Group [Page 139] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - The Development Kit is available via anonymous FTP from - host allspice.lcs.mit.edu. The copyright for the - Development Kit is held by the Massachusetts Institute - of Technology, and the Kit is distributed without - charge according to the terms set forth in its code and - documentation. The distribution takes the form of a - UNIX tar file. - - Bug reports, questions, suggestions, or complaints may - be mailed electronically to snmp-dk@ptt.lcs.mit.edu, - although no response in any form is guaranteed. Dis- - tribution via UUCP mail may be arranged by contacting - the same address. Requests for hard-copy documentation - or copies of the distribution on magnetic media are - never honored. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 140] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SNMP_Libraries_SNMP_RESEARCH - - NAME - SNMP Libraries and Utilities from SNMP Research. - - KEYWORDS - alarm, control, manager, map, security, status; - bridge, DECnet, Ethernet, FDDI, IP, OSI, ring, star; - NMS, SNMP; - DOS, UNIX, VMS; - sourcelib. - - ABSTRACT - The SNMP Libraries and Utilities serve two purposes: - - 1) to act as building blocks for the construction of - SNMP-based agent and manager applications; and - - 2) to act as network management tools for network - fire fighting and report generation. - - The libraries perform ASN.1 parsing and generation tasks - for both network management station applications and - network management agent applications. These libraries - hide the details of ASN.1 parsing and generation from - application writers and make it unnecessary for them to - be expert in these areas. The libraries are very robust - with considerable error checking designed in. The - several command line utilities include applications for - retrieving one or many variables, retrieving tables, or - effecting commands via the setting of remote network - management variables. - - MECHANISM - The parsing is performed via recursive descent methods. - Messages are passed via the Simple Network Management - Protocol (SNMP). - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - The monitored and managed nodes must implement the SNMP - over UDP per RFC 1157 or must be reachable via a proxy - agent. - - - -NOCTools2 Working Group [Page 141] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HARDWARE REQUIRED - This software has been ported to numerous platforms - including workstations, general-purpose timesharing - systems, and embedded hardware in intelligent network - devices such as repeaters, bridges, and routers. - - SOFTWARE REQUIRED - C compiler, TCP/IP library. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 142] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SNMP_PACKAGED_AGENT_SNMP_RESEARCH - - NAME - SNMP Packaged Agent System -- an SNMP host/gateway - agent daemon including a complete protocol stack and - runtime environment required to support an SNMP Agent - from SNMP Research. - - KEYWORDS - control, manager, status; - bridge, Ethernet, FDDI, IP, OSI, ring, star; - NMS, SNMP; - DOS, standalone, UNIX; - sourcelib. - - ABSTRACT - The snmpd agent daemon listens for and responds to - network management queries and commands from logically - remote network management stations. The agent daemon - also emits SNMP traps to identified trap receivers. - The agent daemon is designed to make the addition of - additional vendor-specific variables a - straight-forward task. The snmpd application comes - complete with source code including a powerful set of - portable libraries for generating and parsing SNMP - messages and a set of command line utilities. - - The Packaged Agent System is designed to aid the - hardware manufacturer who is not experienced with the - TCP/IP protocol suite. A lightweight, non-preemptive - scheduler/tasking system for faster execution and less - impact on slow CPUs is included in the package. - Development environment is either MS DOS or UNIX. - - MECHANISM - Network management variables are made available for - inspection and/or alteration by means of the Simple - Network Management Protocol (SNMP). - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - - - -NOCTools2 Working Group [Page 143] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HARDWARE REQUIRED - The Motorola 68XXX and the Intel 8088 and X86 - platforms are fully supported. Other platforms can be - supported. Contact SNMP Research for details. - - This software has been ported to numerous platforms - including workstations, general-purpose timesharing - systems, and embedded hardware in intelligent network - devices such as repeaters, bridges, and routers. - - SOFTWARE REQUIRED - C compiler. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 144] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SNMPD_SNMP_RESEARCH - - NAME - snmpd -- an SNMP host/gateway agent daemon from SNMP - Research. - - KEYWORDS - control, mananger, status; - bridge, Ethernet, FDDI, IP, OSI, ring, star; - NMS, SNMP; - DOS, UNIX; - sourcelib. - - ABSTRACT - The snmpd agent daemon listens for and responds to - network management queries and commands from logically - remote network management stations. The agent daemon - also emits SNMP traps to identified trap receivers. The - agent daemon is architected to make the addition of - additional vendor-specific variables a straight-forward - task. The snmpd application comes complete with source - code including a powerful set of portable libraries for - generating and parsing SNMP messages and a set of - command line utilities. - - MECHANISM - Network management variables are made available for - inspection and/or alteration by means of the Simple - Network Management Protocol (SNMP). - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - Only operating system variables available without - source code modifications to the operating system and - device device drivers are supported. - - HARDWARE REQUIRED - This software has been ported to numerous platforms - including workstations, general-purpose timesharing - systems, and embedded hardware in intelligent network - devices such as repeaters, bridges, and routers. - - - - - -NOCTools2 Working Group [Page 145] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SOFTWARE REQUIRED - C compiler. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 146] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SPIDERMONITOR - - NAME - SpiderMonitor P220, K220 and - SpiderAnalyzer P320, K320 - - KEYWORDS - alarm, analyzer, generator, traffic; DECnet, ethernet, - IP, OSI; eavesdrop; standalone; sourcelib. - - ABSTRACT - The SpiderMonitor and SpiderAnalyzer are protocol - analyzers for performing ethernet LAN diagnostics, mon- - itoring, traffic generation, and troubleshooting. The - SpiderMonitor has the capability of capturing every - packet on a network and of decoding the first four - layers of the OSI protocol model. The SpiderAnalyzer - has additional software for decoding higher protocol - layers. Protocol suites understood: TCP/IP (including - SNMP and applications-layer protocols), OSI, XNS, DEC- - net and IPX. User-definable decodes can be written in - 'C' with the Microsoft version 5.0 'C' compiler. A - decode guide is provided. - - The SpiderAnalyzer supports multiple simultaneous - filters for capturing packets using predefined patterns - and error states. Filter patterns can also trigger on - NOT matching 1 or more filters, an alarm, or a speci- - fied time. - - The SpiderAnalyzer can also employ TDR (Time Domain - Reflectometry) to find media faults, open or short cir- - cuits, or transceiver faults. It can transmit OSI, - XNS, and Xerox link-level echo packets to user- - specified stations, performs loop round tests. - - In traffic generation mode, the SpiderAnalyzer has the - ability to generate packets at random intervals of ran- - dom lengths or any combination of random or fixed - interval or length, generation of packets with CRC - errors, or packets that are too short, or packets that - are too long. - - Output from the SpiderMonitor/Analyzer can be imported - to database or spreadsheet packages. - - - - - - -NOCTools2 Working Group [Page 147] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - The SpiderMonitor and Spider Analyzer are available as - stand-alone, IBM PC compatible packages based upon a - Compaq III portable system, or as a plug-in boards for - any IBM XT/AT compatible machine. The model 220 (Spi- - derMonitor) systems provide a functional base suited - for most network management needs. The model 320 (Spi- - derAnalyzer) systems provide extended functionality in - the development mode and traffic generation mode as - well more filtering capabilities than the 220 models. - - CAVEATS - Traffic generation will congest an operational ether- - net. - - BUGS - None known. - - LIMITATIONS - Monitoring of up to 1024 stations and buffering of up - to 1500 packets. The model 220 provides for 3 filters - with a filter depth of 46 bytes. The model 320 pro- - vides for 4 filters and a second level of filtering - with a filter depth of 64 bytes. - - HARDWARE REQUIRED - PX20s are self contained, the KX20s require an IBM - PC/XT-AT compatible machine with 5 megabytes of hard - disk storage and the spare slot into which the board - kit is plugged. - - SOFTWARE REQUIRED - None. The SpiderAnalyzer requires the Microsoft 'C' - Compiler, Version 5.0 for writing user defined decodes. - - AVAILABILITY - The SpiderMonitor/Analyzer is available commercially. - For information on your local representative, call or - write: - Spider Systems, Inc. - 12 New England Executive Park - Burlington, MA 01803 - Telephone: 617-270-3510 - FAX: 617-270-9818 - - - - - - - -NOCTools2 Working Group [Page 148] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SPIMS - - NAME - SPIMS -- the Swedish Institute of Computer Science - (SICS) Protocol Implementation Measurement - System tool. - - KEYWORDS - benchmark, debugger; IP, OSI; spoof; UNIX. - - ABSTRACT - SPIMS is used to measure the performance of protocol - and "protocol-like" services including response time - (two-way delay), throughput and the time to open and - close connections. It has been used to: - - o benchmark alternative protocol implementations, - - o observe how performance varies when parameters in - specific implementations have been varied (i.e., - to tune parameters). - - SPIMS currently has interfaces to the DoD Internet Pro- - tocols: UDP, TCP, FTP, SunRPC, the OSI protocols from - the ISODE 4.0 distribution package: FTAM, ROSE, ISO TP0 - and to Sunlink 5.2 ISO TP4 as well as Stanford's VMTP. - Also available are a rudimentary set of benchmarks, - stubs for new protocol interfaces and a user manual. - - For an example of the use of SPIMS to tune protocols, - see: - Nordmark & Cheriton, "Experiences from VMTP: How - to achieve low response time," IFIP WG6.1/6.4: - Protocols for High-Speed Networks, May 1989, - Zurich. To be published. - - For an example of how SPIMS can be used to benchmark - protocols, see: - - Gunningberg, Bjorkman, Nordmark, Sjodin, Pink & - Stromqvist "Application Protocols and Performance - Benchmarks", IEEE Communications Magazine, June - 1989, Vol. 27, No.6, pp 30-36. - - Sjodin, Gunningberg, Nordmark, & Pink, "Towards - Protocol Benchmarks', IFIP WG6.1/6.4 Protocols - for High-Speed Networks, May 1989, Zurich, pp - 57-67 - - - -NOCTools2 Working Group [Page 149] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - SPIMS runs as user processes and uses a TCP connection - for measurement set-up. Measurements take place - between processes over the measured protocol. SPIMS - generates messages and transfers them via the measured - protocol service according to a user-supplied specifi- - cation. SPIMS has a unique measurement specification - language that is used to specify a measurement session. - In the language there are constructs for different - application types (e.g., bulk data transfer), for - specifying frequency and sequence of messages, for dis- - tribution over message sizes and for combining basic - specifications. These specifications are independent - of both protocols and protocol implementations and can - be used for benchmarking. For more details on the - internals of SPIMS, see: - - Nordmark & Gunningberg, "SPIMS: A Tool for Protocol - Implementation Performance Measurements" Proc. of 13:th - Conf. on Local Computer Networks, Minneapolis 1989, pp - 222-229. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - SPIMS is implemented on UNIX, including SunOS 4., - 4.3BSD UNIX, DN (UNIX System V, with extensions) and - Ultrix 2.0/3.0. It requires a TCP connection for meas- - urement set-up. No kernel modifications or any modifi- - cations to measured protocols are required. - - - - - - - - - - - -NOCTools2 Working Group [Page 150] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - SPIMS is not in the public domain and the software is - covered by licenses. Use of the SPIMS software - represents acceptance of the terms and conditions of - the licenses. - The licenses are enclosed in the distribution package. - Licenses and SPIMS cover letter can also be obtained - via an Internet FTP connection without getting the whole - software. The retrieval procedure is identical to the - below university distribution via FTP. The file to - retrieve is pub/spims-dist/licenses.tar.Z - - There are two different distribution classes depending on - requesting organization: - - 1. Universities and non-profit organizations. - - To these organizations, SPIMS source code is distributed - free of charge. There are two ways to get the software: - - 1. FTP. - If you have an Internet FTP connection, you - can use anonymous FTP to sics.se - [192.16.123.90], and retrieve the file - pub/spims-dist/dist910304.tar.Z - (this is a .6MB compressed tar image) in - BINARY mode. Log in as user anonymous and at - the password prompt, use your complete - electronic mail address. - - 2. On a Sun 1/4-inch cartridge tape. - For mailing, a handling fee of US$150.00 will be - charged. Submit a bank check with the request. - Do not send tapes or envelopes. - - 2. Commercial organizations. - - These organizations can chose between a license for - commercial use, or a license for internal research - only and no commercial use whatsoever. - - For internal research use only: - - The SPIMS source code is distributed for a one - time fee of US$500.00. Organizations - interested in the research prototype need to - contact us via e-mail and briefly motivate why - they qualify (non-commercial use) for the - - - -NOCTools2 Working Group [Page 151] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - research prototype. - They will thereafter get a permission to - obtain a copy from the same distribution - source as for universities. - - Commercial use: - - A commercial version of SPIMS will eventually - be distributed and supported by a commercial - partner. nIn the meantime we will distribute - the research prototype (source code) to - interested organizations without any guaranty - or support. Contact SICS for further - information. - - For more information about the research prototype - distribution and about a commercial license, contact: - - Swedish Institute of Computer Science - Att: Birgitta Klingenberg - P.O. Box 1263 - S-164 28 Kista - SWEDEN - - e-address: spims@sics.se - Phone: +46-8-7521500, Fax: +46-8-7517230 - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Bengt Ahlgren - Swedish Institute of Computer Science - Box 1263 - S-164 28 KISTA, SWEDEN - - Email: bengta@sics.se - Tel: +46 8 752 1562 (direct) - or +46 8 752 1500 - Fax: +46 8 751 7230 - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 152] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog SPRAY_SUN - - NAME - spray - - KEYWORDS - benchmark, generator; IP; ping; UNIX. - - ABSTRACT - Spray is a traffic generation tool that generates RPC - or UDP packets, or ICMP Echo Requests. The packets are - sent to a remote procedure call application at the des- - tination host. The count of received packets is - retrieved from the remote application after a certain - number of packets have been transmitted. The differ- - ence in packets received versus packets sent represents - (on a LAN) the packets that the destination host had to - drop due to increasing queue length. A measure of - throughput relative to system speed and network load - can thus be obtained. - - MECHANISM - See above. - - CAVEATS - Spray can congest a network. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - SunOS - - AVAILABILITY - Supplied with SunOS. - - - - - - - - - - -NOCTools2 Working Group [Page 153] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog TCPDUMP - - NAME - tcpdump - - KEYWORDS - traffic; ethernet, IP, NFS; UNIX, VMS; free. - - ABSTRACT - Tcpdump can interpret and print headers for the follow- - ing protocols: ethernet, IP, ICMP, TCP, UDP, NFS, ND, - ARP/RARP, AppleTalk. Tcpdump has proven useful for - examining and evaluating the retransmission and window - management operations of TCP implementations. - - MECHANISM - Much like etherfind, tcpdump writes a log file of the - frames traversing an ethernet interface. Each output - line includes the time a packet is received, the type - of packet, and various values from its header. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - Public domain version requires a kernel patch for - SunOS. TCPware for VMS - currently interprets headers - for IP, TCP, UDP, and ICMP only. - - HARDWARE REQUIRED - Any Ultrix system (VAX or DEC RISC hardware) - - SOFTWARE REQUIRED - Ultrix release 4.0 or later. For Ultrix 4.1, may - require the patched "if_ln.o" kernel module, available - from Digital's Customer Support Center. - - - - - - - - - - - - -NOCTools2 Working Group [Page 154] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - Available, though subject to copyright restrictions, - via anonymous FTP from ftp.ee.lbl.gov. The source and - documentation for the tool is in compressed tar format, - in file tcpdump.tar.Z. Also available from - spam.itstd.sri.com, in directory pub. For VMS hosts - with DEC ethernet controllers, available as part of TGV - MultiNet IP software package and TCPware for VMS from - Process Software Corporation. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 155] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog TCPLOGGER - - NAME - tcplogger - - KEYWORDS - traffic; IP; eavesdrop; UNIX; free. - - ABSTRACT - Tcplogger consists of modifications to the 4.3BSD UNIX - source code, and a large library of post-processing - software. Tcplogger records timestamped information - from TCP and IP packets that are sent and received on a - specified connection. For each TCP packet, information - such as sequence number, acknowledgement sequence - number, packet size, and header flags is recorded. For - an IP packet, header length, packet length and TTL - values are recorded. Customized use of the TCP option - field allows the detection of lost or duplicate pack- - ets. - - MECHANISM - Routines of 4.3BSD UNIX in the netinet directory have - been modified to append information to a log in memory. - The log is read continuously by a user process and - written to a file. A TCP option has been added to - start the logging of a connection. Lots of post- - processing software has been written to analyze the - data. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - To get a log at both ends of the connection, the modi- - fied kernel should be run at both the hosts. - - All connections are logged in a single file, but - software is provided to filter out the record of a sin- - gle connection. - - HARDWARE REQUIRED - No restrictions. - - - - - -NOCTools2 Working Group [Page 156] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SOFTWARE REQUIRED - 4.3BSD UNIX (as modified for this tool). - - AVAILABILITY - Free, although a 4.3BSD license is required. Contact - Olafur Gudmundsson (ogud@cs.umd.edu). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 157] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog TOKENVIEW_PROTEON - - NAME - TokenVIEW - - KEYWORDS - control, manager, status; ring; NMS, proprietary; DOS. - - ABSTRACT - Network Management tool for 4/16 Mbit IEEE 802.5 Token - Ring Networks. Monitors active nodes and ring errors. - Maintains database of nodes, wire centers and their - connections. Separate network management ring allows - remote configuration of wire centers. - - MECHANISM - A separate network management ring used with Proteon - Intelligent Wire Centers allows wire center configura- - tion information to be read and modified from a single - remote workstation. A log of network events used with - a database contain nodes, wire centers and their con- - nections, facilitates tracking and correction of net- - work errors. Requires an "E" series PROM, sold with - package. - - CAVEATS - Currently, only ISA bus cards support the required E - series PROM. - - BUGS - None known. - - LIMITATIONS - 256 nodes, 1 net. - - HARDWARE REQUIRED - 512K RAM, CGA or better, hard disk, mouse supported. - - SOFTWARE REQUIRED - MS-DOS, optional mouse driver - - AVAILABILITY - Fully supported product of Proteon, Inc. Previously - sold as Advanced Network Manager (ANM). For more in- - formation, contact: - Proteon, Inc. Phone: (508) 898-2800 - 2 Technology Drive Fax: (508) 366-8901 - Westborough, MA 01581 Telex: 928124 - - - -NOCTools2 Working Group [Page 158] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog TRACEROUTE - - NAME - traceroute - - KEYWORDS - routing; IP; ping; UNIX, VMS; free. - - ABSTRACT - Traceroute is a tool that allows the route taken by - packets from source to destination to be discovered. - It can be used for situations where the IP record route - option would fail, such as intermediate gateways dis- - carding packets, routes that exceed the capacity of an - datagram, or intermediate IP implementations that don't - support record route. Round trip delays between the - source and intermediate gateways are also reported - allowing the determination of individual gateways con- - tribution to end-to-end delay. - - Enhanced versions of traceroute have been developed - that allow specification of loose source routes for - datagrams. This allows one to investigate the return - path from remote machines back to the local host. - - MECHANISM - Traceroute relies on the ICMP TIME_EXCEEDED error - reporting mechanism. When an IP packet is received by - an gateway with a time-to-live value of 0, an ICMP - packet is sent to the host which generated the packet. - By sending packets to a destination with a TTL of 0, - the next hop can be identified as the source of the - ICMP TIME EXCEEDED message. By incrementing the TTL - field the subsequent hops can be identified. Each - packet sent out is also time stamped. The time stamp - is returned as part of the ICMP packet so a round trip - delay can be calculated. - - CAVEATS - Some IP implementations forward packets with a TTL of - 0, thus escaping identification. Others use the TTL - field in the arriving packet as the TTL for the ICMP - error reply, which delays identification. - - Sending datagrams with the source route option will - cause some gateways to crash. It is considered poor - form to repeat this behavior. - - - - -NOCTools2 Working Group [Page 159] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - BUGS - None known. - - LIMITATIONS - Most versions of UNIX have errors in the raw IP code - that require kernel mods for the standard version of - traceroute to work. A version of traceroute exists - that runs without kernel mods under SunOS 3.5 (see - below), but it only operates over an ethernet inter- - face. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX or related OS, or VMS. - - AVAILABILITY - Available by anonymous FTP from ftp.ee.lbl.gov, in file - traceroute.tar.Z. It is also available from - uc.msc.umn.edu. - - A version of traceroute that supports Loose Source - Record Route, along with the source code of the - required kernel modifications and a Makefile for - installing them, is available via anonymous FTP from - zerkalo.harvard.edu, in directory pub, file - traceroute_pkg.tar.Z. - - A version of traceroute that runs under SunOS 3.5 and - does NOT require kernel mods is available via anonymous - FTP from dopey.cs.unc.edu, in file - ~ftp/pub/traceroute.tar.Z. - - For VMS, traceroute is available as part of TGV Mul- - tiNet IP software package. - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 160] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog TRPT - - NAME - TRPT -- transliterate protocol trace - - KEYWORDS - traffic; IP; eavesdrop; UNIX; free. - - ABSTRACT - TRPT displays a trace of a TCP socket events. When no - options are supplied, TRPT prints all the trace records - found in a system, grouped according to TCP connection - protocol control block (PCB). - - An example of TRPT output is: - - 38241 ESTABLISHED:input - [e0531003..e0531203)@6cc5b402(win=4000) -> ESTA- - BLISHED - 38241 ESTABLISHED:user RCVD -> ESTABLISHED - 38266 ESTABLISHED:output - 6cc5b402@e0531203(win=4000) -> ESTABLISHED - 38331 ESTABLISHED:input - [e0531203..e0531403)@6cc5b402(win=4000) - -> CLOSE_WAIT - 38331 CLOSE_WAIT:output - 6cc5b402@e0531404(win=3dff) -> CLOSE_WAIT - 38331 CLOSE_WAIT:user RCVD -> CLOSE_WAIT - 38343 LAST_ACK:output - 6cc5b402@e0531404(win=4000) -> LAST_ACK - 38343 CLOSE_WAIT:user DISCONNECT -> LAST_ACK - 38343 LAST_ACK:user DETACH -> LAST_ACK - - MECHANISM - TRPT interrogates the buffer of TCP trace records that - is created when a TCP socket is marked for debugging. - - CAVEATS - Prior to using TRPT, an analyst should take steps to - isolate the problem connection and find the address of - its protocol control blocks. - - BUGS - None reported. - - - - - - - -NOCTools2 Working Group [Page 161] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LIMITATIONS - A socket must have the debugging option set for TRPT to - operate. Another problem is that the output format of - TRPT is difficult. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX or related OS. - - AVAILABILITY - Included with BSD and SunOS distributions. Available - via anonymous FTP from uunet.uu.net, in file bsd- - sources/src/etc/trpt.tar.Z. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 162] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog TTCP - - NAME - TTCP - - KEYWORDS - benchmark, generator; IP; ping; UNIX, VMS; free. - - ABSTRACT - TTCP is a traffic generator that can be used for test- - ing end-to-end throughput. It is good for evaluating - TCP/IP implementations. - - MECHANISM - Cooperating processes are started on two hosts. The - open a TCP connection and transfer a high volume of - data. Delay and throughput are calculated. - - CAVEATS - Will greatly increase system load. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - No restrictions. - - SOFTWARE REQUIRED - BSD UNIX or related OS, or VMS. - - AVAILABILITY - Source for BSD UNIX is available via anonymous FTP from - vgr.brl.mil, in file ftp/pub/ttcp.c, and from sgi.com, - in file sgi/src/ttcp.c. A version of TTCP has also - been submitted to the USENET news group - comp.sources.unix. For VMS, ttcp.c is included in the - MultiNet Programmer's Kit, a standard feature of TGV - MultiNet IP software package. - - - - - - - - - - -NOCTools2 Working Group [Page 163] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog UNISYS-PARAMAX - - NAME - Paramax Network Security Server - - KEYWORDS - alarm, control, manager, security, status; - ethernet, FDDI, IP; X; UNIX. - - ABSTRACT - The Paramax Network Security Server (NSS) is a - security officer's tool for centralized security - management of TCP/IP-based networks. The NSS provides - capability for collection, on-line storage, - maintenance, and correlation of audit data from hosts, - workstations, servers, and network devices. Through - the X window based user interface, a security officer - can review and analyze this audit data at the NSS, - select and request filtered portions of host audit - data, and receive and analyze security alerts from - across the network. The NSS supports centralized - access control of network resources through its - capability to create and update user and host access - permissions data. The user access permissions data - identifies network addresses that each user is - permitted to access. The host access permissions data - identifies network addresses between which - communication is permitted. The NSS supports - centralized management of user authentication data - (user IDs and passwords) and other user data for use - by hosts, workstations, and servers in the network. - It generates pseudo-random pronounceable passwords for - selection and assignment to users by the security officer. - - The NSS deadman timer locks the NSS screen or logs the - security officer off the NSS after periods of - inactivity. A biometric authentication device is - optional for rigorous fingerprint authentication of - users at the NSS, and logins to the NSS itself are - permitted only at the console. The NSS currently - provides centralized security management for a System High - Network. It is being upgraded for a Compartmented Mode - environment. - - - - - - - - -NOCTools2 Working Group [Page 164] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - The NSS uses the Audit Information Transfer Protocol - (AITP) for the transfer of security alerts and audit - data. AITP is NOT proprietary, and the specification - is available from the address listed below. Access to - the NSS audit database is provided via the Structured - Query Language (SQL). - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Hardware required is a Sun 4 (SPARCStation) with a color - monitor, at least 600 MB disk, and 150 MB 1/4" - cartridge tape drive. - - SOFTWARE REQUIRED - SunOS Version 4.1.1 running the Sun OpenWindows X - windowing environment and the SYBASE Relational Data - Base Management System. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - Commercially available from: - Paramax Systems Corporation - 5151 Camino Ruiz - Camarillo, California 93011-6004 - 805-987-6811 - Peter Vazzana - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - Paramax Systems Corporation - 5151 Camino Ruiz - Camarillo, California 93011-6004 - 805-987-6811 - Nina Lewis - - - - - - - - - - -NOCTools2 Working Group [Page 165] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog WOLLONGONG-MANAGER - - NAME - Management Station, Release 3.0 - - KEYWORDS - manager; ; snmp, x; sun, dec, dos;. - - ABSTRACT - Management Station is a network management software - product that supports SNMP. Release 3.0 implements a - distributed network management architecture that helps - solve the scalability and reliability limitations of - using a single cpu for all SNMP management tasks. - Additionally, there are many applications provided - that are all user-configurable. The following - applications and their functionality is listed below: - - General Info: - - X Windows, 11.4 based implemented with OSF/Motif 1.1.1 - toolkit. X Windows interface for all configuration - files. Most applications have "verbose" mode for - display of SNMP PDU traffic. On-line help and - Reference manual pages. ANSI C compliant. - - Network Management Daemon: - - Responsible for device discovery, trap/alarm - management and fault monitoring for the network map. - Connection with other distributed daemons and any - connected stations is accomplished with SNMP/TCP. - Configured via Manager MIB; also incorporates SMUX MIB - (RFC 1227). Sends any information to INGRES, Oracle - or Sybase via an ESQL interface. User-defined actions - include: send alarm to map; send info to flat file; - execute ESQL command; call any UNIX system command; - forward traps and filter user-defined alarms. - User-defined alarms can use any boolean expression and - MIB variable expressions can be combined with AND/OR - statements. - - MIB Compiler - - ASN.1 MIB compiler with X Windows interface. Accepts - RFC 1155 and 1212 format. Most vendor-specific MIBs - and proposed Internet standard MIBs already included. - - - - -NOCTools2 Working Group [Page 166] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Network Map - - Comprehensive network monitoring map with click and - drag interface, hiearchical and virtual views. - Toolkit and preferences applications, device - discovery. Uses /etc/hosts file, NIS or DNS for - device resolution. Background pixmapping capability, - user-definable menu bar, network manager and console - operator modes via UNIX group permissions. Multiple - map use without limitation. - - MIB Form and MIB Form Editor - - User-designed, X-based SNMP applications. Alias for - MIB variables and interprets returned values. GET - NEXT and SET capability. User-defined polling and - multi-device [agent] capability. Configured via X - interface. - - MIB Chart and MIB Chart Editor - - Choice of strip chart, packed strip chart or bar - graphs. User-specified polling interval, MIB - variable(s) or MIB expressions using arithmetic - operands. Plot actual value, delta or delta/interval. - Plot multiple MIB expressions from multiple agents - simultaneously. X Windows interface. Pause polling - and grid options. - - MIB Tool - - X Windows application for the general viewing and - 'walking' of MIB trees. GET NEXT and SET options. - Window for viewing RFC 1212 MIB definitions. Command - line interface option. - - Application Programming Interface - - Complete set of APIs for developers to write SNMP - applications in character mode or X Windows. - - MECHANISM - Management Station uses SNMP and ICMP Echo Request to - monitor and control SNMP Agents. Network management - daemon implements Wollongong's Manager MIB, SNMP over - TCP and the SMUX protocol. - - - - - -NOCTools2 Working Group [Page 167] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - CAVEATS - none. - - BUGS - See Product Release Notice. - - LIMITATIONS - Limitations on number of management agents and network - management daemons not known at this time. - - HARDWARE REQUIRED - Sun SPARC workstations and servers - DEC DECstations and DECsystems - Motorola MPC (Delta 8000 series) - 3/486 PC and PC-compatible - - 16 MB RAM - n20 MB free disk space for installation - Color monitor strongly recommended - - SOFTWARE REQUIRED - SunOS 4.1-1 or greater & OpenWindows 2.0 or greater (SUN) - X Windows, 11.4 or greater - RISC ULTRIX 4.1 or greater (DEC) - R32V2 (Motorola) - Open Desktop 1.1 or greater (3/486) - - Provided on 1/4" cartridge, TK-50 or 3 1/2" diskettes, - as appropriate, in cpio format. - - AVAILABILITY - A commercial product of: - - The Wollongong Group, Inc. - 1129 San Antonio Rd - Palo Alto, CA. 94303 - ph.: (800) 962 - 8649 (in California) - (800) 872 - 8649 (outside California) - fax: (415) 962 - 0286 - - - - - - - - - - - - -NOCTools2 Working Group [Page 168] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog XNETDB - - NAME - Xnetdb - - KEYWORDS - database, manager, map, monitoring, status; IP; Ping, - SNMP, Unix, X; free. - - ABSTRACT - Xnetdb is a network monitoring tool based on X Windows - and SNMP which also has integrated database and - statistic viewing capabilities. Xnetdb will determine - and display the status of routers and circuits it has - been told to monitor by querying the designated sites - and displaying the result. It can also query the - status of certain designated SNMP variables, such as a - default route for an important router. Additionally, - it also has integrated database functionality in that - it can display additional information about a site or - circuit such as the equipment at the site, the contact - person(s) for the site, and other useful information. - Finally it can gather designated statistical - information about a circuit and display it on demand. - - MECHANISM - Xnetdb uses SNMP or ping to monitor things which its - configured to monitor. It dynamically builds a - network map on its display by querying entities and - obtaining IP addresses and subnet masks. A - configuration file tells xnetdb which IP hosts you - want to monitor. - - CAVEATS - While "ping" can be used to monitor hosts, more useful - results are obtained using SNMP. - - BUGS - Bugs and other assorted topics are discussed on the - xnetdb mailing list. To join, send a note to - "xnetdb-request@oar.net". - - LIMITATIONS - None. - - HARDWARE REQUIRED - No restrictions. - - - - -NOCTools2 Working Group [Page 169] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - SOFTWARE REQUIRED - Most any variety of UNIX plus X-Windows and/or - OpenWindows. - - AVAILABILITY - Available via anonymous ftp from ftp.oar.net - (currently 131.187.1.102) in the directory /pub/src. - Special arrangements can be made for sites without - direct IP access by sending a note to - "xnetdb-request@oar.net". There are minimal licensing - restrictions - these are detailed within the package. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 170] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog XNETMON_SNMP_RESEARCH - - NAME - XNETMON -- an X windows based SNMP network management - station from SNMP Research. - - KEYWORDS - alarm, benchmark, control, debugger, manager, map, - reference, security, status, traffic; - bridge, DECnet, Ethernet, FDDI, IP, OSI, ring, star; - NMS, Ping, SNMP, X; - UNIX; - Sourcelib. - - ABSTRACT - The XNETMON application implements a powerful network - management station based on the X window system. - XNETMON's network management tools for configuration, - performance, security, and fault management have been - used successfully with a wide assortment of wide- and - local-area-network topologies and medias. - Multiprotocol devices are supported - including those using TCP/IP, DECnet, and OSI - protocols. - - Some features of XNETMON's network management tools include: - - o Fault management tool displays a map of the network - configuration with node and link state indicated - in one of several colors to indicate current status; - o Configuration management tool may be used to edit the - network management information base stored in the - NMS to reflect changes occurring in the network; - o Graphs and tabular tools for use in fault and performance - management (e.g. XNETPERFMON); - o Mechanisms by which additional variables, such as vendor- - specific variables, may be added; - o Alarms may be enabled to alert the operator of events - occurring in the network; - o Events are logged to disk; - o Output data may be transferred via flat files for - additional report generation by a variety of - statistical packages. - - The XNETMON application comes complete with source - code including a powerful set of portable libraries - for generating and parsing SNMP messages. - - - - -NOCTools2 Working Group [Page 171] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - MECHANISM - XNETMON is based on the Simple Network Management - Protocol (SNMP). Polling is performed via the - powerful SNMP get-next operator and the SNMP get - operator. Trap-directed polling is used to regulate - focus and intensity of the polling. - - CAVEATS - None. - - BUGS - None known. - - LIMITATIONS - Monitored and managed nodes must implement the SNMP over - UDP per RFC 1157 or must be reachable via a proxy agent. - - HARDWARE REQUIRED - X windows workstation with UDP socket library. - Monochrome is acceptable, but color is far superior. - - SOFTWARE REQUIRED - X windows version 11 release 4 or later or MOTIF. - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 172] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog XNETMON_WELLFLEET - - NAME - xnetmon, xpmon - - KEYWORDS - alarm, manager, map, status; IP; NMS, SNMP; UNIX. - - ABSTRACT - Xnetmon and xpmon provide graphical representation of - performance and status of SNMP-capable network ele- - ments. Xnetmon presents a schematic network map - representing the up/down status of network elements; - xpmon draws a pen plot style graph of the change over - time of any arbitrary MIB object (RFC1066). Both xnet- - mon and xpmon use the SNMP (RFC1098) for retrieving - status and performance data. - - MECHANISM - Xnetmon polls network elements for the status of their - interfaces on a controllable polling interval. Pop-up - windows displaying the values of any MIB variable are - supported by separate polls. When SNMP traps are - received from a network element, that element and all - adjacent elements are immediately re-polled to update - their status. The layout of the network map is stati- - cally configured. Xpmon repeatedly polls (using SNMP) - the designated network element for the value of the - designated MIB variable on the user-specified interval. - The change in the variable is then plotted on the strip - chart. The strip chart regularly adjusts its scale to - the current maximum value on the graph. - - CAVEATS - Polling intervals should be chosen with care so as not - to affect system performance adversely. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Distributed and supported for Sun-3 systems. - - SOFTWARE REQUIRED - SunOS 3.5 or 4.x; X11, release 2 or 3. - - - -NOCTools2 Working Group [Page 173] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY - Commercial product of: - Wellfleet Communications, Inc. - 12 DeAngelo Drive - Bedford, MA 01730-2204 - (617) 275-2400 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 174] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog XNETPERFMON_SNMP_RESEARCH - - NAME - xnetperfmon -- a graphical network performance and - fault management tool from SNMP Research. - - KEYWORDS - manager, security, status; - DECnet, Ethernet, IP, OSI, ring, star; - NMS, SNMP, X; - DOS, UNIX, VMS; - sourcelib. - - ABSTRACT - Xnetperfmon is a XNETMON tool used to produce plots of - SNMP variables in graphical displays. The manager may - easily customize the labels, step size, update interval, - and variables to be plotted to produce graphs for fault - and performance management. Scales automatically adjust - whenever a point to be plotted would go off scale. - - MECHANISM - The xnetperfmon application communicates with remote - agents or proxy agents via the Simple Network Management - Protocol (SNMP). - - CAVEATS - All plots for a single invocation of xnetperfmon must be - for variables provided by a single network management - agent. However, multiple invocations of xnetperfmon may - be active on a single display simultaneously or proxy - agents may be used to summarize information at a common - point. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Systems supporting X windows. - - SOFTWARE REQUIRED - XNETMON from SNMP Research and X Version 11 release 4 or - later (option MOTIF) - - - - - -NOCTools2 Working Group [Page 175] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - AVAILABILITY AND CONTACT POINT FOR INFORMATION ABOUT THIS TOOL - This is a commercial product available under license - from: - - SNMP Research - 3001 Kimberlin Heights Road - Knoxville, TN 37920-9716 - Attn: John Southwood, Sales and Marketing - (615) 573-1434 (Voice) (615) 573-9197 (FAX) - - CONTACT POINT FOR CHANGES TO THIS CATALOG ENTRY - users@seymour1.cs.utk.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 176] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - Internet Tool Catalog XUP_HP - - NAME - xup - - KEYWORDS - status; ping, X; HP. - - ABSTRACT - Xup uses the X-Windows to display the status of an - "interesting" set of hosts. - - MECHANISM - Xup uses ping to determine host status. - - CAVEATS - Polling for status increases network load. - - BUGS - None known. - - LIMITATIONS - None reported. - - HARDWARE REQUIRED - Runs only on HP series 300 and 800 workstations. - - SOFTWARE REQUIRED - Version 10 of X-Windows. - - AVAILABILITY - A standard command for the HP 300 & 800 Workstations. - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 177] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -Appendix: "No-Writeups" - - This section contains references to tools which are known to exist, - but which have not been fully cataloged. If anyone wishes to author - an entry for one of these tools please contact: noctools- - request@merit.edu. - - Each mention is separated by a for improved readability. - If you intend to actually print-out this section of the catalog, then - you should probably strip-out the . - -tuecho.c - -/* - * Send / receive TCP or UDP echos in any of a number of bizzare ways. - * - * Joel P. Bion, March 1990 - * Copyright (c) 1990 cisco Systems. All rights reserved. - * - * This "tuecho" program is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * Prompts as: - * Host: -- host to send echos to -- can be name or a.b.c.d -- - * Enter protocol (0 = UDP, 1 = TCP) [0]: -- UDP or TCP - * Size of data portion (bytes) [100]: -- bytes in data, excluding - * headers -- Number of bursts [5]: -- number of bursts of packets to - * send -- Packets per burst [1]: -- packets per burst, all sent AT - * ONCE -- Timeout (seconds) [2]: -- how long to wait for data - * Pause interval (seconds) [0]: -- Pause interval between bursts of - * frames - * Type of pattern (specify = 0, increment = 1) [1]: - * -- if 0 specified, allow you to specify a 16bit pattern - -- as four hex digits (see below). If 1, will create a - -- "incrementing", cycling pattern from 0x0000 -> 0xffff - -- ->. - * Enter pattern (hex value) [abcd]: -- if "0" specified above - */ - -Availability: - ftp.uu.net:/networking/cisco/tuecho.c - ftp.cisco.com:tuecho.c - - - - - - - - -NOCTools2 Working Group [Page 178] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -SPY An NFS monitoring/tracing tool - -Availability: - A postscript file describing SPY is located on - ftp.uu.net:/networking/ip/nfs/spy.ps.Z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 179] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -NFSTRACE - - This is the rpcspy/nfstrace package. - - It is described in detail in the paper "NFS Tracing by Passive - Network Monitoring", which appeared in the January, 1992 USENIX - conference. - - You'll need either a DEC machine running ULTRIX (with the - packetfilter installed in the kernel) or a Sun running SunOS 4.x - (with NIT). Or you'll need to do a bit of hacking. - - The package differs slightly from the version in the paper: - - - - The handle->name translation facility has been removed. It's - just too fragile to include in the general release. If you need it, - contact me directly and I'll be happy to mail you the code. - - - The output format is a wee-bit different. - - - The IBM-RT Enet filter version is also not included, since I seem to - be the only person in the world running it. RTs are really too slow - for this anyway. - - To configure the package, edit the makefile in the obvious (to me at - least) way. - - Note that the not all versions of SunOS NIT have working versions of - the packet timestamp mechanism. Try to set the -DSTAMPS option in - the makefile, and if that doesn't work, take it out. - - If you are actually going to use this to gather traces, I'd like to - hear from you! Please send email, and share your results/traces if - your organization will allow it. I maintain a mailing list of users - for updates, etc. Send me mail to be added to it. - - Happy tracing. - Matt Blaze - Department of Computer Science - Princeton University - 35 Olden Street - Princeton, NJ 08544 - mab@cs.princeton.edu - 609-258-3946 - - Availability: - ftp.uu.net:/networking/ip/nfs/nfstrace.shar (or check archie) - - - -NOCTools2 Working Group [Page 180] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - LAMER - - # Lame delegation notifier - # Author: Bryan Beecher - # Last Modified: 6/25/92 - # - # To make use of this software, you need to be running the - # University of Michigan release of BIND 4.8.3, or any version - # of named that supports the LAME_DELEGATION patches posted to - # USENET. The U-M release is available via anonymous ftp from - # terminator.cc.umich.edu:/unix/dns/bind4.8.3.tar.Z. - # - # You must also have a copy of query(1) and host(1). These - # are also available via anonymous ftp in the aforementioned - # place. - # ------------------------------------------------------------- - - # ------------------------------------------------------------- - # handle arguments - # ------------------------------------------------------------- - # -d - # This flag is used to append a dot-day suffix to the LOGFILE. - # Handy where log files are kept around for the last week - # and contain a day suffix. - # - # -f - # Change the LOGFILE value altogether. - # - # -w - # Count up all of the DNS statistics for the whole week. - # - # -v - # Be verbose. - # - # -t - # Test mode. Do not send mail to the lame delegation - # hostmasters. - - Availability: - ftp.uu.net:/networking/ip/dns/lamer.tar.Z (or check archie) - - - - - - - - - - - -NOCTools2 Working Group [Page 181] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - HOST - - host - look up host names using domain server - -SYNOPSIS - host [-v] [-a] [-t querytype] [options] name [server] - host [-v] [-a] [-t querytype] [options] -l domain [server] - host [-v] [options] -H [-D] [-E] [-G] domain - host [-v] [options] -C domain - host [-v] [options] -A host - -DESCRIPTION - host looks for information about Internet hosts or domains. - It gets this information from a set of interconnected - servers that are spread across the world. By default, it - simply converts between host names and Internet addresses. - However, with the -t, -a and -v options, it can be used to - find all of the information about hosts or domains that is - maintained by the domain nameserver. - -/* - * Extensively modified by E. Wassenaar, Nikhef-H, - * - * The officially maintained source of this program is available - * via anonymous ftp from machine 'ftp.nikhef.nl' [192.16.199.1] - * in the directory '/pub/network' as 'host.tar.Z' - * - * Also available in this directory are patched versions of the - * BIND 4.8.3 nameserver and resolver library which you may need - * to fully exploit the features of this program, although they - * are not mandatory. See the file 'README_FIRST' for details. - * - * You are kindly requested to report bugs and make suggestions - * for improvements to the author at the given email address, - * and to not re-distribute your own modifications to others. - */ -/* - * New features - * - * - Major overhaul of the whole code. - * - Very rigid error checking, with more verbose error messages. - * - Zone listing section completely rewritten. - * - It is now possible to do recursive listings into subdomains. - * - Maintain resource record statistics during zone listings. - * - Maintain count of hosts during zone listings. - * - Exploit multiple server addresses if available. - * - Option to exploit only primary server for zone transfers. - * - Option to exclude info from names that do not reside in a domain. - - - -NOCTools2 Working Group [Page 182] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - * - Implement timeout handling during connect and read. - * - Write resource record output to optional logfile. - * - Special MB tracing by recursively expanding MR and MG records. - * - Special mode to check SOA records at each nameserver for domain. - * - Special mode to check inverse mappings of host addresses. - * - Code is extensively documented. - */ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 183] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -PINGs - -Many many versions of the PING program exist. -Each implementation has its own set of additional features. -Here are a few more PINGs that are worth taking a look at. - -Version on ftp.cc.berkeley.edu:pub/ping: - This version has duplicate packet detection, Record Route, - ability to specify data pattern for packets, flood pinging, an - interval option, Multicast support, etc. - -Version on nikhefh.nikhef.nl:/pub/network/rping.tar.Z: - 'rping' is just like 'ping', but only a single probe packet - is sent to test the reachability of a destination. - As an option, the loose source routing facility is used - to show the roundtrip route the packet has taken. - Multiple addresses of remote hosts are tried until one - responds. As an option, each of multiple addresses can be - probed unconditionally. - Contains a patch for making loose source routing work in - case you have a SUN with an OMNINET ethernet controller. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 184] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -VRFY - -vrfy.tar.Z (Version 921021) - 'vrfy' is a tool to verify email addresses and mailing lists. - In its simplest form it takes an address "user@domain", figures - out the MX hosts for "domain", and issues the SMTP command VRFY - at the primary MX host (optionally all), or at "domain" itself - if no MX hosts exist. Without "domain" it goes to "localhost". - More complex capabilities are: recursively expanding forward - files or mailing lists, and detecting mail forwarding loops. - Full-blown RFC822 address specifications are understood. - Syntax checking can be carried out either locally or remotely. - Various options are provided to exploit alternative protocol - suites if necessary, and to print many forms of verbose output. - Obvious limitations exist, but on average it works pretty well. - Needless to say you need internet (nameserver and SMTP) access. - See the man page and the extensive documentation in the source - for further details. - -Please send comments and suggestions to Eric Wassenaar - -If you want to receive notification of updates, please send an email -with the keyword "subscribe" in the subject or the body to the address - - -available as: nikhefh.nikhef.nl:/pub/network/vrfy.tar.Z - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 185] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -XNETLOAD - -NAME - xnetload - ethernet load average display for X - -SYNOPSIS - xnetload[-toolkitoption ...] [-scale integer] - [-update seconds] [-hl color] [-highlight color] - [-jumpscroll pixels] [-label string] [-nolabel] host - -DESCRIPTION - The xnetload program displays a periodically updating histo- - gram of the ethernet load average for the specified host. - The resulting graph is scaled as 0% to 100%, where 0% - corresponds to 0mbs and 100% corresponds to 10mbs. NOTE: - The specified host must be running rpc.etherd. - -This program has been run using X11R4 and X11R5, under the following -operating systems: - - SUNOS 4.1.0 - SUNOS 4.1.1 - ULTRIX V4.2 - IRIX 3.3.2 - -Assuming the Imake templates and Rules are in order and in the proper -place on your system, these programs should compile and link -straightforward by running the following sequence: - - xmkmf - make - -Then, as root, issue the following: - - make install - make install.man - -Then, on your host system, (or on any other system you can rlogin or rsh -into) start the etherd daemon with the following (must be root): - - /usr/etc/rpc.etherd le0 & - -where le0 is the mnemonic for the primary ethernet interface. - -To start the xnetload program, the following command line is suggested: - - ./xnetload -hl red host & - - - - -NOCTools2 Working Group [Page 186] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -where "host" is the name of any reachable network node (including -LOCALHOST) that is running the etherd daemon. A small xload window -should appear on your local display with nine horizontal lines. The -label: - "Ethernet Load %" -should appear in the upper left hand corner, just below any additional -title bars or other decorations provided by your window manager. If the -program comes up without the nine lines, or without the "Ethernet Load" -label, then either your resource file is not properly installed in the -appropriate app-defaults directory, or you may have picked up the wrong -xnetload image. Try re-running "make install" as root, or be sure to -include the "./" in front of the command name. - -Good Luck! - -The following changes have been made to this directory since R3: - - o Now use Athena StripChart widget. - - o Understands WM_DELETE_WINDOW. - - o 3-26-92 Modified from xload to xnetload by Roger Smith, - Sterling Software at NASA-Ames Research Center, - Mountain View, Calif. rsmith@proteus.arc.nasa.gov - -Availability: - ftp proteus.arc.nasa.gov:pub/XEnetload.tar.Z (or check archie) - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 187] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -NETTEST - - nettest, nettestd - Performs client and server functions for - timing data throughput - - The nettest and nettestd commands invoke client and server - programs that are used for timing data throughput of various - methods of interprocess communication. For TCP and OSI con- - nections, the nettest program establishes a connection with - the nettestd program, and then it does count writes of size - bytes, followed by count reads of size bytes. For UDP, the - nettest program performs only writes; reads are not per- - formed. The nettestd program, if used with UDP connections, - reads the data packets and prints a message for each data - packet it receives. The number and size of the reads and - writes may not correlate with the number and size of the - actual data packets that are transferred; it depends on the - protocol that is chosen. If you append an optional k (or K) - to the size, count, or bufsize value, the number specified - is multiplied by 1024. - - This source for nettest and nettestd are provided on an "as is" - basis. Cray Research does not provide any support for this code - (unless you are a customer who has purchased the UNICOS operating - system). - - We will gladly take bug reports for nettest/nettestd. Suggested - fixes are prefered to just bug reports. Changes to allow - nettest/nettestd to run on other architectures are also welcomed. We - will try to incorporate bugfixes and update the publicly available - code, but we can make no guarantees. - - For copyright information, see the notice in each source file. - - Send bug-reports/fixes to: - E-mail: dab@cray.com - U.S. Mail: David Borman - Cray Research, Inc. - 655F Lone Oak Drive - Eagan, MN 55121 - Notes: - - 1) The -b option to nettestd has not been tested... - 2) The ISO code should work on a 4.4BSD system, but the - gethostinfo() routine is specific to UNICOS... - - Availability: - ftp sgi.com:/sgi/src/nettest - - - -NOCTools2 Working Group [Page 188] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - ETHERCK - - etherck is a simple program that displays Sun ethernet statistics. - If you have a high percents of input errors that are due to "out of - buffers", then you can run the "iepatch" script to patch a kernel - that uses the Intel ethernet chip ("ie"). A back of the envelope - calculation shows that a .25% input error rate gives about a 10% - degradation of NFS performance if 8k packets are being used. - - In our environment at Legato, patching the ie buffer allocation made - the input error rate drop more than 2 orders of magnitude. This was - after we had applied other networking fixes (e.g., using Prestoserve, - going from thin wire to twisted pair) and pushed a higher load on the - server. - - Note that both etherck and iepatch must be run by root (or you can - make etherck setgid kmem). - - Availability: - send EMAIL to: request@legato.com - with a Subject line: send unsupported etherck - - The following is part of the 'help' file from the Legato Email - Server: - - This message comes to you from the request server at Legato.COM, - request@Legato.COM. It received a message from you asking for help. - - The request server is a mail-response program. That means that you - mail it a request, and it mails back the response. - - The request server is a very dumb program. It does not have much - error checking. If you don't send it the commands that it - understands, it will just answer "I don't understand you". - - The request server has 4 commands. Each command must be the first - word on a line. The request server reads your entire message before - it does anything, so you can have several different commands in a - single message. The request server treats the "Subject:" header line - just like any other line of the message. You can use any combination - of upper and lower case letters in the commands. - - The request server's files are organized into a series of directories - and subdirectories. Each directory has an index, and each - subdirectory has an index. The top-level index gives you an overview - of what is in the subdirectories, and the index for each subdirectory - tells you what is in it. - - - - -NOCTools2 Working Group [Page 189] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - The server has 4 commands: - - "help" command: The command "help" or "send help" causes the server to - send you the help file. You already know this, of course, - because you are reading the help file. No other commands are - honored in a message that asks for help (the server figures - that you had better read the help message before you do - anything else). - - SEND a request to Legato to get the rest of the help file! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 190] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - - NETCK - - netck is a shar file that contains the sources to build "netck", a - network checker that uses the rstat(3R) protocol to gather and print - statistics from machines on the network. netck is useful to help - understand what part of what machines are potential NFS bottlenecks. - To get this file, send email to the request server with the command - "send unsupported netck". - - Availability: - same as ETHERCK (send email To: request@legato.com; subject: - HELP) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 191] - -RFC 1470 FYI: Network Management Tool Catalog June 1993 - - -References - - [1] Stine, R., Editor, "FYI on a Network Management Tool Catalog: - Tools for Monitoring and Debugging TCP/IP Internets and - Interconnected Devices", FYI 2, RFC 1147, Sparta, Inc., April - 1990. - -Security Considerations - - Security issues are not discussed in this memo. - -Authors' Addresses - - Robert M. Enger - Advanced Network and Services - 1875 Campus Commons Drive, Suite 220 - Reston, VA. 22091-1552 - - Phone: 703-758-7722 - EMail: enger@reston.ans.net - - - Joyce K. Reynolds - Information Sciences Institute - University of Southern California - 4676 Admiralty Way - Marina del Rey, CA 90292 - - Phone: (310) 822-1511 - Email: JKREY@ISI.EDU - - - - - - - - - - - - - - - - - - - - - -NOCTools2 Working Group [Page 192] - \ No newline at end of file diff --git a/kernel/picotcp/RFC/rfc1624.txt b/kernel/picotcp/RFC/rfc1624.txt deleted file mode 100644 index fe9fc01..0000000 --- a/kernel/picotcp/RFC/rfc1624.txt +++ /dev/null @@ -1,339 +0,0 @@ - - - - - - -Network Working Group A. Rijsinghani, Editor -Request for Comments: 1624 Digital Equipment Corporation -Updates: 1141 May 1994 -Category: Informational - - - Computation of the Internet Checksum - via Incremental Update - -Status of this Memo - - This memo provides information for the Internet community. This memo - does not specify an Internet standard of any kind. Distribution of - this memo is unlimited. - -Abstract - - This memo describes an updated technique for incremental computation - of the standard Internet checksum. It updates the method described - in RFC 1141. - -Table of Contents - - 1. Introduction .......................................... 1 - 2. Notation and Equations ................................ 2 - 3. Discussion ............................................ 2 - 4. Examples .............................................. 3 - 5. Checksum verification by end systems .................. 4 - 6. Historical Note ....................................... 4 - 7. Acknowledgments ....................................... 5 - 8. Security Considerations ............................... 5 - 9. Conclusions ........................................... 5 - 10. Author's Address ..................................... 5 - 11. References ........................................... 6 - -1. Introduction - - Incremental checksum update is useful in speeding up several - types of operations routinely performed on IP packets, such as - TTL update, IP fragmentation, and source route update. - - RFC 1071, on pages 4 and 5, describes a procedure to - incrementally update the standard Internet checksum. The - relevant discussion, though comprehensive, was not complete. - Therefore, RFC 1141 was published to replace this description - on Incremental Update. In particular, RFC 1141 provides a - more detailed exposure to the procedure described in RFC 1071. - However, it computes a result for certain cases that differs - - - -Rijsinghani [Page 1] - -RFC 1624 Incremental Internet Checksum May 1994 - - - from the one obtained from scratch (one's complement of one's - complement sum of the original fields). - - For the sake of completeness, this memo briefly highlights key - points from RFCs 1071 and 1141. Based on these discussions, - an updated procedure to incrementally compute the standard - Internet checksum is developed and presented. - -2. Notation and Equations - - Given the following notation: - - HC - old checksum in header - C - one's complement sum of old header - HC' - new checksum in header - C' - one's complement sum of new header - m - old value of a 16-bit field - m' - new value of a 16-bit field - - RFC 1071 states that C' is: - - C' = C + (-m) + m' -- [Eqn. 1] - = C + (m' - m) - - As RFC 1141 points out, the equation above is not useful for direct - use in incremental updates since C and C' do not refer to the actual - checksum stored in the header. In addition, it is pointed out that - RFC 1071 did not specify that all arithmetic must be performed using - one's complement arithmetic. - - Finally, complementing the above equation to get the actual checksum, - RFC 1141 presents the following: - - HC' = ~(C + (-m) + m') - = HC + (m - m') - = HC + m + ~m' -- [Eqn. 2] - -3. Discussion - - Although this equation appears to work, there are boundary conditions - under which it produces a result which differs from the one obtained - by checksum computation from scratch. This is due to the way zero is - handled in one's complement arithmetic. - - In one's complement, there are two representations of zero: the all - zero and the all one bit values, often referred to as +0 and -0. - One's complement addition of non-zero inputs can produce -0 as a - result, but never +0. Since there is guaranteed to be at least one - - - -Rijsinghani [Page 2] - -RFC 1624 Incremental Internet Checksum May 1994 - - - non-zero field in the IP header, and the checksum field in the - protocol header is the complement of the sum, the checksum field can - never contain ~(+0), which is -0 (0xFFFF). It can, however, contain - ~(-0), which is +0 (0x0000). - - RFC 1141 yields an updated header checksum of -0 when it should be - +0. This is because it assumed that one's complement has a - distributive property, which does not hold when the result is 0 (see - derivation of [Eqn. 2]). - - The problem is avoided by not assuming this property. The correct - equation is given below: - - HC' = ~(C + (-m) + m') -- [Eqn. 3] - = ~(~HC + ~m + m') - -4. Examples - - Consider an IP packet header in which a 16-bit field m = 0x5555 - changes to m' = 0x3285. Also, the one's complement sum of all other - header octets is 0xCD7A. - - Then the header checksum would be: - - HC = ~(0xCD7A + 0x5555) - = ~0x22D0 - = 0xDD2F - - The new checksum via recomputation is: - - HC' = ~(0xCD7A + 0x3285) - = ~0xFFFF - = 0x0000 - - Using [Eqn. 2], as specified in RFC 1141, the new checksum is - computed as: - - HC' = HC + m + ~m' - = 0xDD2F + 0x5555 + ~0x3285 - = 0xFFFF - - which does not match that computed from scratch, and moreover can - never obtain for an IP header. - - - - - - - - -Rijsinghani [Page 3] - -RFC 1624 Incremental Internet Checksum May 1994 - - - Applying [Eqn. 3] to the example above, we get the correct result: - - HC' = ~(C + (-m) + m') - = ~(0x22D0 + ~0x5555 + 0x3285) - = ~0xFFFF - = 0x0000 - -5. Checksum verification by end systems - - If an end system verifies the checksum by including the checksum - field itself in the one's complement sum and then comparing the - result against -0, as recommended by RFC 1071, it does not matter if - an intermediate system generated a -0 instead of +0 due to the RFC - 1141 property described here. In the example above: - - 0xCD7A + 0x3285 + 0xFFFF = 0xFFFF - 0xCD7A + 0x3285 + 0x0000 = 0xFFFF - - However, implementations exist which verify the checksum by computing - it and comparing against the header checksum field. - - It is recommended that intermediate systems compute incremental - checksum using the method described in this document, and end systems - verify checksum as per the method described in RFC 1071. - - The method in [Eqn. 3] is slightly more expensive than the one in RFC - 1141. If this is a concern, the two additional instructions can be - eliminated by subtracting complements with borrow [see Sec. 7]. This - would result in the following equation: - - HC' = HC - ~m - m' -- [Eqn. 4] - - In the example shown above, - - HC' = HC - ~m - m' - = 0xDD2F - ~0x5555 - 0x3285 - = 0x0000 - -6. Historical Note - - A historical aside: the fact that standard one's complement - arithmetic produces negative zero results is one of its main - drawbacks; it makes for difficulty in interpretation. In the CDC - 6000 series computers [4], this problem was avoided by using - subtraction as the primitive in one's complement arithmetic (i.e., - addition is subtraction of the complement). - - - - - -Rijsinghani [Page 4] - -RFC 1624 Incremental Internet Checksum May 1994 - - -7. Acknowledgments - - The contribution of the following individuals to the work that led to - this document is acknowledged: - - Manu Kaycee - Ascom Timeplex, Incorporated - Paul Koning - Digital Equipment Corporation - Tracy Mallory - 3Com Corporation - Krishna Narayanaswamy - Digital Equipment Corporation - Atul Pandya - Digital Equipment Corporation - - The failure condition was uncovered as a result of IP testing on a - product which implemented the RFC 1141 algorithm. It was analyzed, - and the updated algorithm devised. This algorithm was also verified - using simulation. It was also shown that the failure condition - disappears if the checksum verification is done as per RFC 1071. - -8. Security Considerations - - Security issues are not discussed in this memo. - -9. Conclusions - - It is recommended that either [Eqn. 3] or [Eqn. 4] be the - implementation technique used for incremental update of the standard - Internet checksum. - -10. Author's Address - - Anil Rijsinghani - Digital Equipment Corporation - 550 King St - Littleton, MA 01460 - - Phone: (508) 486-6786 - EMail: anil@levers.enet.dec.com - - - - - - - - - - - - - - - -Rijsinghani [Page 5] - -RFC 1624 Incremental Internet Checksum May 1994 - - -11. References - - [1] Postel, J., "Internet Protocol - DARPA Internet Program Protocol - Specification", STD 5, RFC 791, DARPA, September 1981. - - [2] Braden, R., Borman, D., and C. Partridge, "Computing the Internet - Checksum", RFC 1071, ISI, Cray Research, BBN Laboratories, - September 1988. - - [3] Mallory, T., and A. Kullberg, "Incremental Updating of the - Internet Checksum", RFC 1141, BBN Communications, January 1990. - - [4] Thornton, J., "Design of a Computer -- the Control - Data 6600", Scott, Foresman and Company, 1970. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Rijsinghani [Page 6] - diff --git a/kernel/picotcp/RFC/rfc1644.txt b/kernel/picotcp/RFC/rfc1644.txt deleted file mode 100644 index 2aca5a6..0000000 --- a/kernel/picotcp/RFC/rfc1644.txt +++ /dev/null @@ -1,2131 +0,0 @@ - - - - - - -Network Working Group R. Braden -Request for Comments: 1644 ISI -Category: Experimental July 1994 - - T/TCP -- TCP Extensions for Transactions - Functional Specification - -Status of this Memo - - This memo describes an Experimental Protocol for the Internet - community, and requests discussion and suggestions for improvements. - It does not specify an Internet Standard. Distribution is unlimited. - -Abstract - - This memo specifies T/TCP, an experimental TCP extension for - efficient transaction-oriented (request/response) service. This - backwards-compatible extension could fill the gap between the current - connection-oriented TCP and the datagram-based UDP. - - This work was supported in part by the National Science Foundation - under Grant Number NCR-8922231. - -Table of Contents - - 1. INTRODUCTION .................................................. 2 - 2. OVERVIEW ..................................................... 3 - 2.1 Bypassing the Three-Way Handshake ........................ 4 - 2.2 Transaction Sequences .................................... 6 - 2.3 Protocol Correctness ..................................... 8 - 2.4 Truncating TIME-WAIT State ............................... 12 - 2.5 Transition to Standard TCP Operation ..................... 14 - 3. FUNCTIONAL SPECIFICATION ..................................... 17 - 3.1 Data Structures .......................................... 17 - 3.2 New TCP Options .......................................... 17 - 3.3 Connection States ........................................ 19 - 3.4 T/TCP Processing Rules ................................... 25 - 3.5 User Interface ........................................... 28 - 4. IMPLEMENTATION ISSUES ........................................ 30 - 4.1 RFC-1323 Extensions ...................................... 30 - 4.2 Minimal Packet Sequence .................................. 31 - 4.3 RTT Measurement .......................................... 31 - 4.4 Cache Implementation ..................................... 32 - 4.5 CPU Performance .......................................... 32 - 4.6 Pre-SYN Queue ............................................ 33 - 6. ACKNOWLEDGMENTS .............................................. 34 - 7. REFERENCES ................................................... 34 - APPENDIX A. ALGORITHM SUMMARY ................................... 35 - - - -Braden [Page 1] - -RFC 1644 Transaction/TCP July 1994 - - - Security Considerations .......................................... 38 - Author's Address ................................................. 38 - -1. INTRODUCTION - - TCP was designed to around the virtual circuit model, to support - streaming of data. Another common mode of communication is a - client-server interaction, a request message followed by a response - message. The request/response paradigm is used by application-layer - protocols that implement transaction processing or remote procedure - calls, as well as by a number of network control and management - protocols (e.g., DNS and SNMP). Currently, many Internet user - programs that need request/response communication use UDP, and when - they require transport protocol functions such as reliable delivery - they must effectively build their own private transport protocol at - the application layer. - - Request/response, or "transaction-oriented", communication has the - following features: - - (a) The fundamental interaction is a request followed by a response. - - (b) An explicit open or close phase may impose excessive overhead. - - (c) At-most-once semantics is required; that is, a transaction must - not be "replayed" as the result of a duplicate request packet. - - (d) The minimum transaction latency for a client should be RTT + - SPT, where RTT is the round-trip time and SPT is the server - processing time. - - (e) In favorable circumstances, a reliable request/response - handshake should be achievable with exactly one packet in each - direction. - - This memo concerns T/TCP, an backwards-compatible extension of TCP to - provide efficient transaction-oriented service in addition to - virtual-circuit service. T/TCP provides all the features listed - above, except for (e); the minimum exchange for T/TCP is three - segments. - - In this memo, we use the term "transaction" for an elementary - request/response packet sequence. This is not intended to imply any - of the semantics often associated with application-layer transaction - processing, like 3-phase commits. It is expected that T/TCP can be - used as the transport layer underlying such an application-layer - service, but the semantics of T/TCP is limited to transport-layer - services such as reliable, ordered delivery and at-most-once - - - -Braden [Page 2] - -RFC 1644 Transaction/TCP July 1994 - - - operation. - - An earlier memo [RFC-1379] presented the concepts involved in T/TCP. - However, the real-world usefulness of these ideas depends upon - practical issues like implementation complexity and performance. To - help explore these issues, this memo presents a functional - specification for a particular embodiment of the ideas presented in - RFC-1379. However, the specific algorithms in this memo represent a - later evolution than RFC-1379. In particular, Appendix A in RFC-1379 - explained the difficulties in truncating TIME-WAIT state. However, - experience with an implementation of the RFC-1379 algorithms in a - workstation later showed that accumulation of TCB's in TIME-WAIT - state is an intolerable problem; this necessity led to a simple - solution for truncating TIME-WAIT state, described in this memo. - - Section 2 introduces the T/TCP extensions, and section 3 contains the - complete specification of T/TCP. Section 4 discusses some - implementation issues, and Appendix A contains an algorithmic - summary. This document assumes familiarity with the standard TCP - specification [STD-007]. - -2. OVERVIEW - - The TCP protocol is highly symmetric between the two ends of a - connection. This symmetry is not lost in T/TCP; for example, T/TCP - supports TCP's symmetric simultaneous open from both sides (Section - 2.3 below). However, transaction sequences use T/TCP in a highly - unsymmetrical manner. It is convenient to use the terms "client - host" and "server host" for the host that initiates a connection and - the host that responds, respectively. - - The goal of T/TCP is to allow each transaction, i.e., each - request/response sequence, to be efficiently performed as a single - incarnation of a TCP connection. Standard TCP imposes two - performance problems for transaction-oriented communication. First, - a TCP connection is opened with a "3-way handshake", which must - complete successfully before data can be transferred. The 3-way - handshake adds an extra RTT (round trip time) to the latency of a - transaction. - - The second performance problem is that closing a TCP connection - leaves one or both ends in TIME-WAIT state for a time 2*MSL, where - MSL is the maximum segment lifetime (defined to be 120 seconds). - TIME-WAIT state severely limits the rate of successive transactions - between the same (host,port) pair, since a new incarnation of the - connection cannot be opened until the TIME-WAIT delay expires. RFC- - 1379 explained why the alternative approach, using a different user - port for each transaction between a pair of hosts, also limits the - - - -Braden [Page 3] - -RFC 1644 Transaction/TCP July 1994 - - - transaction rate: (1) the 16-bit port space limits the rate to - 2**16/240 transactions per second, and (2) more practically, an - excessive amount of kernel space would be occupied by TCP state - blocks in TIME-WAIT state [RFC-1379]. - - T/TCP solves these two performance problems for transactions, by (1) - bypassing the 3-way handshake (3WHS) and (2) shortening the delay in - TIME-WAIT state. - - 2.1 Bypassing the Three-Way Handshake - - T/TCP introduces a 32-bit incarnation number, called a "connection - count" (CC), that is carried in a TCP option in each segment. A - distinct CC value is assigned to each direction of an open - connection. A T/TCP implementation assigns monotonically - increasing CC values to successive connections that it opens - actively or passively. - - T/TCP uses the monotonic property of CC values in initial - segments to bypass the 3WHS, using a mechanism that we call TCP - Accelerated Open (TAO). Under the TAO mechanism, a host caches a - small amount of state per remote host. Specifically, a T/TCP host - that is acting as a server keeps a cache containing the last valid - CC value that it has received from each different client host. If - an initial segment (i.e., a segment containing a SYN bit but - no ACK bit) from a particular client host carries a CC value - larger than the corresponding cached value, the monotonic property - of CC's ensures that the segment must be new and can - therefore be accepted immediately. Otherwise, the server host - does not know whether the segment is an old duplicate or was - simply delivered out of order; it therefore executes a normal 3WHS - to validate the . Thus, the TAO mechanism provides an - optimization, with the normal TCP mechanism as a fallback. - - The CC value carried in non- segments is used to protect - against old duplicate segments from earlier incarnations of the - same connection (we call such segments 'antique duplicates' for - short). In the case of short connections (e.g., transactions), - these CC values allow TIME-WAIT state delay to be safely discuss - in Section 2.3. - - T/TCP defines three new TCP options, each of which carries one - 32-bit CC value. These options are named CC, CC.NEW, and CC.ECHO. - The CC option is normally used; CC.NEW and CC.ECHO have special - functions, as follows. - - - - - - -Braden [Page 4] - -RFC 1644 Transaction/TCP July 1994 - - - (a) CC.NEW - - Correctness of the TAO mechanism requires that clients - generate monotonically increasing CC values for successive - connection initiations. These values can be generated using - a simple global counter. There are certain circumstances - (discussed below in Section 2.2) when the client knows that - monotonicity may be violated; in this case, it sends a CC.NEW - rather than a CC option in the initial segment. - Receiving a CC.NEW causes the server to invalidate its cache - entry and do a 3WHS. - - (b) CC.ECHO - - When a server host sends a segment, it echoes the - connection count from the initial in a CC.ECHO option, - which is used by the client host to validate the - segment. - - Figure 1 illustrates the TAO mechanism bypassing a 3WHS. The - cached CC values, denoted by cache.CC[host], are shown on each - side. The server host compares the new CC value x in segment #1 - against x0, its cached value for client host A; this comparison is - called the "TAO test". Since x > x0, the must be new and - can be accepted immediately; the data in the segment can therefore - be delivered to the user process B, and the cached value is - updated. If the TAO test failed (x <= x0), the server host would - do a normal three-way handshake to validate the segment, but - the cache would not be updated. - - - - - - - - - - - - - - - - - - - - - - -Braden [Page 5] - -RFC 1644 Transaction/TCP July 1994 - - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - cache.CC[A] - V - - [ x0 ] - - #1 --> --> (TAO test OK (x > x0) => - data1->user_B and - cache.CC[A]= x; ) - - [ x ] - #2 <-- <-- - (data2->user_A;) - - - Figure 1. TAO: Three-Way Handshake is Bypassed - - - The CC value x is echoed in a CC.ECHO option in the - segment (#2); the client side uses this option to validate the - segment. Since segment #2 is valid, its data2 is delivered to the - client user process. Segment #2 also carries B's CC value; this - is used by A to validate non-SYN segments from B, as explained in - Section 2.4. - - Implementing the T/TCP extensions expands the connection control - block (TCB) to include the two CC values for the connection; call - these variables TCB.CCsend and TCB.CCrecv (or CCsend, CCrecv for - short). For example, the sequence shown in Figure 1 sets - TCB.CCsend = x and TCB.CCrecv = y at host A, and vice versa at - host B. Any segment that is received with a CC option containing - a value SEG.CC different from TCB.CCsend will be rejected as an - antique duplicate. - - 2.2 Transaction Sequences - - T/TCP applies the TAO mechanism described in the previous section - to perform a transaction sequence. Figure 2 shows a minimal - transaction, when the request and response data can each fit into - a single segment. This requires three segments and completes in - one round-trip time (RTT). If the TAO test had failed on segment - #1, B would have queued data1 and the FIN for later processing, - and then it would have returned a segment to A, to - perform a normal 3WHS. - - - - -Braden [Page 6] - -RFC 1644 Transaction/TCP July 1994 - - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - #1 SYN-SENT* --> --> CLOSE-WAIT* - (TAO test OK) - (data1->user_B) - - <-- LAST-ACK* - #2 TIME-WAIT <-- - (data2->user_A) - - - #3 TIME-WAIT --> --> CLOSED - - (timeout) - CLOSED - - Figure 2: Minimal T/TCP Transaction Sequence - - - T/TCP extensions require additional connection states, e.g., the - SYN-SENT*, CLOSE-WAIT*, and LAST-ACK* states shown in Figure 2. - Section 3.3 describes these new connection states. - - To obtain the minimal 3-segment sequence shown in Figure 2, the - server host must delay acknowledging segment #1 so the response - may be piggy-backed on segment #2. If the application takes - longer than this delay to compute the response, the normal TCP - retransmission mechanism in TCP B will send an acknowledgment to - forestall a retransmission from TCP A. Figure 3 shows an example - of a slow server application. Although the sequence in Figure 3 - does contain a 3-way handshake, the TAO mechanism has allowed the - request data to be accepted immediately, so that the client still - sees the minimum latency. - - - - - - - - - - - - - - -Braden [Page 7] - -RFC 1644 Transaction/TCP July 1994 - - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - #1 SYN-SENT* --> --> CLOSE-WAIT* - (TAO test OK => - data1->user_B) - - (timeout) - #2 FIN-WAIT-1 <-- <-- CLOSE-WAIT* - - - #3 FIN-WAIT-1 --> --> CLOSE-WAIT - - - #4 TIME-WAIT <-- <-- LAST-ACK - (data2->user_A) - - #5 TIME_WAIT --> --> CLOSED - - (timeout) - CLOSED - - Figure 3: Acknowledgment Timeout in Server - - - 2.3 Protocol Correctness - - This section fills in more details of the TAO mechanism and - provides an informal sketch of why the T/TCP protocol works. - - CC values are 32-bit integers. The TAO test requires the same - kind of modular arithmetic that is used to compare two TCP - sequence numbers. We assume that the boundary between y < z and z - < y for two CC values y and z occurs when they differ by 2**31, - i.e., by half the total CC space. - - The essential requirement for correctness of T/TCP is this: - - CC values must advance at a rate slower than 2**31 [R1] - counts per 2*MSL - - where MSL denotes the maximum segment lifetime in the Internet. - The requirement [R1] is easily met with a 32-bit CC. For example, - it will allow 10**6 transactions per second with the very liberal - MSL of 1000 seconds [RFC-1379]. This is well in excess of the - - - -Braden [Page 8] - -RFC 1644 Transaction/TCP July 1994 - - - transaction rates achievable with current operating systems and - network latency. - - Assume for the present that successive connections from client A - to server B contain only monotonically increasing CC values. That - is, if x(i) and x(i+1) are CC values carried in two successive - initial segments from the same host, then x(i+1) > x(i). - Assuming the requirement [R1], the CC space cannot wrap within the - range of segments that can be outstanding at one time. Therefore, - those successive segments from a given host that have not - exceeded their MSL must contain an ordered set of CC values: - - x(1) < x(2) < x(3) ... < x(n), - - where the modular comparisons have been replaced by simple - arithmetic comparisons. Here x(n) is the most recent acceptable - , which is cached by the server. If the server host receives - a segment containing a CC option with value y where y > - x(n), that must be newer; an antique duplicate SYN with CC - value greater than x(n) must have exceeded its MSL and vanished. - Hence, monotonic CC values and the TAO test prevent erroneous - replay of antique s. - - There are two possible reasons for a client to generate non- - monotonic CC values: (a) the client may have crashed and - restarted, causing the generated CC values to jump backwards; or - (b) the generated CC values may have wrapped around the finite - space. Wraparound may occur because CC generation is global to - all connections. Suppose that host A sends a transaction to B, - then sends more than 2**31 transactions to other hosts, and - finally sends another transaction to B. From B's viewpoint, CC - will have jumped backward relative to its cached value. - - In either of these two cases, the server may see the CC value jump - backwards only after an interval of at least MSL since the last - segment from the same client host. In case (a), client host - restart, this is because T/TCP retains TCP's explicit "Quiet Time" - of an MSL interval [STD-007]. In case (b). wrap around, [R1] - ensures that a time of at least MSL must have passed before the CC - space wraps around. Hence, there is no possibility that a TAO - test will succeed erroneously due to either cause of non- - monotonicity; i.e., there is no chance of replays due to TAO. - - However, although CC values jumping backwards will not cause an - error, it may cause a performance degradation due to unnecessary - 3WHS's. This results from the generated CC values jumping - backwards through approximately half their range, so that all - succeeding TAO tests fail until the generated CC values catch up - - - -Braden [Page 9] - -RFC 1644 Transaction/TCP July 1994 - - - to the cached value. To avoid this degradation, a client host - sends a CC.NEW option instead of a CC option in the case of either - system restart or CC wraparound. Receiving CC.NEW forces a 3WHS, - but when this 3WHS completes successfully the server cache is - updated to the new CC value. To detect CC wraparound, the client - must cache the last CC value it sent to each server. It therefore - maintains cache.CCsent[B] for each server B. If this cached value - is undefined or if it is larger than the next CC value generated - at the client, then the client sends a CC.NEW instead of a CC - option in the next SYN segment. - - This is illustrated in Figure 4, which shows the scenario for the - first transaction from A to B after the client host A has crashed - and recovered. A similar sequence occurs if x is not greater than - cache.CCsent[B], i.e., if there is a wraparound of the generated - CC values. Because segment #1 contains a CC.NEW option, the - server host invalidates the cache entry and does a 3WHS; however, - it still sets B's TCB.CCrecv for this connection to x. TCP B uses - this CCrecv value to validate the segment (#3) that - completes the 3WHS. Receipt of this segment updates cache.CC[A], - since the cache entry was previously undefined. (If a 3WHS always - updated the cache, then out-of-order SYN segments could cause the - cached value to jump backwards, possibly allowing replays). - Finally, the CC.ECHO option in the segment #2 defines - A's cache.CCsent entry. - - This algorithm delays updating cache.CCsent[] until the has - been ACK'd. This allows the undefined cache.CCsent value to used - as a a "first-time switch" to reliable resynchronization of the - cached value at the server after a crash or wraparound. - - When we use the term "cache", we imply that the value can be - discarded at any time without introducing erroneous behavior - although it may degrade performance. - - (a) If a server host receives an initial from client A but - has no cached value cache.CC[A], the server simply forces a - 3WHS to validate the segment. - - (b) If a client host has no cached value cache.CCsent[B] when it - needs to send an initial segment, the client simply - sends a CC.NEW option in the segment. This forces a 3WHS at - the server. - - - - - - - - -Braden [Page 10] - -RFC 1644 Transaction/TCP July 1994 - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - cache.CCsent[B] cache.CC[A] - V V - - (Crash and restart) - [ ?? ] [ x0 ] - - #1 --> --> (invalidate cache; - queue data1; - 3-way handshake) - - [ ?? ] [ ?? ] - #2 <-- <-- - (cache.CCsent[B]= x;) - - [ x ] [ ?? ] - - #3 --> --> data1->user_B; - cache.CC[A]= x; - - [ x ] [ x ] - - Figure 4. Client Host Restarting - - - So far, we have considered only correctness of the TAO mechanism - for bypassing the 3WHS. We must also protect a connection against - antique duplicate non-SYN segments. In standard TCP, such - protection is one of the functions of the TIME-WAIT state delay. - (The other function is the TCP full-duplex close semantics, which - we need to preserve; that is discussed below in Section 2.5). In - order to achieve a high rate of transaction processing, it must be - possible to truncate this TIME-WAIT state delay without exposure - to antique duplicate segments [RFC-1379]. - - For short connections (e.g., transactions), the CC values assigned - to each direction of the connection can be used to protect against - antique duplicate non-SYN segments. Here we define "short" as a - duration less than MSL. Suppose that there is a connection that - uses the CC values TCB.CCsend = x and TCB.CCrecv = y. By the - requirement [R1], neither x nor y can be reused for a new - connection from the same remote host for a time at least 2*MSL. - If the connection has been in existence for a time less than MSL, - then its CC values will not be reused for a period that exceeds - MSL, and therefore all antique duplicates with that CC value must - vanish before it is reused. Thus, for "short" connections we can - - - -Braden [Page 11] - -RFC 1644 Transaction/TCP July 1994 - - - guard against antique non-SYN segments by simply checking the CC - value in the segment againsts TCB.CCrecv. Note that this check - does not use the monotonic property of the CC values, only that - they not cycle in less than 2*MSL. Again, the quiet time at - system restart protects against errors due to crash with loss of - state. - - If the connection duration exceeds MSL, safety from old duplicates - still requires a TIME-WAIT delay of 2*MSL. Thus, truncation of - TIME-WAIT state is only possible for short connections. (This - problem has also been noticed by Shankar and Lee [ShankarLee93]). - This difference in behavior for long and for short connections - does create a slightly complex service model for applications - using T/TCP. An application has two different strategies for - multiple connections. For "short" connections, it should use a - fixed port pair and use the T/TCP mechanism to get rapid and - efficient transaction processing. For connections whose durations - are of the order of MSL or longer, it should use a different user - port for each successive connection, as is the current practice - with unmodified TCP. The latter strategy will cause excessive - overhead (due to TCB's in TIME-WAIT state) if it is applied to - high-frequency short connections. If an application makes the - wrong choice, its attempt to open a new connection may fail with a - "busy" error. If connection durations may range between long and - short, an application may have to be able to switch strategies - when one fails. - - 2.4 Truncating TIME-WAIT State - - Truncation of TIME-WAIT state is necessary to achieve high - transaction rates. As Figure 2 illustrates, a standard - transaction leaves the client end of the connection in TIME-WAIT - state. This section explains the protocol implications of - truncating TIME-WAIT state, when it is allowed (i.e., when the - connection has been in existence for less than MSL). In this - case, the client host should be able to interrupt TIME-WAIT state - to initiate a new incarnation of the same connection (i.e., using - the same host and ports). This will send an initial - segment. - - It is possible for the new to arrive at the server before - the retransmission state from the previous incarnation is gone, as - shown in Figure 5. Here the final (segment #3) from the - previous incarnation is lost, leaving retransmission state at B. - However, the client received segment #2 and thinks the transaction - completed successfully, so it can initiate a new transaction by - sending segment #4. When this arrives at the server - host, it must implicitly acknowledge segment #2, signalling - - - -Braden [Page 12] - -RFC 1644 Transaction/TCP July 1994 - - - success to the server application, deleting the old TCB, and - creating a new TCB, as shown in Figure 5. Still assuming that the - new is known to be valid, the server host marks the new - connection half-synchronized and delivers data3 to the server - application. (The details of how this is accomplished are - presented in Section 3.3.) - - The earlier discussion of the TAO mechanism assumed that the - previous incarnation was closed before a new arrived at the - server. However, TAO cannot be used to validate the if - there is still state from the previous incarnation, as shown in - Figure 5; in this case, it would be exceedingly awkward to perform - a 3WHS if the TAO test should fail. Fortunately, a modified - version of the TAO test can still be performed, using the state in - the earlier TCB rather than the cached state. - - (A) If the segment contains a CC or CC.NEW option, the - value SEG.CC from this option is compared with TCB.CCrecv, - the CC value in the still-existing state block of the - previous incarnation. If SEG.CC > TCB.CCrecv, the new - segment must be valid. - - (B) Otherwise, the is an old duplicate and is simply - discarded. - - Truncating TIME-WAIT state may be looked upon as composing an - extended state machine that joins the state machines of the two - incarnations, old and new. It may be described by introducing new - intermediate states (which we call I-states), with transitions - that join the two diagrams and share some state from each. I- - states are detailed in Section 3.3. - - Notice also segment #2' in Figure 5. TCP's mechanism to recover - from half-open connections (see Figure 10 of [STD-007]) cause TCP - A to send a RST when 2' arrives, which would incorrectly make B - think that the previous transaction did not complete successfully. - The half-open recovery mechanism must be defeated in this case, by - A ignoring segment #2'. - - - - - - - - - - - - - -Braden [Page 13] - -RFC 1644 Transaction/TCP July 1994 - - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - #1 --> <...,FIN,CC=x> --> LAST-ACK* - - #2 <-- <...ACK(FIN),data2,FIN,CC=y,CC.ECHO=x> <--- LAST-ACK* - TIME-WAIT - (data2->user_A) - - - #3 TIME-WAIT --> --> X (DROP) - - (New Active Open) (New Passive Open) - - #4 SYN-SENT* --> ... - - LISTEN-LA - #2' (discard) <-- <...ACK(FIN),data2,FIN,CC=y> <--- (retransmit) - - #4 SYN-SENT* ... --> ESTABLISHED* - SYN OK (see text) => - {Ack seg #2; - Delete old TCB; - Create new TCB; - data3 -> user_B; - cache.CC[A]= z;} - - Figure 5: Truncating TIME-WAIT State: SYN as Implicit ACK - - - 2.5 Transition to Standard TCP Operation - - T/TCP includes all normal TCP semantics, and it will continue to - operate exactly like TCP when the particular assumptions for - transactions do not hold. There is no limit on the size of an - individual transaction, and behavior of T/TCP should merge - seamlessly from pure transaction operation as shown in Figure 2, - to pure streaming mode for sending large files. All the sequences - shown in [STD-007] are still valid, and the inherent symmetry of - TCP is preserved. - - Figure 6 shows a possible sequence when the request and response - messages each require two segments. Segment #2 is a non-SYN - segment that contains a TCP option. To avoid compatibility - problems with existing TCP implementations, the client side should - - - -Braden [Page 14] - -RFC 1644 Transaction/TCP July 1994 - - - send segment #2 only if cache.CCsent[B] is defined, i.e., only if - host A knows that host B plays the new game. - - - - TCP A (Client) TCP B (Server) - _______________ ______________ - - CLOSED LISTEN - - - #1 SYN-SENT* --> --> ESTABLISHED* - (TAO test OK => - data1-> user) - - #2 SYN-SENT* --> --> CLOSE-WAIT* - (data2-> user) - - CLOSE-WAIT* - #3 FIN-WAIT-2 <-- <-- - (data3->user) - - #4 TIME_WAIT <-- <-- LAST-ACK* - (data4->user) - - #5 TIME-WAIT --> --> CLOSED - - - Figure 6. Multi-Packet Request/Response Sequence - - Figure 7 shows a more complex example, one possible sequence with - TAO combined with simultaneous open and close. This may be - compared with Figure 8 of [STD-007]. - - - - - - - - - - - - - - - - - - -Braden [Page 15] - -RFC 1644 Transaction/TCP July 1994 - - - - TCP A TCP B - _______________ ______________ - - CLOSED CLOSED - - #1 SYN-SENT* --> ... - - #2 CLOSING* <-- <-- SYN-SENT* - (TAO test OK => - data2->user_A - - #3 CLOSING* --> ... - - #1' ... --> CLOSING* - (TAO test OK => - data1->user_B) - - #4 TIME-WAIT <-- <-- CLOSING* - - #5 TIME-WAIT --> ... - - #3' ... --> TIME-WAIT - - #6 TIME-WAIT <-- <--- TIME-WAIT - - #5' TIME-WAIT ... --> TIME-WAIT - - (timeout) (timeout) - CLOSED CLOSED - - Figure 7: Simultaneous Open and Close - - - - - - - - - - - - - - - - - - - -Braden [Page 16] - -RFC 1644 Transaction/TCP July 1994 - - -3. FUNCTIONAL SPECIFICATION - - 3.1 Data Structures - - A connection count is an unsigned 32-bit integer, with the value - zero excluded. Zero is used to denote an undefined value. - - A host maintains a global connection count variable CCgen, and - each connection control block (TCB) contains two new connection - count variables, TCB.CCsend and TCB.CCrecv. Whenever a TCB is - created for the active or passive end of a new connection, CCgen - is incremented by 1 and placed in TCB.CCsend of the TCB; however, - if the previous CCgen value was 0xffffffff (-1), then the next - value should be 1. TCB.CCrecv is initialized to zero (undefined). - - T/TCP adds a per-host cache to TCP. An entry in this cache for - foreign host fh includes two CC values, cache.CC[fh] and - cache.CCsent[fh]. It may include other values, as discussed in - Sections 4.3 and 4.4. According to [STD-007], a TCP is not - permitted to send a segment larger than the default size 536, - unless it has received a larger value in an MSS (Maximum Segment - Size) option. This could constrain the client to use the default - MSS of 536 bytes for every request. To avoid this constraint, a - T/TCP may cache the MSS option values received from remote hosts, - and we allow a TCP to use a cached MSS option value for the - initial SYN segment. - - When the client sends an initial segment containing data, it - does not have a send window for the server host. This is not a - great difficulty; we simply define a default initial window; our - current suggestion is 4K. Such a non-zero default should be be - conditioned upon the existence of a cached connection count for - the foreign host, so that data may be included on an initial SYN - segment only if cache.CC[foreign host] is non-zero. - - In TCP, the window is dynamically adjusted to provide congestion - control/avoidance [Jacobson88]. It is possible that a particular - path might not be able to absorb an initial burst of 4096 bytes - without congestive losses. If this turns out to be a problem, it - should be possible to cache the congestion threshold for the path - and use this value to determine the maximum size of the initial - packet burst created by a request. - - 3.2 New TCP Options - - Three new TCP options are defined: CC, CC.NEW, and CC.ECHO. Each - carries a connection count SEG.CC. The complete rules for sending - and processing these options are given in Section 3.4 below. - - - -Braden [Page 17] - -RFC 1644 Transaction/TCP July 1994 - - - CC Option - - Kind: 11 - - Length: 6 - - +--------+--------+--------+--------+--------+--------+ - |00001011|00000110| Connection Count: SEG.CC | - +--------+--------+--------+--------+--------+--------+ - Kind=11 Length=6 - - This option may be sent in an initial SYN segment, and it may - be sent in other segments if a CC or CC.NEW option has been - received for this incarnation of the connection. Its SEG.CC - value is the TCB.CCsend value from the sender's TCB. - - CC.NEW Option - - Kind: 12 - - Length: 6 - - +--------+--------+--------+--------+--------+--------+ - |00001100|00000110| Connection Count: SEG.CC | - +--------+--------+--------+--------+--------+--------+ - Kind=12 Length=6 - - This option may be sent instead of a CC option in an initial - segment (i.e., SYN but not ACK bit), to indicate that the - SEG.CC value may not be larger than the previous value. Its - SEG.CC value is the TCB.CCsend value from the sender's TCB. - - CC.ECHO Option - - Kind: 13 - - Length: 6 - - +--------+--------+--------+--------+--------+--------+ - |00001101|00000110| Connection Count: SEG.CC | - +--------+--------+--------+--------+--------+--------+ - Kind=13 Length=6 - - This option must be sent (in addition to a CC option) in a - segment containing both a SYN and an ACK bit, if the initial - SYN segment contained a CC or CC.NEW option. Its SEG.CC value - is the SEG.CC value from the initial SYN. - - - - -Braden [Page 18] - -RFC 1644 Transaction/TCP July 1994 - - - A CC.ECHO option should be sent only in a segment and - should be ignored if it is received in any other segment. - - 3.3 Connection States - - T/TCP requires new connection states and state transitions. - Figure 8 shows the resulting finite state machine; see [RFC-1379] - for a detailed development. If all state names ending in stars - are removed from Figure 8, the state diagram reduces to the - standard TCP state machine (see Figure 6 of [STD-007]), with two - exceptions: - - * STD-007 shows a direct transition from SYN-RECEIVED to FIN- - WAIT-1 state when the user issues a CLOSE call. This - transition is suspect; a more accurate description of the - state machine would seem to require the intermediate SYN- - RECEIVED* state shown in Figure 8. - - * In STD-007, a user CLOSE call in SYN-SENT state causes a - direct transition to CLOSED state. The extended diagram of - Figure 8 forces the connection to open before it closes, - since calling CLOSE to terminate the request in SYN-SENT - state is normal behavior for a transaction client. In the - case that no data has been sent in SYN-SENT state, it is - reasonable for a user CLOSE call to immediately enter CLOSED - state and delete the TCB. - - Each of the new states in Figure 8 bears a starred name, created - by suffixing a star onto a standard TCP state. Each "starred" - state bears a simple relationship to the corresponding "unstarred" - state. - - o SYN-SENT* and SYN-RECEIVED* differ from the SYN-SENT and - SYN-RECEIVED state, respectively, in recording the fact that - a FIN needs to be sent. - - o The other starred states indicate that the connection is - half-synchronized (hence, a SYN bit needs to be sent). - - - - - - - - - - - - - -Braden [Page 19] - -RFC 1644 Transaction/TCP July 1994 - - - ________ g ________ - | |<------------| | - | CLOSED |------------>| LISTEN | - |________| h ------|________| - | / | | - | / i| j| - | / | | - a| a'/ | _V______ ________ - | / j | |ESTAB- | e' | CLOSE- | - | / -----------|-->| LISHED*|------------>| WAIT*| - | / / | |________| |________| - | / / | | | | | - | / / | | c| d'| c| - ____V_V_ / _______V | __V_____ | __V_____ - | SYN- | b' | SYN- |c | |ESTAB- | e | | CLOSE- | - | SENT |------>|RECEIVED|---|->| LISHED|----------|->| WAIT | - |________| |________| | |________| | |________| - | | | | | | - | | | | __V_____ | - | | | | | LAST- | | - d'| d'| d'| d| | ACK* | | - | | | | |________| | - | | | | | | - | | ______V_ | ________ |c' |d - | k | | FIN- | | e''' | | | | - | -------|-->| WAIT-1*|---|------>|CLOSING*| | | - | / | |________| | |________| | | - | / | | | | | | - | / | c'| | c'| | | - ___V___ / ____V___ V_____V_ ____V___ V____V__ - | SYN- | b'' | SYN- | c | FIN- | e'' | | | LAST- | - | SENT* |---->|RECEIVD*|---->| WAIT-1 |---->|CLOSING | | ACK | - |________| |________| |________| |________| |________| - | | | - f| f| f'| - ___V____ ____V___ ___V____ - | FIN- | e |TIME- | T | | - | WAIT-2 |---->| WAIT |-->| CLOSED | - |________| |________| |________| - - - Figure 8A: Basic T/TCP State Diagram - - - - - - - - - -Braden [Page 20] - -RFC 1644 Transaction/TCP July 1994 - - - ________________________________________________________________ - | | - | Label Event / Action | - | _____ ________________________ | - | | - | a Active OPEN / create TCB, snd SYN | - | a' Active OPEN / snd SYN | - | b rcv SYN [no TAO]/ snd ACK(SYN) | - | b' rcv SYN [no TAO]/ snd SYN,ACK(SYN) | - | b'' rcv SYN [no TAO]/ snd SYN,FIN,ACK(SYN) | - | c rcv ACK(SYN) / | - | c' rcv ACK(SYN) / snd FIN | - | d CLOSE / snd FIN | - | d' CLOSE / snd SYN,FIN | - | e rcv FIN / snd ACK(FIN) | - | e' rcv FIN / snd SYN,ACK(FIN) | - | e'' rcv FIN / snd FIN,ACK(FIN) | - | e''' rcv FIN / snd SYN,FIN,ACK(FIN) | - | f rcv ACK(FIN) / | - | f' rcv ACK(FIN) / delete TCB | - | g CLOSE / delete TCB | - | h passive OPEN / create TCB | - | i (= b') rcv SYN [no TAO]/ snd SYN,ACK(SYN) | - | j rcv SYN [TAO OK] / snd SYN,ACK(SYN) | - | k rcv SYN [TAO OK] / snd SYN,FIN,ACK(SYN) | - | T timeout=2MSL / delete TCB | - | | - | | - | Figure 8B. Definition of State Transitions | - |________________________________________________________________| - - This simple correspondence leads to an alternative state model, - which makes it easy to incorporate the new states in an existing - implementation. Each state in the extended FSM is defined by the - triplet: - - (old_state, SENDSYN, SENDFIN) - - where 'old_state' is a standard TCP state and SENDFIN and SENDSYN - are Boolean flags see Figure 9. The SENDFIN flag is turned on (on - the client side) by a SEND(... EOF=YES) call, to indicate that a - FIN should be sent in a state which would not otherwise send a - FIN. The SENDSYN flag is turned on when the TAO test succeeds to - indicate that the connection is only half synchronized; as a - result, a SYN will be sent in a state which would not otherwise - send a SYN. - - - - - -Braden [Page 21] - -RFC 1644 Transaction/TCP July 1994 - - - ________________________________________________________________ - | | - | New state: Old_state: SENDSYN: SENDFIN: | - | __________ __________ ______ ______ | - | | - | SYN-SENT* => SYN-SENT FALSE TRUE | - | | - | SYN-RECEIVED* => SYN-RECEIVED FALSE TRUE | - | | - | ESTABLISHED* => ESTABLISHED TRUE FALSE | - | | - | CLOSE-WAIT* => CLOSE-WAIT TRUE FALSE | - | | - | LAST-ACK* => LAST-ACK TRUE FALSE | - | | - | FIN-WAIT-1* => FIN-WAIT-1 TRUE FALSE | - | | - | CLOSING* => CLOSING TRUE FALSE | - | | - | | - | Figure 9: Alternative State Definitions | - |________________________________________________________________| - - - Here is a more complete description of these boolean variables. - - * SENDFIN - - SENDFIN is turned on by the SEND(...EOF=YES) call, and turned - off when FIN-WAIT-1 state is entered. It may only be on in - SYN-SENT* and SYN-RECEIVED* states. - - SENDFIN has two effects. First, it causes a FIN to be sent - on the last segment of data from the user. Second, it causes - the SYN-SENT[*] and SYN-RECEIVED[*] states to transition - directly to FIN-WAIT-1, skipping ESTABLISHED state. - - * SENDSYN - - The SENDSYN flag is turned on when an initial SYN segment is - received and passes the TAO test. SENDSYN is turned off when - the SYN is acknowledged (specifically, when there is no RST - or SYN bit and SEG.UNA < SND.ACK). - - SENDSYN has three effects. First, it causes the SYN bit to - be set in segments sent with the initial sequence number - (ISN). Second, it causes a transition directly from LISTEN - state to ESTABLISHED*, if there is no FIN bit, or otherwise - - - -Braden [Page 22] - -RFC 1644 Transaction/TCP July 1994 - - - to CLOSE-WAIT*. Finally, it allows data to be received and - processed (passed to the application) even if the segment - does not contain an ACK bit. - - According to the state model of the basic TCP specification [STD- - 007], the server side must explicitly issued a passive OPEN call, - creating a TCB in LISTEN state, before an initial SYN may be - accepted. To accommodate truncation of TIME-WAIT state within - this model, it is necessary to add the five "I-states" shown in - Figure 10. The I-states are: LISTEN-LA, LISTEN-LA*, LISTEN-CL, - LISTEN-CL*, and LISTEN-TW. These are 'bridge states' between two - successive the state diagrams of two successive incarnations. - Here D is the duration of the previous connection, i.e., the - elapsed time since the connection opened. The transitions labeled - with lower-case letters are taken from Figure 8. - - Fortunately, many TCP implementations have a different user - interface model, in which the use can issue a generic passive open - ("listen") call; thereafter, when a matching initial SYN arrives, - a new TCB in LISTEN state is automatically generated. With this - user model, the I-states of Figure 10 are unnecessary. - - For example, suppose an initial SYN segment arrives for a - connection that is in LAST-ACK state. If this segment carries a - CC option and if SEG.CC is greater than TCB.CCrecv in the existing - TCB, the "q" transition shown in Figure 10 can be made directly - from the LAST-ACK state. That is, the previous TCB is processed - as if an ACK(FIN) had arrived, causing the user to be notified of - a successful CLOSE and the TCB to be deleted. Then processing of - the new SYN segment is repeated, using a new TCB that is generated - automatically. The same principle can be used to avoid - implementing any of the I-states. - - - - - - - - - - - - - - - - - - - -Braden [Page 23] - -RFC 1644 Transaction/TCP July 1994 - - - ______________________________ -| P: Passive OPEN / | -| | -| Q: Rcv SYN, special TAO test | d'| d| -| (see text) / Delete TCB, | ________ ___V____ | -| create TCB, snd SYN | |LISTEN- | P | LAST- | | -| | | LA* |<-----| ACK* | | -| Q': (same as Q) if D < MSL | |________| |________| | -| | | | | | -| R: Rcv ACK(FIN) / Delete TCB,| Q| c'| c'| | -| create TCB | | | | | -| | | ___V____ V______V -| S': Active OPEN if D < MSL / | | |LISTEN- | P | LAST- | -| Delete TCB, create TCB, | | | LA |<-----| ACK | -| snd SYN. | | |________| |________| -|______________________________| | | | | - | Q| R| f| - ________ ________ | | | | - e''' | | P |LISTEN- | | | V V - ---->|CLOSING*|----->| CL* | | | LISTEN CLOSED - |________| |________| | | - | | Q| | | - c'| c'| V V V - | | ESTABLISHED* - ____V___ V_______ - e'' | | P |LISTEN- | - ---->|CLOSING |------>| CL | - |________| |________| - | R| Q| - f| V V - | LISTEN ESTABLISHED* - ____V___ _________ - e |TIME- | P | LISTEN- | - ---->| WAIT |------------->| TW | - |________| |_________| - / | | | | - S'/ T| T| Q'| |S' - | _____V_ h _____V__ | V - | | |-------->| | | SYN-SENT - | | CLOSED |<--------| LISTEN | | - | |________| ------|________| | - | | / | j| | - | a| a'/ i| V V - | | / | ESTABLISHED* - V V V V - SYN-SENT ... - - Figure 10: I-States for TIME-WAIT Truncation - - - -Braden [Page 24] - -RFC 1644 Transaction/TCP July 1994 - - - 3.4 T/TCP Processing Rules - - This section summarizes the rules for sending and processing the - T/TCP options. - - INITIALIZATION - - I1: All cache entries cache.CC[*] and cache.CCsent[*] are - undefined (zero) when a host system initializes, and CCgen - is set to a non-zero value. - - I2: A new TCB is initialized with TCB.CCrecv = 0 and - TCB.CCsend = current CCgen value; CCgen is then - incremented. If the result is zero, CCgen is incremented - again. - - - SENDING SEGMENTS - - S1: Sending initial Segment - - An initial segment is sent with either a CC option - or a CC.NEW option. If cache.CCsent[fh] is undefined or - if TCB.CCsend < cache.CCsent[fh], then the option - CC.NEW(TCB.CCsend) is sent and cache.CCsent[fh] is set to - zero. Otherwise, the option CC(TCB.CCsend) is sent and - cache.CCsent[fh] is set to CCsend. - - S2: Sending Segment - - If the sender's TCB.CCrecv is non-zero, then a - segment is sent with both a CC(TCB.CCsend) option and a - CC.ECHO (TCB.CCrecv) option. - - S3: Sending Non-SYN Segment - - A non-SYN segment is sent with a CC(TCB.CCsend) option if - the TCB.CCrecv value is non-zero, or if the state is SYN- - SENT or SYN-SENT* and cache.CCsent[fh] is non-zero (this - last is required to send CC options in the segments - following the first of a multi-segment request message; - see segment #2 in Figure 6). - - RECEIVING INITIAL SEGMENT - - Suppose that a server host receives a segment containing a SYN - bit but no ACK bit in LISTEN, SYN-SENT, or SYN-SENT* state. - - - - -Braden [Page 25] - -RFC 1644 Transaction/TCP July 1994 - - - R1.1:If the segment contains a CC or CC.NEW option, - SEG.CC is stored into TCB.CCrecv of the new TCB. - - R1.2:If the segment contains a CC option and if the local cache - entry cache.CC[fh] is defined and if - SEG.CC > cache.CC[fh], then the TAO test is passed and the - connection is half-synchronized in the incoming direction. - The server host replaces the cache.CC[fh] value by SEG.CC, - passes any data in the segment to the user, and processes - a FIN bit if present. - - Acknowledgment of the SYN is delayed to allow piggybacking - on a response segment. - - R1.3:If SEG.CC <= cache.CC[fh] (the TAO test has failed), or if - cache.CC[fh] is undefined, or if there is no CC option - (but possibly a CC.NEW option), the server host proceeds - with normal TCP processing. If the connection was in - LISTEN state, then the host executes a 3-way handshake - using the standard TCP rules. In the SYN-SENT or SYN- - SENT* state (i.e., the simultaneous open case), the TCP - sends ACK(SYN) and enters SYN-RECEIVED state. - - R1.4:If there is no CC option (but possibly a CC.NEW option), - then the server host sets cache.CC[fh] undefined (zero). - Receiving an ACK for a SYN (following application of rule - R1.3) will update cache.CC[fh], by rule R3. - - Suppose that an initial segment containing a CC or CC.NEW - option arrives in an I-state (i.e., a state with a name of the - form 'LISTEN-xx', where xx is one of TW, LA, L8, CL, or CL*): - - R1.5:If the state is LISTEN-TW, then the duration of the - current connection is compared with MSL. If duration > - MSL then send a RST: - - - - drop the packet, and return. - - R1.6:Perform a special TAO test: compare SEG.CC with - TCB.CCrecv. - - If SEG.CC is greater, then processing is performed as if - an ACK(FIN) had arrived: signal the application that the - previous close completed successfully and delete the - previous TCB. Then create a new TCB in LISTEN state and - reprocess the SYN segment against the new TCB. - - - -Braden [Page 26] - -RFC 1644 Transaction/TCP July 1994 - - - Otherwise, silently discard the segment. - - RECEIVING SEGMENT - - Suppose that a client host receives a segment for a - connection in SYN-SENT or SYN-SENT* state. - - R2.1:If SEG.ACK is not acceptable (see [STD-007]) and - cache.CCsent[fh] is non-zero, then simply drop the segment - without sending a RST. (The new SYN that the client is - (re-)transmitting will eventually acknowledge any - outstanding data and FIN at the server.) - - R2.2:If the segment contains a CC.ECHO option whose SEG.CC is - different from TCB.CCsend, then the segment is - unacceptable and is dropped. - - R2.3:If cache.CCsent[fh] is zero, then it is set to TCB.CCsend. - - R2.4:If the segment contains a CC option, its SEG.CC is stored - into TCB.CCrecv of the TCB. - - RECEIVING SEGMENT IN SYN-RECEIVED STATE - - R3.1:If a segment contains a CC option whose SEG.CC differs - from TCB.CCrecv, then the segment is unacceptable and is - dropped. - - R3.2:Otherwise, a 3-way handshake has completed successfully at - the server side. If the segment contains a CC option and - if cache.CC[fh] is zero, then cache.CC[fh] is replaced by - TCB.CCrecv. - - RECEIVING OTHER SEGMENT - - R4: Any other segment received with a CC option is - unacceptable if SEG.CC differs from TCB.CCrecv. However, - a RST segment is exempted from this test. - - OPEN REQUEST - - To allow truncation of TIME-WAIT state, the following changes - are made in the state diagram for OPEN requests (see Figure - 10): - - O1.1:A new passive open request is allowed in any of the - states: LAST-ACK, LAST-ACK*, CLOSING, CLOSING*, or TIME- - WAIT. This causes a transition to the corresponding I- - - - -Braden [Page 27] - -RFC 1644 Transaction/TCP July 1994 - - - state (see Figure 10), which retains the previous state, - including the retransmission queue and timer. - - O1.2 A new active open request is allowed in TIME-WAIT or - LISTEN-TW state, if the elapsed time since the current - connection opened is less than MSL. The result is to - delete the old TCB and create a new one, send a new SYN - segment, and enter SYN-SENT or SYN-SENT* state (depending - upon whether or not the SYN segment contains a FIN bit). - - Finally, T/TCP has a provision to improve performance for the case - of a client that "sprays" transactions rapidly using many - different server hosts and/or ports. If TCB.CCrecv in the TCB is - non-zero (and still assuming that the connection duration is less - than MSL), then the TIME-WAIT delay may be set to min(K*RTO, - 2*MSL). Here RTO is the measured retransmission timeout time and - the constant K is currently specified to be 8. - - 3.5 User Interface - - STD-007 defines a prototype user interface ("transport service") - that implements the virtual circuit service model [STD-007, - Section 3.8]. One addition to this interface in required for - transaction processing: a new Boolean flag "end-of-file" (EOF), - added to the SEND call. A generic SEND call becomes: - - Send - - Format: SEND (local connection name, buffer address, - byte count, PUSH flag, URGENT flag, EOF flag [,timeout]) - - The following text would be added to the description of SEND in - [STD-007]: - - If the EOF (End-Of-File) flag is set, any remaining queued - data is pushed and the connection is closed. Just as with the - CLOSE call, all data being sent is delivered reliably before - the close takes effect, and data may continue to be received - on the connection after completion of the SEND call. - - Figure 8A shows a skeleton sequence of user calls by which a - client could initiate a transaction. The SEND call initiates a - transaction request to the foreign socket (host and port) - specified in the passive OPEN call. The predicate "recv_EOF" - tests whether or not a FIN has been received on the connection; - this might be implemented using the STATUS command of [STD-007], - or it might be implemented by some operating-system-dependent - mechanism. When recv_EOF returns TRUE, the connection has been - - - -Braden [Page 28] - -RFC 1644 Transaction/TCP July 1994 - - - completely closed and the client end of the connection is in - TIME-WAIT state. - - __________________________________________________________________ - | | - | | - | OPEN(local_port, foreign_socket, PASSIVE) -> conn_name; | - | | - | SEND(conn_name, request_buffer, length, | - | PUSH=YES, URG=NO, EOF=YES); | - | | - | while (not recv_EOF(conn_name)) { | - | | - | RECEIVE(conn_name, reply_buffer, length) -> count; | - | | - | | - | } | - | | - | | - | Figure 8A: Client Side User Interface | - |__________________________________________________________________| - - If a client is going to send a rapid series of such requests to - the same foreign_socket, it should use the same local_port for - all. This will allow truncation of TIME-WAIT state. Otherwise, - it could leave local_port wild, allowing TCP to choose successive - local ports for each call, realizing that each transaction may - leave behind a significant control block overhead in the kernel. - - Figure 8B shows a basic sequence of server calls. The server - application waits for a request to arrive and then reads and - processes it until a FIN arrives (recv_EOF returns TRUE). At this - time, the connection is half-closed. The SEND call used to return - the reply completes the close in the other direction. It should - be noted that the use of SEND(... EOF=YES) in Figure 4B instead of - a SEND, CLOSE sequence is only an optimization; it allows - piggybacking the FIN in order to minimize the number of segments. - It should have little effect on transaction latency. - - - - - - - - - - - - - -Braden [Page 29] - -RFC 1644 Transaction/TCP July 1994 - - - __________________________________________________________________ - | | - | | - | OPEN(local_port, ANY_SOCKET, PASSIVE) -> conn_name; | - | | - | | - | | - | STATUS(conn_name) -> foreign_socket | - | | - | while (not recv_EOF(conn_name)) { | - | | - | RECEIVE(conn_name, request_buffer, length) -> count; | - | | - | | - | } | - | | - | | - | | - | SEND(conn_name, reply_buffer, length, | - | PUSH=YES, URG=NO, EOF=YES); | - | | - | | - | Figure 8B: Server Side User Interface | - |__________________________________________________________________| - - -4. IMPLEMENTATION ISSUES - - 4.1 RFC-1323 Extensions - - A recently-proposed set of TCP enhancements [RFC-1323] defines a - Timestamps option, which carries two 32-bit timestamp values. - This option is used to accurately measure round-trip time (RTT). - The same option is also used in a procedure known as "PAWS" - (Protect Against Wrapped Sequence) to prevent erroneous data - delivery due to a combination of old duplicate segments and - sequence number reuse at very high bandwidths. The approach to - transactions specified in this memo is independent of the RFC-1323 - enhancements, but implementation of RFC-1323 is desirable for all - TCP's. - - The RFC-1323 extensions share several common implementation issues - with the T/TCP extensions. Both require that TCP headers carry - options. Accommodating options in TCP headers requires changes in - the way that the maximum segment size is determined, to prevent - inadvertent IP fragmentation. Both require some additional state - variable in the TCB, which may or may not cause implementation - difficulties. - - - -Braden [Page 30] - -RFC 1644 Transaction/TCP July 1994 - - - 4.2 Minimal Packet Sequence - - Most TCP implementations will require some small modifications to - allow the minimal packet sequence for a transaction shown in - Figure 2. - - Many TCP implementations contain a mechanism to delay - acknowledgments of some subset of the data segments, to cut down - on the number of acknowledgment segments and to allow piggybacking - on the reverse data flow (typically character echoes). To obtain - minimal packet exchanges for transactions, it is necessary to - delay the acknowledgment of some control bits, in an analogous - manner. In particular, the segment that is to be sent - in ESTABLISHED* or CLOSE-WAIT* state should be delayed. Note that - the amount of delay is determined by the minimum RTO at the - transmitter; it is a parameter of the communication protocol, - independent of the application. We propose to use the same delay - parameter (and if possible, the same mechanism) that is used for - delaying data acknowledgments. - - To get the FIN piggy-backed on the reply data (segment #3 in - Figure 2), thos implementations that have an implied PUSH=YES on - all SEND calls will need to augment the user interface so that - PUSH=NO can be set for transactions. - - 4.3 RTT Measurement - - Transactions introduce new issues into the problem of measuring - round trip times [Jacobson88]. - - (a) With the minimal 3-segment exchange, there can be exactly one - RTT measurement in each direction for each transaction. - Since dynamic estimation of RTT cannot take place within a - single transaction, it must take place across successive - transactions. Therefore, cacheing the measured RTT and RTT - variance values is essential for transaction processing; in - normal virtual circuit communication, such cacheing is only - desirable. - - (b) At the completion of a transaction, the values for RTT and - RTT variance that are retained in the cache must be some - average of previous values with the values measured during - the transaction that is completing. This raises the question - of the time constant for this average; quite different - dynamic considerations hold for transactions than for file - transfers, for example. - - (c) An RTT measurement by the client will yield the value: - - - -Braden [Page 31] - -RFC 1644 Transaction/TCP July 1994 - - - T = RTT + min(SPT, ATO), - - where SPT (server processing time) was defined in the - introduction, and ATO is the timeout period for sending a - delayed ACK. Thus, the measured RTT includes SPT, which may - be arbitrarily variable; however, the resulting variability - of the measured T cannot exceed ATO. (In a popular TCP - implementation, for example, ATO = 200ms, so that the - variance of SPT makes a relatively small contribution to the - variance of RTT.) - - (d) Transactions sample the RTT at random times, which are - determined by the client and the server applications rather - than by the network dynamics. When there are long pauses - between transactions, cached path properties will be poor - predictors of current values in the network. - - Thus, the dynamics of RTT measurement for transactions differ from - those for virtual circuits. RTT measurements should work - correctly for very short connections but reduce to the current TCP - algorithms for long-lasting connections. Further study is this - issue is needed. - - 4.4 Cache Implementation - - This extension requires a per-host cache of connection counts. - This cache may also contain values of the smoothed RTT, RTT - variance, congestion avoidance threshold, and MSS values. - Depending upon the implementation details, it may be simplest to - build a new cache for these values; another possibility is to use - the routing cache that should already be included in the host - [RFC-1122]. - - Implementation of the cache may be simplified because it is - consulted only when a connection is established; thereafter, the - CC values relevant to the connection are kept in the TCB. This - means that a cache entry may be safely reused during the lifetime - of a connection, avoiding the need for locking. - - 4.5 CPU Performance - - TCP implementations are customarily optimized for streaming of - data at high speeds, not for opening or closing connections. - Jacobson's Header Prediction algorithm [Jacobson90] handles the - simple common cases of in-sequence data and ACK segments when - streaming data. To provide good performance for transactions, an - implementation might be able to do an analogous "header - prediction" specifically for the minimal request and the response - - - -Braden [Page 32] - -RFC 1644 Transaction/TCP July 1994 - - - segments. - - The overhead of UDP provides a lower bound on the overhead of - TCP-based transaction processing. It will probably not be - possible to reach this bound for TCP transactions, since opening a - TCP connection involves creating a significant amount of state - that is not required by UDP. - - McKenney and Dove [McKenney92] have pointed out that transaction - processing applications of TCP can stress the performance of the - demultiplexing algorithm, i.e., the algorithm used to look up the - TCB when a segment arrives. They advocate the use of hash-table - techniques rather than a linear search. The effect of - demultiplexing on performance may become especially acute for a - transaction client using the extended TCP described here, due to - TCB's left in TIME-WAIT state. A high rate of transactions from a - given client will leave a large number of TCB's in TIME-WAIT - state, until their timeout expires. If the TCP implementation - uses a linear search for demultiplexing, all of these control - blocks must be traversed in order to discover that the new - association does not exist. In this circumstance, performance of - a hash table lookup should not degrade severely due to - transactions. - - 4.6 Pre-SYN Queue - - Suppose that segment #1 in Figure 4 is lost in the network; when - segment #2 arrives in LISTEN state, it will be ignored by the TCP - rules (see [STD-007] p.66, "fourth other text and control"), and - must be retransmitted. It would be possible for the server side - to queue any ACK-less data segments received in LISTEN state and - to "replay" the segments in this queue when a SYN segment does - arrive. A data segment received with an ACK bit, which is the - normal case for existing TCP's, would still a generate RST - segment. - - Note that queueing segments in LISTEN state is different from - queueing out-of-order segments after the connection is - synchronized. In LISTEN state, the sequence number corresponding - to the left window edge is not yet known, so that the segment - cannot be trimmed to fit within the window before it is queued. - In fact, no processing should be done on a queued segment while - the connection is still in LISTEN state. Therefore, a new "pre- - SYN queue" would be needed. A timeout would be required, to flush - the Pre-SYN Queue in case a SYN segment was not received. - - Although implementation of a pre-SYN queue is not difficult in BSD - TCP, its limited contribution to throughput probably does not - - - -Braden [Page 33] - -RFC 1644 Transaction/TCP July 1994 - - - justify the effort. - -6. ACKNOWLEDGMENTS - - I am very grateful to Dave Clark for pointing out bugs in RFC-1379 - and for helping me to clarify the model. I also wish to thank Greg - Minshall, whose probing questions led to further elucidation of the - issues in T/TCP. - -7. REFERENCES - - [Jacobson88] Jacobson, V., "Congestion Avoidance and Control", ACM - SIGCOMM '88, Stanford, CA, August 1988. - - [Jacobson90] Jacobson, V., "4BSD Header Prediction", Comp Comm - Review, v. 20, no. 2, April 1990. - - [McKenney92] McKenney, P., and K. Dove, "Efficient Demultiplexing - of Incoming TCP Packets", ACM SIGCOMM '92, Baltimore, MD, October - 1992. - - [RFC-1122] Braden, R., Ed., "Requirements for Internet Hosts -- - Communications Layers", STD-3, RFC-1122, USC/Information Sciences - Institute, October 1989. - - [RFC-1323] Jacobson, V., Braden, R., and D. Borman, "TCP Extensions - for High Performance, RFC-1323, LBL, USC/Information Sciences - Institute, Cray Research, February 1991. - - [RFC-1379] Braden, R., "Transaction TCP -- Concepts", RFC-1379, - USC/Information Sciences Institute, September 1992. - - [ShankarLee93] Shankar, A. and D. Lee, "Modulo-N Incarnation - Numbers for Cache-Based Transport Protocols", Report CS-TR-3046/ - UIMACS-TR-93-24, University of Maryland, March 1993. - - [STD-007] Postel, J., "Transmission Control Protocol - DARPA - Internet Program Protocol Specification", STD-007, RFC-793, - USC/Information Sciences Institute, September 1981. - - - - - - - - - - - - -Braden [Page 34] - -RFC 1644 Transaction/TCP July 1994 - - -APPENDIX A. ALGORITHM SUMMARY - - This appendix summarizes the additional processing rules introduced - by T/TCP. We define the following symbols: - - Options - - CC(SEG.CC): TCP Connection Count (CC) Option - CC.NEW(SEG.CC): TCP CC.NEW option - CC.ECHO(SEG.CC): TCP CC.ECHO option - - Here SEG.CC is option value in segment. - - Per-Connection State Variables in TCB - - CCsend: CC value to be sent in segments - CCrecv: CC value to be received in segments - Elapsed: Duration of connection - - Global Variables: - - CCgen: CC generator variable - cache.CC[fh]: Cache entry: Last CC value received. - cache.CCsent[fh]: Cache entry: Last CC value sent. - - - PSEUDO-CODE SUMMARY: - - Passive OPEN => { - Create new TCB; - } - - Active OPEN => { - - CCrecv = 0; - CCsend = CCgen; - If (CCgen == 0xffffffff) then Set CCgen = 1; - else Set CCgen = CCgen + 1. - - } - - - Send initial {SYN} segment => { - - If (cache.CCsent[fh] == 0 OR CCsend < cache.CCsent[fh] ) then { - - Include CC.NEW(CCsend) option in segment; - Set cache.CCsent[fh] = 0; - - - -Braden [Page 35] - -RFC 1644 Transaction/TCP July 1994 - - - } - else { - - Include CC(CCsend) option in segment; - Set cache.CCsent[fh] = CCsend; - } - } - - - Send {SYN,ACK} segment => { - - If (CCrecv != 0) then - Include CC(CCsend), CC.ECHO(CCrecv) options in segment. - } - - - Receive {SYN} segment in LISTEN, SYN-SENT, or SYN-SENT* state => { - - If state == LISTEN then { - CCrecv = 0; - CCsend = CCgen; - If (CCgen == 0xffffffff) then Set CCgen = 1; - else Set CCgen = CCgen + 1. - } - - If (Segment contains CC option OR - Segment contains CC.NEW option) then - Set CCrecv = SEG.CC. - - if (Segment contains CC option AND - cache.CC[fh] != 0 AND - SEG.CC > cache.CC[fh] ) then { /* TAO Test OK */ - - Set cache.CC[fh] = CCrecv; - - - } - - - If (Segment does not contain CC option) then - Set cache.CC[fh] = 0; - - . - } - - Receive {SYN} segment in LISTEN-TW, LISTEN-LA, LISTEN-LA*, LISTEN-CL, - or LISTEN-CL* state => { - - - - -Braden [Page 36] - -RFC 1644 Transaction/TCP July 1994 - - - If ( (Segment contains CC option AND CCrecv != 0 ) then { - - If (state = LISTEN-TW AND Elapsed > MSL ) then - . - - if (SEG.CC > CCrecv ) then { - ; - ; - . - /* Expect to match new TCB - * in LISTEN state. - */ - } - } - else - . - } - - - Receive {SYN,ACK} segment => { - - if (Segment contains CC.ECHO option AND - SEG.CC != CCsend) then - . - - if (Segment contains CC option) then { - Set CCrecv = SEG.CC. - - if (cache.CC[fh] is undefined) then - Set cache.CC[fh] = CCrecv. - } - } - - - Send non-SYN segment => { - - if (CCrecv != 0 OR - (cache.CCsent[fh] != 0 AND - state is SYN-SENT or SYN-SENT*)) then - Include CC(CCsend) option in segment. - } - - - Receive non-SYN segment in SYN-RECEIVED state => { - - if (Segment contains CC option AND RST bit is off) { - if (SEG.CC != CCrecv) then - . - - if (cache.CC[fh] is undefined) then - Set cache.CC[fh] = CCrecv. - } - } - - - Receive non-SYN segment in (state >= ESTABLISHED) => { - - if (Segment contains CC option AND RST bit is off) { - if (SEG.CC != CCrecv) then - . - } - } - - -Security Considerations - - Security issues are not discussed in this memo. - -Author's Address - - Bob Braden - University of Southern California - Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292 - - Phone: (310) 822-1511 - EMail: Braden@ISI.EDU - - - - - - - - - - - - - - - - - - - -Braden [Page 38] - diff --git a/kernel/picotcp/RFC/rfc1661.txt b/kernel/picotcp/RFC/rfc1661.txt deleted file mode 100644 index 02112bd..0000000 --- a/kernel/picotcp/RFC/rfc1661.txt +++ /dev/null @@ -1,2976 +0,0 @@ - - - - - - -Network Working Group W. Simpson, Editor -Request for Comments: 1661 Daydreamer -STD: 51 July 1994 -Obsoletes: 1548 -Category: Standards Track - - - The Point-to-Point Protocol (PPP) - - - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - - -Abstract - - The Point-to-Point Protocol (PPP) provides a standard method for - transporting multi-protocol datagrams over point-to-point links. PPP - is comprised of three main components: - - 1. A method for encapsulating multi-protocol datagrams. - - 2. A Link Control Protocol (LCP) for establishing, configuring, - and testing the data-link connection. - - 3. A family of Network Control Protocols (NCPs) for establishing - and configuring different network-layer protocols. - - This document defines the PPP organization and methodology, and the - PPP encapsulation, together with an extensible option negotiation - mechanism which is able to negotiate a rich assortment of - configuration parameters and provides additional management - functions. The PPP Link Control Protocol (LCP) is described in terms - of this mechanism. - - -Table of Contents - - - 1. Introduction .......................................... 1 - 1.1 Specification of Requirements ................... 2 - 1.2 Terminology ..................................... 3 - - 2. PPP Encapsulation ..................................... 4 - - -Simpson [Page i] - RFC 1661 Point-to-Point Protocol July 1994 - - - 3. PPP Link Operation .................................... 6 - 3.1 Overview ........................................ 6 - 3.2 Phase Diagram ................................... 6 - 3.3 Link Dead (physical-layer not ready) ............ 7 - 3.4 Link Establishment Phase ........................ 7 - 3.5 Authentication Phase ............................ 8 - 3.6 Network-Layer Protocol Phase .................... 8 - 3.7 Link Termination Phase .......................... 9 - - 4. The Option Negotiation Automaton ...................... 11 - 4.1 State Transition Table .......................... 12 - 4.2 States .......................................... 14 - 4.3 Events .......................................... 16 - 4.4 Actions ......................................... 21 - 4.5 Loop Avoidance .................................. 23 - 4.6 Counters and Timers ............................. 24 - - 5. LCP Packet Formats .................................... 26 - 5.1 Configure-Request ............................... 28 - 5.2 Configure-Ack ................................... 29 - 5.3 Configure-Nak ................................... 30 - 5.4 Configure-Reject ................................ 31 - 5.5 Terminate-Request and Terminate-Ack ............. 33 - 5.6 Code-Reject ..................................... 34 - 5.7 Protocol-Reject ................................. 35 - 5.8 Echo-Request and Echo-Reply ..................... 36 - 5.9 Discard-Request ................................. 37 - - 6. LCP Configuration Options ............................. 39 - 6.1 Maximum-Receive-Unit (MRU) ...................... 41 - 6.2 Authentication-Protocol ......................... 42 - 6.3 Quality-Protocol ................................ 43 - 6.4 Magic-Number .................................... 45 - 6.5 Protocol-Field-Compression (PFC) ................ 48 - 6.6 Address-and-Control-Field-Compression (ACFC) - - SECURITY CONSIDERATIONS ...................................... 51 - REFERENCES ................................................... 51 - ACKNOWLEDGEMENTS ............................................. 51 - CHAIR'S ADDRESS .............................................. 52 - EDITOR'S ADDRESS ............................................. 52 - - - - - - - - - - -Simpson [Page ii] - RFC 1661 Point-to-Point Protocol July 1994 - - -1. Introduction - - The Point-to-Point Protocol is designed for simple links which - transport packets between two peers. These links provide full-duplex - simultaneous bi-directional operation, and are assumed to deliver - packets in order. It is intended that PPP provide a common solution - for easy connection of a wide variety of hosts, bridges and routers - [1]. - - Encapsulation - - The PPP encapsulation provides for multiplexing of different - network-layer protocols simultaneously over the same link. The - PPP encapsulation has been carefully designed to retain - compatibility with most commonly used supporting hardware. - - Only 8 additional octets are necessary to form the encapsulation - when used within the default HDLC-like framing. In environments - where bandwidth is at a premium, the encapsulation and framing may - be shortened to 2 or 4 octets. - - To support high speed implementations, the default encapsulation - uses only simple fields, only one of which needs to be examined - for demultiplexing. The default header and information fields - fall on 32-bit boundaries, and the trailer may be padded to an - arbitrary boundary. - - Link Control Protocol - - In order to be sufficiently versatile to be portable to a wide - variety of environments, PPP provides a Link Control Protocol - (LCP). The LCP is used to automatically agree upon the - encapsulation format options, handle varying limits on sizes of - packets, detect a looped-back link and other common - misconfiguration errors, and terminate the link. Other optional - facilities provided are authentication of the identity of its peer - on the link, and determination when a link is functioning properly - and when it is failing. - - Network Control Protocols - - Point-to-Point links tend to exacerbate many problems with the - current family of network protocols. For instance, assignment and - management of IP addresses, which is a problem even in LAN - environments, is especially difficult over circuit-switched - point-to-point links (such as dial-up modem servers). These - problems are handled by a family of Network Control Protocols - (NCPs), which each manage the specific needs required by their - - - -Simpson [Page 1] - RFC 1661 Point-to-Point Protocol July 1994 - - - respective network-layer protocols. These NCPs are defined in - companion documents. - - Configuration - - It is intended that PPP links be easy to configure. By design, - the standard defaults handle all common configurations. The - implementor can specify improvements to the default configuration, - which are automatically communicated to the peer without operator - intervention. Finally, the operator may explicitly configure - options for the link which enable the link to operate in - environments where it would otherwise be impossible. - - This self-configuration is implemented through an extensible - option negotiation mechanism, wherein each end of the link - describes to the other its capabilities and requirements. - Although the option negotiation mechanism described in this - document is specified in terms of the Link Control Protocol (LCP), - the same facilities are designed to be used by other control - protocols, especially the family of NCPs. - - - -1.1. Specification of Requirements - - In this document, several words are used to signify the requirements - of the specification. These words are often capitalized. - - MUST This word, or the adjective "required", means that the - definition is an absolute requirement of the specification. - - MUST NOT This phrase means that the definition is an absolute - prohibition of the specification. - - SHOULD This word, or the adjective "recommended", means that there - may exist valid reasons in particular circumstances to - ignore this item, but the full implications must be - understood and carefully weighed before choosing a - different course. - - MAY This word, or the adjective "optional", means that this - item is one of an allowed set of alternatives. An - implementation which does not include this option MUST be - prepared to interoperate with another implementation which - does include the option. - - - - - - -Simpson [Page 2] - RFC 1661 Point-to-Point Protocol July 1994 - - -1.2. Terminology - - This document frequently uses the following terms: - - datagram The unit of transmission in the network layer (such as IP). - A datagram may be encapsulated in one or more packets - passed to the data link layer. - - frame The unit of transmission at the data link layer. A frame - may include a header and/or a trailer, along with some - number of units of data. - - packet The basic unit of encapsulation, which is passed across the - interface between the network layer and the data link - layer. A packet is usually mapped to a frame; the - exceptions are when data link layer fragmentation is being - performed, or when multiple packets are incorporated into a - single frame. - - peer The other end of the point-to-point link. - - silently discard - The implementation discards the packet without further - processing. The implementation SHOULD provide the - capability of logging the error, including the contents of - the silently discarded packet, and SHOULD record the event - in a statistics counter. - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 3] - RFC 1661 Point-to-Point Protocol July 1994 - - -2. PPP Encapsulation - - The PPP encapsulation is used to disambiguate multiprotocol - datagrams. This encapsulation requires framing to indicate the - beginning and end of the encapsulation. Methods of providing framing - are specified in companion documents. - - A summary of the PPP encapsulation is shown below. The fields are - transmitted from left to right. - - +----------+-------------+---------+ - | Protocol | Information | Padding | - | 8/16 bits| * | * | - +----------+-------------+---------+ - - - Protocol Field - - The Protocol field is one or two octets, and its value identifies - the datagram encapsulated in the Information field of the packet. - The field is transmitted and received most significant octet - first. - - The structure of this field is consistent with the ISO 3309 - extension mechanism for address fields. All Protocols MUST be - odd; the least significant bit of the least significant octet MUST - equal "1". Also, all Protocols MUST be assigned such that the - least significant bit of the most significant octet equals "0". - Frames received which don't comply with these rules MUST be - treated as having an unrecognized Protocol. - - Protocol field values in the "0***" to "3***" range identify the - network-layer protocol of specific packets, and values in the - "8***" to "b***" range identify packets belonging to the - associated Network Control Protocols (NCPs), if any. - - Protocol field values in the "4***" to "7***" range are used for - protocols with low volume traffic which have no associated NCP. - Protocol field values in the "c***" to "f***" range identify - packets as link-layer Control Protocols (such as LCP). - - - - - - - - - - - -Simpson [Page 4] - RFC 1661 Point-to-Point Protocol July 1994 - - - Up-to-date values of the Protocol field are specified in the most - recent "Assigned Numbers" RFC [2]. This specification reserves - the following values: - - Value (in hex) Protocol Name - - 0001 Padding Protocol - 0003 to 001f reserved (transparency inefficient) - 007d reserved (Control Escape) - 00cf reserved (PPP NLPID) - 00ff reserved (compression inefficient) - - 8001 to 801f unused - 807d unused - 80cf unused - 80ff unused - - c021 Link Control Protocol - c023 Password Authentication Protocol - c025 Link Quality Report - c223 Challenge Handshake Authentication Protocol - - Developers of new protocols MUST obtain a number from the Internet - Assigned Numbers Authority (IANA), at IANA@isi.edu. - - - Information Field - - The Information field is zero or more octets. The Information - field contains the datagram for the protocol specified in the - Protocol field. - - The maximum length for the Information field, including Padding, - but not including the Protocol field, is termed the Maximum - Receive Unit (MRU), which defaults to 1500 octets. By - negotiation, consenting PPP implementations may use other values - for the MRU. - - - Padding - - On transmission, the Information field MAY be padded with an - arbitrary number of octets up to the MRU. It is the - responsibility of each protocol to distinguish padding octets from - real information. - - - - - - -Simpson [Page 5] - RFC 1661 Point-to-Point Protocol July 1994 - - -3. PPP Link Operation - -3.1. Overview - - In order to establish communications over a point-to-point link, each - end of the PPP link MUST first send LCP packets to configure and test - the data link. After the link has been established, the peer MAY be - authenticated. - - Then, PPP MUST send NCP packets to choose and configure one or more - network-layer protocols. Once each of the chosen network-layer - protocols has been configured, datagrams from each network-layer - protocol can be sent over the link. - - The link will remain configured for communications until explicit LCP - or NCP packets close the link down, or until some external event - occurs (an inactivity timer expires or network administrator - intervention). - - - -3.2. Phase Diagram - - In the process of configuring, maintaining and terminating the - point-to-point link, the PPP link goes through several distinct - phases which are specified in the following simplified state diagram: - - +------+ +-----------+ +--------------+ - | | UP | | OPENED | | SUCCESS/NONE - | Dead |------->| Establish |---------->| Authenticate |--+ - | | | | | | | - +------+ +-----------+ +--------------+ | - ^ | | | - | FAIL | FAIL | | - +<--------------+ +----------+ | - | | | - | +-----------+ | +---------+ | - | DOWN | | | CLOSING | | | - +------------| Terminate |<---+<----------| Network |<-+ - | | | | - +-----------+ +---------+ - - Not all transitions are specified in this diagram. The following - semantics MUST be followed. - - - - - - - -Simpson [Page 6] - RFC 1661 Point-to-Point Protocol July 1994 - - -3.3. Link Dead (physical-layer not ready) - - The link necessarily begins and ends with this phase. When an - external event (such as carrier detection or network administrator - configuration) indicates that the physical-layer is ready to be used, - PPP will proceed to the Link Establishment phase. - - During this phase, the LCP automaton (described later) will be in the - Initial or Starting states. The transition to the Link Establishment - phase will signal an Up event to the LCP automaton. - - Implementation Note: - - Typically, a link will return to this phase automatically after - the disconnection of a modem. In the case of a hard-wired link, - this phase may be extremely short -- merely long enough to detect - the presence of the device. - - - -3.4. Link Establishment Phase - - The Link Control Protocol (LCP) is used to establish the connection - through an exchange of Configure packets. This exchange is complete, - and the LCP Opened state entered, once a Configure-Ack packet - (described later) has been both sent and received. - - All Configuration Options are assumed to be at default values unless - altered by the configuration exchange. See the chapter on LCP - Configuration Options for further discussion. - - It is important to note that only Configuration Options which are - independent of particular network-layer protocols are configured by - LCP. Configuration of individual network-layer protocols is handled - by separate Network Control Protocols (NCPs) during the Network-Layer - Protocol phase. - - Any non-LCP packets received during this phase MUST be silently - discarded. - - The receipt of the LCP Configure-Request causes a return to the Link - Establishment phase from the Network-Layer Protocol phase or - Authentication phase. - - - - - - - - -Simpson [Page 7] - RFC 1661 Point-to-Point Protocol July 1994 - - -3.5. Authentication Phase - - On some links it may be desirable to require a peer to authenticate - itself before allowing network-layer protocol packets to be - exchanged. - - By default, authentication is not mandatory. If an implementation - desires that the peer authenticate with some specific authentication - protocol, then it MUST request the use of that authentication - protocol during Link Establishment phase. - - Authentication SHOULD take place as soon as possible after link - establishment. However, link quality determination MAY occur - concurrently. An implementation MUST NOT allow the exchange of link - quality determination packets to delay authentication indefinitely. - - Advancement from the Authentication phase to the Network-Layer - Protocol phase MUST NOT occur until authentication has completed. If - authentication fails, the authenticator SHOULD proceed instead to the - Link Termination phase. - - Only Link Control Protocol, authentication protocol, and link quality - monitoring packets are allowed during this phase. All other packets - received during this phase MUST be silently discarded. - - Implementation Notes: - - An implementation SHOULD NOT fail authentication simply due to - timeout or lack of response. The authentication SHOULD allow some - method of retransmission, and proceed to the Link Termination - phase only after a number of authentication attempts has been - exceeded. - - The implementation responsible for commencing Link Termination - phase is the implementation which has refused authentication to - its peer. - - - -3.6. Network-Layer Protocol Phase - - Once PPP has finished the previous phases, each network-layer - protocol (such as IP, IPX, or AppleTalk) MUST be separately - configured by the appropriate Network Control Protocol (NCP). - - Each NCP MAY be Opened and Closed at any time. - - - - - -Simpson [Page 8] - RFC 1661 Point-to-Point Protocol July 1994 - - - Implementation Note: - - Because an implementation may initially use a significant amount - of time for link quality determination, implementations SHOULD - avoid fixed timeouts when waiting for their peers to configure a - NCP. - - After a NCP has reached the Opened state, PPP will carry the - corresponding network-layer protocol packets. Any supported - network-layer protocol packets received when the corresponding NCP is - not in the Opened state MUST be silently discarded. - - Implementation Note: - - While LCP is in the Opened state, any protocol packet which is - unsupported by the implementation MUST be returned in a Protocol- - Reject (described later). Only protocols which are supported are - silently discarded. - - During this phase, link traffic consists of any possible combination - of LCP, NCP, and network-layer protocol packets. - - - -3.7. Link Termination Phase - - PPP can terminate the link at any time. This might happen because of - the loss of carrier, authentication failure, link quality failure, - the expiration of an idle-period timer, or the administrative closing - of the link. - - LCP is used to close the link through an exchange of Terminate - packets. When the link is closing, PPP informs the network-layer - protocols so that they may take appropriate action. - - After the exchange of Terminate packets, the implementation SHOULD - signal the physical-layer to disconnect in order to enforce the - termination of the link, particularly in the case of an - authentication failure. The sender of the Terminate-Request SHOULD - disconnect after receiving a Terminate-Ack, or after the Restart - counter expires. The receiver of a Terminate-Request SHOULD wait for - the peer to disconnect, and MUST NOT disconnect until at least one - Restart time has passed after sending a Terminate-Ack. PPP SHOULD - proceed to the Link Dead phase. - - Any non-LCP packets received during this phase MUST be silently - discarded. - - - - -Simpson [Page 9] - RFC 1661 Point-to-Point Protocol July 1994 - - - Implementation Note: - - The closing of the link by LCP is sufficient. There is no need - for each NCP to send a flurry of Terminate packets. Conversely, - the fact that one NCP has Closed is not sufficient reason to cause - the termination of the PPP link, even if that NCP was the only NCP - currently in the Opened state. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 10] - RFC 1661 Point-to-Point Protocol July 1994 - - -4. The Option Negotiation Automaton - - The finite-state automaton is defined by events, actions and state - transitions. Events include reception of external commands such as - Open and Close, expiration of the Restart timer, and reception of - packets from a peer. Actions include the starting of the Restart - timer and transmission of packets to the peer. - - Some types of packets -- Configure-Naks and Configure-Rejects, or - Code-Rejects and Protocol-Rejects, or Echo-Requests, Echo-Replies and - Discard-Requests -- are not differentiated in the automaton - descriptions. As will be described later, these packets do indeed - serve different functions. However, they always cause the same - transitions. - - Events Actions - - Up = lower layer is Up tlu = This-Layer-Up - Down = lower layer is Down tld = This-Layer-Down - Open = administrative Open tls = This-Layer-Started - Close= administrative Close tlf = This-Layer-Finished - - TO+ = Timeout with counter > 0 irc = Initialize-Restart-Count - TO- = Timeout with counter expired zrc = Zero-Restart-Count - - RCR+ = Receive-Configure-Request (Good) scr = Send-Configure-Request - RCR- = Receive-Configure-Request (Bad) - RCA = Receive-Configure-Ack sca = Send-Configure-Ack - RCN = Receive-Configure-Nak/Rej scn = Send-Configure-Nak/Rej - - RTR = Receive-Terminate-Request str = Send-Terminate-Request - RTA = Receive-Terminate-Ack sta = Send-Terminate-Ack - - RUC = Receive-Unknown-Code scj = Send-Code-Reject - RXJ+ = Receive-Code-Reject (permitted) - or Receive-Protocol-Reject - RXJ- = Receive-Code-Reject (catastrophic) - or Receive-Protocol-Reject - RXR = Receive-Echo-Request ser = Send-Echo-Reply - or Receive-Echo-Reply - or Receive-Discard-Request - - - - - - - - - - -Simpson [Page 11] - RFC 1661 Point-to-Point Protocol July 1994 - - -4.1. State Transition Table - - The complete state transition table follows. States are indicated - horizontally, and events are read vertically. State transitions and - actions are represented in the form action/new-state. Multiple - actions are separated by commas, and may continue on succeeding lines - as space requires; multiple actions may be implemented in any - convenient order. The state may be followed by a letter, which - indicates an explanatory footnote. The dash ('-') indicates an - illegal transition. - - | State - | 0 1 2 3 4 5 -Events| Initial Starting Closed Stopped Closing Stopping -------+----------------------------------------------------------- - Up | 2 irc,scr/6 - - - - - Down | - - 0 tls/1 0 1 - Open | tls/1 1 irc,scr/6 3r 5r 5r - Close| 0 tlf/0 2 2 4 4 - | - TO+ | - - - - str/4 str/5 - TO- | - - - - tlf/2 tlf/3 - | - RCR+ | - - sta/2 irc,scr,sca/8 4 5 - RCR- | - - sta/2 irc,scr,scn/6 4 5 - RCA | - - sta/2 sta/3 4 5 - RCN | - - sta/2 sta/3 4 5 - | - RTR | - - sta/2 sta/3 sta/4 sta/5 - RTA | - - 2 3 tlf/2 tlf/3 - | - RUC | - - scj/2 scj/3 scj/4 scj/5 - RXJ+ | - - 2 3 4 5 - RXJ- | - - tlf/2 tlf/3 tlf/2 tlf/3 - | - RXR | - - 2 3 4 5 - - - - - - - - - - - - - - - -Simpson [Page 12] - RFC 1661 Point-to-Point Protocol July 1994 - - - - | State - | 6 7 8 9 -Events| Req-Sent Ack-Rcvd Ack-Sent Opened -------+----------------------------------------- - Up | - - - - - Down | 1 1 1 tld/1 - Open | 6 7 8 9r - Close|irc,str/4 irc,str/4 irc,str/4 tld,irc,str/4 - | - TO+ | scr/6 scr/6 scr/8 - - TO- | tlf/3p tlf/3p tlf/3p - - | - RCR+ | sca/8 sca,tlu/9 sca/8 tld,scr,sca/8 - RCR- | scn/6 scn/7 scn/6 tld,scr,scn/6 - RCA | irc/7 scr/6x irc,tlu/9 tld,scr/6x - RCN |irc,scr/6 scr/6x irc,scr/8 tld,scr/6x - | - RTR | sta/6 sta/6 sta/6 tld,zrc,sta/5 - RTA | 6 6 8 tld,scr/6 - | - RUC | scj/6 scj/7 scj/8 scj/9 - RXJ+ | 6 6 8 9 - RXJ- | tlf/3 tlf/3 tlf/3 tld,irc,str/5 - | - RXR | 6 7 8 ser/9 - - - The states in which the Restart timer is running are identifiable by - the presence of TO events. Only the Send-Configure-Request, Send- - Terminate-Request and Zero-Restart-Count actions start or re-start - the Restart timer. The Restart timer is stopped when transitioning - from any state where the timer is running to a state where the timer - is not running. - - The events and actions are defined according to a message passing - architecture, rather than a signalling architecture. If an action is - desired to control specific signals (such as DTR), additional actions - are likely to be required. - - [p] Passive option; see Stopped state discussion. - - [r] Restart option; see Open event discussion. - - [x] Crossed connection; see RCA event discussion. - - - - - - -Simpson [Page 13] - RFC 1661 Point-to-Point Protocol July 1994 - - -4.2. States - - Following is a more detailed description of each automaton state. - - Initial - - In the Initial state, the lower layer is unavailable (Down), and - no Open has occurred. The Restart timer is not running in the - Initial state. - - Starting - - The Starting state is the Open counterpart to the Initial state. - An administrative Open has been initiated, but the lower layer is - still unavailable (Down). The Restart timer is not running in the - Starting state. - - When the lower layer becomes available (Up), a Configure-Request - is sent. - - Closed - - In the Closed state, the link is available (Up), but no Open has - occurred. The Restart timer is not running in the Closed state. - - Upon reception of Configure-Request packets, a Terminate-Ack is - sent. Terminate-Acks are silently discarded to avoid creating a - loop. - - Stopped - - The Stopped state is the Open counterpart to the Closed state. It - is entered when the automaton is waiting for a Down event after - the This-Layer-Finished action, or after sending a Terminate-Ack. - The Restart timer is not running in the Stopped state. - - Upon reception of Configure-Request packets, an appropriate - response is sent. Upon reception of other packets, a Terminate- - Ack is sent. Terminate-Acks are silently discarded to avoid - creating a loop. - - Rationale: - - The Stopped state is a junction state for link termination, - link configuration failure, and other automaton failure modes. - These potentially separate states have been combined. - - There is a race condition between the Down event response (from - - - -Simpson [Page 14] - RFC 1661 Point-to-Point Protocol July 1994 - - - the This-Layer-Finished action) and the Receive-Configure- - Request event. When a Configure-Request arrives before the - Down event, the Down event will supercede by returning the - automaton to the Starting state. This prevents attack by - repetition. - - Implementation Option: - - After the peer fails to respond to Configure-Requests, an - implementation MAY wait passively for the peer to send - Configure-Requests. In this case, the This-Layer-Finished - action is not used for the TO- event in states Req-Sent, Ack- - Rcvd and Ack-Sent. - - This option is useful for dedicated circuits, or circuits which - have no status signals available, but SHOULD NOT be used for - switched circuits. - - Closing - - In the Closing state, an attempt is made to terminate the - connection. A Terminate-Request has been sent and the Restart - timer is running, but a Terminate-Ack has not yet been received. - - Upon reception of a Terminate-Ack, the Closed state is entered. - Upon the expiration of the Restart timer, a new Terminate-Request - is transmitted, and the Restart timer is restarted. After the - Restart timer has expired Max-Terminate times, the Closed state is - entered. - - Stopping - - The Stopping state is the Open counterpart to the Closing state. - A Terminate-Request has been sent and the Restart timer is - running, but a Terminate-Ack has not yet been received. - - Rationale: - - The Stopping state provides a well defined opportunity to - terminate a link before allowing new traffic. After the link - has terminated, a new configuration may occur via the Stopped - or Starting states. - - Request-Sent - - In the Request-Sent state an attempt is made to configure the - connection. A Configure-Request has been sent and the Restart - timer is running, but a Configure-Ack has not yet been received - - - -Simpson [Page 15] - RFC 1661 Point-to-Point Protocol July 1994 - - - nor has one been sent. - - Ack-Received - - In the Ack-Received state, a Configure-Request has been sent and a - Configure-Ack has been received. The Restart timer is still - running, since a Configure-Ack has not yet been sent. - - Ack-Sent - - In the Ack-Sent state, a Configure-Request and a Configure-Ack - have both been sent, but a Configure-Ack has not yet been - received. The Restart timer is running, since a Configure-Ack has - not yet been received. - - Opened - - In the Opened state, a Configure-Ack has been both sent and - received. The Restart timer is not running. - - When entering the Opened state, the implementation SHOULD signal - the upper layers that it is now Up. Conversely, when leaving the - Opened state, the implementation SHOULD signal the upper layers - that it is now Down. - - - -4.3. Events - - Transitions and actions in the automaton are caused by events. - - Up - - This event occurs when a lower layer indicates that it is ready to - carry packets. - - Typically, this event is used by a modem handling or calling - process, or by some other coupling of the PPP link to the physical - media, to signal LCP that the link is entering Link Establishment - phase. - - It also can be used by LCP to signal each NCP that the link is - entering Network-Layer Protocol phase. That is, the This-Layer-Up - action from LCP triggers the Up event in the NCP. - - Down - - This event occurs when a lower layer indicates that it is no - - - -Simpson [Page 16] - RFC 1661 Point-to-Point Protocol July 1994 - - - longer ready to carry packets. - - Typically, this event is used by a modem handling or calling - process, or by some other coupling of the PPP link to the physical - media, to signal LCP that the link is entering Link Dead phase. - - It also can be used by LCP to signal each NCP that the link is - leaving Network-Layer Protocol phase. That is, the This-Layer- - Down action from LCP triggers the Down event in the NCP. - - Open - - This event indicates that the link is administratively available - for traffic; that is, the network administrator (human or program) - has indicated that the link is allowed to be Opened. When this - event occurs, and the link is not in the Opened state, the - automaton attempts to send configuration packets to the peer. - - If the automaton is not able to begin configuration (the lower - layer is Down, or a previous Close event has not completed), the - establishment of the link is automatically delayed. - - When a Terminate-Request is received, or other events occur which - cause the link to become unavailable, the automaton will progress - to a state where the link is ready to re-open. No additional - administrative intervention is necessary. - - Implementation Option: - - Experience has shown that users will execute an additional Open - command when they want to renegotiate the link. This might - indicate that new values are to be negotiated. - - Since this is not the meaning of the Open event, it is - suggested that when an Open user command is executed in the - Opened, Closing, Stopping, or Stopped states, the - implementation issue a Down event, immediately followed by an - Up event. Care must be taken that an intervening Down event - cannot occur from another source. - - The Down followed by an Up will cause an orderly renegotiation - of the link, by progressing through the Starting to the - Request-Sent state. This will cause the renegotiation of the - link, without any harmful side effects. - - Close - - This event indicates that the link is not available for traffic; - - - -Simpson [Page 17] - RFC 1661 Point-to-Point Protocol July 1994 - - - that is, the network administrator (human or program) has - indicated that the link is not allowed to be Opened. When this - event occurs, and the link is not in the Closed state, the - automaton attempts to terminate the connection. Futher attempts - to re-configure the link are denied until a new Open event occurs. - - Implementation Note: - - When authentication fails, the link SHOULD be terminated, to - prevent attack by repetition and denial of service to other - users. Since the link is administratively available (by - definition), this can be accomplished by simulating a Close - event to the LCP, immediately followed by an Open event. Care - must be taken that an intervening Close event cannot occur from - another source. - - The Close followed by an Open will cause an orderly termination - of the link, by progressing through the Closing to the Stopping - state, and the This-Layer-Finished action can disconnect the - link. The automaton waits in the Stopped or Starting states - for the next connection attempt. - - Timeout (TO+,TO-) - - This event indicates the expiration of the Restart timer. The - Restart timer is used to time responses to Configure-Request and - Terminate-Request packets. - - The TO+ event indicates that the Restart counter continues to be - greater than zero, which triggers the corresponding Configure- - Request or Terminate-Request packet to be retransmitted. - - The TO- event indicates that the Restart counter is not greater - than zero, and no more packets need to be retransmitted. - - Receive-Configure-Request (RCR+,RCR-) - - This event occurs when a Configure-Request packet is received from - the peer. The Configure-Request packet indicates the desire to - open a connection and may specify Configuration Options. The - Configure-Request packet is more fully described in a later - section. - - The RCR+ event indicates that the Configure-Request was - acceptable, and triggers the transmission of a corresponding - Configure-Ack. - - The RCR- event indicates that the Configure-Request was - - - -Simpson [Page 18] - RFC 1661 Point-to-Point Protocol July 1994 - - - unacceptable, and triggers the transmission of a corresponding - Configure-Nak or Configure-Reject. - - Implementation Note: - - These events may occur on a connection which is already in the - Opened state. The implementation MUST be prepared to - immediately renegotiate the Configuration Options. - - Receive-Configure-Ack (RCA) - - This event occurs when a valid Configure-Ack packet is received - from the peer. The Configure-Ack packet is a positive response to - a Configure-Request packet. An out of sequence or otherwise - invalid packet is silently discarded. - - Implementation Note: - - Since the correct packet has already been received before - reaching the Ack-Rcvd or Opened states, it is extremely - unlikely that another such packet will arrive. As specified, - all invalid Ack/Nak/Rej packets are silently discarded, and do - not affect the transitions of the automaton. - - However, it is not impossible that a correctly formed packet - will arrive through a coincidentally-timed cross-connection. - It is more likely to be the result of an implementation error. - At the very least, this occurance SHOULD be logged. - - Receive-Configure-Nak/Rej (RCN) - - This event occurs when a valid Configure-Nak or Configure-Reject - packet is received from the peer. The Configure-Nak and - Configure-Reject packets are negative responses to a Configure- - Request packet. An out of sequence or otherwise invalid packet is - silently discarded. - - Implementation Note: - - Although the Configure-Nak and Configure-Reject cause the same - state transition in the automaton, these packets have - significantly different effects on the Configuration Options - sent in the resulting Configure-Request packet. - - Receive-Terminate-Request (RTR) - - This event occurs when a Terminate-Request packet is received. - The Terminate-Request packet indicates the desire of the peer to - - - -Simpson [Page 19] - RFC 1661 Point-to-Point Protocol July 1994 - - - close the connection. - - Implementation Note: - - This event is not identical to the Close event (see above), and - does not override the Open commands of the local network - administrator. The implementation MUST be prepared to receive - a new Configure-Request without network administrator - intervention. - - Receive-Terminate-Ack (RTA) - - This event occurs when a Terminate-Ack packet is received from the - peer. The Terminate-Ack packet is usually a response to a - Terminate-Request packet. The Terminate-Ack packet may also - indicate that the peer is in Closed or Stopped states, and serves - to re-synchronize the link configuration. - - Receive-Unknown-Code (RUC) - - This event occurs when an un-interpretable packet is received from - the peer. A Code-Reject packet is sent in response. - - Receive-Code-Reject, Receive-Protocol-Reject (RXJ+,RXJ-) - - This event occurs when a Code-Reject or a Protocol-Reject packet - is received from the peer. - - The RXJ+ event arises when the rejected value is acceptable, such - as a Code-Reject of an extended code, or a Protocol-Reject of a - NCP. These are within the scope of normal operation. The - implementation MUST stop sending the offending packet type. - - The RXJ- event arises when the rejected value is catastrophic, - such as a Code-Reject of Configure-Request, or a Protocol-Reject - of LCP! This event communicates an unrecoverable error that - terminates the connection. - - Receive-Echo-Request, Receive-Echo-Reply, Receive-Discard-Request - (RXR) - - This event occurs when an Echo-Request, Echo-Reply or Discard- - Request packet is received from the peer. The Echo-Reply packet - is a response to an Echo-Request packet. There is no reply to an - Echo-Reply or Discard-Request packet. - - - - - - -Simpson [Page 20] - RFC 1661 Point-to-Point Protocol July 1994 - - -4.4. Actions - - Actions in the automaton are caused by events and typically indicate - the transmission of packets and/or the starting or stopping of the - Restart timer. - - Illegal-Event (-) - - This indicates an event that cannot occur in a properly - implemented automaton. The implementation has an internal error, - which should be reported and logged. No transition is taken, and - the implementation SHOULD NOT reset or freeze. - - This-Layer-Up (tlu) - - This action indicates to the upper layers that the automaton is - entering the Opened state. - - Typically, this action is used by the LCP to signal the Up event - to a NCP, Authentication Protocol, or Link Quality Protocol, or - MAY be used by a NCP to indicate that the link is available for - its network layer traffic. - - This-Layer-Down (tld) - - This action indicates to the upper layers that the automaton is - leaving the Opened state. - - Typically, this action is used by the LCP to signal the Down event - to a NCP, Authentication Protocol, or Link Quality Protocol, or - MAY be used by a NCP to indicate that the link is no longer - available for its network layer traffic. - - This-Layer-Started (tls) - - This action indicates to the lower layers that the automaton is - entering the Starting state, and the lower layer is needed for the - link. The lower layer SHOULD respond with an Up event when the - lower layer is available. - - This results of this action are highly implementation dependent. - - This-Layer-Finished (tlf) - - This action indicates to the lower layers that the automaton is - entering the Initial, Closed or Stopped states, and the lower - layer is no longer needed for the link. The lower layer SHOULD - respond with a Down event when the lower layer has terminated. - - - -Simpson [Page 21] - RFC 1661 Point-to-Point Protocol July 1994 - - - Typically, this action MAY be used by the LCP to advance to the - Link Dead phase, or MAY be used by a NCP to indicate to the LCP - that the link may terminate when there are no other NCPs open. - - This results of this action are highly implementation dependent. - - Initialize-Restart-Count (irc) - - This action sets the Restart counter to the appropriate value - (Max-Terminate or Max-Configure). The counter is decremented for - each transmission, including the first. - - Implementation Note: - - In addition to setting the Restart counter, the implementation - MUST set the timeout period to the initial value when Restart - timer backoff is used. - - Zero-Restart-Count (zrc) - - This action sets the Restart counter to zero. - - Implementation Note: - - This action enables the FSA to pause before proceeding to the - desired final state, allowing traffic to be processed by the - peer. In addition to zeroing the Restart counter, the - implementation MUST set the timeout period to an appropriate - value. - - Send-Configure-Request (scr) - - A Configure-Request packet is transmitted. This indicates the - desire to open a connection with a specified set of Configuration - Options. The Restart timer is started when the Configure-Request - packet is transmitted, to guard against packet loss. The Restart - counter is decremented each time a Configure-Request is sent. - - Send-Configure-Ack (sca) - - A Configure-Ack packet is transmitted. This acknowledges the - reception of a Configure-Request packet with an acceptable set of - Configuration Options. - - Send-Configure-Nak (scn) - - A Configure-Nak or Configure-Reject packet is transmitted, as - appropriate. This negative response reports the reception of a - - - -Simpson [Page 22] - RFC 1661 Point-to-Point Protocol July 1994 - - - Configure-Request packet with an unacceptable set of Configuration - Options. - - Configure-Nak packets are used to refuse a Configuration Option - value, and to suggest a new, acceptable value. Configure-Reject - packets are used to refuse all negotiation about a Configuration - Option, typically because it is not recognized or implemented. - The use of Configure-Nak versus Configure-Reject is more fully - described in the chapter on LCP Packet Formats. - - Send-Terminate-Request (str) - - A Terminate-Request packet is transmitted. This indicates the - desire to close a connection. The Restart timer is started when - the Terminate-Request packet is transmitted, to guard against - packet loss. The Restart counter is decremented each time a - Terminate-Request is sent. - - Send-Terminate-Ack (sta) - - A Terminate-Ack packet is transmitted. This acknowledges the - reception of a Terminate-Request packet or otherwise serves to - synchronize the automatons. - - Send-Code-Reject (scj) - - A Code-Reject packet is transmitted. This indicates the reception - of an unknown type of packet. - - Send-Echo-Reply (ser) - - An Echo-Reply packet is transmitted. This acknowledges the - reception of an Echo-Request packet. - - - -4.5. Loop Avoidance - - The protocol makes a reasonable attempt at avoiding Configuration - Option negotiation loops. However, the protocol does NOT guarantee - that loops will not happen. As with any negotiation, it is possible - to configure two PPP implementations with conflicting policies that - will never converge. It is also possible to configure policies which - do converge, but which take significant time to do so. Implementors - should keep this in mind and SHOULD implement loop detection - mechanisms or higher level timeouts. - - - - - -Simpson [Page 23] - RFC 1661 Point-to-Point Protocol July 1994 - - -4.6. Counters and Timers - - Restart Timer - - There is one special timer used by the automaton. The Restart - timer is used to time transmissions of Configure-Request and - Terminate-Request packets. Expiration of the Restart timer causes - a Timeout event, and retransmission of the corresponding - Configure-Request or Terminate-Request packet. The Restart timer - MUST be configurable, but SHOULD default to three (3) seconds. - - Implementation Note: - - The Restart timer SHOULD be based on the speed of the link. - The default value is designed for low speed (2,400 to 9,600 - bps), high switching latency links (typical telephone lines). - Higher speed links, or links with low switching latency, SHOULD - have correspondingly faster retransmission times. - - Instead of a constant value, the Restart timer MAY begin at an - initial small value and increase to the configured final value. - Each successive value less than the final value SHOULD be at - least twice the previous value. The initial value SHOULD be - large enough to account for the size of the packets, twice the - round trip time for transmission at the link speed, and at - least an additional 100 milliseconds to allow the peer to - process the packets before responding. Some circuits add - another 200 milliseconds of satellite delay. Round trip times - for modems operating at 14,400 bps have been measured in the - range of 160 to more than 600 milliseconds. - - Max-Terminate - - There is one required restart counter for Terminate-Requests. - Max-Terminate indicates the number of Terminate-Request packets - sent without receiving a Terminate-Ack before assuming that the - peer is unable to respond. Max-Terminate MUST be configurable, - but SHOULD default to two (2) transmissions. - - Max-Configure - - A similar counter is recommended for Configure-Requests. Max- - Configure indicates the number of Configure-Request packets sent - without receiving a valid Configure-Ack, Configure-Nak or - Configure-Reject before assuming that the peer is unable to - respond. Max-Configure MUST be configurable, but SHOULD default - to ten (10) transmissions. - - - - -Simpson [Page 24] - RFC 1661 Point-to-Point Protocol July 1994 - - - Max-Failure - - A related counter is recommended for Configure-Nak. Max-Failure - indicates the number of Configure-Nak packets sent without sending - a Configure-Ack before assuming that configuration is not - converging. Any further Configure-Nak packets for peer requested - options are converted to Configure-Reject packets, and locally - desired options are no longer appended. Max-Failure MUST be - configurable, but SHOULD default to five (5) transmissions. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 25] - RFC 1661 Point-to-Point Protocol July 1994 - - -5. LCP Packet Formats - - There are three classes of LCP packets: - - 1. Link Configuration packets used to establish and configure a - link (Configure-Request, Configure-Ack, Configure-Nak and - Configure-Reject). - - 2. Link Termination packets used to terminate a link (Terminate- - Request and Terminate-Ack). - - 3. Link Maintenance packets used to manage and debug a link - (Code-Reject, Protocol-Reject, Echo-Request, Echo-Reply, and - Discard-Request). - - In the interest of simplicity, there is no version field in the LCP - packet. A correctly functioning LCP implementation will always - respond to unknown Protocols and Codes with an easily recognizable - LCP packet, thus providing a deterministic fallback mechanism for - implementations of other versions. - - Regardless of which Configuration Options are enabled, all LCP Link - Configuration, Link Termination, and Code-Reject packets (codes 1 - through 7) are always sent as if no Configuration Options were - negotiated. In particular, each Configuration Option specifies a - default value. This ensures that such LCP packets are always - recognizable, even when one end of the link mistakenly believes the - link to be open. - - Exactly one LCP packet is encapsulated in the PPP Information field, - where the PPP Protocol field indicates type hex c021 (Link Control - Protocol). - - A summary of the Link Control Protocol packet format is shown below. - The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - - Code - - The Code field is one octet, and identifies the kind of LCP - - - -Simpson [Page 26] - RFC 1661 Point-to-Point Protocol July 1994 - - - packet. When a packet is received with an unknown Code field, a - Code-Reject packet is transmitted. - - Up-to-date values of the LCP Code field are specified in the most - recent "Assigned Numbers" RFC [2]. This document concerns the - following values: - - 1 Configure-Request - 2 Configure-Ack - 3 Configure-Nak - 4 Configure-Reject - 5 Terminate-Request - 6 Terminate-Ack - 7 Code-Reject - 8 Protocol-Reject - 9 Echo-Request - 10 Echo-Reply - 11 Discard-Request - - - Identifier - - The Identifier field is one octet, and aids in matching requests - and replies. When a packet is received with an invalid Identifier - field, the packet is silently discarded without affecting the - automaton. - - Length - - The Length field is two octets, and indicates the length of the - LCP packet, including the Code, Identifier, Length and Data - fields. The Length MUST NOT exceed the MRU of the link. - - Octets outside the range of the Length field are treated as - padding and are ignored on reception. When a packet is received - with an invalid Length field, the packet is silently discarded - without affecting the automaton. - - Data - - The Data field is zero or more octets, as indicated by the Length - field. The format of the Data field is determined by the Code - field. - - - - - - - - -Simpson [Page 27] - RFC 1661 Point-to-Point Protocol July 1994 - - -5.1. Configure-Request - - Description - - An implementation wishing to open a connection MUST transmit a - Configure-Request. The Options field is filled with any desired - changes to the link defaults. Configuration Options SHOULD NOT be - included with default values. - - Upon reception of a Configure-Request, an appropriate reply MUST - be transmitted. - - A summary of the Configure-Request packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Options ... - +-+-+-+-+ - - - Code - - 1 for Configure-Request. - - Identifier - - The Identifier field MUST be changed whenever the contents of the - Options field changes, and whenever a valid reply has been - received for a previous request. For retransmissions, the - Identifier MAY remain unchanged. - - Options - - The options field is variable in length, and contains the list of - zero or more Configuration Options that the sender desires to - negotiate. All Configuration Options are always negotiated - simultaneously. The format of Configuration Options is further - described in a later chapter. - - - - - - - - - -Simpson [Page 28] - RFC 1661 Point-to-Point Protocol July 1994 - - -5.2. Configure-Ack - - Description - - If every Configuration Option received in a Configure-Request is - recognizable and all values are acceptable, then the - implementation MUST transmit a Configure-Ack. The acknowledged - Configuration Options MUST NOT be reordered or modified in any - way. - - On reception of a Configure-Ack, the Identifier field MUST match - that of the last transmitted Configure-Request. Additionally, the - Configuration Options in a Configure-Ack MUST exactly match those - of the last transmitted Configure-Request. Invalid packets are - silently discarded. - - A summary of the Configure-Ack packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Options ... - +-+-+-+-+ - - - Code - - 2 for Configure-Ack. - - Identifier - - The Identifier field is a copy of the Identifier field of the - Configure-Request which caused this Configure-Ack. - - Options - - The Options field is variable in length, and contains the list of - zero or more Configuration Options that the sender is - acknowledging. All Configuration Options are always acknowledged - simultaneously. - - - - - - - - -Simpson [Page 29] - RFC 1661 Point-to-Point Protocol July 1994 - - -5.3. Configure-Nak - - Description - - If every instance of the received Configuration Options is - recognizable, but some values are not acceptable, then the - implementation MUST transmit a Configure-Nak. The Options field - is filled with only the unacceptable Configuration Options from - the Configure-Request. All acceptable Configuration Options are - filtered out of the Configure-Nak, but otherwise the Configuration - Options from the Configure-Request MUST NOT be reordered. - - Options which have no value fields (boolean options) MUST use the - Configure-Reject reply instead. - - Each Configuration Option which is allowed only a single instance - MUST be modified to a value acceptable to the Configure-Nak - sender. The default value MAY be used, when this differs from the - requested value. - - When a particular type of Configuration Option can be listed more - than once with different values, the Configure-Nak MUST include a - list of all values for that option which are acceptable to the - Configure-Nak sender. This includes acceptable values that were - present in the Configure-Request. - - Finally, an implementation may be configured to request the - negotiation of a specific Configuration Option. If that option is - not listed, then that option MAY be appended to the list of Nak'd - Configuration Options, in order to prompt the peer to include that - option in its next Configure-Request packet. Any value fields for - the option MUST indicate values acceptable to the Configure-Nak - sender. - - On reception of a Configure-Nak, the Identifier field MUST match - that of the last transmitted Configure-Request. Invalid packets - are silently discarded. - - Reception of a valid Configure-Nak indicates that when a new - Configure-Request is sent, the Configuration Options MAY be - modified as specified in the Configure-Nak. When multiple - instances of a Configuration Option are present, the peer SHOULD - select a single value to include in its next Configure-Request - packet. - - Some Configuration Options have a variable length. Since the - Nak'd Option has been modified by the peer, the implementation - MUST be able to handle an Option length which is different from - - - -Simpson [Page 30] - RFC 1661 Point-to-Point Protocol July 1994 - - - the original Configure-Request. - - A summary of the Configure-Nak packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Options ... - +-+-+-+-+ - - - Code - - 3 for Configure-Nak. - - Identifier - - The Identifier field is a copy of the Identifier field of the - Configure-Request which caused this Configure-Nak. - - Options - - The Options field is variable in length, and contains the list of - zero or more Configuration Options that the sender is Nak'ing. - All Configuration Options are always Nak'd simultaneously. - - - -5.4. Configure-Reject - - Description - - If some Configuration Options received in a Configure-Request are - not recognizable or are not acceptable for negotiation (as - configured by a network administrator), then the implementation - MUST transmit a Configure-Reject. The Options field is filled - with only the unacceptable Configuration Options from the - Configure-Request. All recognizable and negotiable Configuration - Options are filtered out of the Configure-Reject, but otherwise - the Configuration Options MUST NOT be reordered or modified in any - way. - - On reception of a Configure-Reject, the Identifier field MUST - match that of the last transmitted Configure-Request. - Additionally, the Configuration Options in a Configure-Reject MUST - - - -Simpson [Page 31] - RFC 1661 Point-to-Point Protocol July 1994 - - - be a proper subset of those in the last transmitted Configure- - Request. Invalid packets are silently discarded. - - Reception of a valid Configure-Reject indicates that when a new - Configure-Request is sent, it MUST NOT include any of the - Configuration Options listed in the Configure-Reject. - - A summary of the Configure-Reject packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Options ... - +-+-+-+-+ - - - Code - - 4 for Configure-Reject. - - Identifier - - The Identifier field is a copy of the Identifier field of the - Configure-Request which caused this Configure-Reject. - - Options - - The Options field is variable in length, and contains the list of - zero or more Configuration Options that the sender is rejecting. - All Configuration Options are always rejected simultaneously. - - - - - - - - - - - - - - - - - - -Simpson [Page 32] - RFC 1661 Point-to-Point Protocol July 1994 - - -5.5. Terminate-Request and Terminate-Ack - - Description - - LCP includes Terminate-Request and Terminate-Ack Codes in order to - provide a mechanism for closing a connection. - - An implementation wishing to close a connection SHOULD transmit a - Terminate-Request. Terminate-Request packets SHOULD continue to - be sent until Terminate-Ack is received, the lower layer indicates - that it has gone down, or a sufficiently large number have been - transmitted such that the peer is down with reasonable certainty. - - Upon reception of a Terminate-Request, a Terminate-Ack MUST be - transmitted. - - Reception of an unelicited Terminate-Ack indicates that the peer - is in the Closed or Stopped states, or is otherwise in need of - re-negotiation. - - A summary of the Terminate-Request and Terminate-Ack packet formats - is shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - - Code - - 5 for Terminate-Request; - - 6 for Terminate-Ack. - - Identifier - - On transmission, the Identifier field MUST be changed whenever the - content of the Data field changes, and whenever a valid reply has - been received for a previous request. For retransmissions, the - Identifier MAY remain unchanged. - - On reception, the Identifier field of the Terminate-Request is - copied into the Identifier field of the Terminate-Ack packet. - - - - -Simpson [Page 33] - RFC 1661 Point-to-Point Protocol July 1994 - - - Data - - The Data field is zero or more octets, and contains uninterpreted - data for use by the sender. The data may consist of any binary - value. The end of the field is indicated by the Length. - - - -5.6. Code-Reject - - Description - - Reception of a LCP packet with an unknown Code indicates that the - peer is operating with a different version. This MUST be reported - back to the sender of the unknown Code by transmitting a Code- - Reject. - - Upon reception of the Code-Reject of a code which is fundamental - to this version of the protocol, the implementation SHOULD report - the problem and drop the connection, since it is unlikely that the - situation can be rectified automatically. - - A summary of the Code-Reject packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Rejected-Packet ... - +-+-+-+-+-+-+-+-+ - - - Code - - 7 for Code-Reject. - - Identifier - - The Identifier field MUST be changed for each Code-Reject sent. - - Rejected-Packet - - The Rejected-Packet field contains a copy of the LCP packet which - is being rejected. It begins with the Information field, and does - not include any Data Link Layer headers nor an FCS. The - Rejected-Packet MUST be truncated to comply with the peer's - - - -Simpson [Page 34] - RFC 1661 Point-to-Point Protocol July 1994 - - - established MRU. - - - -5.7. Protocol-Reject - - Description - - Reception of a PPP packet with an unknown Protocol field indicates - that the peer is attempting to use a protocol which is - unsupported. This usually occurs when the peer attempts to - configure a new protocol. If the LCP automaton is in the Opened - state, then this MUST be reported back to the peer by transmitting - a Protocol-Reject. - - Upon reception of a Protocol-Reject, the implementation MUST stop - sending packets of the indicated protocol at the earliest - opportunity. - - Protocol-Reject packets can only be sent in the LCP Opened state. - Protocol-Reject packets received in any state other than the LCP - Opened state SHOULD be silently discarded. - - A summary of the Protocol-Reject packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Rejected-Protocol | Rejected-Information ... - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Code - - 8 for Protocol-Reject. - - Identifier - - The Identifier field MUST be changed for each Protocol-Reject - sent. - - Rejected-Protocol - - The Rejected-Protocol field is two octets, and contains the PPP - Protocol field of the packet which is being rejected. - - - -Simpson [Page 35] - RFC 1661 Point-to-Point Protocol July 1994 - - - Rejected-Information - - The Rejected-Information field contains a copy of the packet which - is being rejected. It begins with the Information field, and does - not include any Data Link Layer headers nor an FCS. The - Rejected-Information MUST be truncated to comply with the peer's - established MRU. - - - -5.8. Echo-Request and Echo-Reply - - Description - - LCP includes Echo-Request and Echo-Reply Codes in order to provide - a Data Link Layer loopback mechanism for use in exercising both - directions of the link. This is useful as an aid in debugging, - link quality determination, performance testing, and for numerous - other functions. - - Upon reception of an Echo-Request in the LCP Opened state, an - Echo-Reply MUST be transmitted. - - Echo-Request and Echo-Reply packets MUST only be sent in the LCP - Opened state. Echo-Request and Echo-Reply packets received in any - state other than the LCP Opened state SHOULD be silently - discarded. - - - A summary of the Echo-Request and Echo-Reply packet formats is shown - below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Magic-Number | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - - Code - - 9 for Echo-Request; - - 10 for Echo-Reply. - - - -Simpson [Page 36] - RFC 1661 Point-to-Point Protocol July 1994 - - - Identifier - - On transmission, the Identifier field MUST be changed whenever the - content of the Data field changes, and whenever a valid reply has - been received for a previous request. For retransmissions, the - Identifier MAY remain unchanged. - - On reception, the Identifier field of the Echo-Request is copied - into the Identifier field of the Echo-Reply packet. - - Magic-Number - - The Magic-Number field is four octets, and aids in detecting links - which are in the looped-back condition. Until the Magic-Number - Configuration Option has been successfully negotiated, the Magic- - Number MUST be transmitted as zero. See the Magic-Number - Configuration Option for further explanation. - - Data - - The Data field is zero or more octets, and contains uninterpreted - data for use by the sender. The data may consist of any binary - value. The end of the field is indicated by the Length. - - - -5.9. Discard-Request - - Description - - LCP includes a Discard-Request Code in order to provide a Data - Link Layer sink mechanism for use in exercising the local to - remote direction of the link. This is useful as an aid in - debugging, performance testing, and for numerous other functions. - - Discard-Request packets MUST only be sent in the LCP Opened state. - On reception, the receiver MUST silently discard any Discard- - Request that it receives. - - - - - - - - - - - - - -Simpson [Page 37] - RFC 1661 Point-to-Point Protocol July 1994 - - - A summary of the Discard-Request packet format is shown below. The - fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Magic-Number | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - Code - - 11 for Discard-Request. - - Identifier - - The Identifier field MUST be changed for each Discard-Request - sent. - - Magic-Number - - The Magic-Number field is four octets, and aids in detecting links - which are in the looped-back condition. Until the Magic-Number - Configuration Option has been successfully negotiated, the Magic- - Number MUST be transmitted as zero. See the Magic-Number - Configuration Option for further explanation. - - Data - - The Data field is zero or more octets, and contains uninterpreted - data for use by the sender. The data may consist of any binary - value. The end of the field is indicated by the Length. - - - - - - - - - - - - - - - - -Simpson [Page 38] - RFC 1661 Point-to-Point Protocol July 1994 - - -6. LCP Configuration Options - - LCP Configuration Options allow negotiation of modifications to the - default characteristics of a point-to-point link. If a Configuration - Option is not included in a Configure-Request packet, the default - value for that Configuration Option is assumed. - - Some Configuration Options MAY be listed more than once. The effect - of this is Configuration Option specific, and is specified by each - such Configuration Option description. (None of the Configuration - Options in this specification can be listed more than once.) - - The end of the list of Configuration Options is indicated by the - Length field of the LCP packet. - - Unless otherwise specified, all Configuration Options apply in a - half-duplex fashion; typically, in the receive direction of the link - from the point of view of the Configure-Request sender. - - Design Philosophy - - The options indicate additional capabilities or requirements of - the implementation that is requesting the option. An - implementation which does not understand any option SHOULD - interoperate with one which implements every option. - - A default is specified for each option which allows the link to - correctly function without negotiation of the option, although - perhaps with less than optimal performance. - - Except where explicitly specified, acknowledgement of an option - does not require the peer to take any additional action other than - the default. - - It is not necessary to send the default values for the options in - a Configure-Request. - - - A summary of the Configuration Option format is shown below. The - fields are transmitted from left to right. - - 0 1 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Data ... - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - - - -Simpson [Page 39] - RFC 1661 Point-to-Point Protocol July 1994 - - - Type - - The Type field is one octet, and indicates the type of - Configuration Option. Up-to-date values of the LCP Option Type - field are specified in the most recent "Assigned Numbers" RFC [2]. - This document concerns the following values: - - 0 RESERVED - 1 Maximum-Receive-Unit - 3 Authentication-Protocol - 4 Quality-Protocol - 5 Magic-Number - 7 Protocol-Field-Compression - 8 Address-and-Control-Field-Compression - - - Length - - The Length field is one octet, and indicates the length of this - Configuration Option including the Type, Length and Data fields. - - If a negotiable Configuration Option is received in a Configure- - Request, but with an invalid or unrecognized Length, a Configure- - Nak SHOULD be transmitted which includes the desired Configuration - Option with an appropriate Length and Data. - - Data - - The Data field is zero or more octets, and contains information - specific to the Configuration Option. The format and length of - the Data field is determined by the Type and Length fields. - - When the Data field is indicated by the Length to extend beyond - the end of the Information field, the entire packet is silently - discarded without affecting the automaton. - - - - - - - - - - - - - - - - -Simpson [Page 40] - RFC 1661 Point-to-Point Protocol July 1994 - - -6.1. Maximum-Receive-Unit (MRU) - - Description - - This Configuration Option may be sent to inform the peer that the - implementation can receive larger packets, or to request that the - peer send smaller packets. - - The default value is 1500 octets. If smaller packets are - requested, an implementation MUST still be able to receive the - full 1500 octet information field in case link synchronization is - lost. - - Implementation Note: - - This option is used to indicate an implementation capability. - The peer is not required to maximize the use of the capacity. - For example, when a MRU is indicated which is 2048 octets, the - peer is not required to send any packet with 2048 octets. The - peer need not Configure-Nak to indicate that it will only send - smaller packets, since the implementation will always require - support for at least 1500 octets. - - A summary of the Maximum-Receive-Unit Configuration Option format is - shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Maximum-Receive-Unit | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Type - - 1 - - Length - - 4 - - Maximum-Receive-Unit - - The Maximum-Receive-Unit field is two octets, and specifies the - maximum number of octets in the Information and Padding fields. - It does not include the framing, Protocol field, FCS, nor any - transparency bits or bytes. - - - - -Simpson [Page 41] - RFC 1661 Point-to-Point Protocol July 1994 - - -6.2. Authentication-Protocol - - Description - - On some links it may be desirable to require a peer to - authenticate itself before allowing network-layer protocol packets - to be exchanged. - - This Configuration Option provides a method to negotiate the use - of a specific protocol for authentication. By default, - authentication is not required. - - An implementation MUST NOT include multiple Authentication- - Protocol Configuration Options in its Configure-Request packets. - Instead, it SHOULD attempt to configure the most desirable - protocol first. If that protocol is Configure-Nak'd, then the - implementation SHOULD attempt the next most desirable protocol in - the next Configure-Request. - - The implementation sending the Configure-Request is indicating - that it expects authentication from its peer. If an - implementation sends a Configure-Ack, then it is agreeing to - authenticate with the specified protocol. An implementation - receiving a Configure-Ack SHOULD expect the peer to authenticate - with the acknowledged protocol. - - There is no requirement that authentication be full-duplex or that - the same protocol be used in both directions. It is perfectly - acceptable for different protocols to be used in each direction. - This will, of course, depend on the specific protocols negotiated. - - A summary of the Authentication-Protocol Configuration Option format - is shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Authentication-Protocol | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - - Type - - 3 - - - - - -Simpson [Page 42] - RFC 1661 Point-to-Point Protocol July 1994 - - - Length - - >= 4 - - Authentication-Protocol - - The Authentication-Protocol field is two octets, and indicates the - authentication protocol desired. Values for this field are always - the same as the PPP Protocol field values for that same - authentication protocol. - - Up-to-date values of the Authentication-Protocol field are - specified in the most recent "Assigned Numbers" RFC [2]. Current - values are assigned as follows: - - Value (in hex) Protocol - - c023 Password Authentication Protocol - c223 Challenge Handshake Authentication Protocol - - - Data - - The Data field is zero or more octets, and contains additional - data as determined by the particular protocol. - - - -6.3. Quality-Protocol - - Description - - On some links it may be desirable to determine when, and how - often, the link is dropping data. This process is called link - quality monitoring. - - This Configuration Option provides a method to negotiate the use - of a specific protocol for link quality monitoring. By default, - link quality monitoring is disabled. - - The implementation sending the Configure-Request is indicating - that it expects to receive monitoring information from its peer. - If an implementation sends a Configure-Ack, then it is agreeing to - send the specified protocol. An implementation receiving a - Configure-Ack SHOULD expect the peer to send the acknowledged - protocol. - - There is no requirement that quality monitoring be full-duplex or - - - -Simpson [Page 43] - RFC 1661 Point-to-Point Protocol July 1994 - - - that the same protocol be used in both directions. It is - perfectly acceptable for different protocols to be used in each - direction. This will, of course, depend on the specific protocols - negotiated. - - A summary of the Quality-Protocol Configuration Option format is - shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Quality-Protocol | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - - Type - - 4 - - Length - - >= 4 - - Quality-Protocol - - The Quality-Protocol field is two octets, and indicates the link - quality monitoring protocol desired. Values for this field are - always the same as the PPP Protocol field values for that same - monitoring protocol. - - Up-to-date values of the Quality-Protocol field are specified in - the most recent "Assigned Numbers" RFC [2]. Current values are - assigned as follows: - - Value (in hex) Protocol - - c025 Link Quality Report - - - Data - - The Data field is zero or more octets, and contains additional - data as determined by the particular protocol. - - - - - - -Simpson [Page 44] - RFC 1661 Point-to-Point Protocol July 1994 - - -6.4. Magic-Number - - Description - - This Configuration Option provides a method to detect looped-back - links and other Data Link Layer anomalies. This Configuration - Option MAY be required by some other Configuration Options such as - the Quality-Protocol Configuration Option. By default, the - Magic-Number is not negotiated, and zero is inserted where a - Magic-Number might otherwise be used. - - Before this Configuration Option is requested, an implementation - MUST choose its Magic-Number. It is recommended that the Magic- - Number be chosen in the most random manner possible in order to - guarantee with very high probability that an implementation will - arrive at a unique number. A good way to choose a unique random - number is to start with a unique seed. Suggested sources of - uniqueness include machine serial numbers, other network hardware - addresses, time-of-day clocks, etc. Particularly good random - number seeds are precise measurements of the inter-arrival time of - physical events such as packet reception on other connected - networks, server response time, or the typing rate of a human - user. It is also suggested that as many sources as possible be - used simultaneously. - - When a Configure-Request is received with a Magic-Number - Configuration Option, the received Magic-Number is compared with - the Magic-Number of the last Configure-Request sent to the peer. - If the two Magic-Numbers are different, then the link is not - looped-back, and the Magic-Number SHOULD be acknowledged. If the - two Magic-Numbers are equal, then it is possible, but not certain, - that the link is looped-back and that this Configure-Request is - actually the one last sent. To determine this, a Configure-Nak - MUST be sent specifying a different Magic-Number value. A new - Configure-Request SHOULD NOT be sent to the peer until normal - processing would cause it to be sent (that is, until a Configure- - Nak is received or the Restart timer runs out). - - Reception of a Configure-Nak with a Magic-Number different from - that of the last Configure-Nak sent to the peer proves that a link - is not looped-back, and indicates a unique Magic-Number. If the - Magic-Number is equal to the one sent in the last Configure-Nak, - the possibility of a looped-back link is increased, and a new - Magic-Number MUST be chosen. In either case, a new Configure- - Request SHOULD be sent with the new Magic-Number. - - If the link is indeed looped-back, this sequence (transmit - Configure-Request, receive Configure-Request, transmit Configure- - - - -Simpson [Page 45] - RFC 1661 Point-to-Point Protocol July 1994 - - - Nak, receive Configure-Nak) will repeat over and over again. If - the link is not looped-back, this sequence might occur a few - times, but it is extremely unlikely to occur repeatedly. More - likely, the Magic-Numbers chosen at either end will quickly - diverge, terminating the sequence. The following table shows the - probability of collisions assuming that both ends of the link - select Magic-Numbers with a perfectly uniform distribution: - - Number of Collisions Probability - -------------------- --------------------- - 1 1/2**32 = 2.3 E-10 - 2 1/2**32**2 = 5.4 E-20 - 3 1/2**32**3 = 1.3 E-29 - - - Good sources of uniqueness or randomness are required for this - divergence to occur. If a good source of uniqueness cannot be - found, it is recommended that this Configuration Option not be - enabled; Configure-Requests with the option SHOULD NOT be - transmitted and any Magic-Number Configuration Options which the - peer sends SHOULD be either acknowledged or rejected. In this - case, looped-back links cannot be reliably detected by the - implementation, although they may still be detectable by the peer. - - If an implementation does transmit a Configure-Request with a - Magic-Number Configuration Option, then it MUST NOT respond with a - Configure-Reject when it receives a Configure-Request with a - Magic-Number Configuration Option. That is, if an implementation - desires to use Magic Numbers, then it MUST also allow its peer to - do so. If an implementation does receive a Configure-Reject in - response to a Configure-Request, it can only mean that the link is - not looped-back, and that its peer will not be using Magic- - Numbers. In this case, an implementation SHOULD act as if the - negotiation had been successful (as if it had instead received a - Configure-Ack). - - The Magic-Number also may be used to detect looped-back links - during normal operation, as well as during Configuration Option - negotiation. All LCP Echo-Request, Echo-Reply, and Discard- - Request packets have a Magic-Number field. If Magic-Number has - been successfully negotiated, an implementation MUST transmit - these packets with the Magic-Number field set to its negotiated - Magic-Number. - - The Magic-Number field of these packets SHOULD be inspected on - reception. All received Magic-Number fields MUST be equal to - either zero or the peer's unique Magic-Number, depending on - whether or not the peer negotiated a Magic-Number. - - - -Simpson [Page 46] - RFC 1661 Point-to-Point Protocol July 1994 - - - Reception of a Magic-Number field equal to the negotiated local - Magic-Number indicates a looped-back link. Reception of a Magic- - Number other than the negotiated local Magic-Number, the peer's - negotiated Magic-Number, or zero if the peer didn't negotiate one, - indicates a link which has been (mis)configured for communications - with a different peer. - - Procedures for recovery from either case are unspecified, and may - vary from implementation to implementation. A somewhat - pessimistic procedure is to assume a LCP Down event. A further - Open event will begin the process of re-establishing the link, - which can't complete until the looped-back condition is - terminated, and Magic-Numbers are successfully negotiated. A more - optimistic procedure (in the case of a looped-back link) is to - begin transmitting LCP Echo-Request packets until an appropriate - Echo-Reply is received, indicating a termination of the looped- - back condition. - - A summary of the Magic-Number Configuration Option format is shown - below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Magic-Number - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - Magic-Number (cont) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Type - - 5 - - Length - - 6 - - Magic-Number - - The Magic-Number field is four octets, and indicates a number - which is very likely to be unique to one end of the link. A - Magic-Number of zero is illegal and MUST always be Nak'd, if it is - not Rejected outright. - - - - - - - -Simpson [Page 47] - RFC 1661 Point-to-Point Protocol July 1994 - - -6.5. Protocol-Field-Compression (PFC) - - Description - - This Configuration Option provides a method to negotiate the - compression of the PPP Protocol field. By default, all - implementations MUST transmit packets with two octet PPP Protocol - fields. - - PPP Protocol field numbers are chosen such that some values may be - compressed into a single octet form which is clearly - distinguishable from the two octet form. This Configuration - Option is sent to inform the peer that the implementation can - receive such single octet Protocol fields. - - As previously mentioned, the Protocol field uses an extension - mechanism consistent with the ISO 3309 extension mechanism for the - Address field; the Least Significant Bit (LSB) of each octet is - used to indicate extension of the Protocol field. A binary "0" as - the LSB indicates that the Protocol field continues with the - following octet. The presence of a binary "1" as the LSB marks - the last octet of the Protocol field. Notice that any number of - "0" octets may be prepended to the field, and will still indicate - the same value (consider the two binary representations for 3, - 00000011 and 00000000 00000011). - - When using low speed links, it is desirable to conserve bandwidth - by sending as little redundant data as possible. The Protocol- - Field-Compression Configuration Option allows a trade-off between - implementation simplicity and bandwidth efficiency. If - successfully negotiated, the ISO 3309 extension mechanism may be - used to compress the Protocol field to one octet instead of two. - The large majority of packets are compressible since data - protocols are typically assigned with Protocol field values less - than 256. - - Compressed Protocol fields MUST NOT be transmitted unless this - Configuration Option has been negotiated. When negotiated, PPP - implementations MUST accept PPP packets with either double-octet - or single-octet Protocol fields, and MUST NOT distinguish between - them. - - The Protocol field is never compressed when sending any LCP - packet. This rule guarantees unambiguous recognition of LCP - packets. - - When a Protocol field is compressed, the Data Link Layer FCS field - is calculated on the compressed frame, not the original - - - -Simpson [Page 48] - RFC 1661 Point-to-Point Protocol July 1994 - - - uncompressed frame. - - A summary of the Protocol-Field-Compression Configuration Option - format is shown below. The fields are transmitted from left to - right. - - 0 1 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Type - - 7 - - Length - - 2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 49] - RFC 1661 Point-to-Point Protocol July 1994 - - -6.6. Address-and-Control-Field-Compression (ACFC) - - Description - - This Configuration Option provides a method to negotiate the - compression of the Data Link Layer Address and Control fields. By - default, all implementations MUST transmit frames with Address and - Control fields appropriate to the link framing. - - Since these fields usually have constant values for point-to-point - links, they are easily compressed. This Configuration Option is - sent to inform the peer that the implementation can receive - compressed Address and Control fields. - - If a compressed frame is received when Address-and-Control-Field- - Compression has not been negotiated, the implementation MAY - silently discard the frame. - - The Address and Control fields MUST NOT be compressed when sending - any LCP packet. This rule guarantees unambiguous recognition of - LCP packets. - - When the Address and Control fields are compressed, the Data Link - Layer FCS field is calculated on the compressed frame, not the - original uncompressed frame. - - A summary of the Address-and-Control-Field-Compression configuration - option format is shown below. The fields are transmitted from left - to right. - - 0 1 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Type - - 8 - - Length - - 2 - - - - - - - -Simpson [Page 50] - RFC 1661 Point-to-Point Protocol July 1994 - - -Security Considerations - - Security issues are briefly discussed in sections concerning the - Authentication Phase, the Close event, and the Authentication- - Protocol Configuration Option. - - - -References - - [1] Perkins, D., "Requirements for an Internet Standard Point-to- - Point Protocol", RFC 1547, Carnegie Mellon University, - December 1993. - - [2] Reynolds, J., and Postel, J., "Assigned Numbers", STD 2, RFC - 1340, USC/Information Sciences Institute, July 1992. - - -Acknowledgements - - This document is the product of the Point-to-Point Protocol Working - Group of the Internet Engineering Task Force (IETF). Comments should - be submitted to the ietf-ppp@merit.edu mailing list. - - Much of the text in this document is taken from the working group - requirements [1]; and RFCs 1171 & 1172, by Drew Perkins while at - Carnegie Mellon University, and by Russ Hobby of the University of - California at Davis. - - William Simpson was principally responsible for introducing - consistent terminology and philosophy, and the re-design of the phase - and negotiation state machines. - - Many people spent significant time helping to develop the Point-to- - Point Protocol. The complete list of people is too numerous to list, - but the following people deserve special thanks: Rick Adams, Ken - Adelman, Fred Baker, Mike Ballard, Craig Fox, Karl Fox, Phill Gross, - Kory Hamzeh, former WG chair Russ Hobby, David Kaufman, former WG - chair Steve Knowles, Mark Lewis, former WG chair Brian Lloyd, John - LoVerso, Bill Melohn, Mike Patton, former WG chair Drew Perkins, Greg - Satz, John Shriver, Vernon Schryver, and Asher Waldfogel. - - Special thanks to Morning Star Technologies for providing computing - resources and network access support for writing this specification. - - - - - - - -Simpson [Page 51] - RFC 1661 Point-to-Point Protocol July 1994 - - -Chair's Address - - The working group can be contacted via the current chair: - - Fred Baker - Advanced Computer Communications - 315 Bollay Drive - Santa Barbara, California 93117 - - fbaker@acc.com - - - -Editor's Address - - Questions about this memo can also be directed to: - - William Allen Simpson - Daydreamer - Computer Systems Consulting Services - 1384 Fontaine - Madison Heights, Michigan 48071 - - Bill.Simpson@um.cc.umich.edu - bsimpson@MorningStar.com - - - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 52] - - diff --git a/kernel/picotcp/RFC/rfc1662.txt b/kernel/picotcp/RFC/rfc1662.txt deleted file mode 100644 index 18b7c24..0000000 --- a/kernel/picotcp/RFC/rfc1662.txt +++ /dev/null @@ -1,1440 +0,0 @@ - - - - - - -Network Working Group W. Simpson, Editor -Request for Comments: 1662 Daydreamer -STD: 51 July 1994 -Obsoletes: 1549 -Category: Standards Track - - - PPP in HDLC-like Framing - - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - - -Abstract - - The Point-to-Point Protocol (PPP) [1] provides a standard method for - transporting multi-protocol datagrams over point-to-point links. - - This document describes the use of HDLC-like framing for PPP - encapsulated packets. - - -Table of Contents - - - 1. Introduction .......................................... 1 - 1.1 Specification of Requirements ................... 2 - 1.2 Terminology ..................................... 2 - - 2. Physical Layer Requirements ........................... 3 - - 3. The Data Link Layer ................................... 4 - 3.1 Frame Format .................................... 5 - 3.2 Modification of the Basic Frame ................. 7 - - 4. Octet-stuffed framing ................................. 8 - 4.1 Flag Sequence ................................... 8 - 4.2 Transparency .................................... 8 - 4.3 Invalid Frames .................................. 9 - 4.4 Time Fill ....................................... 9 - 4.4.1 Octet-synchronous ............................... 9 - 4.4.2 Asynchronous .................................... 9 - 4.5 Transmission Considerations ..................... 10 - 4.5.1 Octet-synchronous ............................... 10 - 4.5.2 Asynchronous .................................... 10 - - -Simpson [Page i] - RFC 1662 HDLC-like Framing July 1994 - - - 5. Bit-stuffed framing ................................... 11 - 5.1 Flag Sequence ................................... 11 - 5.2 Transparency .................................... 11 - 5.3 Invalid Frames .................................. 11 - 5.4 Time Fill ....................................... 11 - 5.5 Transmission Considerations ..................... 12 - - 6. Asynchronous to Synchronous Conversion ................ 13 - - 7. Additional LCP Configuration Options .................. 14 - 7.1 Async-Control-Character-Map (ACCM) .............. 14 - - APPENDICES ................................................... 17 - A. Recommended LCP Options ............................... 17 - B. Automatic Recognition of PPP Frames ................... 17 - C. Fast Frame Check Sequence (FCS) Implementation ........ 18 - C.1 FCS table generator ............................. 18 - C.2 16-bit FCS Computation Method ................... 19 - C.3 32-bit FCS Computation Method ................... 21 - - SECURITY CONSIDERATIONS ...................................... 24 - REFERENCES ................................................... 24 - ACKNOWLEDGEMENTS ............................................. 25 - CHAIR'S ADDRESS .............................................. 25 - EDITOR'S ADDRESS ............................................. 25 - - - - -1. Introduction - - This specification provides for framing over both bit-oriented and - octet-oriented synchronous links, and asynchronous links with 8 bits - of data and no parity. These links MUST be full-duplex, but MAY be - either dedicated or circuit-switched. - - An escape mechanism is specified to allow control data such as - XON/XOFF to be transmitted transparently over the link, and to remove - spurious control data which may be injected into the link by - intervening hardware and software. - - Some protocols expect error free transmission, and either provide - error detection only on a conditional basis, or do not provide it at - all. PPP uses the HDLC Frame Check Sequence for error detection. - This is commonly available in hardware implementations, and a - software implementation is provided. - - - - - - -Simpson [Page 1] - RFC 1662 HDLC-like Framing July 1994 - - -1.1. Specification of Requirements - - In this document, several words are used to signify the requirements - of the specification. These words are often capitalized. - - MUST This word, or the adjective "required", means that the - definition is an absolute requirement of the specification. - - MUST NOT This phrase means that the definition is an absolute - prohibition of the specification. - - SHOULD This word, or the adjective "recommended", means that there - may exist valid reasons in particular circumstances to - ignore this item, but the full implications must be - understood and carefully weighed before choosing a - different course. - - MAY This word, or the adjective "optional", means that this - item is one of an allowed set of alternatives. An - implementation which does not include this option MUST be - prepared to interoperate with another implementation which - does include the option. - - -1.2. Terminology - - This document frequently uses the following terms: - - datagram The unit of transmission in the network layer (such as IP). - A datagram may be encapsulated in one or more packets - passed to the data link layer. - - frame The unit of transmission at the data link layer. A frame - may include a header and/or a trailer, along with some - number of units of data. - - packet The basic unit of encapsulation, which is passed across the - interface between the network layer and the data link - layer. A packet is usually mapped to a frame; the - exceptions are when data link layer fragmentation is being - performed, or when multiple packets are incorporated into a - single frame. - - peer The other end of the point-to-point link. - - silently discard - The implementation discards the packet without further - processing. The implementation SHOULD provide the - capability of logging the error, including the contents of - the silently discarded packet, and SHOULD record the event - in a statistics counter. - - -Simpson [Page 2] - RFC 1662 HDLC-like Framing July 1994 - - -2. Physical Layer Requirements - - PPP is capable of operating across most DTE/DCE interfaces (such as, - EIA RS-232-E, EIA RS-422, and CCITT V.35). The only absolute - requirement imposed by PPP is the provision of a full-duplex circuit, - either dedicated or circuit-switched, which can operate in either an - asynchronous (start/stop), bit-synchronous, or octet-synchronous - mode, transparent to PPP Data Link Layer frames. - - Interface Format - - PPP presents an octet interface to the physical layer. There is - no provision for sub-octets to be supplied or accepted. - - Transmission Rate - - PPP does not impose any restrictions regarding transmission rate, - other than that of the particular DTE/DCE interface. - - Control Signals - - PPP does not require the use of control signals, such as Request - To Send (RTS), Clear To Send (CTS), Data Carrier Detect (DCD), and - Data Terminal Ready (DTR). - - When available, using such signals can allow greater functionality - and performance. In particular, such signals SHOULD be used to - signal the Up and Down events in the LCP Option Negotiation - Automaton [1]. When such signals are not available, the - implementation MUST signal the Up event to LCP upon - initialization, and SHOULD NOT signal the Down event. - - Because signalling is not required, the physical layer MAY be - decoupled from the data link layer, hiding the transient details - of the physical transport. This has implications for mobility in - cellular radio networks, and other rapidly switching links. - - When moving from cell to cell within the same zone, an - implementation MAY choose to treat the entire zone as a single - link, even though transmission is switched among several - frequencies. The link is considered to be with the central - control unit for the zone, rather than the individual cell - transceivers. However, the link SHOULD re-establish its - configuration whenever the link is switched to a different - administration. - - Due to the bursty nature of data traffic, some implementations - have choosen to disconnect the physical layer during periods of - - - -Simpson [Page 3] - RFC 1662 HDLC-like Framing July 1994 - - - inactivity, and reconnect when traffic resumes, without informing - the data link layer. Robust implementations should avoid using - this trick over-zealously, since the price for decreased setup - latency is decreased security. Implementations SHOULD signal the - Down event whenever "significant time" has elapsed since the link - was disconnected. The value for "significant time" is a matter of - considerable debate, and is based on the tariffs, call setup - times, and security concerns of the installation. - - - -3. The Data Link Layer - - PPP uses the principles described in ISO 3309-1979 HDLC frame - structure, most recently the fourth edition 3309:1991 [2], which - specifies modifications to allow HDLC use in asynchronous - environments. - - The PPP control procedures use the Control field encodings described - in ISO 4335-1979 HDLC elements of procedures, most recently the - fourth edition 4335:1991 [4]. - - This should not be construed to indicate that every feature of the - above recommendations are included in PPP. Each feature included - is explicitly described in the following sections. - - To remain consistent with standard Internet practice, and avoid - confusion for people used to reading RFCs, all binary numbers in the - following descriptions are in Most Significant Bit to Least - Significant Bit order, reading from left to right, unless otherwise - indicated. Note that this is contrary to standard ISO and CCITT - practice which orders bits as transmitted (network bit order). Keep - this in mind when comparing this document with the international - standards documents. - - - - - - - - - - - - - - - - - -Simpson [Page 4] - RFC 1662 HDLC-like Framing July 1994 - - -3.1. Frame Format - - A summary of the PPP HDLC-like frame structure is shown below. This - figure does not include bits inserted for synchronization (such as - start and stop bits for asynchronous links), nor any bits or octets - inserted for transparency. The fields are transmitted from left to - right. - - +----------+----------+----------+ - | Flag | Address | Control | - | 01111110 | 11111111 | 00000011 | - +----------+----------+----------+ - +----------+-------------+---------+ - | Protocol | Information | Padding | - | 8/16 bits| * | * | - +----------+-------------+---------+ - +----------+----------+----------------- - | FCS | Flag | Inter-frame Fill - |16/32 bits| 01111110 | or next Address - +----------+----------+----------------- - - The Protocol, Information and Padding fields are described in the - Point-to-Point Protocol Encapsulation [1]. - - Flag Sequence - - Each frame begins and ends with a Flag Sequence, which is the - binary sequence 01111110 (hexadecimal 0x7e). All implementations - continuously check for this flag, which is used for frame - synchronization. - - Only one Flag Sequence is required between two frames. Two - consecutive Flag Sequences constitute an empty frame, which is - silently discarded, and not counted as a FCS error. - - Address Field - - The Address field is a single octet, which contains the binary - sequence 11111111 (hexadecimal 0xff), the All-Stations address. - Individual station addresses are not assigned. The All-Stations - address MUST always be recognized and received. - - The use of other address lengths and values may be defined at a - later time, or by prior agreement. Frames with unrecognized - Addresses SHOULD be silently discarded. - - - - - - -Simpson [Page 5] - RFC 1662 HDLC-like Framing July 1994 - - - Control Field - - The Control field is a single octet, which contains the binary - sequence 00000011 (hexadecimal 0x03), the Unnumbered Information - (UI) command with the Poll/Final (P/F) bit set to zero. - - The use of other Control field values may be defined at a later - time, or by prior agreement. Frames with unrecognized Control - field values SHOULD be silently discarded. - - Frame Check Sequence (FCS) Field - - The Frame Check Sequence field defaults to 16 bits (two octets). - The FCS is transmitted least significant octet first, which - contains the coefficient of the highest term. - - A 32-bit (four octet) FCS is also defined. Its use may be - negotiated as described in "PPP LCP Extensions" [5]. - - The use of other FCS lengths may be defined at a later time, or by - prior agreement. - - The FCS field is calculated over all bits of the Address, Control, - Protocol, Information and Padding fields, not including any start - and stop bits (asynchronous) nor any bits (synchronous) or octets - (asynchronous or synchronous) inserted for transparency. This - also does not include the Flag Sequences nor the FCS field itself. - - When octets are received which are flagged in the Async- - Control-Character-Map, they are discarded before calculating - the FCS. - - For more information on the specification of the FCS, see the - Appendices. - - The end of the Information and Padding fields is found by locating - the closing Flag Sequence and removing the Frame Check Sequence - field. - - - - - - - - - - - - - -Simpson [Page 6] - RFC 1662 HDLC-like Framing July 1994 - - -3.2. Modification of the Basic Frame - - The Link Control Protocol can negotiate modifications to the standard - HDLC-like frame structure. However, modified frames will always be - clearly distinguishable from standard frames. - - Address-and-Control-Field-Compression - - When using the standard HDLC-like framing, the Address and Control - fields contain the hexadecimal values 0xff and 0x03 respectively. - When other Address or Control field values are in use, Address- - and-Control-Field-Compression MUST NOT be negotiated. - - On transmission, compressed Address and Control fields are simply - omitted. - - On reception, the Address and Control fields are decompressed by - examining the first two octets. If they contain the values 0xff - and 0x03, they are assumed to be the Address and Control fields. - If not, it is assumed that the fields were compressed and were not - transmitted. - - By definition, the first octet of a two octet Protocol field - will never be 0xff (since it is not even). The Protocol field - value 0x00ff is not allowed (reserved) to avoid ambiguity when - Protocol-Field-Compression is enabled and the first Information - field octet is 0x03. - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 7] - RFC 1662 HDLC-like Framing July 1994 - - -4. Octet-stuffed framing - - This chapter summarizes the use of HDLC-like framing with 8-bit - asynchronous and octet-synchronous links. - - - -4.1. Flag Sequence - - The Flag Sequence indicates the beginning or end of a frame. The - octet stream is examined on an octet-by-octet basis for the value - 01111110 (hexadecimal 0x7e). - - - -4.2. Transparency - - An octet stuffing procedure is used. The Control Escape octet is - defined as binary 01111101 (hexadecimal 0x7d), most significant bit - first. - - As a minimum, sending implementations MUST escape the Flag Sequence - and Control Escape octets. - - After FCS computation, the transmitter examines the entire frame - between the two Flag Sequences. Each Flag Sequence, Control Escape - octet, and any octet which is flagged in the sending Async-Control- - Character-Map (ACCM), is replaced by a two octet sequence consisting - of the Control Escape octet followed by the original octet - exclusive-or'd with hexadecimal 0x20. - - This is bit 5 complemented, where the bit positions are numbered - 76543210 (the 6th bit as used in ISO numbered 87654321 -- BEWARE - when comparing documents). - - Receiving implementations MUST correctly process all Control Escape - sequences. - - On reception, prior to FCS computation, each octet with value less - than hexadecimal 0x20 is checked. If it is flagged in the receiving - ACCM, it is simply removed (it may have been inserted by intervening - data communications equipment). Each Control Escape octet is also - removed, and the following octet is exclusive-or'd with hexadecimal - 0x20, unless it is the Flag Sequence (which aborts a frame). - - A few examples may make this more clear. Escaped data is transmitted - on the link as follows: - - - - -Simpson [Page 8] - RFC 1662 HDLC-like Framing July 1994 - - - - 0x7e is encoded as 0x7d, 0x5e. (Flag Sequence) - 0x7d is encoded as 0x7d, 0x5d. (Control Escape) - 0x03 is encoded as 0x7d, 0x23. (ETX) - - Some modems with software flow control may intercept outgoing DC1 and - DC3 ignoring the 8th (parity) bit. This data would be transmitted on - the link as follows: - - 0x11 is encoded as 0x7d, 0x31. (XON) - 0x13 is encoded as 0x7d, 0x33. (XOFF) - 0x91 is encoded as 0x7d, 0xb1. (XON with parity set) - 0x93 is encoded as 0x7d, 0xb3. (XOFF with parity set) - - - - -4.3. Invalid Frames - - Frames which are too short (less than 4 octets when using the 16-bit - FCS), or which end with a Control Escape octet followed immediately - by a closing Flag Sequence, or in which octet-framing is violated (by - transmitting a "0" stop bit where a "1" bit is expected), are - silently discarded, and not counted as a FCS error. - - - -4.4. Time Fill - -4.4.1. Octet-synchronous - - There is no provision for inter-octet time fill. - - The Flag Sequence MUST be transmitted during inter-frame time fill. - - -4.4.2. Asynchronous - - Inter-octet time fill MUST be accomplished by transmitting continuous - "1" bits (mark-hold state). - - Inter-frame time fill can be viewed as extended inter-octet time - fill. Doing so can save one octet for every frame, decreasing delay - and increasing bandwidth. This is possible since a Flag Sequence may - serve as both a frame end and a frame begin. After having received - any frame, an idle receiver will always be in a frame begin state. - - - - -Simpson [Page 9] - RFC 1662 HDLC-like Framing July 1994 - - - Robust transmitters should avoid using this trick over-zealously, - since the price for decreased delay is decreased reliability. Noisy - links may cause the receiver to receive garbage characters and - interpret them as part of an incoming frame. If the transmitter does - not send a new opening Flag Sequence before sending the next frame, - then that frame will be appended to the noise characters causing an - invalid frame (with high reliability). - - It is suggested that implementations will achieve the best results by - always sending an opening Flag Sequence if the new frame is not - back-to-back with the last. Transmitters SHOULD send an open Flag - Sequence whenever "appreciable time" has elapsed after the prior - closing Flag Sequence. The maximum value for "appreciable time" is - likely to be no greater than the typing rate of a slow typist, about - 1 second. - - - -4.5. Transmission Considerations - -4.5.1. Octet-synchronous - - The definition of various encodings and scrambling is the - responsibility of the DTE/DCE equipment in use, and is outside the - scope of this specification. - - -4.5.2. Asynchronous - - All octets are transmitted least significant bit first, with one - start bit, eight bits of data, and one stop bit. There is no - provision for seven bit asynchronous links. - - - - - - - - - - - - - - - - - - -Simpson [Page 10] - RFC 1662 HDLC-like Framing July 1994 - - -5. Bit-stuffed framing - - This chapter summarizes the use of HDLC-like framing with bit- - synchronous links. - - - -5.1. Flag Sequence - - The Flag Sequence indicates the beginning or end of a frame, and is - used for frame synchronization. The bit stream is examined on a - bit-by-bit basis for the binary sequence 01111110 (hexadecimal 0x7e). - - The "shared zero mode" Flag Sequence "011111101111110" SHOULD NOT be - used. When not avoidable, such an implementation MUST ensure that - the first Flag Sequence detected (the end of the frame) is promptly - communicated to the link layer. Use of the shared zero mode hinders - interoperability with bit-synchronous to asynchronous and bit- - synchronous to octet-synchronous converters. - - - -5.2. Transparency - - After FCS computation, the transmitter examines the entire frame - between the two Flag Sequences. A "0" bit is inserted after all - sequences of five contiguous "1" bits (including the last 5 bits of - the FCS) to ensure that a Flag Sequence is not simulated. - - On reception, prior to FCS computation, any "0" bit that directly - follows five contiguous "1" bits is discarded. - - - -5.3. Invalid Frames - - Frames which are too short (less than 4 octets when using the 16-bit - FCS), or which end with a sequence of more than six "1" bits, are - silently discarded, and not counted as a FCS error. - - - -5.4. Time Fill - - There is no provision for inter-octet time fill. - - The Flag Sequence SHOULD be transmitted during inter-frame time fill. - However, certain types of circuit-switched links require the use of - - - -Simpson [Page 11] - RFC 1662 HDLC-like Framing July 1994 - - - mark idle (continuous ones), particularly those that calculate - accounting based on periods of bit activity. When mark idle is used - on a bit-synchronous link, the implementation MUST ensure at least 15 - consecutive "1" bits between Flags during the idle period, and that - the Flag Sequence is always generated at the beginning of a frame - after an idle period. - - This differs from practice in ISO 3309, which allows 7 to 14 bit - mark idle. - - - -5.5. Transmission Considerations - - All octets are transmitted least significant bit first. - - The definition of various encodings and scrambling is the - responsibility of the DTE/DCE equipment in use, and is outside the - scope of this specification. - - While PPP will operate without regard to the underlying - representation of the bit stream, lack of standards for transmission - will hinder interoperability as surely as lack of data link - standards. At speeds of 56 Kbps through 2.0 Mbps, NRZ is currently - most widely available, and on that basis is recommended as a default. - - When configuration of the encoding is allowed, NRZI is recommended as - an alternative, because of its relative immunity to signal inversion - configuration errors, and instances when it MAY allow connection - without an expensive DSU/CSU. Unfortunately, NRZI encoding - exacerbates the missing x1 factor of the 16-bit FCS, so that one - error in 2**15 goes undetected (instead of one in 2**16), and triple - errors are not detected. Therefore, when NRZI is in use, it is - recommended that the 32-bit FCS be negotiated, which includes the x1 - factor. - - At higher speeds of up to 45 Mbps, some implementors have chosen the - ANSI High Speed Synchronous Interface [HSSI]. While this experience - is currently limited, implementors are encouraged to cooperate in - choosing transmission encoding. - - - - - - - - - - - -Simpson [Page 12] - RFC 1662 HDLC-like Framing July 1994 - - -6. Asynchronous to Synchronous Conversion - - There may be some use of asynchronous-to-synchronous converters (some - built into modems and cellular interfaces), resulting in an - asynchronous PPP implementation on one end of a link and a - synchronous implementation on the other. It is the responsibility of - the converter to do all stuffing conversions during operation. - - To enable this functionality, synchronous PPP implementations MUST - always respond to the Async-Control-Character-Map Configuration - Option with the LCP Configure-Ack. However, acceptance of the - Configuration Option does not imply that the synchronous - implementation will do any ACCM mapping. Instead, all such octet - mapping will be performed by the asynchronous-to-synchronous - converter. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 13] - RFC 1662 HDLC-like Framing July 1994 - - -7. Additional LCP Configuration Options - - The Configuration Option format and basic options are already defined - for LCP [1]. - - Up-to-date values of the LCP Option Type field are specified in the - most recent "Assigned Numbers" RFC [10]. This document concerns the - following values: - - 2 Async-Control-Character-Map - - - - -7.1. Async-Control-Character-Map (ACCM) - - Description - - This Configuration Option provides a method to negotiate the use - of control character transparency on asynchronous links. - - Each end of the asynchronous link maintains two Async-Control- - Character-Maps. The receiving ACCM is 32 bits, but the sending - ACCM may be up to 256 bits. This results in four distinct ACCMs, - two in each direction of the link. - - For asynchronous links, the default receiving ACCM is 0xffffffff. - The default sending ACCM is 0xffffffff, plus the Control Escape - and Flag Sequence characters themselves, plus whatever other - outgoing characters are flagged (by prior configuration) as likely - to be intercepted. - - For other types of links, the default value is 0, since there is - no need for mapping. - - The default inclusion of all octets less than hexadecimal 0x20 - allows all ASCII control characters [6] excluding DEL (Delete) - to be transparently communicated through all known data - communications equipment. - - The transmitter MAY also send octets with values in the range 0x40 - through 0xff (except 0x5e) in Control Escape format. Since these - octet values are not negotiable, this does not solve the problem - of receivers which cannot handle all non-control characters. - Also, since the technique does not affect the 8th bit, this does - not solve problems for communications links that can send only 7- - bit characters. - - - - -Simpson [Page 14] - RFC 1662 HDLC-like Framing July 1994 - - - Note that this specification differs in detail from later - amendments, such as 3309:1991/Amendment 2 [3]. However, such - "extended transparency" is applied only by "prior agreement". - Use of the transparency methods in this specification - constitute a prior agreement with respect to PPP. - - For compatibility with 3309:1991/Amendment 2, the transmitter - MAY escape DEL and ACCM equivalents with the 8th (most - significant) bit set. No change is required in the receiving - algorithm. - - Following ACCM negotiation, the transmitter SHOULD cease - escaping DEL. - - However, it is rarely necessary to map all control characters, and - often it is unnecessary to map any control characters. The - Configuration Option is used to inform the peer which control - characters MUST remain mapped when the peer sends them. - - The peer MAY still send any other octets in mapped format, if it - is necessary because of constraints known to the peer. The peer - SHOULD Configure-Nak with the logical union of the sets of mapped - octets, so that when such octets are spuriously introduced they - can be ignored on receipt. - - A summary of the Async-Control-Character-Map Configuration Option - format is shown below. The fields are transmitted from left to - right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | ACCM - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - ACCM (cont) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - Type - - 2 - - Length - - 6 - - - - - - -Simpson [Page 15] - RFC 1662 HDLC-like Framing July 1994 - - - ACCM - - The ACCM field is four octets, and indicates the set of control - characters to be mapped. The map is sent most significant octet - first. - - Each numbered bit corresponds to the octet of the same value. If - the bit is cleared to zero, then that octet need not be mapped. - If the bit is set to one, then that octet MUST remain mapped. For - example, if bit 19 is set to zero, then the ASCII control - character 19 (DC3, Control-S) MAY be sent in the clear. - - Note: The least significant bit of the least significant octet - (the final octet transmitted) is numbered bit 0, and would map - to the ASCII control character NUL. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Simpson [Page 16] - RFC 1662 HDLC-like Framing July 1994 - - -A. Recommended LCP Options - - The following Configurations Options are recommended: - - High Speed links - - Magic Number - Link Quality Monitoring - No Address and Control Field Compression - No Protocol Field Compression - - - Low Speed or Asynchronous links - - Async Control Character Map - Magic Number - Address and Control Field Compression - Protocol Field Compression - - - -B. Automatic Recognition of PPP Frames - - It is sometimes desirable to detect PPP frames, for example during a - login sequence. The following octet sequences all begin valid PPP - LCP frames: - - 7e ff 03 c0 21 - 7e ff 7d 23 c0 21 - 7e 7d df 7d 23 c0 21 - - Note that the first two forms are not a valid username for Unix. - However, only the third form generates a correctly checksummed PPP - frame, whenever 03 and ff are taken as the control characters ETX and - DEL without regard to parity (they are correct for an even parity - link) and discarded. - - Many implementations deal with this by putting the interface into - packet mode when one of the above username patterns are detected - during login, without examining the initial PPP checksum. The - initial incoming PPP frame is discarded, but a Configure-Request is - sent immediately. - - - - - - - - - -Simpson [Page 17] - RFC 1662 HDLC-like Framing July 1994 - - -C. Fast Frame Check Sequence (FCS) Implementation - - The FCS was originally designed with hardware implementations in - mind. A serial bit stream is transmitted on the wire, the FCS is - calculated over the serial data as it goes out, and the complement of - the resulting FCS is appended to the serial stream, followed by the - Flag Sequence. - - The receiver has no way of determining that it has finished - calculating the received FCS until it detects the Flag Sequence. - Therefore, the FCS was designed so that a particular pattern results - when the FCS operation passes over the complemented FCS. A good - frame is indicated by this "good FCS" value. - - - -C.1. FCS table generator - - The following code creates the lookup table used to calculate the - FCS-16. - - /* - * Generate a FCS-16 table. - * - * Drew D. Perkins at Carnegie Mellon University. - * - * Code liberally borrowed from Mohsen Banan and D. Hugh Redelmeier. - */ - - /* - * The FCS-16 generator polynomial: x**0 + x**5 + x**12 + x**16. - */ - #define P 0x8408 - - - main() - { - register unsigned int b, v; - register int i; - - printf("typedef unsigned short u16;\n"); - printf("static u16 fcstab[256] = {"); - for (b = 0; ; ) { - if (b % 8 == 0) - printf("\n"); - - v = b; - for (i = 8; i--; ) - - - -Simpson [Page 18] - RFC 1662 HDLC-like Framing July 1994 - - - v = v & 1 ? (v >> 1) ^ P : v >> 1; - - printf("\t0x%04x", v & 0xFFFF); - if (++b == 256) - break; - printf(","); - } - printf("\n};\n"); - } - - - -C.2. 16-bit FCS Computation Method - - The following code provides a table lookup computation for - calculating the Frame Check Sequence as data arrives at the - interface. This implementation is based on [7], [8], and [9]. - - /* - * u16 represents an unsigned 16-bit number. Adjust the typedef for - * your hardware. - */ - typedef unsigned short u16; - - /* - * FCS lookup table as calculated by the table generator. - */ - static u16 fcstab[256] = { - 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, - 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, - 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, - 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, - 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, - 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, - 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, - 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, - 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, - 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, - 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, - 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, - 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, - 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, - 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, - 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, - 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, - 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, - 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, - 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, - - - -Simpson [Page 19] - RFC 1662 HDLC-like Framing July 1994 - - - 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, - 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, - 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, - 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, - 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, - 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, - 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, - 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, - 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, - 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, - 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, - 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 - }; - - #define PPPINITFCS16 0xffff /* Initial FCS value */ - #define PPPGOODFCS16 0xf0b8 /* Good final FCS value */ - - /* - * Calculate a new fcs given the current fcs and the new data. - */ - u16 pppfcs16(fcs, cp, len) - register u16 fcs; - register unsigned char *cp; - register int len; - { - ASSERT(sizeof (u16) == 2); - ASSERT(((u16) -1) > 0); - while (len--) - fcs = (fcs >> 8) ^ fcstab[(fcs ^ *cp++) & 0xff]; - - return (fcs); - } - - /* - * How to use the fcs - */ - tryfcs16(cp, len) - register unsigned char *cp; - register int len; - { - u16 trialfcs; - - /* add on output */ - trialfcs = pppfcs16( PPPINITFCS16, cp, len ); - trialfcs ^= 0xffff; /* complement */ - cp[len] = (trialfcs & 0x00ff); /* least significant byte first */ - cp[len+1] = ((trialfcs >> 8) & 0x00ff); - - - - -Simpson [Page 20] - RFC 1662 HDLC-like Framing July 1994 - - - /* check on input */ - trialfcs = pppfcs16( PPPINITFCS16, cp, len + 2 ); - if ( trialfcs == PPPGOODFCS16 ) - printf("Good FCS\n"); - } - - - -C.3. 32-bit FCS Computation Method - - The following code provides a table lookup computation for - calculating the 32-bit Frame Check Sequence as data arrives at the - interface. - - /* - * The FCS-32 generator polynomial: x**0 + x**1 + x**2 + x**4 + x**5 - * + x**7 + x**8 + x**10 + x**11 + x**12 + x**16 - * + x**22 + x**23 + x**26 + x**32. - */ - - /* - * u32 represents an unsigned 32-bit number. Adjust the typedef for - * your hardware. - */ - typedef unsigned long u32; - - static u32 fcstab_32[256] = - { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, - 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, - 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, - 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, - 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, - 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, - 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, - 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, - 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, - 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, - 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, - 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, - 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, - 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, - - - -Simpson [Page 21] - RFC 1662 HDLC-like Framing July 1994 - - - 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, - 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, - 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, - 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, - 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, - 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, - 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, - 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, - 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, - 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, - 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, - 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, - 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, - 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, - 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, - 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, - 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, - 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, - 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, - 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, - 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, - 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, - 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, - 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, - 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, - 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, - 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, - 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, - 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d - }; - - #define PPPINITFCS32 0xffffffff /* Initial FCS value */ - #define PPPGOODFCS32 0xdebb20e3 /* Good final FCS value */ - - - -Simpson [Page 22] - RFC 1662 HDLC-like Framing July 1994 - - - /* - * Calculate a new FCS given the current FCS and the new data. - */ - u32 pppfcs32(fcs, cp, len) - register u32 fcs; - register unsigned char *cp; - register int len; - { - ASSERT(sizeof (u32) == 4); - ASSERT(((u32) -1) > 0); - while (len--) - fcs = (((fcs) >> 8) ^ fcstab_32[((fcs) ^ (*cp++)) & 0xff]); - - return (fcs); - } - - /* - * How to use the fcs - */ - tryfcs32(cp, len) - register unsigned char *cp; - register int len; - { - u32 trialfcs; - - /* add on output */ - trialfcs = pppfcs32( PPPINITFCS32, cp, len ); - trialfcs ^= 0xffffffff; /* complement */ - cp[len] = (trialfcs & 0x00ff); /* least significant byte first */ - cp[len+1] = ((trialfcs >>= 8) & 0x00ff); - cp[len+2] = ((trialfcs >>= 8) & 0x00ff); - cp[len+3] = ((trialfcs >> 8) & 0x00ff); - - /* check on input */ - trialfcs = pppfcs32( PPPINITFCS32, cp, len + 4 ); - if ( trialfcs == PPPGOODFCS32 ) - printf("Good FCS\n"); - } - - - - - - - - - - - - - -Simpson [Page 23] - RFC 1662 HDLC-like Framing July 1994 - - -Security Considerations - - As noted in the Physical Layer Requirements section, the link layer - might not be informed when the connected state of the physical layer - has changed. This results in possible security lapses due to over- - reliance on the integrity and security of switching systems and - administrations. An insertion attack might be undetected. An - attacker which is able to spoof the same calling identity might be - able to avoid link authentication. - - - -References - - [1] Simpson, W., Editor, "The Point-to-Point Protocol (PPP)", - STD 50, RFC 1661, Daydreamer, July 1994. - - [2] ISO/IEC 3309:1991(E), "Information Technology - - Telecommunications and information exchange between systems - - High-level data link control (HDLC) procedures - Frame - structure", International Organization For Standardization, - Fourth edition 1991-06-01. - - [3] ISO/IEC 3309:1991/Amd.2:1992(E), "Information Technology - - Telecommunications and information exchange between systems - - High-level data link control (HDLC) procedures - Frame - structure - Amendment 2: Extended transparency options for - start/stop transmission", International Organization For - Standardization, 1992-01-15. - - [4] ISO/IEC 4335:1991(E), "Information Technology - - Telecommunications and information exchange between systems - - High-level data link control (HDLC) procedures - Elements of - procedures", International Organization For Standardization, - Fourth edition 1991-09-15. - - [5] Simpson, W., Editor, "PPP LCP Extensions", RFC 1570, - Daydreamer, January 1994. - - [6] ANSI X3.4-1977, "American National Standard Code for - Information Interchange", American National Standards - Institute, 1977. - - [7] Perez, "Byte-wise CRC Calculations", IEEE Micro, June 1983. - - [8] Morse, G., "Calculating CRC's by Bits and Bytes", Byte, - September 1986. - - - - -Simpson [Page 24] - RFC 1662 HDLC-like Framing July 1994 - - - [9] LeVan, J., "A Fast CRC", Byte, November 1987. - - [10] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC - 1340, USC/Information Sciences Institute, July 1992. - - - -Acknowledgements - - This document is the product of the Point-to-Point Protocol Working - Group of the Internet Engineering Task Force (IETF). Comments should - be submitted to the ietf-ppp@merit.edu mailing list. - - This specification is based on previous RFCs, where many - contributions have been acknowleged. - - The 32-bit FCS example code was provided by Karl Fox (Morning Star - Technologies). - - Special thanks to Morning Star Technologies for providing computing - resources and network access support for writing this specification. - - - -Chair's Address - - The working group can be contacted via the current chair: - - Fred Baker - Advanced Computer Communications - 315 Bollay Drive - Santa Barbara, California 93117 - - fbaker@acc.com - - -Editor's Address - - Questions about this memo can also be directed to: - - William Allen Simpson - Daydreamer - Computer Systems Consulting Services - 1384 Fontaine - Madison Heights, Michigan 48071 - - Bill.Simpson@um.cc.umich.edu - bsimpson@MorningStar.com - - -Simpson [Page 25] - - - - - diff --git a/kernel/picotcp/RFC/rfc1693.txt b/kernel/picotcp/RFC/rfc1693.txt deleted file mode 100644 index 0ee3e3f..0000000 --- a/kernel/picotcp/RFC/rfc1693.txt +++ /dev/null @@ -1,2019 +0,0 @@ - - - - - - -Network Working Group T. Connolly -Request for Comments: 1693 P. Amer -Category: Experimental P. Conrad - University of Delaware - November 1994 - - - An Extension to TCP : Partial Order Service - -Status of This Memo - - This memo defines an Experimental Protocol for the Internet - community. This memo does not specify an Internet standard of any - kind. Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited - -IESG Note: - - Note that the work contained in this memo does not describe an - Internet standard. The Transport AD and Transport Directorate do not - recommend the implementation of the TCP modifications described. - However, outside the context of TCP, we find that the memo offers a - useful analysis of how misordered and incomplete data may be handled. - See, for example, the discussion of Application Layer Framing by D. - Clark and D. Tennenhouse in, "Architectural Considerations for a New - Generation of Protocols", SIGCOM 90 Proceedings, ACM, September 1990. - -Abstract - - This RFC introduces a new transport mechanism for TCP based upon - partial ordering. The aim is to present the concepts of partial - ordering and promote discussions on its usefulness in network - communications. Distribution of this memo is unlimited. - -Introduction - - A service which allows partial order delivery and partial reliability - is one which requires some, but not all objects to be received in the - order transmitted while also allowing objects to be transmitted - unreliably (i.e., some may be lost). - - The realization of such a service requires, (1) communication and/or - negotiation of what constitutes a valid ordering and/or loss-level, - and (2) an algorithm which enables the receiver to ascertain the - deliverability of objects as they arrive. These issues are addressed - here - both conceptually and formally - summarizing the results of - research and initial implementation efforts. - - - - -Connolly, Amer & Conrad [Page 1] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - The authors envision the use of a partial order service within a - connection-oriented, transport protocol such as TCP providing a - further level of granularity to the transport user in terms of the - type and quality of offered service. This RFC focuses specifically - on extending TCP to provide partial order connections. - - The idea of a partial order service is not limited to TCP. It may be - considered a useful option for any transport protocol and we - encourage researchers and practitioners to investigate further the - most effective uses for partial ordering whether in a next-generation - TCP, or another general purpose protocol such as XTP, or perhaps - within a "special purpose" protocol tailored to a specific - application and network profile. - - Finally, while the crux of this RFC relates to and introduces a new - way of considering object ordering, a number of other classic - transport mechanisms are also seen in a new light - among these are - reliability, window management and data acknowledgments. - - Keywords: partial order, quality of service, reliability, multimedia, - client/server database, Windows, transport protocol - -Table of Contents - - 1. Introduction and motivation .................................. 3 - 2. Partial Order Delivery ....................................... 4 - 2.1 Example 1: Remote Database .................................. 4 - 2.2 Example 2: Multimedia ....................................... 8 - 2.3 Example 3: Windows Screen Refresh ........................... 9 - 2.4 Potential Savings ........................................... 10 - 3. Reliability vs. Order ........................................ 12 - 3.1 Reliability Classes ......................................... 13 - 4. Partial Order Connection ..................................... 15 - 4.1 Connection Establishment .................................... 16 - 4.2 Data Transmission ........................................... 19 - 4.2.1 Sender .................................................... 22 - 4.2.2 Receiver .................................................. 25 - 5. Quantifying and Comparing Partial Order Services ............. 30 - 6. Future Direction ............................................. 31 - 7. Summary ...................................................... 32 - 8. References ................................................... 34 - Security Considerations ......................................... 35 - Authors' Addresses .............................................. 36 - - - - - - - - -Connolly, Amer & Conrad [Page 2] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - -1. Introduction and motivation - - Current applications that need to communicate objects (i.e., octets, - packets, frames, protocol data units) usually choose between a fully - ordered service such as that currently provided by TCP and one that - does not guarantee any ordering such as that provided by UDP. A - similar "all-or-nothing" choice is made for object reliability - - reliable connections which guarantee all objects will be delivered - verses unreliable data transport which makes no guarantee. What is - more appropriate for some applications is a partial order and/or - partial reliability service where a subset of objects being - communicated must arrive in the order transmitted, yet some objects - may arrive in a different order, and some (well specified subset) of - the objects may not arrive at all. - - One motivating application for a partial order service is the - emerging area of multimedia communications. Multimedia traffic is - often characterized either by periodic, synchronized parallel streams - of information (e.g., combined audio-video), or by structured image - streams (e.g., displays of multiple overlapping and nonoverlapping - windows). These applications have a high degree of tolerance for - less-than-fully-ordered data transport as well as data loss. Thus - they are ideal candidates for using a partial order, partial - reliability service. In general, any application which communicates - parallel and/or independent data structures may potentially be able - to profit from a partial order service. - - A second application that could benefit from a partial order service - involves remote or distributed databases. Imagine the case where a - database user transmitting queries to a remote server expects objects - (or records) to be returned in some order, although not necessarily - total order. For example a user writing an SQL data query might - specify this with the "order by" clause. There exist today a great - number of commercial implementations of distributed databases which - utilize - and thus are penalized by - an ordered delivery service. - - Currently these applications must use and pay for a fully - ordered/fully reliable service even though they do not need it. The - introduction of partial services allows applications to lower the - demanded quality of service (QOS) of the communication assuming that - such a service is more efficient and less costly. In effect, a - partial order extends the service level from two extremes - ordered - and unordered - to a range of discreet values encompassing both of - the extremes and all possible partial orderings in between. A - similar phenomenon is demonstrated in the area of reliability. - - - - - - -Connolly, Amer & Conrad [Page 3] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - It is worth mentioning that a TCP implementation providing a partial - order service, as described here, would be able to communicate with a - non-partial order implementation simply by recognizing this fact at - connection establishment - hence this extension is backward - compatible with earlier versions of TCP. Furthermore, it is - conceivable for a host to support the sending-half (or receiving- - half) of a partial order connection alone to reduce the size of the - TCP as well as the effort involved in the implementation. Similar - "levels of conformance" have been proposed in other internet - extensions such as [Dee89] involving IP multicasting. - - This RFC proceeds as follows. The principles of partial order - delivery, published in [ACCD93a], are presented in Section 2. The - notion of partial reliability, published in [ACCD93b], is introduced - in Section 3 followed by an explanation of "reliability classes". - Then, the practical issues involved with setting up and maintaining a - Partial Order Connection (POC) within a TCP framework are addressed - in Section 4 looking first at connection establishment, and then - discussing the sender's role and the receiver's role. Section 5 - provides insights into the expected performance improvements of a - partial order service over an ordered service and Section 6 discusses - some future directions. Concluding remarks are given in Section 7. - -2. Partial Order Delivery - - Partial order services are needed and can be employed as soon as a - complete ordering is not mandatory. When two objects can be - delivered in either order, there is no need to use an ordered service - that must delay delivery of the second one transmitted until the - first arrives as the following examples demonstrate. - -2.1 Example 1: Remote Database - - Simpson's Sporting Goods (SSG) has recently installed a state-of- - the-art enterprise-wide network. Their first "network application" - is a client/server SQL database with the following four records, - numbered {1 2 3 4} for convenience: - - SALESPERSON LOCATION CHARGES DESCRIPTION - ------------- ----------------- --------- ----------------- - 1 Anderson Atlanta, GA $4,200 Camping Gear - 2 Baker Boston, MA $849 Camping Gear - 3 Crowell Boston, MA $9,500 Sportswear - 4 Dykstra Wash., DC $1,000 Sportswear - - SSG employees running the client-side of the application can query - the database server from any location in the enterprise net using - standard SQL commands and the results will be displayed on their - - - -Connolly, Amer & Conrad [Page 4] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - screen. From the employee's perspective, the network is completely - reliable and delivers data (records) in an order that conforms to - their SQL request. In reality though, it is the transport layer - protocol which provides the reliability and order on top of an - unreliable network layer - one which introduces loss, duplication, - and disorder. - - Consider the four cases in Figure 1 - in the first query (1.a), - ordered by SALESPERSON, the records have only one acceptable order at - the destination, 1,2,3,4. This is evident due to the fact that there - are four distinct salespersons. If record 2 is received before - record 1 due to a network loss during transmission, the transport - service can not deliver it and must therefore buffer it until record - 1 arrives. An ordered service, also referred to as a virtual circuit - or FIFO channel, provides the desired level of service in this case. - - At the other extreme, an unordered service is motivated in Figure 1.d - where the employee has implicitly specified that any ordering is - valid simply by omitting the "order by" clause. Here any of 4! = 24 - delivery orderings would satisfy the application, or from the - transport layer's point of view, all records are immediately - deliverable as soon as they arrive from the network. No record needs - to buffered should it arrive out of sequential order. As notation, 4 - ordered objects are written 1;2;3;4 and 4 unordered objects are - written using a parallel operator: 1||2||3||4. - - Figures 1.b and 1.c demonstrate two possible partial orders that - permit 2 and 4 orderings respectively at the destination. Using the - notation just described, the valid orderings for the query in 1.b are - specified as 1;(2||3);4, which is to say that record 1 must be - delivered first followed by record 2 and 3 in either order followed - by record 4. Likewise, the ordering for 1.c is (1||2);(3||4). In - these two cases, an ordered service is too strict and an unordered - service is not strict enough. - - - - - - - - - - - - - - - - - -Connolly, Amer & Conrad [Page 5] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - +-----------------------------------------------------------------+ - | SELECT SALESPERSON, LOCATION, CHARGES, DESCRIPTION | - | FROM BILLING_TABLE | - | | - | SALESPERSON LOCATION CHARGES DESCRIPTION | - | ------------- ----------------- --------- --------------- | - | 1 Anderson Atlanta, GA $4,200 Camping Gear | - | 2 Baker Boston, MA $849 Camping Gear | - | 3 Crowell Boston, MA $9,500 Sportswear | - | 4 Dykstra Wash., DC $1,000 Sportswear | - +=================================================================+ - |a - ORDER BY SALESPERSON | - | | - | 1,2,3,4 1,2,3,4 | - | | - | Sender -----------> NETWORK --------------> Receiver | - | (1 valid ordering) | - +-----------------------------------------------------------------+ - |b - ORDER BY LOCATION | - | 1,2,3,4 | - | 1,2,3,4 1,3,2,4 | - | | - | Sender -----------> NETWORK --------------> Receiver | - | (2 valid orderings) | - +-----------------------------------------------------------------+ - |c - ORDER BY DESCRIPTION | - | 1,2,3,4 | - | 2,1,3,4 | - | 1,2,3,4 1,2,4,3 | - | 2,1,4,3 | - | | - | Sender -----------> NETWORK --------------> Receiver | - | (4 valid orderings) | - +-----------------------------------------------------------------+ - |d - (no order by clause) | - | 1,2,3,4 | - | 1,2,4,3 | - | 1,2,3,4 ... | - | 4,3,2,1 | - | | - | Sender -----------> NETWORK --------------> Receiver | - | (4!=24 valid orderings) | - +-----------------------------------------------------------------+ - Figure 1: Ordered vs. Partial Ordered vs. Unordered Delivery - - It is vital for the transport layer to recognize the exact - requirements of the application and to ensure that these are met. - However, there is no inherent need to exceed these requirements; on - - - -Connolly, Amer & Conrad [Page 6] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - the contrary, by exceeding these requirements unecessary resources - are consumed. This example application requires a reliable - connection - all records must eventually be delivered - but has some - flexibility when it comes to record ordering. - - In this example, each query has a different partial order. In total, - there exist 16 different partial orders for the desired 4 records. - For an arbitrary number of objects N, there exist many possible - partial orders each of which accepts some number of valid orderings - between 1 and N! (which correspond to the ordered and unordered - cases respectively). For some classes of partial orders, the number - of valid orderings can be calculated easily, for others this - calculation is intractable. An in-depth discussion on calculating - and comparing the number of orderings for a given partial order can - be found in [ACCD93a]. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Connolly, Amer & Conrad [Page 7] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - -2.2 Example 2: Multimedia - - A second example application that motivates a partial order service - is a multimedia broadcast involving video, audio and text components. - Consider an extended presentation of the evening news - extended to - include two distinct audio channels, a text subtitle and a closed- - captioned sign language video for the hearing impaired, in addition - to the normal video signal, as modeled by the following diagram. - - (left audio) (right audio) - +------+ +------+ - | ++++ | | ++++ | - | ++++ | | ++++ | - +------+ +------+ - =================================================== - I +---------------+I - I | |I - I | (hand signs) |I - I | |I - I +---------------+I - I I - I I - I (Main Video) I - I I - I I - I I - I I - I +------------------------------------------+ I - I | (text subtitle) | I - I +------------------------------------------+ I - I I - =================================================== - Figure 2: Multimedia broadcast example - - The multimedia signals have differing characteristics. The main video - signal may consist of full image graphics at a rate of 30 images/sec - while the video of hand signs requires a lower quality, say 10 - images/sec. Assume the audio signals are each divided into 60 sound - fragments/sec and the text object each second consists of either (1) - new text, (2) a command to keep the previous second of text, or (3) a - command for no subtitle. - - During a one-second interval of the broadcast, a sender transmits 30 - full-motion video images, 10 closed-captioned hand sign images, 60 - packets of a digitized audio signal for each of the audio streams and - a single text packet. The following diagram then might represent the - characteristics of the multimedia presentation in terms of the media - types, the number of each, and their ordering. Objects connected by a - - - -Connolly, Amer & Conrad [Page 8] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - horizontal line must be received in order, while those in parallel - have no inherent ordering requirement. - -+----------------------------------------------------------------------+ -| | -| |-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-...-o-|-o-|-o-| right audio | -| | | | | | | | | | | | | (60/sec) | -| | | | | | | | | | | | | | -| |-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-|-o-...-o-|-o-|-o-| left audio | -| | | | | | | | (60/sec) | -| | | | | | | | | -| |---o---|---o---|---o---|---o---|---...---|---o---| normal video | -| | | | (30/sec) | -| | | | | -| |-----------o-----------|--------o--...--------o--| hand signs | -| | | (10/sec) | -| | | | -| |-----------------------------o-----...-----------| text | -| | | (1/sec) | -| | -+----------------------------------------------------------------------+ - Figure 3: Object ordering in multimedia application - - Of particular interest to our discussion of partial ordering is the - fact that, while objects of a given media type generally must be - received in order, there exists flexibility between the separate - "streams" of multimedia data (where a "stream" represents the - sequence of objects for a specific media type). Another significant - characteristic of this example is the repeating nature of the object - orderings. Figure 3 represents a single, one-second, partial order - snapshot in a stream of possibly thousands of repeating sequential - periods of communication. - - It is assumed that further synchronization concerns in presenting the - objects are addressed by a service provided on top of the proposed - partial order service. Temporal ordering for synchronized playback - is considered, for example, in [AH91, HKN91]. - -2.3 Example 3: Windows Screen Refresh - - A third example to motivate a partial order service involves - refreshing a workstation screen/display containing multiple windows - from a remote source. In this case, objects (icons, still or video - images) that do not overlap have a "parallel" relationship (i.e., - their order of refreshing is independent) while overlapping screen - objects have a "sequential" relationship and should be delivered in - order. Therefore, the way in which the windows overlap induces a - partial order. - - - -Connolly, Amer & Conrad [Page 9] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Consider the two cases in Figure 4. A sender wishes to refresh a - remote display that contains four active windows (objects) named {1 2 - 3 4}. Assume the windows are transmitted in numerical order and the - receiving application refreshes windows as soon as the transport - service delivers them. If the windows are configured as in Figure - 4a, then there exist two different orderings for redisplay, namely - 1,2,3,4 or 1,3,2,4. If window 2 is received before window 1, the - transport service cannot deliver it or an incorrect image will be - displayed. In Figure 4b, the structure of the windows results in six - possible orderings - 1,2,3,4 or 1,3,2,4 or 1,3,4,2 or 3,4,1,2 or - 3,1,4,2 or 3,1,2,4. - - +================================+============================+ - |a +-----------+ |b +----------+ | - | | 1 | | | 1 | | - | | | | | +----------+ | - | +---------+ +----------+ | +-----| 2 | | - | | 2 |----| 3 | | | | | - | | +-----------+ | | +----------+ | - | | | 4 | | | +----------+ | - | +-----| |-------+ | | 3 | | - | | | | | +----------+ | - | +-----------+ | +------| 4 | | - | | | | | - | | +----------+ | - | | | - | 1;(2||3);4 | (1;2)||(3;4) | - +================================+============================+ - Figure 4: Window screen refresh - -2.4 Potential Savings - - In each of these examples, the valid orderings are strictly dependent - upon, and must be specified by the application. Intuitively, as the - number of acceptable orderings increases, the amount of resources - utilized by a partial order transport service, in terms of buffers - and retransmissions, should decrease as compared to a fully ordered - transport service thus also decreasing the overall cost of the - connection. Just how much lower will depend largely upon the - flexibility of the application and the quality of the underlying - network. - - As an indication of the potential for improved service, let us - briefly look at the case where a database has the following 14 - records. - - - - - - -Connolly, Amer & Conrad [Page 10] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - SALESPERSON LOCATION CHARGES DESCRIPTION - ------------- ----------------- --------- --------------- - 1 Anderson Washington $4,200 Camping Gear - 2 Anderson Philadelphia $2,000 Golf Equipment - 3 Anderson Boston $450 Bowling shoes - 4 Baker Boston $849 Sportswear - 5 Baker Washington $3,100 Weights - 6 Baker Washington $2000 Camping Gear - 7 Baker Atlanta $290 Baseball Gloves - 8 Baker Boston $1,500 Sportswear - 9 Crowell Boston $9,500 Camping Gear - 10 Crowell Philadelphia $6,000 Exercise Bikes - 11 Crowell New York $1,500 Sportswear - 12 Dykstra Atlanta $1,000 Sportswear - 13 Dykstra Dallas $15,000 Rodeo Gear - 14 Dykstra Miami $3,200 Golf Equipment - - Using formulas derived in [ACCD93a] one may calculate the total - number of valid orderings for any partial order that can be - represented in the notation mentioned previously. For the case where - a user specifies "ORDER BY SALESPERSON", the partial order above can - be expressed as, - - (1||2||3);(4||5||6||7||8);(9||10||11);(12||13||14) - - Of the 14!=87,178,291,200 total possible combinations, there exist - 25,920 valid orderings at the destination. A service that may - deliver the records in any of these 25,920 orderings has a great deal - more flexibility than in the ordered case where there is only 1 valid - order for 14 objects. It is interesting to consider the real - possibility of hundreds or even thousands of objects and the - potential savings in communication costs. - - In all cases, the underlying network is assumed to be unreliable and - may thus introduce loss, duplication, and disorder. It makes no - sense to put a partial order service on top of a reliable network. - While the exact amount of unreliability in a network may vary and is - not always well understood, initial experimental research indicates - that real world networks, for example the service provided by the - Internet's IP level, "yield high losses, duplicates and reorderings - of packets" [AS93,BCP93]. The authors plan to conduct further - experimentation into measuring Internet network unreliability. This - information would say a great deal about the practical merit of a - partial order service. - - - - - - - -Connolly, Amer & Conrad [Page 11] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - -3. Reliability vs. Order - - While TCP avoids the loss of even a single object, in fact for many - applications, there exists a genuine ability to tolerate loss. - Losing one frame per second in a 30 frame per second video or losing - a segment of its accompanying audio channel is usually not a problem. - Bearing this in mind, it is of value to consider a quality of service - that combines a partial order with a level of tolerated loss (partial - reliability). Traditionally there exist 4 services: reliable- - ordered, reliable-unordered, unreliable-ordered, and unreliable- - unordered. See Figure 5. Reliable-ordered service (denoted by a - single point) represents the case where all objects are delivered in - the order transmitted. File transfer is an example application - requiring such a service. - - reliable-ordered reliable-unordered - | | - | | - v v - zero loss-->*---------------------------------* - min loss-->|<-- |<-- - . | | - . |<-- |<-- - | | - |<-- unreliable- |<-- unreliable- - RELIABILITY | ordered | unordered - |<-- |<-- - | | - |<-- |<-- - max loss-->| | - +-+--+--+--+--+--+--+--+--+--+--+-+ - ordered partial ordered unordered - - ORDER - - Figure 5: Quality Of Service: Reliability vs. Order - - Traditional Service Types - - In a reliable-unordered service (also a single point), all objects - must be delivered, but not necessarily according to the order - transmitted; in fact, any order will suffice. Some transaction - processing applications such as credit card verification require such - a service. - - Unreliable-ordered service allows some objects to be lost. Those - that are delivered, however, must arrive in relative order (An - "unreliable" service does not necessarily lose objects; rather, it - may do so without failing to provide its advertised quality of - - - -Connolly, Amer & Conrad [Page 12] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - service; e.g., the postal system provides an unreliable service). - Since there are varying degrees of unreliability, this service is - represented by a set of points in Figure 5. An unreliable-ordered - service is applicable to packet-voice or teleconferencing - applications. - - Finally unreliable-unordered service allows objects to be lost and - delivered in any order. This is the kind of service used for normal - e-mail (without acknowledgment receipts) and electronic announcements - or junk e-mail. - - As mentioned previously, the concept of a partial order expands the - order dimension from the two extremes of ordered and unordered to a - range of discrete possibilities as depicted in Figure 6. - Additionally, as will be discussed presently, the notion of - reliability is extended to allow for varying degrees of reliability - on a per-object basis providing even greater flexibility and improved - resource utilization. - - reliable-PO - - | | | | | | | | | | | | - | | | | | | | | | | | | - v v v v v v v v v v v v - zero loss-->*---------------------------------* - min loss-->| . . . . . . . . . . . | - . | . . . . . . . . . . . | - . | . . . . . . . . . . . | - | . . . . . . | - RELIABILITY | . . . unreliable-PO . . . | - | . . . . . . . . . . . | - | . . . . . . . . . . . | - | . . . . . . . . . . . | - | . . . . . . . . . . . | - max loss-->| . . . . . . . . . . . | - +-+--+--+--+--+--+--+--+--+--+--+-+ - ordered partial ordered unordered - - ORDER - - Figure 6: Quality Of Service: Reliability vs. Order - Partial - Order Service - -3.1 Reliability Classes - - When considering unreliable service, one cannot assume that all - objects are equal with regards to their reliability. This - classification is reasonable if all objects are identical (e.g., - - - -Connolly, Amer & Conrad [Page 13] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - video frames in a 30 frame/second film). Many applications, such as - multimedia systems, however, often contain a variety of object types. - Thus three object reliability classes are proposed: BART-NL, BART-L, - and NBART-L. Objects are assigned to one of these classes depending - on their temporal value as will be show presently. - - BART-NL objects must be delivered to the destination. These objects - have temporal value that lasts for an entire established connection - and require reliable delivery (NL = No Loss allowed). An example of - BART-NL objects would be the database records in Example 2.1 or the - windows in the screen refresh in Example 2.3. If all objects are of - type BART-NL, the service is reliable. One possible way to assure - eventual delivery of a BART-NL object in a protocol is for the sender - to buffer it, start a timeout timer, and retransmit it if no ACK - arrives before the timeout. The receiver in turn returns an ACK when - the object has safely arrived and been delivered (BART = Buffers, - ACKs, Retransmissions, Timers). - - BART-L objects are those that have temporal value over some - intermediate amount of time - enough to permit timeout and - retransmission, but not everlasting. Once the temporal value of - these objects has expired, it is better to presume them lost than to - delay further the delivery pipeline of information. One possibility - for deciding when an object's usefulness has expired is to require - each object to contain information defining its precise temporal - value [DS93]. An example of a BART-L object would be a movie - subtitle, sent in parallel with associated film images, which is - valuable any time during a twenty second film sequence. If not - delivered sometime during the first ten seconds, the subtitle loses - its value and can be presumed lost. These objects are buffered- - ACKed-retransmitted up to a certain point in time and then presumed - lost. - - NBART-L objects are those with temporal values too short to bother - timing out and retransmitting. An example of a NBART-L object would - be a single packet of speech in a packetized phone conversation or - one image in a 30 image/sec film. A sender transmits these objects - once and the service makes a best effort to deliver them. If the one - attempt is unsuccessful, no further attempts are made. - - An obvious question comes to mind - what about NBART-NL objects? Do - such objects exist? The authors have considered the notion of - communicating an object without the use of BART and still being able - to provide a service without loss. Perhaps with the use of forward - error correction this may become a viable alternative and could - certainly be included in the protocol. However, for our purposes in - this document, only the first three classifications will be - considered. - - - -Connolly, Amer & Conrad [Page 14] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - While classic transport protocols generally treat all objects - equally, the sending and receiving functions of a protocol providing - partial order/partial reliability service will behave differently for - each class of object. For example, a sender buffers and, if - necessary, retransmits any BART-NL or BART-L objects that are not - acknowledged within a predefined timeout period. On the contrary, - NBART-L objects are forgotten as soon as they are transmitted. - -4. Partial Order Connection - - The implementation of a protocol that provides partial order service - requires, at a minimum, (1) communication of the partial ordering - between the two endpoints, and (2) dynamic evaluation of the - deliverability of objects as they arrive at the receiver. In - addition, this RFC describes the mechanisms needed to (3) initiate a - connection, (4) provide varying degrees of reliability for the - objects being transmitted, and (5) improve buffer utilization at the - sender based on object reliability. - - Throughout the discussion of these issues, the authors use the - generic notion of "objects" in describing the service details. Thus, - one of the underlying requirements of a partial order service is the - ability to handle such an abstraction (e.g., recognize object - boundaries). The details of object management are implementation - dependent and thus are not specified in this RFC. However, as this - represents a potential fundamental change to the TCP protocol, some - discussion is in order. - - At one extreme, it is possible to consider octets as objects and - require that the application specify the partial order accordingly - (octet by octet). This likely would entail an inordinate amount of - overhead, processing each octet on an individual basis (literally - breaking up contiguous segments to determine which, if any, octets - are deliverable and which are not). At the other extreme, the - transport protocol could maintain object atomicity regardless of size - - passing arbitrarily large data structures to IP for transmission. - At the sending side of the connection this would actually work since - IP is prepared to perform source fragmentation, however, there is no - guarantee that the receiving IP will be able to reassemble the - fragments! IP relies on the TCP max segment size to prevent this - situation from occurring[LMKQ89]. - - A more realistic approach given the existing IP constraints might be - to maintain the current notion of a TCP max segment size for the - lower-layer interface with IP while allowing a much larger object - size at the upper-layer interface. Of course this presents some - additional complexities. First of all, the transport layer will now - have to be concerned with fragmentation/reassembly of objects larger - - - -Connolly, Amer & Conrad [Page 15] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - than the max segment size and secondly, the increased object sizes - will require significantly more buffer space at the receiver if we - want to buffer the object until it arrives in entirety. - Alternatively, one may consider delivering "fragments" of an object - as they arrive as long as the ordering of the fragments is correct - and the application is able to process the fragments (this notion of - fragmented delivery is discussed further in Section 6). - -4.1 Connection Establishment - - By extending the transport paradigm to allow partial ordering and - reliability classes, a user application may be able to take advantage - of a more efficient data transport facility by negotiating the - optimal service level which is required - no more, no less. This is - accomplished by specifying these variables as QOS parameters or, in - TCP terminology, as options to be included in the TCP header [Pos81]. - - A TCP implementation that provides a partial order service requires - the use of two new TCP options. The first is an enabling option - "POC-permitted" (Partial Order Connection Permitted) that may be used - in a SYN segment to request a partial order service. The other is - the "POC-service-profile" option which is used periodically to - communicate the service characteristics. This second option may be - sent only after successful transmission and acknowledgment of the - POC-permitted option. - - A user process issuing either an active or passive OPEN may choose to - include the POC-permitted option if the application can benefit from - the use of a partial order service and in fact, in cases where the - viability of such service is unknown, it is suggested that the option - be used and that the decision be left to the user's peer. - - For example, a multimedia server might issue a passive with the - POC-permitted option in preparation for the connection by a remote - user. - - Upon reception of a segment with the POC-permitted option, the - receiving user has the option to respond with a similar POC-permitted - indication or may reject a partial order connection if the - application does not warrant the service or the receiving user is - simply unable to provide such a service (e.g., does not recognize the - POC-permitted option). - - In the event that simultaneous initial segments are exchanged, - the TCP will initiate a partial order connection only if both sides - include the POC-permitted option. - - - - - -Connolly, Amer & Conrad [Page 16] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - A brief example should help to demonstrate this procedure. The - following notation (a slight simplification on that employed in RFC - 793) will be used. Each line is numbered for reference purposes. - TCP-A (on the left) will play the role of the receiver and TCP-B will - be the sender. Right arrows (-->) indicate departure of a TCP - segment from TCP-A to TCP-B, or arrival of a segment at B from A. - Left arrows indicate the reverse. TCP states represent the state - AFTER the departure or arrival of the segment (whose contents are - shown in the center of the line). Liberties are taken with the - contents of the segments where only the fields of interest are shown. - - TCP-A TCP-B - - 1. CLOSED LISTEN - - 2. SYN-SENT --> --> SYN-RECEIVED - - 3. ESTABLISHED <-- <-- SYN-RECEIVED - - 4. ESTABLISHED --> --> ESTABLISHED - - Figure 7. Basic 3-Way handshake for a partial order connection - - In line 1 of Figure 7, the sending user has already issued a passive - OPEN with the POC-permitted option and is waiting for a connection. - In line 2, the receiving user issues an active OPEN with the same - option which in turn prompts TCP-A to send a SYN segment with the - POC-permitted option and enter the SYN-SENT state. TCP-B is able to - confirm the use of a PO connection and does so in line 3, after which - TCP-A enters the established state and completes the connection with - an ACK segment in line 4. - - In the event that either side is unable to provide partial order - service, the POC-permitted option will be omitted and normal TCP - processing will ensue. - - For completeness, the authors include the following specification for - both the POC-permitted option and the POC-service-profile option in a - format consistent with the TCP specification document [Pos81]. - - TCP POC-permitted Option: - - Kind: 9 Length: - 2 bytes - - +-----------+-------------+ - | Kind=9 | Length=2 | - +-----------+-------------+ - - - - -Connolly, Amer & Conrad [Page 17] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - TCP POC-service-profile Option: - - Kind: 10 Length: 3 bytes - - 1 bit 1 bit 6 bits - +----------+----------+------------+----------+--------+ - | Kind=10 | Length=3 | Start_flag | End_flag | Filler | - +----------+----------+------------+----------+--------+ - - The first option represents a simple indicator communicated between - the two peer transport entities and needs no further explanation. - The second option serves to communicate the information necessary to - carry out the job of the protocol - the type of information which is - typically found in the header of a TCP segment - and raises some - interesting questions. - - Standard TCP maintains a 60-byte maximum header size on all segments. - The obvious intuition behind this rule is that one would like to - minimize the amount of overhead information present in each packet - while simultaneously increasing the payload, or data, section. While - this is acceptable for most TCP connections today, a partial-order - service would necessarily require that significantly more control - information be passed between transport entities at certain points - during a connection. Maintaining the strict interpretation of this - rule would prove to be inefficient. If, for example, the service - profile occupied a total of 400 bytes (a modest amount as will be - confirmed in the next section), then one would have to fragment this - information across at least 10 segments, allocating 20 bytes per - segment for the normal TCP header. - - Instead, the authors propose that the service profile be carried in - the data section of the segment and that the 3-byte POC-service- - profile option described above be placed in the header to indicate - the presence of this information. Upon reception of such a segment, - the TCP extracts the service profile and uses it appropriately as - will be discussed in the following sections. - - The option itself, as shown here, contains two 1-bit flags necessary - to handle the case where the service profile does not fit in a single - TCP segment. The "Start_flag" indicates that the information in the - data section represents the beginning of the service profile and the - "End_flag" represents the converse. For service profiles which fit - completely in a single segment, both flags will be set to 1. - Otherwise, the Start_flag is set in the initial segment and the - End_flag in the final segment allowing the peer entity to reconstrcut - the entire service profile (using the normal sequence numbers in the - segment header). The "Filler" field serves merely to complete the - third byte of the option. - - - -Connolly, Amer & Conrad [Page 18] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Note that the length of the service profile may vary during the - connection as the order or reliability requirements of the user - change but this length must not exceed the buffering ability of the - peer TCP entity since the entire profile must be stored. The exact - makeup of this data structure is presented in Section 4.2. - -4.2 Data Transmission - - Examining the characteristics of a partial order TCP in chronological - fashion, one would start off with the establishment of a connection - as described in Section 4.1. After which, although both ends have - acknowledged the acceptability of partial order transport, neither - has actually begun a partial order transmission - in other words, - both the sending-side and the receiving-side are operating in a - normal, ordered-reliable mode. For the subsequent discussion, an - important distinction is made in the terms sending-side and - receiving-side which refer to the data flow from the sender and that - from the receiver, respectively. - - For the partial ordering to commence, the TCP must be made aware of - the acceptable object orderings and reliability for both the send- - side and receive-side of the connection for a given set of objects - (hereafter referred to as a "period"). This information is contained - in the service profile and it is the responsibility of the user - application to define this profile. Unlike standard TCP where - applications implicitly define a reliable, ordered profile; with - partial order TCP, the application must explicity define a profile. - - The representation of the service profile is one of the concerns for - the transport protocol. It would be useful if the TCP could encode a - partial ordering in as few bits as possible since these bits will be - transmitted to the destination each time the partial order changes. - A matrix representation appears to be well-suited to encoding the - partial order and a vector has been proposed to communicate and - manage the reliability aspects of the service. Temporal values may - be included within the objects themselves or may be defined as a - function of the state of the connection [DS93]. Using these data - structures, the complete service profile would include (1) a partial - order matrix, (2) a reliability vector and (3) an object_sizes vector - which represents the size of the objects in octets (see - [ACCD93a,CAC93] for a discussion on alternative structures for these - variables). - - Throughout this section, we use the following service profile as a - running example. Shown here is a partial order matrix and graphical - representation for a simple partial order with 6 objects - - ((1;2)||(3;4)||5);6. In the graphical diagram, arrows (-->) denote - sequential order and objects in parallel can be delivered in either - - - -Connolly, Amer & Conrad [Page 19] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - order. So in this example, object 2 must be delivered after object - 1, object 4 must be delivered after object 3, and object 6 must be - delivered after objects 1 through 5 have all been delivered. Among - the 6 objects, there are 30 valid orderings for this partial order - (each valid ordering is known as a linear extension of the partial - order). - - 1 2 3 4 5 6 - +-------------+ - 1 | - 1 0 0 0 1 | | | | - 2 | - - 0 0 0 1 | |-->1-->|-->2-->| | - 3 | - - - 1 0 1 | | | | - 4 | - - - - 0 1 | |-->3-->|-->4-->|-->6-->| - 5 | - - - - - 1 | | | | - 6 | - - - - - - | |------>5------>| | - +-------------+ | | | - - PO Matrix PO Graph - - - In the matrix, a 1 in row i of column j denotes that object i must be - delivered before object j. Note that if objects are numbered in any - way such that 1,2,3,...,N is a valid ordering, only the upper right - triangle of the transitively closed matrix is needed [ACCD93a]. - Thus, for N objects, the partial order can be encoded in (N*(N-1)/2) - bits. - - The reliability vector for the case where reliability classes are - enumerated types such as {BART-NL=1, BART-L=2, NBART-L = 3} and all - objects are BART-NL would simply be, <1, 1, 1, 1, 1, 1>. Together - with the object_sizes vector, the complete service profile is - described. - - This information must be packaged and communicated to the sending TCP - before the first object is transmitted using a TCP service primitive - or comparable means depending upon the User/TCP interface. Once the - service profile has been specified to the TCP, it remains in effect - until the connection is closed or the sending user specifies a new - service profile. In the event that the largest object size can not - be processed by the receiving TCP, the user application is informed - that the connection cannot be maintained and the normal connection - close procedure is followed. - - Typically, as has been described here, the service profile definition - and specification is handled at the sending end of the connection, - but there could be applications (such as the screen refresh) where - the receiving user has this knowledge. Under these circumstances the - receiving user is obliged to transmit the object ordering on the - - - -Connolly, Amer & Conrad [Page 20] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - return side of the connection (e.g., when making the request for a - screen refresh) and have the sender interpret this data to be used on - the send side of the connection. - - Requiring that the sending application specify the service profile is - not an arbitrary choice. To ensure proper object identification, the - receiving application must transmit the new object numbering to the - sending application (not the sending transport layer). Since the - sending application must receive this information in any case, it - simplifies matters greatly to require that the sending application be - the only side that may specify the service profile to the transport - layer. - - Consider now the layered architecture diagram in Figure 8 and assume - that a connection already is established. Let us now say that UserA - specifies the service profile for the sending-side of the connection - via its interface with TCP-A. TCP-A places the profile in the header - of one or more data packets (depending upon the size of the service - profile, the profile may require several packets), sets the POC- - service-profile option and passes it to IP for transmission over the - network. This packet must be transmitted reliably, therefore TCP-A - buffers it and starts a normal retransmit timer. Subsequently, the - service profile arrives at the destination node and is handed to - TCP-B (as indicated by the arrows in Figure 8). TCP-B returns an - acknowledgment and immediately adopts the service profile for one - direction of data flow over the connection. When the acknowledgment - arrives back at TCP-A, the cycle is complete and both sides are now - able to use the partial order service. - - +--------+ +----------+ - Service | UserA | | UserB | - Profile +--------+ +----------+ - | | | - | | | - v | | - | +---------+ +-----------+ Service - | | TCP-A | | TCP-B | Profile - | +---------+ +-----------+ ^ - | | | | - | | | | - | | | | - | +---------------------------------------+ | - v | | | - ------>| ---- Service Profile -------------> |-----> - +---------------------------------------+ - - Figure 8. Layered Communication Architecture - - - - -Connolly, Amer & Conrad [Page 21] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Note that one of the TCP entities learns of the profile via its user - interface, while the other TCP entity is informed via its network - interface. - - For the remaining discussions, we will assume that a partial order - profile has been successfully negotiated for a single direction of - the connection (as depicted in Figure 8) and that we may now speak of - a "sending TCP" (TCP-A) and a "receiving TCP" (TCP-B). As such, - TCP-A refers to the partial order data stream as the "send-side" of - the connection, while TCP-B refers to the same data stream as the - "receive-side". - - Having established a partial order connection, the communicating TCPs - each have their respective jobs to perform to ensure proper data - delivery. The sending TCP ascertains the object ordering and - reliability from the service profile and uses this information in its - buffering/retransmission policy. The receiver modifications are more - significant, particularly the issues of object deliverability and - reliability. And both sides will need to redefine the notion of - window management. Let us look specifically at how each side of the - TCP connection is managed under this new paradigm. - -4.2.1 Sender - - The sender's concerns are still essentially four-fold - transmitting - data, managing buffer space, processing acknowledgments and - retransmitting after a time-out - however, each takes on a new - meaning in a partial order service. Additionally, the management of - the service profile represents a fifth duty not previously needed. - - Taking a rather simplistic view, normal TCP output processing - involves (1) setting up the header, (2) copying user data into the - outgoing segment, (3) sending the segment, (4) making a copy in a - send buffer for retransmission and (5) starting a retransmission - timer. The only difference with a partial order service is that the - reliability vector must be examined to determine whether or not to - buffer the object and start a timer - if the object is classified as - NBART-L, then steps 4 and 5 are omitted. - - Buffer management at the sending end of a partial order connection is - dependent upon the object reliability class and the object size. - When transmitting NBART-L objects the sender need not store the data - for later possible retransmission since NBART-L objects are never - retransmitted. The details of buffer management - such as whether to - allocate fixed-size pools of memory, or perhaps utilize a dynamic - heap allocation strategy - are left to the particular system - implementer. - - - - -Connolly, Amer & Conrad [Page 22] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Acknowledgment processing remains essentially intact - - acknowledgments are cumulative and specify the peer TCP's window - advertisement. However, determination of this advertisement is no - longer a trivial process dependent only upon the available buffer - space (this is discussed further in Section 4.2.2). Moreover, it - should be noted that the introduction of partial ordering and partial - reliability presents several new and interesting alternatives for the - acknowledgment policy. The authors are investigating several of - these strategies through a simulation model and have included a brief - discussion of these issues in Section 6. - - The retransmit function of the TCP is entirely unchanged and is - therefore not discussed further. - - For some applications, it may be possible to maintain the same - partial order for multiple periods (e.g., the application repeats the - same partial order). In the general case, however, the protocol must - be able to change the service profile during an existing connection. - When a change in the service profile is requested, the sending TCP is - obliged to complete the processing of the current partial order - before commencing with a new one. This ensures consistency between - the user applications in the event of a connection failure and - simplifies the protocol (future study is planned to investigate the - performance improvement gained by allowing concurrent different - partial orders). The current partial order is complete when all - sending buffers are free. Then negotiation of the new service - profile is performed in the same manner as with the initial profile. - - Combining these issues, we propose the following simplified state - machine for the protocol (connection establishment and tear down - remains the same and is not show here). - - (1)Send Request (5)Ack Arrival - +------+ +-----------+ - | | | | - | V | | - +----------+ (4) New PO Profile +----------+ | - +---->| |----------------------->| PO |<-----+ - | | ESTAB | | | - (2) | | | | SETUP | - Ack +-----| |<-----------------------| |<-----+ - Arrival +----------+ (7)PO Setup Complete +----------+ | - ^ | | | - | | | | - +------+ +---------+ - (3)Timeout (6)Timeout - - - - - -Connolly, Amer & Conrad [Page 23] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Event (1) - User Makes a Data Send Request - ========= - If Piggyback Timer is set then - cancel piggyback timer - Package and send the object (with ACK for receive-side) - If object type = (BART-L,BART-NL) then - Store the object and start a retransmit timer - If sending window is full then - Block Event (1) - allow no further send requests from user - - Event (2) - ACK Arrives - ========= - If ACKed object(s) is buffered then - Release the buffer(s) and stop the retransmit timer(s) - Extract the peer TCP's window advertisement - If remote TCP's window advertisement > sending window then - Enable Event (1) - If remote TCP's window advertisement <= sending window then - Block Event (1) - allow no further send requests from user - Adjust sending window based on received window advertisement - - Event (3) - Retransmit Timer Expires - ========= - If Piggyback Timer is set then - cancel piggyback timer - Re-transmit the segment (with ACK for receive-side) - Restart the timer - - Event (4) - PO Service Profile Arrives at the User Interface - ========= - Transition to the PO SETUP state - Store the Send-side PO service profile - Package the profile into 1 or more segments, setting the - POC-Service-Profile option on each - If Piggyback Timer is set then - cancel piggyback timer - Send the segment(s) (with ACK for receive-side) - Store the segment(s) and start a retransmit timer - - Event (5) - ACK Arrival - ========= - If ACKed object(s) is buffered then - Release the buffer(s) and stop the retransmit timer(s) - Extract the peer TCP's window advertisement - If all objects from previous service profile have been ACKed and - the new service profile has been ACKed then enable Event (7) - - - - - -Connolly, Amer & Conrad [Page 24] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Event (6) - Retransmit Timer Expires - ========= - If Piggyback Timer is set then - cancel piggyback timer - Re-transmit the segment (with ACK for receive-side) - Restart the timer - - Event (7) - PO Setup Completed - ========= - Transition to the ESTAB state and begin processing new service - profile - -4.2.2 Receiver - - The receiving TCP has additional decisions to make involving object - deliverability, reliability and window management. Additionally, the - service profile must be established (and re-established) periodically - and some special processing must be performed at the end of each - period. - - When an object arrives, the question is no longer, "is this the next - deliverable object?", but rather, "is this ONE OF the next - deliverable objects?" Hence, it is convenient to think of a - "Deliverable Set" of objects with a partial order protocol. To - determine the elements of this set and answer the question of - deliverability, the receiver relies upon the partial order matrix - but, unlike the sender, the receiver dynamically updates the matrix - as objects are processed thus making other objects (possibly already - buffered objects) deliverable as well. A check of the object type - also must be performed since BART-NL and BART-L objects require an - ACK to be returned to the sender but NBART-L do not. Consider our - example from the previous section. - - 1 2 3 4 5 6 - +-------------+ - 1 | - 1 0 0 0 1 | | | | - 2 | - - 0 0 0 1 | |-->1-->|-->2-->| | - 3 | - - - 1 0 1 | | | | - 4 | - - - - 0 1 | |-->3-->|-->4-->|-->6-->| - 5 | - - - - - 1 | | | | - 6 | - - - - - - | |------>5------>| | - +-------------+ | | | - - PO Matrix PO Graph - - When object 5 arrives, the receiver scans column 5, finds that the - object is deliverable (since there are no 1's in the column) and - immediately delivers the object to the user application. Then, the - - - -Connolly, Amer & Conrad [Page 25] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - matrix is updated to remove the constraint of any object whose - delivery depends on object 5 by clearing all entries of row 5. This - may enable other objects to be delivered (for example, if object 2 is - buffered then the delivery of object 1 will make object 2 - deliverable). This leads us to the next issue - delivery of stored - objects. - - In general, whenever an object is delivered, the buffers must be - examined to see if any other stored object(s) becomes deliverable. - CAC93 describes an efficient algorithm to implement this processing - based on traversing the precedence graph. - - Consideration of object reliability is interesting. The authors have - taken a polling approach wherein a procedure is executed - periodically, say once every 100 milliseconds, to evaluate the - temporal value of outstanding objects on which the destination is - waiting. Those whose temporal value has expired (i.e. which are no - longer useful as defined by the application) are "declared lost" and - treated in much the same manner as delivered objects - the matrix is - updated, and if the object type is BART-L, an ACK is sent. Any - objects from the current period which have not yet been delivered or - declared lost are candidates for the "Terminator" as the procedure is - called. The Terminator's criterion is not specifically addressed in - this RFC, but one example might be for the receiving user to - periodically pass a list of no-longer-useful objects to TCP-B. - - Another question which arises is, "How does one calculate the send - and receive windows?" With a partial order service, these windows - are no longer contiguous intervals of objects but rather sets of - objects. In fact, there are three sets which are of interest to the - receiving TCP one of which has already been mentioned - the - Deliverable Set. Additionally, we can think of the Bufferable Set - and the Receivable Set. Some definitions are in order: - - Deliverable Set: objects which can be immediately passed up to - the user. - - Buffered Set: objects stored in a buffer awaiting delivery. - - Bufferable Set: objects which can be stored but not immediately - delivered (due to some ordering constraint). - - Receivable Set: union of the Deliverable Set and the Bufferable - Set (which are disjoint) - intuitively, all objects which - are "receivable" must be either "deliverable" or - "bufferable". - - - - - -Connolly, Amer & Conrad [Page 26] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - The following example will help to illustrate these sets. Consider - our simple service profile from earlier for the case where the size - of each object is 1 MByte and the receiver has only 2 MBytes of - buffer space (enough for 2 objects). Define a boolean vector of - length N (N = number of objects in a period) called the Processed - Vector which is used to indicate which objects from the current - period have been delivered or declared lost. Initially, all buffers - are empty and the PO Matrix and Processed Vector are as shown here, - - 1 2 3 4 5 6 - +-------------+ - 1 | - 1 0 0 0 1 | - 2 | - - 0 0 0 1 | - 3 | - - - 1 0 1 | - 4 | - - - - 0 1 | - 5 | - - - - - 1 | [ F F F F F F ] - 6 | - - - - - - | 1 2 3 4 5 6 - +-------------+ - - PO Matrix Processed Vector - - From the PO Matrix, it is clear that the Deliverable Set = - {(1,1),(1,3),(1,5)}, where (1,1) refers to object #1 from period #1, - asssuming that the current period is period #1. - - The Bufferable Set, however, depends upon how one defines bufferable - objects. Several approaches are possible. The authors' initial - approach to determining the Bufferable Set can best be explained in - terms of the following rules, - - Rule 1: Remaining space must be allocated for all objects from - period i before any object from period i+1 is buffered - - Rule 2: In the event that there exists enough space to buffer - some but not all objects from a given period, space will - be reserved for the first objects (i.e. 1,2,3,...,k) - - With these rules, the Bufferable Set = {(1,2),(1,4)}, the Buffered - Set is trivially equal to the empty set, { }, and the Receivable Set - = {(1,1),(1,2),(1,3),(1,4),(1,5)}. - - Note that the current acknowledgment scheme uses the min and max - values in the Receivable Set for its window advertisement which is - transmitted in all ACK segments sent along the receive-side of the - connection (from receiver to sender). Moreover, the - "piggyback_delay" timer is still used to couple ACKs with return data - (as utilized in standard TCP). - - - - -Connolly, Amer & Conrad [Page 27] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Returning to our example, let us now assume that object 1 and then 3 - arrive at the receiver and object 2 is lost. After processing both - objects, the PO Matrix and Processed Vector will have the following - updated structure, - - 1 2 3 4 5 6 - +-------------+ - 1 | - 0 0 0 0 0 | - 2 | - - 0 0 0 1 | - 3 | - - - 0 0 0 | - 4 | - - - - 0 1 | - 5 | - - - - - 1 | [ T F T F F F ] - 6 | - - - - - - | 1 2 3 4 5 6 - +-------------+ - - PO Matrix Processed Vector - - We can see that the Deliverable Set = {(1,2),(1,4),(1,5)}, but what - should the Bufferable Set consist of? Since only one buffer is - required for the current period's objects, we have 1 Mbyte of - additional space available for "future" objects and therefore include - the first object from period #2 in both the Bufferable and the - Receivable Set, - - Deliverable Set = {(1,2),(1,4),(1,5)} - - Bufferable Set = {(1,6),(2,1)} - - Buffered Set = { } - - Receivable Set = {(1,2),(1,4),(1,5),(1,6),(2,1)} - - In general, the notion of window management takes on new meaning with - a partial order service. One may re-examine the classic window - relations with a partial order service in mind and devise new, less - restrictive relations which may shed further light on the operation - of such a service. - - Two final details: (1) as with the sender, the receiver must - periodically establish or modify the PO service profile and (2) upon - processing the last object in a period, the receiver must re-set the - PO matrix and Processed vector to their initial states. - - - - - - - - - -Connolly, Amer & Conrad [Page 28] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Let us look at the state machine and pseudo-code for the receiver. - - (2)Data Segment Arrival (5)PO Profile fragment Arrival - +------+ +-------+ - | | | | - | V (1)First PO Profile | V - +---------+ fragment arrives +---------+(6) Data Segment - +---->| |----------------------->| |<-----+ Arrival - | | ESTAB | | PO |------+ - | | | | | - | | | | SETUP |<-----+ -(3) +-----| |<-----------------------| |------+ -Terminator+---------+ (9)PO Setup complete +---------+(7) Terminator - ^ | | ^ - | | | | - +------+ +------+ - (4)Piggyback Timeout (8)Piggyback Timeout - - - Event 1 - First PO Service Profile fragment arrives at network - ======= interface - Transition to the PO SETUP state - Store the PO service profile (fragment) - Send an Acknowledgement of the PO service profile (fragment) - - Event 2 - Data Segment Arrival - ======= - If object is in Deliverable Set then - Deliver the object - Update PO Matrix and Processed Vector - Check buffers for newly deliverable objects - If all objects from current period have been processed then - Start the next period (re-initialize data structures) - Start piggyback_delay timer to send an ACK - Else if object is in Bufferable Set then - Store the object - Else - Discard object - Start piggyback_delay timer to send an ACK - - Event 3 - Periodic call of the Terminator - ======= - For all unprocessed objects in the current period do - If object is "no longer useful" then - Update PO Matrix and Processed Vector - If object is in a buffer then - Release the buffer - Check buffers for newly deliverable objects - - - -Connolly, Amer & Conrad [Page 29] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - If all objects from current period have been processed - then Start the next period (re-initialize data - structures) - - Event 4 - Piggyback_delay Timer Expires - ======= - Send an ACK - Disable piggyback_delay timer - - Event 5 - PO Service Profile fragment arrives at network interface - ======= - Store the PO service profile (fragment) - Send an Acknowledgement of the PO service profile (fragment) - If entire PO Service profile has been received then enable Event - (9) - - Event 6 - Data Segment arrival - ======= - (See event 2) - - Event 7 - Periodic call of the terminator - ======= - (See Event 3) - - Event 8 - Piggyback_delay Timer Expires - ======= - (See Event 4) - - Event 9 - PO Setup Complete - ======= - Transition to the ESTAB state - - Note that, for reasons of clarity, we have used a transitively closed - matrix representation of the partial order. A more efficient - implementation based on an adjacency list representation of a - transitively reduced precedence graph results in a more efficient - running time [CAC93]. - -5. Quantifying and Comparing Partial Order Services - - While ordered, reliable delivery is ideal, the existence of less- - than-ideal underlying networks can cause delays for applications that - need only partial order or partial reliability. By introducing a - partial order service, one may in effect relax the requirements on - order and reliability and presumably expect some savings in terms of - buffer utilization and bandwidth (due to fewer retransmissions) and - shorter overall delays. A practical question to be addressed is, - "what are the expected savings likely to be?" - - - -Connolly, Amer & Conrad [Page 30] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - As mentioned in Section 2, the extent of such savings will depend - largely on the quality of the underlying network - bandwidth, delay, - amount and distribution of loss/duplication/disorder - as well as the - flexibility of the partial order itself - specified by the PO matrix - and reliability vector. If the underlying network has no loss, a - partial order service essentially becomes an ordered service. - Collecting experimental data to ascertain realistic network - conditions is a straightforward task and will help to quantify in - general the value of a partial order service [Bol93]. But how can - one quantify and compare the cost of providing specific levels of - service? - - Preliminary research indicates that the number of linear extensions - (orderings) of a partial order in the presence of loss effectively - measures the complexity of that order. The authors have derived - formulae for calculating the number of extensions when a partial - order is series-parallel and have proposed a metric for comparing - partial orders based on this number [ACCD93b]. This metric could be - used as a means for charging for the service, for example. What also - may be interesting is a specific head-to-head comparison between - different partial orders with varying degrees of flexibility. Work - is currently underway on a simulation model aimed at providing this - information. And finally, work is underway on an implementation of - TCP which includes partial order service. - -6. Future Direction - - In addition to the simulation and implementation work the authors are - pursuing several problems related to partial ordering which will be - mentioned briefly. - - An interesting question arises when discussing the acknowledgment - strategy for a partial order service. For classic protocols, a - cumulative ACK of object i confirms all objects "up to and including" - i. But the meaning of "up to and including" with a partial order - service has different implications than with an ordered service. - - Consider our example partial order, ((1;2)||(3;4)||5);6). What - should a cumulative ACK of object 4 confirm? The most logical - definition would say it confirms receipt of object 4 and all objects - that precede 4 in the partial order, in this case, object 3. Nothing - is said about the arrival of objects 1 or 2. With this alternative - interpretation where cumulative ACKs depend on the partial order, the - sender must examine the partial order matrix to determine which - buffers can be released. In this example, scanning column 4 of the - matrix reveals that object 3 must come before object 4 and therefore - both object buffers (and any buffers from a previous period) can be - released. - - - -Connolly, Amer & Conrad [Page 31] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - Other partial order acknowledgment policies are possible for a - protocol providing a partial order service including the use of - selective ACKs (which has been proposed in [JB88] and implemented in - the Cray TCP [Chang93]) as well as the current TCP strategy where an - ACK of i also ACKs everything <= i (in a cyclical sequence number - space). The authors are investigating an ACK policy which utilizes a - combination of selective and "partial-order-cumulative" - acknowledgments. This is accomplished by replacing the current TCP - cumulative ACK with one which has the partial order meaning as - described above and augmenting this with intermittent selective ACKs - when needed. - - In another area, the notion of fragmented delivery, mentioned in the - beginning of Section 4, looks like a promising technique for certain - classes of applications which may offer a substantial improvement in - memory utilization. Briefly, the term fragmented delivery refers to - the ability to transfer less-than-complete objects between the - transport layer and the user application (or session layer as the - case may be). For example, a 1Mbyte object could potentially be - delivered in multiple "chunks" as segments arrive thus freeing up - valuable memory and reducing the delay on those pieces of data. The - scenario becomes somewhat more complex when multiple "parallel - streams" are considered where the application could now receive - pieces of multiple objects associated with different streams. - - Additional work in the area of implementing a working partial order - protocol is being performed both at the University of Delaware and at - the LAAS du CNRS laboratory in Toulouse, France - particularly in - support of distributed, high-speed, multimedia communication. It will - be interesting to examine the processing requirements for an - implementation of a partial order protocol at key events (such as - object arrival) compared with a non-partial order implementation. - - Finally, the authors are interested in the realization of a network - application utilizing a partial order service. The aim of such work - is threefold: (1) provide further insight into the expected - performance gains, (2) identify new issues unique to partial order - transport and, (3) build a road-map for application designers - interested in using a partial order service. - -7. Summary - - This RFC introduces the concepts of a partial order service and - discusses the practical issues involved with including partial - ordering in a transport protocol. The need for such a service is - motivated by several applications including the vast fields of - distributed databases, and multimedia. The service has been - presented as a backward-compatible extension to TCP to adapt to - - - -Connolly, Amer & Conrad [Page 32] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - applications with different needs specified in terms of QOS - parameters. - - The notion of a partial ordering extends QOS flexibility to include - object delivery, reliability, and temporal value thus allowing the - transport layer to effectively handle a wider range of applications - (i.e., any which might benefit from such mechanisms). The service - profile described in Section 4 accurately characterizes the QOS for a - partial order service (which encompasses the two extremes of total - ordered and unordered transport as well). - - Several significant modifications have been proposed and are - summarized here: - - (1) Replacing the requirement for ordered delivery with one for - application-dependent partial ordering - - (2) Allowing unreliable and partially reliable data transport - - (3) Conducting a non-symmetrical connection (not entirely foreign - to TCP, the use of different MSS values for the two sides - of a connection is an example) - - (4) Management of "objects" rather than octets - - (5) Modified acknowledgment strategy - - (6) New definition for the send and receive "windows" - - (7) Extension of the User/TCP interface to include certain - QOS parameters - - (8) Use of new TCP options - - As evidenced by this list, a partial order and partial reliability - service proposes to re-examine several fundamental transport - mechanisms and, in so doing, offers the opportunity for substantial - improvement in the support of existing and new application areas. - - - - - - - - - - - - - -Connolly, Amer & Conrad [Page 33] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - -8. References - - [ACCD93a] Amer, P., Chassot, C., Connolly, T., and M. Diaz, - "Partial Order Transport Service for Multimedia - Applications: Reliable Service", Second International - Symposium on High Performance Distributed Computing - (HPDC-2), Spokane, Washington, July 1993. - - [ACCD93b] Amer, P., Chassot, C., Connolly, T., and M. Diaz, - "Partial Order Transport Service for Multimedia - Applications: Unreliable Service", Proc. INET '93, San - Francisco, August 1993. - - [AH91] Anderson, D., and G. Homsy, "A Continuous Media I/O - Server and its Synchronization Mechanism", IEEE - Computer, 24(10), 51-57, October 1991. - - [AS93] Agrawala, A., and D. Sanghi, "Experimental Assessment - of End-to-End Behavior on Internet," Proc. IEEE INFOCOM - '93, San Francisco, CA, March 1993. - - [BCP93] Claffy, K., Polyzos, G., and H.-W. Braun, "Traffic - Characteristics of the T1 NSFNET", Proc. IEEE INFOCOM - '93, San Francisco, CA, March 1993. - - [Bol93] Bolot, J., "End-to-End Packet Delay and Loss Behavior - in the Internet", SIGCOMM '93, Ithaca, NY, September - 1993. - - [CAC93] Conrad, P., Amer, P., and T. Connolly, "Improving - Performance in Transport-Layer Communications Protocols - by using Partial Orders and Partial Reliability", - Work in Progress, December 1993. - - [Chang93] Chang, Y., "High-Speed Transport Protocol Evaluation -- - the Final Report", MCNC Center for Communications - Technical Document, February 1993. - - [Dee89] Deering, S., "Host Extensions for IP Multicasting," STD - 5, RFC 1112 Stanford University, August 1989. - - [DS93] Diaz, M., and P. Senac, "Time Stream Petri Nets: A - Model for Multimedia Synchronization", Proceedings of - Multimedia Modeling '93, Singapore, 1993. - - - - - - - -Connolly, Amer & Conrad [Page 34] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - - [HKN91] Hardt-Kornacki, S., and L. Ness, "Optimization Model - for the Delivery of Interactive Multimedia Documents", - In Proc. Globecom '91, 669-673, Phoenix, Arizona, - December 1991. - - [JB88] Jacobson, V., and R. Braden, "TCP Extensions for - Long-Delay Paths", RFC 1072, LBL, USC/Information - Sciences Institute, October 1988. - - [JBB92] Jacobson, V., Braden, R., and D. Borman, "TCP - Extensions for High Performance", RFC 1323, LBL, Cray - Research, USC/Information Sciences Institute, May 1992. - - [LMKQ89] Leffler, S., McKusick, M., Karels, M., and J. - Quarterman, "4.3 BSD UNIX Operating System", - Addison-Wesley Publishing Company, Reading, MA, 1989. - - [OP91] O'Malley, S., and L. Peterson, "TCP Extensions - Considered Harmful", RFC 1263, University of Arizona, - October 1991. - - [Pos81] Postel, J., "Transmission Control Protocol - DARPA - Internet Program Protocol Specification," STD 7, - RFC 793, DARPA, September 1981. - -Security Considerations - - Security issues are not discussed in this memo. - - - - - - - - - - - - - - - - - - - - - - - -Connolly, Amer & Conrad [Page 35] - -RFC 1693 An Extension to TCP: Partial Order Service November 1994 - - -Authors' Addresses - - Tom Connolly - 101C Smith Hall - Department of Computer & Information Sciences - University of Delaware - Newark, DE 19716 - 2586 - - EMail: connolly@udel.edu - - - Paul D. Amer - 101C Smith Hall - Department of Computer & Information Sciences - University of Delaware - Newark, DE 19716 - 2586 - - EMail: amer@udel.edu - - - Phill Conrad - 101C Smith Hall - Department of Computer & Information Sciences - University of Delaware - Newark, DE 19716 - 2586 - - EMail: pconrad@udel.edu - - - - - - - - - - - - - - - - - - - - - - - - -Connolly, Amer & Conrad [Page 36] - diff --git a/kernel/picotcp/RFC/rfc1877.txt b/kernel/picotcp/RFC/rfc1877.txt deleted file mode 100644 index 843c15c..0000000 --- a/kernel/picotcp/RFC/rfc1877.txt +++ /dev/null @@ -1,339 +0,0 @@ - - - - - - -Network Working Group S. Cobb -Request for Comments: 1877 Microsoft -Category: Informational December 1995 - - - PPP Internet Protocol Control Protocol Extensions for - Name Server Addresses - -Status of this Memo - - This memo provides information for the Internet community. This memo - does not specify an Internet standard of any kind. Distribution of - this memo is unlimited. - -Abstract - - The Point-to-Point Protocol (PPP) [1] provides a standard method for - transporting multi-protocol datagrams over point-to-point links. PPP - defines an extensible Link Control Protocol and a family of Network - Control Protocols (NCPs) for establishing and configuring different - network-layer protocols. - - This document extends the NCP for establishing and configuring the - Internet Protocol over PPP [2], defining the negotiation of primary - and secondary Domain Name System (DNS) [3] and NetBIOS Name Server - (NBNS) [4] addresses. - -Table of Contents - - 1. Additional IPCP Configuration options ................. 1 - 1.1 Primary DNS Server Address .................... 2 - 1.2 Primary NBNS Server Address ................... 3 - 1.3 Secondary DNS Server Address .................. 4 - 1.4 Secondary NBNS Server Address ................. 5 - REFRENCES .................................................... 6 - SECURITY CONSIDERATIONS ...................................... 6 - CHAIR'S ADDRESS .............................................. 6 - AUTHOR'S ADDRESS ............................................. 6 - -1. Additional IPCP Configuration Options - - The four name server address configuration options, 129 to 132, - provide a method of obtaining the addresses of Domain Name System - (DNS) servers and (NetBIOS Name Server (NBNS) nodes on the remote - network. - - - - - - -Cobb Informational [Page 1] - -RFC 1877 PPP IPCP Extensions December 1995 - - - Primary and secondary addresses are negotiated independently. They - serve identical purposes, except that when both are present an - attempt SHOULD be made to resolve names using the primary address - before using the secondary address. - - For implementational convenience, these options are designed to be - identical in format and behavior to option 3 (IP-Address) which is - already present in most IPCP implementations. - - Since the usefulness of name server address information is dependent - on the topology of the remote network and local peer's application, - it is suggested that these options not be included in the list of - "IPCP Recommended Options". - -1.1. Primary DNS Server Address - - Description - - This Configuration Option defines a method for negotiating with - the remote peer the address of the primary DNS server to be used - on the local end of the link. If local peer requests an invalid - server address (which it will typically do intentionally) the - remote peer specifies the address by NAKing this option, and - returning the IP address of a valid DNS server. - - By default, no primary DNS address is provided. - - A summary of the Primary DNS Address Configuration Option format is - shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Primary-DNS-Address - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - Primary-DNS-Address (cont) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Type - - 129 - - Length - - 6 - - - - - - -Cobb Informational [Page 2] - -RFC 1877 PPP IPCP Extensions December 1995 - - - Primary-DNS-Address - - The four octet Primary-DNS-Address is the address of the primary - DNS server to be used by the local peer. If all four octets are - set to zero, it indicates an explicit request that the peer - provide the address information in a Config-Nak packet. - - Default - - No address is provided. - -1.2. Primary NBNS Server Address - - Description - - This Configuration Option defines a method for negotiating with - the remote peer the address of the primary NBNS server to be used - on the local end of the link. If local peer requests an invalid - server address (which it will typically do intentionally) the - remote peer specifies the address by NAKing this option, and - returning the IP address of a valid NBNS server. - - By default, no primary NBNS address is provided. - - A summary of the Primary NBNS Address Configuration Option format is - shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Primary-NBNS-Address - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - Primary-NBNS-Address (cont) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Type - - 130 - - Length - - 6 - - Primary-NBNS-Address - - The four octet Primary-NBNS-Address is the address of the primary - NBNS server to be used by the local peer. If all four octets are - set to zero, it indicates an explicit request that the peer - - - -Cobb Informational [Page 3] - -RFC 1877 PPP IPCP Extensions December 1995 - - - provide the address information in a Config-Nak packet. - - Default - - No address is provided. - -1.3. Secondary DNS Server Address - - Description - - This Configuration Option defines a method for negotiating with - the remote peer the address of the secondary DNS server to be used - on the local end of the link. If local peer requests an invalid - server address (which it will typically do intentionally) the - remote peer specifies the address by NAKing this option, and - returning the IP address of a valid DNS server. - - By default, no secondary DNS address is provided. - - A summary of the Secondary DNS Address Configuration Option format is - shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Secondary-DNS-Address - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - Secondary-DNS-Address (cont) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Type - - 131 - - Length - - 6 - - Secondary-DNS-Address - - The four octet Secondary-DNS-Address is the address of the primary - NBNS server to be used by the local peer. If all four octets are - set to zero, it indicates an explicit request that the peer - provide the address information in a Config-Nak packet. - - Default - - No address is provided. - - - -Cobb Informational [Page 4] - -RFC 1877 PPP IPCP Extensions December 1995 - - -1.4. Secondary NBNS Server Address - - Description - - This Configuration Option defines a method for negotiating with - the remote peer the address of the secondary NBNS server to be - used on the local end of the link. If local peer requests an - invalid server address (which it will typically do intentionally) - the remote peer specifies the address by NAKing this option, and - returning the IP address of a valid NBNS server. - - By default, no secondary NBNS address is provided. - - A summary of the Secondary NBNS Address Configuration Option format - is shown below. The fields are transmitted from left to right. - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Secondary-NBNS-Address - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - Secondary-NBNS-Address (cont) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Type - - 132 - - Length - - 6 - - Secondary-NBNS-Address - - The four octet Secondary-NBNS-Address is the address of the - secondary NBNS server to be used by the local peer. If all - four octets are set to zero, it indicates an explicit request - that the peer provide the address information in a Config-Nak - packet. - - Default - - No address is provided. - - - - - - - - -Cobb Informational [Page 5] - -RFC 1877 PPP IPCP Extensions December 1995 - - -References - - [1] Simpson, W., Editor, "The Point-to-Point Protocol (PPP)", STD 51, - RFC 1661, Daydreamer, July 1994. - - [2] McGregor, G., "PPP Internet Control Protocol", RFC 1332, Merit, - May 1992. - - [3] Auerbach, K., and A. Aggarwal, "Protocol Standard for a NetBIOS - Service on a TCP/UDP Transport", STD 19, RFCs 1001 and 1002, - March 1987. - - [4] Mockapetris, P., "Domain Names - Concepts and Facilities", STD - 13, RFC 1034, USC/Information Sciences Institute, November 1987. - - [5] Mockapetris, P., "Domain Names - Implementation and - Specification", STD 13, RFC 1035, USC/Information Sciences - Institute, November 1987. - -Security Considerations - - Security issues are not discussed in this memo. - -Chair's Address - - The working group can be contacted via the current chair: - - Fred Baker - Cisco Systems - 519 Lado Drive - Santa Barbara, California 93111 - - EMail: fred@cisco.com - -Author's Address - - Questions about this memo can also be directed to: - - Steve Cobb - Microsoft Corporation - One Microsoft Way - Redmond, WA 98052-6399 - - Phone: (206) 882-8080 - - EMail: stevec@microsoft.com - - - - - -Cobb Informational [Page 6] - diff --git a/kernel/picotcp/RFC/rfc1936.txt b/kernel/picotcp/RFC/rfc1936.txt deleted file mode 100644 index 479fe37..0000000 --- a/kernel/picotcp/RFC/rfc1936.txt +++ /dev/null @@ -1,1179 +0,0 @@ - - - - - - -Network Working Group J. Touch -Request For Comments: 1936 B. Parham -Category: Informational ISI - April 1996 - - - Implementing the Internet Checksum in Hardware - -Status of This Memo - - This memo provides information for the Internet community. This memo - does not specify an Internet standard of any kind. Distribution of - this memo is unlimited. - -Abstract - - This memo presents a techniques for efficiently implementing the - Internet Checksum in hardware. It includes PLD code for programming a - single, low cost part to perform checksumming at 1.26 Gbps. - -Introduction - - The Internet Checksum is used in various Internet protocols to check - for data corruption in headers (e.g., IP) [4] and packet bodies (e.g, - UDP, TCP) [5][6]. Efficient software implementation of this checksum - has been addressed in previous RFCs [1][2][3][7]. - - Efficient software implementations of the Internet Checksum algorithm - are often embedded in data copying operations ([1], Section 2). This - copy operation is increasingly being performed by dedicated direct - memory access (DMA) hardware. As a result, DMA hardware designs are - beginning to incorporate dedicated hardware to compute the Internet - Checksum during the data transfer. - - This note presents the architecture of an efficient, pipelined - Internet Checksum mechanism, suitable for inclusion in DMA hardware - [8]. This design can be implemented in a relatively inexpensive - programmable logic device (PLD) (1995 cost of $40), and is capable of - supporting 1.26 Gbps transfer rates, at 26 ns per 32-bit word. - Appendix A provides the pseudocode for such a device. This design has - been implemented in the PC-ATOMIC host interface hardware [8]. We - believe this design is of general use to the Internet community. - - - - - - - - - -Touch & Parham Informational [Page 1] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - - The remainder of this document is organized as follows: - - Review of the Internet Checksum - One's Complement vs. Two's Complement Addition - Interfaces - Summary - Appendix A - PLD source code - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Touch & Parham Informational [Page 2] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -A Review of the Internet Checksum - - The Internet Checksum is used for detecting corruption in a block of - data [1]. It is initialized to zero, and computed as the complement - of the ones-complement sum of the data, taken in 16-bit units. A - subsequent checksum of the data and checksum together should generate - a zero checksum if no errors are detected. - - The checksum allows [1]: - - - byte order "independence" - reordered output is equivalent to reordered input - - 16-bit word-order independence - reordering 16-bit words preserves the output - - incremental computation - - deferred carries - - parallel summation - a result of deferred carries, incremental - computation, and 16-bit word order independence - - This note describes an implementation that computes two partial - checksums in parallel, over the odd and even 16-bit half-words of - 32-bit data. The result is a pair of partial checksums (odd and - even), which can be combined, and the result inverted to generate the - true Internet Checksum. This technique is related to the long-word - parallel summation used in efficient software implementations [1]. - - +------------------+ +------------------+ - | high half-word | | low half-word | - | ones-complement | | ones-complement | - | partial checksum | | partial checksum | - +------------------+ +------------------+ - \ / - * (ones-complement sum) - | - +------------------+ - | partial checksum | - +------------------+ - | - * (ones-complement negative) - | - +-------------------+ - | final | - | Internet Checksum | - +-------------------+ - - - - - - -Touch & Parham Informational [Page 3] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -One's Complement vs. Two's Complement Addition - - The Internet Checksum is composed of a ones-complement lookahead - adder and a bit-wise inverter. A ones-complement adder can be built - either using twos-complement components, or natively. - - A twos-complement implementation of a ones-complement adder requires - either two twos-complement adders, or two cycles per add. The sum is - performed, then the high-bit carry-out is propagated to the carry-in, - and a second sum is performed. (ones-complement addition is {+1s} and - twos-complement is {+2s}) - - a {+1s} b == (a {+2s} b) + carry(a {+2s} b) - - e.g., - halfword16 a,b; - word32 c; - a {+1s} b == r - such that: - c = a {+2s} b; # sum value - r = (c & 0xFFFF) {+2s} (c >> 16); # sum carry - - Bits of a twos-complement lookahead adder are progressively more - complex in carry lookahead. (OR the contents of each row, where terms - are AND'd or XOR'd {^}) - - 4-bit carry-lookahead 2's complement adder: - a,b : input data - p : carry propagate, where pi = ai*bi = (ai)(bi) - g : carry generate, where gi = ai + bi - - Out0 := a0 ^ b0 ^ ci - - Out1 := a1 ^ b1 ^ (cip0 + g0) - - Out2 := a2 ^ b2 ^ (cip0p1 + g0p1 + g1) - - Out3 := a3 ^ b3 ^ (cip0p1p2 + g0p1p2 + g1p2 + g2) - - Cout := cip0p1p2p3 + g0p1p2p3 + g1p2p3 + g2p3 + g3 - - The true ones-complement lookahead adder recognizes that the carry- - wrap of the twos-complement addition is equivalent to a toroidal - carry-lookahead. Bits of a ones-complement lookahead adder are all - the same complexity, that of the high-bit of a twos-complement - lookahead adder. Thus the ones-complement sum (and thus the Internet - Checksum) is bit-position independent. We replace `ci' with the `co' - expression and reduce. (OR terms in each row pair). - - - -Touch & Parham Informational [Page 4] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - - 4-bit carry-lookahead 1's complement ring adder: - - Out0 = a0 ^ b0 ^ (g3 + g2p3 + g1p2p3 + g0p1p2p3) - - Out1 = a1 ^ b1 ^ (g3p0 + g2p3p0 + g1p2p3p0 + g0) - - Out2 = a2 ^ b2 ^ (g3p0p1 + g2p3p0p1 + g1 + g0p1) - - Out3 = a3 ^ b3 ^ (g3p0p1p2 + g2 + g1p2 + g0p1p2) - - A hardware implementation can use this toroidal design directly, - together with conventional twos-complement fast-adder internal - components, to perform a pipelined ones-complement adder [8]. - - A VLSI implementation could use any full-lookahead adder, adapted to - be toroidal and bit-equivalent, as above. In our PLD implementation, - we implement the adders via 2- and 3-bit full-lookahead sub- - components. The adder components are chained in a ring via carry bit - registers. This relies on delayed carry-propagation to implement a - carry pipeline between the fast-adder stages. - - Full-lookahead adders in a toroidal pipeline - - +-+-+-+ +-+-+-+ +-+-+ +-+-+-+ +-+-+-+ +-+-+ - |i|i|i| |i|i|i| |i|i| |i|i|i| |i|i|i| |i|i| - |F|E|D| |C|B|A| |9|8| |7|6|5| |4|3|2| |1|0| - +-+-+-+ +-+-+-+ +-+-+ +-+-+-+ +-+-+-+ +-+-+ - "+" "+" "+" "+" "+" "+" - +-+-+-+ +-+-+-+ +-+-+ +-+-+-+ +-+-+-+ +-+-+ - |s|s|s| |s|s|s| |s|s| |s|s|s| |s|s|s| |s|s| - |F|E|D| |C|B|A| |9|8| |7|6|5| |4|3|2| |1|0| - +-+-+-+ +-+-+-+ +-+-+ +-+-+-+ +-+-+-+ +-+-+ - v | v | v | v | v | v | +--+ - | ^ | ^ | ^ | ^ | ^ | ^ v | - | +-+ +-+ +-+ +-+ +-+ +-+ | - | |c| |c| |c| |c| |c| |c| | - | |5| |4| |3| |2| |1| |0| | - | +-+ +-+ +-+ +-+ +-+ +-+ | - +----------------------------------------------------------+ - - Implementation of fast-adders in PLD hardware is currently limited to - 3-bits, because an i-bit adder requires 4+2^i product terms, and - current PLDs support only 16 product terms. The resulting device - takes at most 5 "idle" clock periods for the carries to propagate - through the accumulation pipeline. - - - - - - -Touch & Parham Informational [Page 5] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -Interfaces - - The above device has been installed in a VL-Bus PC host interface - card [8]. It has a hardware and software interface, defined as - follows. - - Hardware Interface - - The Internet Checksum hardware appears as a single-port 32-bit - register, with clock and control signals [8]: - - +----------------------+ - CLR--->| | - OE---->| 32-bit register as | - CLK--->| 2 adjacent 16-bit |<---/---> 32-bit data bus - ICLK-->| ones-complement sums | - ADD--->| | - +----------------------+ - - CLR = zero the register - OE = write the register onto the data bus - CLK = clock to cycle the pipeline operation - ICLK = input data latch clock - ADD = initiating an add of latched input data - - CLR causes the contents of the checksum register and input latch to - be zeroed. There is no explicit load; a CLR followed by a write of - the load value to a dummy location is equivalent. - - The OE causes the register to be written to the data bus, or tri- - stated. - - The CLK causes the pipeline to operate. If no new input data is - latched to be added (via ICLK, ADD), a virtual "zero" is summed into - the register, to permit the pipeline to empty. - - The ICLK (transparently) latches the value on the data bus to be - latched internally, to be summed into the accumulator on the next ADD - signal. The ADD signal causes the latched input data (ICLK) to be - accumulated into the checksum pipeline. ADD and ICLK are commonly - tied together. One 32-bit data value can be latched and accumulated - into the pipeline adder every 26-ns clock, assuming data is stable - when the ADD/ICLK signal occurs. - - The internal 32-bit register is organized as two 16-bit ones- - complement sums, over the even and odd 16-bit words of the data - stream. To compute the Internet Checksum from this quantity, ones- - complement add the halves together, and invert the result. - - - -Touch & Parham Informational [Page 6] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - - Software Interface - - The device is used as a memory-mapped register. The register is read - by performing a read on its equivalent memory location. - - The device is controlled via an external memory-mapped register. Bits - in this control register clear the device (set/clear the CLR line), - and enable and disable the device (set/clear the ADD line). The CLR - line can alternatively be mapped to a memory write, e.g., such that - reading the location is a non-destructive read of the checksum - register, and a write of any value clears the checksum register. The - enable/disable control must be stored in an external register. - - The device is designed to operate in background during memory - transfers (either DMA or programmed I/O). Once enabled, all transfers - across that bus are summed into the checksum register. The checksum - is available 5 clocks after the last enabled data accumulation. This - delay is often hidden by memory access mechanisms and bus - arbitration. If required, "stall" instructions can be executed for - the appropriate delay. - - For the following example, we assume that the device is located at - CKSUMLOC. We assume that reading that location reads the checksum - register, and writing any value to that location clears the register. - The control register is located at CTLLOC, and the checksum - enable/disable bit is CKSUMBIT, where 1 is enabled, and 0 is - disabled. To perform a checksum, a programmer would clear the - register, (optionally initialize the checksum), initiate a series of - transfers, and use the result: - - /******* initialization *******/ - *(CTLLOC) &= ~((ctlsize)(CKSUMBIT)); /* disable sum */ - (word32)(*(CKSUMLOC)) = 0; /* clear reg */ - *(CTLLOC) |= CKSUMBIT; /* enable sum */ - { (optional) write initial value to a dummy location } - - /***** perform a transfer *****/ - { do one or more DMA or PIO transfers - read or write } - - /***** gather the results *****/ - *(CTLLOC) &= ~((ctlsize)(CKSUMBIT)); /* disable sum */ - sum = (word32)(*(CKSUMLOC)); /* read sum */ - sum = (sum & 0xFFFF) + (sum >> 16); /* fold halves */ - sum = (sum & 0xFFFF) + (sum >> 16); /* add in carry */ - ipcksum = (halfword16)(~(sum & 0xFFFF)); /* 1's negative */ - - - - - - -Touch & Parham Informational [Page 7] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -Summary - - This note describes the design of a hardware Internet Checksum that - can be implemented in an inexpensive PLD, achieving 1.26 Gbps. This - design has been implemented in the PC-ATOMIC host interface hardware - [8]. We believe this design is of general use to the Internet - community. - -Security Considerations - - Security considerations are not addressed here. The Internet Checksum - is not intended as a security measure. - -Acknowledgements - - The authors would like to thank the members of the "High-Performance - Computing and Communications", notably Mike Carlton, and "Advanced - Systems" Divisions at ISI for their assistance in the development of - the hardware, and this memo. - -References - - [1] Braden, R., Borman, D., and Partridge, C., "Computing the - Internet Checksum," Network Working Group RFC-1071, ISI, Cray - Research, and BBN Labs, Sept. 1988. - - [2] Mallory, T., and Kullberg, A., "Incremental Updating of the - Internet Checksum," Network Working Group RFC-1141, BBN Comm., - Jan. 1990. - - [3] Plummer, W., "TCP Checksum Function Design," IEN-45, BBN, 1978, - included as an appendix in RFC-1071. - - [4] Postel, Jon, "Internet Protocol," Network Working Group RFC- - 791/STD-5, ISI, Sept. 1981. - - [5] Postel, Jon, "User Datagram Protocol," Network Working Group - RFC-768/STD-6, ISI, Aug. 1980. - - [6] Postel, Jon, "Transmission Control Protocol," Network Working - Group RFC-793/STD-7, ISI, Sept. 1981. - - [7] Rijsinghani, A., "Computation of the Internet Checksum via - Incremental Update," Network Working Group RFC-1624, Digital - Equipment Corp., May 1994. - - [8] Touch, J., "PC-ATOMIC", ISI Tech. Report. SR-95-407, June 1995. - - - - -Touch & Parham Informational [Page 8] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -Authors' Addresses - - Joe Touch - University of Southern California/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292-6695 - USA - Phone: +1 310-822-1511 x151 - Fax: +1 310-823-6714 - URL: http://www.isi.edu/~touch - EMail: touch@isi.edu - - - Bruce Parham - University of Southern California/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292-6695 - USA - Phone: +1 310-822-1511 x101 - Fax: +1 310-823-6714 - EMail: bparham@isi.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Touch & Parham Informational [Page 9] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -Appendix A: PLD source code - -The following is the PLD source code for an AMD MACH-435 PLD. The -MACH-435 is composed of 8 22V10-equivalent PLD blocks, connected by a -configurable internal matrix. - - ----- (PLD source code follows) ---- - -TITLE PC-ATOMIC IP Sum Accelerator - 1-clock 2- and 3-bit 26 ns version -PATTERN ip_sum -REVISION 1.01 -AUTHOR J. Touch & B. Parham -COMPANY USC/ISI -DATE 06/21/94 - -CHIP ip_sum MACH435 - -; accumulates in 1 clock (1 level of logic) -; -; resources allocated to reduce fitting time -; -; uses an input register "dl" to latch the data bus values on rising edge -; accumulates a hi/lo ones-complement sum in register "q" -; the input and output are accessed via bidirectional pins "dq" -; -; uses 2 groups of 6 carry bit registers "cy" -; -; use 3-bit full-adders with carry lookahead (settles in 6 clocks) -; group 16 bits as [000102 030405 0607 080910 111213 1415] -; [161718 192021 2223 242526 272829 3031] -; -; locking the pins down speeds up fitting and is designed to force -; 4-bit components into single "segments" of the PLD. -; we could have indicated the same thing via: -; GROUP MACH_SEG_A dq[6..0] -; GROUP MACH_SEG_B dq[14..8] -; GROUP MACH_SEG_C dq[22..16] -; GROUP MACH_SEG_D dq[30..24] - -; -; control pins: -; -PIN 20 clk ; adder clock -PIN 62 ip_add ; add current data to sum -PIN 83 ip_sum_ena ; output current sum -PIN 41 ip_clr ; clear current sum -PIN 23 ip_dclk ; input data latch (tied to clk, or not) - - - -Touch & Parham Informational [Page 10] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; dq are data bus pins -; dl is the input register -; -PIN [9..3] dq[6..0] IPAIR dl[6..0] ; IO port -PIN [18..12] dq[14..8] IPAIR dl[14..8] ; IO port -PIN [30..24] dq[22..16] IPAIR dl[22..16] ; IO port -PIN [39..33] dq[30..24] IPAIR dl[30..24] ; IO port -PIN ? dq[31,23,15,7] IPAIR dl[31,23,15,7] ; IO port - -; -; q is the partial checksum register -; dl is the input register -; dq are the data bus pins -; -NODE ? q[31..0] OPAIR dq[31..0] ; internal data in reg -NODE ? dl[31..0] REG ; input reg - -; -; cy are the carry register bits -; -NODE ? cy[31,29,26,23,21,18,15,13,10,7,5,2] REG - ;1-bit internal carry bits - -EQUATIONS - -; -; .trst is the tri-state control, 0 means these are always inputs -; -ip_add.trst = 0 -ip_clr.trst = 0 -ip_sum_ena.trst = 0 - -; -; grab data to the input register on every clock (irrelevant if invalid) -; -dl[31..0].clkf = ip_dclk ; grab data all the time - ; don't use setf, rstf, or trst for dl - ; we want dl to map to input registers, not internal cells - ; besides, input registers don't need setf, rstf, or trst - -; -; control of the checksum register -; -dq[31..0].clkf = clk ; clk clocks everything -dq[31..0].setf = gnd ; never preset registers -dq[31..0].rstf = ip_clr ; clear on reset -dq[31..0].trst = ip_sum_ena ; ena outputs sum - read - - - -Touch & Parham Informational [Page 11] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; control for the carry register -; -cy[31,29,26,23,21,18,15,13,10,7,5,2].clkf = clk -cy[31,29,26,23,21,18,15,13,10,7,5,2].setf = gnd ; never preset -cy[31,29,26,23,21,18,15,13,10,7,5,2].rstf = ip_clr ; clear on reset - -; -; INPUT DATA LATCH -; nothing fancy here - grab all inputs when ip_add signal is high -; i.e., grab data in input register -; -dl[31..0] := dq[31..0] - -; -; COMBINATORIAL ADDER -; -; built as a series of 2-bit and 3-bit (carry-lookahead) full-adders -; with carries sent to the carry register "pipeline" -; -; sum[n] are sum bits -; cy[m] are carry bits -; ":+:" is XOR - -; -; SUM[0] = (A0 :+: B0 :+: CARRY_IN) -; -; CY[0] = ((A0 * B0) + ((A0 :+: B0) * CARRY_IN)) -; -; actually, the latter can be rewritten as -; -; CY[0] = ((A0 * B0) + ((A0 + B0) * CARRY_IN)) -; -; because the XOR won't be invalidated by the AND case, since the -; result is always 1 from the first term then anyway -; this helps reduce the number of XOR terms required, which are -; a limited resource in PLDs -; - - - - - - - - - - - - - -Touch & Parham Informational [Page 12] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; SUM THE LOW-ORDER WORD -; - -; -; the first 5 bits [0..4] of the low-order word -; -q[0] := (q[0] :+: (ip_add * dl[0]) :+: cy[15]) - -q[1] := (q[1] :+: (ip_add * dl[1]) :+: - ((ip_add * - (q[0] * dl[0] + - dl[0] * cy[15])) + - (q[0] * cy[15]))) - -q[2] := (q[2] :+: (ip_add * dl[2]) :+: - ((ip_add * - (q[1] * dl[1] + - q[1] * q[0] * dl[0] + - dl[1] * q[0] * dl[0] + - q[1] * dl[0] * cy[15] + - dl[1] * dl[0] * cy[15] + - dl[1] * q[0] * cy[15])) + - (q[1] * q[0] * cy[15]))) - -cy[2] := ((ip_add * - (q[2] * dl[2] + - q[2] * q[1] * dl[1] + - dl[2] * q[1] * dl[1] + - q[2] * q[1] * q[0] * dl[0] + - q[2] * dl[1] * q[0] * dl[0] + - dl[2] * q[1] * q[0] * dl[0] + - dl[2] * dl[1] * q[0] * dl[0] + - q[2] * q[1] * dl[0] * cy[15] + - q[2] * dl[1] * q[0] * cy[15] + - q[2] * dl[1] * dl[0] * cy[15] + - dl[2] * q[1] * q[0] * cy[15] + - dl[2] * q[1] * dl[0] * cy[15] + - dl[2] * dl[1] * q[0] * cy[15] + - dl[2] * dl[1] * dl[0] * cy[15])) + - (q[2] * q[1] * q[0] * cy[15])) - -q[3] := (q[3] :+: (ip_add * dl[3]) :+: cy[2]) - -q[4] := (q[4] :+: (ip_add * dl[4]) :+: - ((ip_add * - (q[3] * dl[3] + - dl[3] * cy[2])) + - (q[3] * cy[2]))) - - - -Touch & Parham Informational [Page 13] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; the next 3 bits [5..7] of the low-order word -; -q[5] := (q[5] :+: (ip_add * dl[5]) :+: - ((ip_add * - (q[4] * dl[4] + - q[4] * q[3] * dl[3] + - dl[4] * q[3] * dl[3] + - q[4] * dl[3] * cy[2] + - dl[4] * dl[3] * cy[2] + - dl[4] * q[3] * cy[2])) + - (q[4] * q[3] * cy[2]))) - -cy[5] := ((ip_add * ( - q[5] * dl[5] + - q[5] * q[4] * dl[4] + - dl[5] * q[4] * dl[4] + - q[5] * q[4] * q[3] * dl[3] + - q[5] * dl[4] * q[3] * dl[3] + - dl[5] * q[4] * q[3] * dl[3] + - dl[5] * dl[4] * q[3] * dl[3] + - q[5] * q[4] * dl[3] * cy[2] + - q[5] * dl[4] * q[3] * cy[2] + - q[5] * dl[4] * dl[3] * cy[2] + - dl[5] * q[4] * q[3] * cy[2] + - dl[5] * q[4] * dl[3] * cy[2] + - dl[5] * dl[4] * q[3] * cy[2] + - dl[5] * dl[4] * dl[3] * cy[2])) + - (q[5] * q[4] * q[3] * cy[2])) - -q[6] := (q[6] :+: (ip_add * dl[6]) :+: cy[5]) - -q[7] := (q[7] :+: (ip_add * dl[7]) :+: - ((ip_add * - (q[6] * dl[6] + - dl[6] * cy[5])) + - (q[6] * cy[5]))) - -cy[7] := ((ip_add * - (q[7] * dl[7] + - q[7] * q[6] * dl[6] + - dl[7] * q[6] * dl[6] + - q[7] * dl[6] * cy[5] + - dl[7] * dl[6] * cy[5] + - dl[7] * q[6] * cy[5])) + - (q[7] * q[6] * cy[5])) - - - - - -Touch & Parham Informational [Page 14] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; the next 5 bits [8..12] of the low-order word -; -q[8] := (q[8] :+: (ip_add * dl[8]) :+: cy[7]) - -q[9] := (q[9] :+: (ip_add * dl[9]) :+: - ((ip_add * - (q[8] * dl[8] + - dl[8] * cy[7])) + - (q[8] * cy[7]))) - -q[10] := (q[10] :+: (ip_add * dl[10]) :+: - ((ip_add * - (q[9] * dl[9] + - q[9] * q[8] * dl[8] + - dl[9] * q[8] * dl[8] + - q[9] * dl[8] * cy[7] + - dl[9] * dl[8] * cy[7] + - dl[9] * q[8] * cy[7])) + - (q[9] * q[8] * cy[7]))) - -cy[10] := ((ip_add * - (q[10] * dl[10] + - q[10] * q[9] * dl[9] + - dl[10] * q[9] * dl[9] + - q[10] * q[9] * q[8] * dl[8] + - q[10] * dl[9] * q[8] * dl[8] + - dl[10] * q[9] * q[8] * dl[8] + - dl[10] * dl[9] * q[8] * dl[8] + - q[10] * q[9] * dl[8] * cy[7] + - q[10] * dl[9] * q[8] * cy[7] + - q[10] * dl[9] * dl[8] * cy[7] + - dl[10] * q[9] * q[8] * cy[7] + - dl[10] * q[9] * dl[8] * cy[7] + - dl[10] * dl[9] * q[8] * cy[7] + - dl[10] * dl[9] * dl[8] * cy[7])) + - (q[10] * q[9] * q[8] * cy[7])) - -q[11] := (q[11] :+: (ip_add * dl[11]) :+: cy[10]) - -q[12] := (q[12] :+: (ip_add * dl[12]) :+: - ((ip_add * - (q[11] * dl[11] + - dl[11] * cy[10])) + - (q[11] * cy[10]))) - - - - - - -Touch & Parham Informational [Page 15] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; the final 3 bits [13..15] of the low-order word -; -q[13] := (q[13] :+: (ip_add * dl[13]) :+: - ((ip_add * - (q[12] * dl[12] + - q[12] * q[11] * dl[11] + - dl[12] * q[11] * dl[11] + - q[12] * dl[11] * cy[10] + - dl[12] * dl[11] * cy[10] + - dl[12] * q[11] * cy[10])) + - (q[12] * q[11] * cy[10]))) - -cy[13] := ((ip_add * ( - q[13] * dl[13] + - q[13] * q[12] * dl[12] + - dl[13] * q[12] * dl[12] + - q[13] * q[12] * q[11] * dl[11] + - q[13] * dl[12] * q[11] * dl[11] + - dl[13] * q[12] * q[11] * dl[11] + - dl[13] * dl[12] * q[11] * dl[11] + - q[13] * q[12] * dl[11] * cy[10] + - q[13] * dl[12] * q[11] * cy[10] + - q[13] * dl[12] * dl[11] * cy[10] + - dl[13] * q[12] * q[11] * cy[10] + - dl[13] * q[12] * dl[11] * cy[10] + - dl[13] * dl[12] * q[11] * cy[10] + - dl[13] * dl[12] * dl[11] * cy[10])) + - (q[13] * q[12] * q[11] * cy[10])) - -q[14] := (q[14] :+: (ip_add * dl[14]) :+: cy[13]) - -q[15] := (q[15] :+: (ip_add * dl[15]) :+: - ((ip_add * - (q[14] * dl[14] + - dl[14] * cy[13])) + - (q[14] * cy[13]))) - -cy[15] := ((ip_add * - (q[15] * dl[15] + - q[15] * q[14] * dl[14] + - dl[15] * q[14] * dl[14] + - q[15] * dl[14] * cy[13] + - dl[15] * dl[14] * cy[13] + - dl[15] * q[14] * cy[13])) + - (q[15] * q[14] * cy[13])) - - - - - -Touch & Parham Informational [Page 16] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; SUM THE HIGH-ORDER WORD -; - -; -; the first 5 bits [16..20] of the high-order word -; -q[16] := (q[16] :+: (ip_add * dl[16]) :+: cy[31]) - -q[17] := (q[17] :+: (ip_add * dl[17]) :+: - ((ip_add * - (q[16] * dl[16] + - dl[16] * cy[31])) + - (q[16] * cy[31]))) - -q[18] := (q[18] :+: (ip_add * dl[18]) :+: - ((ip_add * - (q[17] * dl[17] + - q[17] * q[16] * dl[16] + - dl[17] * q[16] * dl[16] + - q[17] * dl[16] * cy[31] + - dl[17] * dl[16] * cy[31] + - dl[17] * q[16] * cy[31])) + - (q[17] * q[16] * cy[31]))) - -cy[18] := ((ip_add * - (q[18] * dl[18] + - q[18] * q[17] * dl[17] + - dl[18] * q[17] * dl[17] + - q[18] * q[17] * q[16] * dl[16] + - q[18] * dl[17] * q[16] * dl[16] + - dl[18] * q[17] * q[16] * dl[16] + - dl[18] * dl[17] * q[16] * dl[16] + - q[18] * q[17] * dl[16] * cy[31] + - q[18] * dl[17] * q[16] * cy[31] + - q[18] * dl[17] * dl[16] * cy[31] + - dl[18] * q[17] * q[16] * cy[31] + - dl[18] * q[17] * dl[16] * cy[31] + - dl[18] * dl[17] * q[16] * cy[31] + - dl[18] * dl[17] * dl[16] * cy[31])) + - (q[18] * q[17] * q[16] * cy[31])) - -q[19] := (q[19] :+: (ip_add * dl[19]) :+: cy[18]) - -q[20] := (q[20] :+: (ip_add * dl[20]) :+: - ((ip_add * - (q[19] * dl[19] + - dl[19] * cy[18])) + - (q[19] * cy[18]))) - - - -Touch & Parham Informational [Page 17] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; the next 3 bits [21..23] of the high-order word -; -q[21] := (q[21] :+: (ip_add * dl[21]) :+: - ((ip_add * - (q[20] * dl[20] + - q[20] * q[19] * dl[19] + - dl[20] * q[19] * dl[19] + - q[20] * dl[19] * cy[18] + - dl[20] * dl[19] * cy[18] + - dl[20] * q[19] * cy[18])) + - (q[20] * q[19] * cy[18]))) - -cy[21] := ((ip_add * ( - q[21] * dl[21] + - q[21] * q[20] * dl[20] + - dl[21] * q[20] * dl[20] + - q[21] * q[20] * q[19] * dl[19] + - q[21] * dl[20] * q[19] * dl[19] + - dl[21] * q[20] * q[19] * dl[19] + - dl[21] * dl[20] * q[19] * dl[19] + - q[21] * q[20] * dl[19] * cy[18] + - q[21] * dl[20] * q[19] * cy[18] + - q[21] * dl[20] * dl[19] * cy[18] + - dl[21] * q[20] * q[19] * cy[18] + - dl[21] * q[20] * dl[19] * cy[18] + - dl[21] * dl[20] * q[19] * cy[18] + - dl[21] * dl[20] * dl[19] * cy[18])) + - (q[21] * q[20] * q[19] * cy[18])) - -q[22] := (q[22] :+: (ip_add * dl[22]) :+: cy[21]) - -q[23] := (q[23] :+: (ip_add * dl[23]) :+: - ((ip_add * - (q[22] * dl[22] + - dl[22] * cy[21])) + - (q[22] * cy[21]))) - -cy[23] := ((ip_add * - (q[23] * dl[23] + - q[23] * q[22] * dl[22] + - dl[23] * q[22] * dl[22] + - q[23] * dl[22] * cy[21] + - dl[23] * dl[22] * cy[21] + - dl[23] * q[22] * cy[21])) + - (q[23] * q[22] * cy[21])) - - - - - -Touch & Parham Informational [Page 18] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; the next 5 bits [24..28] of the high-order word -; -q[24] := (q[24] :+: (ip_add * dl[24]) :+: cy[23]) - -q[25] := (q[25] :+: (ip_add * dl[25]) :+: - ((ip_add * - (q[24] * dl[24] + - dl[24] * cy[23])) + - (q[24] * cy[23]))) - -q[26] := (q[26] :+: (ip_add * dl[26]) :+: - ((ip_add * - (q[25] * dl[25] + - q[25] * q[24] * dl[24] + - dl[25] * q[24] * dl[24] + - q[25] * dl[24] * cy[23] + - dl[25] * dl[24] * cy[23] + - dl[25] * q[24] * cy[23])) + - (q[25] * q[24] * cy[23]))) - -cy[26] := ((ip_add * - (q[26] * dl[26] + - q[26] * q[25] * dl[25] + - dl[26] * q[25] * dl[25] + - q[26] * q[25] * q[24] * dl[24] + - q[26] * dl[25] * q[24] * dl[24] + - dl[26] * q[25] * q[24] * dl[24] + - dl[26] * dl[25] * q[24] * dl[24] + - q[26] * q[25] * dl[24] * cy[23] + - q[26] * dl[25] * q[24] * cy[23] + - q[26] * dl[25] * dl[24] * cy[23] + - dl[26] * q[25] * q[24] * cy[23] + - dl[26] * q[25] * dl[24] * cy[23] + - dl[26] * dl[25] * q[24] * cy[23] + - dl[26] * dl[25] * dl[24] * cy[23])) + - (q[26] * q[25] * q[24] * cy[23])) - -q[27] := (q[27] :+: (ip_add * dl[27]) :+: cy[26]) - -q[28] := (q[28] :+: (ip_add * dl[28]) :+: - ((ip_add * - (q[27] * dl[27] + - dl[27] * cy[26])) + - (q[27] * cy[26]))) - - - - - - -Touch & Parham Informational [Page 19] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; the final 3 bits [29..31] of the high-order word -; -q[29] := (q[29] :+: (ip_add * dl[29]) :+: - ((ip_add * - (q[28] * dl[28] + - q[28] * q[27] * dl[27] + - dl[28] * q[27] * dl[27] + - q[28] * dl[27] * cy[26] + - dl[28] * dl[27] * cy[26] + - dl[28] * q[27] * cy[26])) + - (q[28] * q[27] * cy[26]))) - -cy[29] := ((ip_add * ( - q[29] * dl[29] + - q[29] * q[28] * dl[28] + - dl[29] * q[28] * dl[28] + - q[29] * q[28] * q[27] * dl[27] + - q[29] * dl[28] * q[27] * dl[27] + - dl[29] * q[28] * q[27] * dl[27] + - dl[29] * dl[28] * q[27] * dl[27] + - q[29] * q[28] * dl[27] * cy[26] + - q[29] * dl[28] * q[27] * cy[26] + - q[29] * dl[28] * dl[27] * cy[26] + - dl[29] * q[28] * q[27] * cy[26] + - dl[29] * q[28] * dl[27] * cy[26] + - dl[29] * dl[28] * q[27] * cy[26] + - dl[29] * dl[28] * dl[27] * cy[26])) + - (q[29] * q[28] * q[27] * cy[26])) - -q[30] := (q[30] :+: (ip_add * dl[30]) :+: cy[29]) - -q[31] := (q[31] :+: (ip_add * dl[31]) :+: - ((ip_add * - (q[30] * dl[30] + - dl[30] * cy[29])) + - (q[30] * cy[29]))) - -cy[31] := ((ip_add * - (q[31] * dl[31] + - q[31] * q[30] * dl[30] + - dl[31] * q[30] * dl[30] + - q[31] * dl[30] * cy[29] + - dl[31] * dl[30] * cy[29] + - dl[31] * q[30] * cy[29])) + - (q[31] * q[30] * cy[29])) - - - - - -Touch & Parham Informational [Page 20] - -RFC 1936 Implementing the Internet Checksum in Hardware April 1996 - - -; -; output nodes onto output pins (pending enable..) -; -dq[0] := {q[0]} -dq[1] := {q[1]} -dq[2] := {q[2]} -dq[3] := {q[3]} -dq[4] := {q[4]} -dq[5] := {q[5]} -dq[6] := {q[6]} -dq[7] := {q[7]} -dq[8] := {q[8]} -dq[9] := {q[9]} -dq[10] := {q[10]} -dq[11] := {q[11]} -dq[12] := {q[12]} -dq[13] := {q[13]} -dq[14] := {q[14]} -dq[15] := {q[15]} - -dq[16] := {q[16]} -dq[17] := {q[17]} -dq[18] := {q[18]} -dq[19] := {q[19]} -dq[20] := {q[20]} -dq[21] := {q[21]} -dq[22] := {q[22]} -dq[23] := {q[23]} -dq[24] := {q[24]} -dq[25] := {q[25]} -dq[26] := {q[26]} -dq[27] := {q[27]} -dq[28] := {q[28]} -dq[29] := {q[29]} -dq[30] := {q[30]} -dq[31] := {q[31]} - -; -; end. -; - - - - - - - - - - - -Touch & Parham Informational [Page 21] - diff --git a/kernel/picotcp/RFC/rfc1948.txt b/kernel/picotcp/RFC/rfc1948.txt deleted file mode 100644 index f660b4f..0000000 --- a/kernel/picotcp/RFC/rfc1948.txt +++ /dev/null @@ -1,339 +0,0 @@ - - - - - - -Network Working Group S. Bellovin -Request for Comments: 1948 AT&T Research -Category: Informational May 1996 - - - Defending Against Sequence Number Attacks - -Status of This Memo - - This memo provides information for the Internet community. This memo - does not specify an Internet standard of any kind. Distribution of - this memo is unlimited. - -Abstract - - IP spoofing attacks based on sequence number spoofing have become a - serious threat on the Internet (CERT Advisory CA-95:01). While - ubiquitous crypgraphic authentication is the right answer, we propose - a simple modification to TCP implementations that should be a very - substantial block to the current wave of attacks. - -Overview and Rational - - In 1985, Morris [1] described a form of attack based on guessing what - sequence numbers TCP [2] will use for new connections. Briefly, the - attacker gags a host trusted by the target, impersonates the IP - address of the trusted host when talking to the target, and completes - the 3-way handshake based on its guess at the next initial sequence - number to be used. An ordinary connection to the target is used to - gather sequence number state information. This entire sequence, - coupled with address-based authentication, allows the attacker to - execute commands on the target host. - - Clearly, the proper solution is cryptographic authentication [3,4]. - But it will quite a long time before that is deployed. It has - therefore been necessary for many sites to restrict use of protocols - that rely on address-based authentication, such as rlogin and rsh. - Unfortunately, the prevalence of "sniffer attacks" -- network - eavesdropping (CERT Advisory CA-94:01) -- has rendered ordinary - TELNET [5] very dangerous as well. The Internet is thus left without - a safe, secure mechanism for remote login. - - We propose a simple change to TCP implementations that will block - most sequence number guessing attacks. More precisely, such attacks - will remain possible if and only if the Bad Guy already has the - ability to launch even more devastating attacks. - - - - - -Bellovin Informational [Page 1] - -RFC 1948 Sequence Number Attacks May 1996 - - -Details of the Attack - - In order to understand the particular case of sequence number - guessing, one must look at the 3-way handshake used in the TCP open - sequence [2]. Suppose client machine A wants to talk to rsh server - B. It sends the following message: - - A->B: SYN, ISNa - - That is, it sends a packet with the SYN ("synchronize sequence - number") bit set and an initial sequence number ISNa. - - B replies with - - B->A: SYN, ISNb, ACK(ISNa) - - In addition to sending its own initial sequence number, it - acknowledges A's. Note that the actual numeric value ISNa must - appear in the message. - - A concludes the handshake by sending - - A->B: ACK(ISNb) - - The initial sequence numbers are intended to be more or less random. - More precisely, RFC 793 specifies that the 32-bit counter be - incremented by 1 in the low-order position about every 4 - microseconds. Instead, Berkeley-derived kernels increment it by a - constant every second, and by another constant for each new - connection. Thus, if you open a connection to a machine, you know to - a very high degree of confidence what sequence number it will use for - its next connection. And therein lies the attack. - - The attacker X first opens a real connection to its target B -- say, - to the mail port or the TCP echo port. This gives ISNb. It then - impersonates A and sends - - Ax->B: SYN, ISNx - - where "Ax" denotes a packet sent by X pretending to be A. - - B's response to X's original SYN (so to speak) - - B->A: SYN, ISNb', ACK(ISNx) - - - - - - - -Bellovin Informational [Page 2] - -RFC 1948 Sequence Number Attacks May 1996 - - - goes to the legitimate A, about which more anon. X never sees that - message but can still send - - Ax->B: ACK(ISNb') - - using the predicted value for ISNb'. If the guess is right -- and - usually it will be -- B's rsh server thinks it has a legitimate - connection with A, when in fact X is sending the packets. X can't - see the output from this session, but it can execute commands as more - or less any user -- and in that case, the game is over and X has won. - - There is a minor difficulty here. If A sees B's message, it will - realize that B is acknowledging something it never sent, and will - send a RST packet in response to tear down the connection. There are - a variety of ways to prevent this; the easiest is to wait until the - real A is down (possibly as a result of enemy action, of course). In - actual practice, X can gag A by exploiting a very common - implementation bug; this is described below. - -The Fix - - The choice of initial sequence numbers for a connection is not - random. Rather, it must be chosen so as to minimize the probability - of old stale packets being accepted by new incarnations of the same - connection [6, Appendix A]. Furthermore, implementations of TCP - derived from 4.2BSD contain special code to deal with such - reincarnations when the server end of the original connection is - still in TIMEWAIT state [7, pp. 945]. Accordingly, simple - randomization, as suggested in [8], will not work well. - - But duplicate packets, and hence the restrictions on the initial - sequence number for reincarnations, are peculiar to individual - connections. That is, there is no connection, syntactic or semantic, - between the sequence numbers used for two different connections. We - can prevent sequence number guessing attacks by giving each - connection -- that is, each 4-tuple of -- a separate sequence number space. Within - each space, the initial sequence number is incremented according to - [2]; however, there is no obvious relationship between the numbering - in different spaces. - - The obvious way to do this is to maintain state for dead connections, - and the easiest way to do that is to change the TCP state transition - diagram so that both ends of all connections go to TIMEWAIT state. - That would work, but it's inelegant and consumes storage space. - Instead, we use the current 4 microsecond timer M and set - - ISN = M + F(localhost, localport, remotehost, remoteport). - - - -Bellovin Informational [Page 3] - -RFC 1948 Sequence Number Attacks May 1996 - - - It is vital that F not be computable from the outside, or an attacker - could still guess at sequence numbers from the initial sequence - number used for some other connection. We therefore suggest that F - be a cryptographic hash function of the connection-id and some secret - data. MD5 [9] is a good choice, since the code is widely available. - The secret data can either be a true random number [10], or it can be - the combination of some per-host secret and the boot time of the - machine. The boot time is included to ensure that the secret is - changed on occasion. Other data, such as the host's IP address and - name, may be included in the hash as well; this eases administration - by permitting a network of workstations to share the same secret data - while still giving them separate sequence number spaces. Our - recommendation, in fact, is to use all three of these items: as - random a number as the hardware can generate, an administratively- - installed pass phrase, and the machine's IP address. This allows for - local choice on how secure the secret is. - - Note that the secret cannot easily be changed on a live machine. - Doing so would change the initial sequence numbers used for - reincarnated connections; to maintain safety, either dead connection - state must be kept or a quiet time observed for two maximum segment - lifetimes after such a change. - -A Common TCP Bug - - As mentioned earlier, attackers using sequence number guessing have - to "gag" the trusted machine first. While a number of strategies are - possible, most of the attacks detected thus far rely on an - implementation bug. - - When SYN packets are received for a connection, the receiving system - creates a new TCB in SYN-RCVD state. To avoid overconsumption of - resources, 4.2BSD-derived systems permit only a limited number of - TCBs in this state per connection. Once this limit is reached, - future SYN packets for new connections are discarded; it is assumed - that the client will retransmit them as needed. - - When a packet is received, the first thing that must be done is a - search for the TCB for that connection. If no TCB is found, the - kernel searches for a "wild card" TCB used by servers to accept - connections from all clients. Unfortunately, in many kernels this - code is invoked for any incoming packets, not just for initial SYN - packets. If the SYN-RCVD queue is full for the wildcard TCB, any new - packets specifying just that host and port number will be discarded, - even if they aren't SYN packets. - - - - - - -Bellovin Informational [Page 4] - -RFC 1948 Sequence Number Attacks May 1996 - - - To gag a host, then, the attacker sends a few dozen SYN packets to - the rlogin port from different port numbers on some non-existent - machine. This fills up the SYN-RCVD queue, while the SYN+ACK packets - go off to the bit bucket. The attack on the target machine then - appears to come from the rlogin port on the trusted machine. The - replies -- the SYN+ACKs from the target -- will be perceived as - packets belonging to a full queue, and will be dropped silently. - This could be avoided if the full queue code checked for the ACK bit, - which cannot legally be on for legitimate open requests. If it is - on, RST should be sent in reply. - -Security Considerations - - Good sequence numbers are not a replacement for cryptographic - authentication. At best, they're a palliative measure. - - An eavesdropper who can observe the initial messages for a connection - can determine its sequence number state, and may still be able to - launch sequence number guessing attacks by impersonating that - connection. However, such an eavesdropper can also hijack existing - connections [11], so the incremental threat isn't that high. Still, - since the offset between a fake connection and a given real - connection will be more or less constant for the lifetime of the - secret, it is important to ensure that attackers can never capture - such packets. Typical attacks that could disclose them include both - eavesdropping and the variety of routing attacks discussed in [8]. - - If random numbers are used as the sole source of the secret, they - MUST be chosen in accordance with the recommendations given in [10]. - -Acknowledgments - - Matt Blaze and Jim Ellis contributed some crucial ideas to this RFC. - Frank Kastenholz contributed constructive comments to this memo. - -References - - [1] R.T. Morris, "A Weakness in the 4.2BSD UNIX TCP/IP Software", - CSTR 117, 1985, AT&T Bell Laboratories, Murray Hill, NJ. - - [2] Postel, J., "Transmission Control Protocol", STD 7, RFC 793, - September 1981. - - [3] Kohl, J., and C. Neuman, "The Kerberos Network Authentication - Service (V5)", RFC 1510, September 1993. - - [4] Atkinson, R., "Security Architecture for the Internet - Protocol", RFC 1825, August 1995. - - - -Bellovin Informational [Page 5] - -RFC 1948 Sequence Number Attacks May 1996 - - - [5] Postel, J., and J. Reynolds, "Telnet Protocol Specification", - STD 8, RFC 854, May 1983. - - [6] Jacobson, V., Braden, R., and L. Zhang, "TCP Extension for - High-Speed Paths", RFC 1885, October 1990. - - [7] G.R. Wright, W. R. Stevens, "TCP/IP Illustrated, Volume 2", - 1995. Addison-Wesley. - - [8] S. Bellovin, "Security Problems in the TCP/IP Protocol Suite", - April 1989, Computer Communications Review, vol. 19, no. 2, pp. - 32-48. - - [9] Rivest, R., "The MD5 Message-Digest Algorithm", RFC 1321, - April 1992. - - [10] Eastlake, D., Crocker, S., and J. Schiller, "Randomness - Recommendations for Security", RFC 1750, December 1994. - - [11] L. Joncheray, "A Simple Active Attack Against TCP, 1995, Proc. - Fifth Usenix UNIX Security Symposium. - -Author's Address - - Steven M. Bellovin - AT&T Research - 600 Mountain Avenue - Murray Hill, NJ 07974 - - Phone: (908) 582-5886 - EMail: smb@research.att.com - - - - - - - - - - - - - - - - - - - - -Bellovin Informational [Page 6] - diff --git a/kernel/picotcp/RFC/rfc1994.txt b/kernel/picotcp/RFC/rfc1994.txt deleted file mode 100644 index e4a553e..0000000 --- a/kernel/picotcp/RFC/rfc1994.txt +++ /dev/null @@ -1,732 +0,0 @@ - - - - - - -Network Working Group W. Simpson -Request for Comments: 1994 DayDreamer -Obsoletes: 1334 August 1996 -Category: Standards Track - - - PPP Challenge Handshake Authentication Protocol (CHAP) - - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Abstract - - The Point-to-Point Protocol (PPP) [1] provides a standard method for - transporting multi-protocol datagrams over point-to-point links. - - PPP also defines an extensible Link Control Protocol, which allows - negotiation of an Authentication Protocol for authenticating its peer - before allowing Network Layer protocols to transmit over the link. - - This document defines a method for Authentication using PPP, which - uses a random Challenge, with a cryptographically hashed Response - which depends upon the Challenge and a secret key. - -Table of Contents - - 1. Introduction .......................................... 1 - 1.1 Specification of Requirements ................... 1 - 1.2 Terminology ..................................... 2 - 2. Challenge-Handshake Authentication Protocol ........... 2 - 2.1 Advantages ...................................... 3 - 2.2 Disadvantages ................................... 3 - 2.3 Design Requirements ............................. 4 - 3. Configuration Option Format ........................... 5 - 4. Packet Format ......................................... 6 - 4.1 Challenge and Response .......................... 7 - 4.2 Success and Failure ............................. 9 - SECURITY CONSIDERATIONS ...................................... 10 - ACKNOWLEDGEMENTS ............................................. 11 - REFERENCES ................................................... 12 - CONTACTS ..................................................... 12 - - - - -Simpson [Page i] - -RFC 1994 PPP CHAP August 1996 - - -1. Introduction - - In order to establish communications over a point-to-point link, each - end of the PPP link must first send LCP packets to configure the data - link during Link Establishment phase. After the link has been - established, PPP provides for an optional Authentication phase before - proceeding to the Network-Layer Protocol phase. - - By default, authentication is not mandatory. If authentication of - the link is desired, an implementation MUST specify the - Authentication-Protocol Configuration Option during Link - Establishment phase. - - These authentication protocols are intended for use primarily by - hosts and routers that connect to a PPP network server via switched - circuits or dial-up lines, but might be applied to dedicated links as - well. The server can use the identification of the connecting host - or router in the selection of options for network layer negotiations. - - This document defines a PPP authentication protocol. The Link - Establishment and Authentication phases, and the Authentication- - Protocol Configuration Option, are defined in The Point-to-Point - Protocol (PPP) [1]. - - -1.1. Specification of Requirements - - In this document, several words are used to signify the requirements - of the specification. These words are often capitalized. - - MUST This word, or the adjective "required", means that the - definition is an absolute requirement of the specification. - - MUST NOT This phrase means that the definition is an absolute - prohibition of the specification. - - SHOULD This word, or the adjective "recommended", means that there - may exist valid reasons in particular circumstances to - ignore this item, but the full implications must be - understood and carefully weighed before choosing a - different course. - - MAY This word, or the adjective "optional", means that this - item is one of an allowed set of alternatives. An - implementation which does not include this option MUST be - prepared to interoperate with another implementation which - does include the option. - - - - -Simpson [Page 1] - -RFC 1994 PPP CHAP August 1996 - - -1.2. Terminology - - This document frequently uses the following terms: - - authenticator - The end of the link requiring the authentication. The - authenticator specifies the authentication protocol to be - used in the Configure-Request during Link Establishment - phase. - - peer The other end of the point-to-point link; the end which is - being authenticated by the authenticator. - - silently discard - This means the implementation discards the packet without - further processing. The implementation SHOULD provide the - capability of logging the error, including the contents of - the silently discarded packet, and SHOULD record the event - in a statistics counter. - - - - -2. Challenge-Handshake Authentication Protocol - - The Challenge-Handshake Authentication Protocol (CHAP) is used to - periodically verify the identity of the peer using a 3-way handshake. - This is done upon initial link establishment, and MAY be repeated - anytime after the link has been established. - - 1. After the Link Establishment phase is complete, the - authenticator sends a "challenge" message to the peer. - - 2. The peer responds with a value calculated using a "one-way - hash" function. - - 3. The authenticator checks the response against its own - calculation of the expected hash value. If the values match, - the authentication is acknowledged; otherwise the connection - SHOULD be terminated. - - 4. At random intervals, the authenticator sends a new challenge to - the peer, and repeats steps 1 to 3. - - - - - - - - -Simpson [Page 2] - -RFC 1994 PPP CHAP August 1996 - - -2.1. Advantages - - CHAP provides protection against playback attack by the peer through - the use of an incrementally changing identifier and a variable - challenge value. The use of repeated challenges is intended to limit - the time of exposure to any single attack. The authenticator is in - control of the frequency and timing of the challenges. - - This authentication method depends upon a "secret" known only to the - authenticator and that peer. The secret is not sent over the link. - - Although the authentication is only one-way, by negotiating CHAP in - both directions the same secret set may easily be used for mutual - authentication. - - Since CHAP may be used to authenticate many different systems, name - fields may be used as an index to locate the proper secret in a large - table of secrets. This also makes it possible to support more than - one name/secret pair per system, and to change the secret in use at - any time during the session. - - -2.2. Disadvantages - - CHAP requires that the secret be available in plaintext form. - Irreversably encrypted password databases commonly available cannot - be used. - - It is not as useful for large installations, since every possible - secret is maintained at both ends of the link. - - Implementation Note: To avoid sending the secret over other links - in the network, it is recommended that the challenge and response - values be examined at a central server, rather than each network - access server. Otherwise, the secret SHOULD be sent to such - servers in a reversably encrypted form. Either case requires a - trusted relationship, which is outside the scope of this - specification. - - - - - - - - - - - - - -Simpson [Page 3] - -RFC 1994 PPP CHAP August 1996 - - -2.3. Design Requirements - - The CHAP algorithm requires that the length of the secret MUST be at - least 1 octet. The secret SHOULD be at least as large and - unguessable as a well-chosen password. It is preferred that the - secret be at least the length of the hash value for the hashing - algorithm chosen (16 octets for MD5). This is to ensure a - sufficiently large range for the secret to provide protection against - exhaustive search attacks. - - The one-way hash algorithm is chosen such that it is computationally - infeasible to determine the secret from the known challenge and - response values. - - Each challenge value SHOULD be unique, since repetition of a - challenge value in conjunction with the same secret would permit an - attacker to reply with a previously intercepted response. Since it - is expected that the same secret MAY be used to authenticate with - servers in disparate geographic regions, the challenge SHOULD exhibit - global and temporal uniqueness. - - Each challenge value SHOULD also be unpredictable, least an attacker - trick a peer into responding to a predicted future challenge, and - then use the response to masquerade as that peer to an authenticator. - - Although protocols such as CHAP are incapable of protecting against - realtime active wiretapping attacks, generation of unique - unpredictable challenges can protect against a wide range of active - attacks. - - A discussion of sources of uniqueness and probability of divergence - is included in the Magic-Number Configuration Option [1]. - - - - - - - - - - - - - - - - - - - -Simpson [Page 4] - -RFC 1994 PPP CHAP August 1996 - - -3. Configuration Option Format - - A summary of the Authentication-Protocol Configuration Option format - to negotiate the Challenge-Handshake Authentication Protocol is shown - below. The fields are transmitted from left to right. - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Type | Length | Authentication-Protocol | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Algorithm | - +-+-+-+-+-+-+-+-+ - - Type - - 3 - - Length - - 5 - - Authentication-Protocol - - c223 (hex) for Challenge-Handshake Authentication Protocol. - - Algorithm - - The Algorithm field is one octet and indicates the authentication - method to be used. Up-to-date values are specified in the most - recent "Assigned Numbers" [2]. One value is required to be - implemented: - - 5 CHAP with MD5 [3] - - - - - - - - - - - - - - - - - - - -Simpson [Page 5] - -RFC 1994 PPP CHAP August 1996 - - -4. Packet Format - - Exactly one Challenge-Handshake Authentication Protocol packet is - encapsulated in the Information field of a PPP Data Link Layer frame - where the protocol field indicates type hex c223 (Challenge-Handshake - Authentication Protocol). A summary of the CHAP packet format is - shown below. The fields are transmitted from left to right. - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Data ... - +-+-+-+-+ - - Code - - The Code field is one octet and identifies the type of CHAP - packet. CHAP Codes are assigned as follows: - - 1 Challenge - 2 Response - 3 Success - 4 Failure - - Identifier - - The Identifier field is one octet and aids in matching challenges, - responses and replies. - - Length - - The Length field is two octets and indicates the length of the - CHAP packet including the Code, Identifier, Length and Data - fields. Octets outside the range of the Length field should be - treated as Data Link Layer padding and should be ignored on - reception. - - Data - - The Data field is zero or more octets. The format of the Data - field is determined by the Code field. - - - - - - - - - - -Simpson [Page 6] - -RFC 1994 PPP CHAP August 1996 - - -4.1. Challenge and Response - - Description - - The Challenge packet is used to begin the Challenge-Handshake - Authentication Protocol. The authenticator MUST transmit a CHAP - packet with the Code field set to 1 (Challenge). Additional - Challenge packets MUST be sent until a valid Response packet is - received, or an optional retry counter expires. - - A Challenge packet MAY also be transmitted at any time during the - Network-Layer Protocol phase to ensure that the connection has not - been altered. - - The peer SHOULD expect Challenge packets during the Authentication - phase and the Network-Layer Protocol phase. Whenever a Challenge - packet is received, the peer MUST transmit a CHAP packet with the - Code field set to 2 (Response). - - Whenever a Response packet is received, the authenticator compares - the Response Value with its own calculation of the expected value. - Based on this comparison, the authenticator MUST send a Success or - Failure packet (described below). - - Implementation Notes: Because the Success might be lost, the - authenticator MUST allow repeated Response packets during the - Network-Layer Protocol phase after completing the - Authentication phase. To prevent discovery of alternative - Names and Secrets, any Response packets received having the - current Challenge Identifier MUST return the same reply Code - previously returned for that specific Challenge (the message - portion MAY be different). Any Response packets received - during any other phase MUST be silently discarded. - - When the Failure is lost, and the authenticator terminates the - link, the LCP Terminate-Request and Terminate-Ack provide an - alternative indication that authentication failed. - - - - - - - - - - - - - - -Simpson [Page 7] - -RFC 1994 PPP CHAP August 1996 - - - A summary of the Challenge and Response packet format is shown below. - The fields are transmitted from left to right. - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Value-Size | Value ... - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Name ... - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Code - - 1 for Challenge; - - 2 for Response. - - Identifier - - The Identifier field is one octet. The Identifier field MUST be - changed each time a Challenge is sent. - - The Response Identifier MUST be copied from the Identifier field - of the Challenge which caused the Response. - - Value-Size - - This field is one octet and indicates the length of the Value - field. - - Value - - The Value field is one or more octets. The most significant octet - is transmitted first. - - The Challenge Value is a variable stream of octets. The - importance of the uniqueness of the Challenge Value and its - relationship to the secret is described above. The Challenge - Value MUST be changed each time a Challenge is sent. The length - of the Challenge Value depends upon the method used to generate - the octets, and is independent of the hash algorithm used. - - The Response Value is the one-way hash calculated over a stream of - octets consisting of the Identifier, followed by (concatenated - with) the "secret", followed by (concatenated with) the Challenge - Value. The length of the Response Value depends upon the hash - algorithm used (16 octets for MD5). - - - - -Simpson [Page 8] - -RFC 1994 PPP CHAP August 1996 - - - Name - - The Name field is one or more octets representing the - identification of the system transmitting the packet. There are - no limitations on the content of this field. For example, it MAY - contain ASCII character strings or globally unique identifiers in - ASN.1 syntax. The Name should not be NUL or CR/LF terminated. - The size is determined from the Length field. - - -4.2. Success and Failure - - Description - - If the Value received in a Response is equal to the expected - value, then the implementation MUST transmit a CHAP packet with - the Code field set to 3 (Success). - - If the Value received in a Response is not equal to the expected - value, then the implementation MUST transmit a CHAP packet with - the Code field set to 4 (Failure), and SHOULD take action to - terminate the link. - - A summary of the Success and Failure packet format is shown below. - The fields are transmitted from left to right. - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Code | Identifier | Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Message ... - +-+-+-+-+-+-+-+-+-+-+-+-+- - - Code - - 3 for Success; - - 4 for Failure. - - Identifier - - The Identifier field is one octet and aids in matching requests - and replies. The Identifier field MUST be copied from the - Identifier field of the Response which caused this reply. - - - - - - - - -Simpson [Page 9] - -RFC 1994 PPP CHAP August 1996 - - - Message - - The Message field is zero or more octets, and its contents are - implementation dependent. It is intended to be human readable, - and MUST NOT affect operation of the protocol. It is recommended - that the message contain displayable ASCII characters 32 through - 126 decimal. Mechanisms for extension to other character sets are - the topic of future research. The size is determined from the - Length field. - - - -Security Considerations - - Security issues are the primary topic of this RFC. - - The interaction of the authentication protocols within PPP are highly - implementation dependent. This is indicated by the use of SHOULD - throughout the document. - - For example, upon failure of authentication, some implementations do - not terminate the link. Instead, the implementation limits the kind - of traffic in the Network-Layer Protocols to a filtered subset, which - in turn allows the user opportunity to update secrets or send mail to - the network administrator indicating a problem. - - There is no provision for re-tries of failed authentication. - However, the LCP state machine can renegotiate the authentication - protocol at any time, thus allowing a new attempt. It is recommended - that any counters used for authentication failure not be reset until - after successful authentication, or subsequent termination of the - failed link. - - There is no requirement that authentication be full duplex or that - the same protocol be used in both directions. It is perfectly - acceptable for different protocols to be used in each direction. - This will, of course, depend on the specific protocols negotiated. - - The secret SHOULD NOT be the same in both directions. This allows an - attacker to replay the peer's challenge, accept the computed - response, and use that response to authenticate. - - In practice, within or associated with each PPP server, there is a - database which associates "user" names with authentication - information ("secrets"). It is not anticipated that a particular - named user would be authenticated by multiple methods. This would - make the user vulnerable to attacks which negotiate the least secure - method from among a set (such as PAP rather than CHAP). If the same - - - -Simpson [Page 10] - -RFC 1994 PPP CHAP August 1996 - - - secret was used, PAP would reveal the secret to be used later with - CHAP. - - Instead, for each user name there should be an indication of exactly - one method used to authenticate that user name. If a user needs to - make use of different authentication methods under different - circumstances, then distinct user names SHOULD be employed, each of - which identifies exactly one authentication method. - - Passwords and other secrets should be stored at the respective ends - such that access to them is as limited as possible. Ideally, the - secrets should only be accessible to the process requiring access in - order to perform the authentication. - - The secrets should be distributed with a mechanism that limits the - number of entities that handle (and thus gain knowledge of) the - secret. Ideally, no unauthorized person should ever gain knowledge - of the secrets. Such a mechanism is outside the scope of this - specification. - - -Acknowledgements - - David Kaufman, Frank Heinrich, and Karl Auerbach used a challenge - handshake at SDC when designing one of the protocols for a "secure" - network in the mid-1970s. Tom Bearson built a prototype Sytek - product ("Poloneous"?) on the challenge-response notion in the 1982- - 83 timeframe. Another variant is documented in the various IBM SNA - manuals. Yet another variant was implemented by Karl Auerbach in the - Telebit NetBlazer circa 1991. - - Kim Toms and Barney Wolff provided useful critiques of earlier - versions of this document. - - Special thanks to Dave Balenson, Steve Crocker, James Galvin, and - Steve Kent, for their extensive explanations and suggestions. Now, - if only we could get them to agree with each other. - - - - - - - - - - - - - - -Simpson [Page 11] - -RFC 1994 PPP CHAP August 1996 - - -References - - [1] Simpson, W., Editor, "The Point-to-Point Protocol (PPP)", STD - 51, RFC 1661, DayDreamer, July 1994. - - [2] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC - 1700, USC/Information Sciences Institute, October 1994. - - [3] Rivest, R., and S. Dusse, "The MD5 Message-Digest Algorithm", - MIT Laboratory for Computer Science and RSA Data Security, - Inc., RFC 1321, April 1992. - - - -Contacts - - Comments should be submitted to the ietf-ppp@merit.edu mailing list. - - This document was reviewed by the Point-to-Point Protocol Working - Group of the Internet Engineering Task Force (IETF). The working - group can be contacted via the current chair: - - Karl Fox - Ascend Communications - 3518 Riverside Drive, Suite 101 - Columbus, Ohio 43221 - - karl@MorningStar.com - karl@Ascend.com - - - Questions about this memo can also be directed to: - - William Allen Simpson - DayDreamer - Computer Systems Consulting Services - 1384 Fontaine - Madison Heights, Michigan 48071 - - wsimpson@UMich.edu - wsimpson@GreenDragon.com (preferred) - - - - - - - - - - -Simpson [Page 12] - - diff --git a/kernel/picotcp/RFC/rfc2012.txt b/kernel/picotcp/RFC/rfc2012.txt deleted file mode 100644 index b5aee40..0000000 --- a/kernel/picotcp/RFC/rfc2012.txt +++ /dev/null @@ -1,563 +0,0 @@ - - - - - - -Network Working Group K. McCloghrie, Editor -Request for Comments: 2012 Cisco Systems -Updates: 1213 November 1996 -Category: Standards Track - - - SNMPv2 Management Information Base - for the Transmission Control Protocol using SMIv2 - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -IESG Note: - - The IP, UDP, and TCP MIB modules currently support only IPv4. These - three modules use the IpAddress type defined as an OCTET STRING of - length 4 to represent the IPv4 32-bit internet addresses. (See RFC - 1902, SMI for SNMPv2.) They do not support the new 128-bit IPv6 - internet addresses. - -Table of Contents - - 1. Introduction ................................................ 1 - 2. Definitions ................................................. 2 - 2.1 The TCP Group .............................................. 3 - 2.2 Conformance Information .................................... 8 - 2.2.1 Compliance Statements .................................... 8 - 2.2.2 Units of Conformance ..................................... 9 - 3. Acknowledgements ............................................ 10 - 4. References .................................................. 10 - 5. Security Considerations ..................................... 10 - 6. Editor's Address ............................................ 10 - -1. Introduction - - A management system contains: several (potentially many) nodes, each - with a processing entity, termed an agent, which has access to - management instrumentation; at least one management station; and, a - management protocol, used to convey management information between - the agents and management stations. Operations of the protocol are - carried out under an administrative framework which defines - authentication, authorization, access control, and privacy policies. - - - - -McCloghrie Standards Track [Page 1] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - - Management stations execute management applications which monitor and - control managed elements. Managed elements are devices such as - hosts, routers, terminal servers, etc., which are monitored and - controlled via access to their management information. - - Management information is viewed as a collection of managed objects, - residing in a virtual information store, termed the Management - Information Base (MIB). Collections of related objects are defined - in MIB modules. These modules are written using a subset of OSI's - Abstract Syntax Notation One (ASN.1) [1], termed the Structure of - Management Information (SMI) [2]. - - This document is the MIB module which defines managed objects for - managing implementations of the Transmission Control Protocol (TCP) - [3]. - - The managed objects in this MIB module were originally defined using - the SNMPv1 framework as a part of MIB-II [4]. This document defines - the same objects for TCP using the SNMPv2 framework. - -2. Definitions - -TCP-MIB DEFINITIONS ::= BEGIN - -IMPORTS - MODULE-IDENTITY, OBJECT-TYPE, Integer32, Gauge32, - Counter32, IpAddress, mib-2 FROM SNMPv2-SMI - MODULE-COMPLIANCE, OBJECT-GROUP FROM SNMPv2-CONF; - -tcpMIB MODULE-IDENTITY - LAST-UPDATED "9411010000Z" - ORGANIZATION "IETF SNMPv2 Working Group" - CONTACT-INFO - " Keith McCloghrie - - Postal: Cisco Systems, Inc. - 170 West Tasman Drive - San Jose, CA 95134-1706 - US - - Phone: +1 408 526 5260 - Email: kzm@cisco.com" - - - - - - - - - -McCloghrie Standards Track [Page 2] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - - DESCRIPTION - "The MIB module for managing TCP implementations." - REVISION "9103310000Z" - DESCRIPTION - "The initial revision of this MIB module was part of MIB- - II." - ::= { mib-2 49 } - --- the TCP group - -tcp OBJECT IDENTIFIER ::= { mib-2 6 } - -tcpRtoAlgorithm OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - constant(2), -- a constant rto - rsre(3), -- MIL-STD-1778, Appendix B - vanj(4) -- Van Jacobson's algorithm [5] - } - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The algorithm used to determine the timeout value used for - retransmitting unacknowledged octets." - ::= { tcp 1 } - -tcpRtoMin OBJECT-TYPE - SYNTAX Integer32 - UNITS "milliseconds" - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The minimum value permitted by a TCP implementation for the - retransmission timeout, measured in milliseconds. More - refined semantics for objects of this type depend upon the - algorithm used to determine the retransmission timeout. In - particular, when the timeout algorithm is rsre(3), an object - of this type has the semantics of the LBOUND quantity - described in RFC 793." - ::= { tcp 2 } - -tcpRtoMax OBJECT-TYPE - SYNTAX Integer32 - UNITS "milliseconds" - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The maximum value permitted by a TCP implementation for the - - - -McCloghrie Standards Track [Page 3] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - - retransmission timeout, measured in milliseconds. More - refined semantics for objects of this type depend upon the - algorithm used to determine the retransmission timeout. In - particular, when the timeout algorithm is rsre(3), an object - of this type has the semantics of the UBOUND quantity - described in RFC 793." - ::= { tcp 3 } - -tcpMaxConn OBJECT-TYPE - SYNTAX Integer32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The limit on the total number of TCP connections the entity - can support. In entities where the maximum number of - connections is dynamic, this object should contain the value - -1." - ::= { tcp 4 } - -tcpActiveOpens OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times TCP connections have made a direct - transition to the SYN-SENT state from the CLOSED state." - ::= { tcp 5 } - -tcpPassiveOpens OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times TCP connections have made a direct - transition to the SYN-RCVD state from the LISTEN state." - ::= { tcp 6 } - -tcpAttemptFails OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times TCP connections have made a direct - transition to the CLOSED state from either the SYN-SENT - state or the SYN-RCVD state, plus the number of times TCP - connections have made a direct transition to the LISTEN - state from the SYN-RCVD state." - ::= { tcp 7 } - - - -McCloghrie Standards Track [Page 4] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - -tcpEstabResets OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times TCP connections have made a direct - transition to the CLOSED state from either the ESTABLISHED - state or the CLOSE-WAIT state." - ::= { tcp 8 } - -tcpCurrEstab OBJECT-TYPE - SYNTAX Gauge32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of TCP connections for which the current state - is either ESTABLISHED or CLOSE- WAIT." - ::= { tcp 9 } - - -tcpInSegs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments received, including those - received in error. This count includes segments received on - currently established connections." - ::= { tcp 10 } - -tcpOutSegs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments sent, including those on - current connections but excluding those containing only - retransmitted octets." - ::= { tcp 11 } - -tcpRetransSegs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments retransmitted - that is, the - number of TCP segments transmitted containing one or more - previously transmitted octets." - - - -McCloghrie Standards Track [Page 5] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - - ::= { tcp 12 } - - --- the TCP Connection table - --- The TCP connection table contains information about this --- entity's existing TCP connections. - -tcpConnTable OBJECT-TYPE - SYNTAX SEQUENCE OF TcpConnEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table containing TCP connection-specific information." - ::= { tcp 13 } - -tcpConnEntry OBJECT-TYPE - SYNTAX TcpConnEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A conceptual row of the tcpConnTable containing information - about a particular current TCP connection. Each row of this - table is transient, in that it ceases to exist when (or soon - after) the connection makes the transition to the CLOSED - state." - INDEX { tcpConnLocalAddress, - tcpConnLocalPort, - tcpConnRemAddress, - tcpConnRemPort } - ::= { tcpConnTable 1 } - -TcpConnEntry ::= SEQUENCE { - tcpConnState INTEGER, - tcpConnLocalAddress IpAddress, - tcpConnLocalPort INTEGER, - tcpConnRemAddress IpAddress, - tcpConnRemPort INTEGER - } - -tcpConnState OBJECT-TYPE - SYNTAX INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - - - -McCloghrie Standards Track [Page 6] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11), - deleteTCB(12) - } - MAX-ACCESS read-write - STATUS current - DESCRIPTION - "The state of this TCP connection. - - The only value which may be set by a management station is - deleteTCB(12). Accordingly, it is appropriate for an agent - to return a `badValue' response if a management station - attempts to set this object to any other value. - - If a management station sets this object to the value - deleteTCB(12), then this has the effect of deleting the TCB - (as defined in RFC 793) of the corresponding connection on - the managed node, resulting in immediate termination of the - connection. - - As an implementation-specific option, a RST segment may be - sent from the managed node to the other TCP endpoint (note - however that RST segments are not sent reliably)." - ::= { tcpConnEntry 1 } - -tcpConnLocalAddress OBJECT-TYPE - SYNTAX IpAddress - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The local IP address for this TCP connection. In the case - of a connection in the listen state which is willing to - accept connections for any IP interface associated with the - node, the value 0.0.0.0 is used." - ::= { tcpConnEntry 2 } - -tcpConnLocalPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The local port number for this TCP connection." - ::= { tcpConnEntry 3 } - -tcpConnRemAddress OBJECT-TYPE - - - -McCloghrie Standards Track [Page 7] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - - SYNTAX IpAddress - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The remote IP address for this TCP connection." - ::= { tcpConnEntry 4 } - -tcpConnRemPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The remote port number for this TCP connection." - ::= { tcpConnEntry 5 } - -tcpInErrs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments received in error (e.g., bad - TCP checksums)." - ::= { tcp 14 } - -tcpOutRsts OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of TCP segments sent containing the RST flag." - ::= { tcp 15 } - --- conformance information - -tcpMIBConformance OBJECT IDENTIFIER ::= { tcpMIB 2 } - -tcpMIBCompliances OBJECT IDENTIFIER ::= { tcpMIBConformance 1 } -tcpMIBGroups OBJECT IDENTIFIER ::= { tcpMIBConformance 2 } - - --- compliance statements - -tcpMIBCompliance MODULE-COMPLIANCE - STATUS current - DESCRIPTION - "The compliance statement for SNMPv2 entities which - implement TCP." - MODULE -- this module - - - -McCloghrie Standards Track [Page 8] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - - MANDATORY-GROUPS { tcpGroup - } - ::= { tcpMIBCompliances 1 } - --- units of conformance - -tcpGroup OBJECT-GROUP - OBJECTS { tcpRtoAlgorithm, tcpRtoMin, tcpRtoMax, - tcpMaxConn, tcpActiveOpens, - tcpPassiveOpens, tcpAttemptFails, - tcpEstabResets, tcpCurrEstab, tcpInSegs, - tcpOutSegs, tcpRetransSegs, tcpConnState, - tcpConnLocalAddress, tcpConnLocalPort, - tcpConnRemAddress, tcpConnRemPort, - tcpInErrs, tcpOutRsts } - STATUS current - DESCRIPTION - "The tcp group of objects providing for management of TCP - entities." - ::= { tcpMIBGroups 1 } - -END - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -McCloghrie Standards Track [Page 9] - -RFC 2012 SNMPv2 MIB for TCP November 1996 - - -3. Acknowledgements - - This document contains a modified subset of RFC 1213. - -4. References - - [1] Information processing systems - Open Systems Interconnection - - Specification of Abstract Syntax Notation One (ASN.1), - International Organization for Standardization. International - Standard 8824, (December, 1987). - - [2] McCloghrie, K., Editor, "Structure of Management Information - for version 2 of the Simple Network Management Protocol - (SNMPv2)", RFC 1902, Cisco Systems, January 1996. - - [3] Postel, J., "Transmission Control Protocol - DARPA Internet - Program Protocol Specification", STD 7, RFC 793, DARPA, - September 1981. - - [4] McCloghrie, K., and M. Rose, "Management Information Base for - Network Management of TCP/IP-based internets: MIB-II", STD 17, - RFC 1213, March 1991. - - [5] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM 1988, - Stanford, California. - -5. Security Considerations - - Security issues are not discussed in this memo. - -6. Editor's Address - - Keith McCloghrie - Cisco Systems, Inc. - 170 West Tasman Drive - San Jose, CA 95134-1706 - US - - Phone: +1 408 526 5260 - EMail: kzm@cisco.com - - - - - - - - - - - -McCloghrie Standards Track [Page 10] - diff --git a/kernel/picotcp/RFC/rfc2018.txt b/kernel/picotcp/RFC/rfc2018.txt deleted file mode 100644 index 1d84811..0000000 --- a/kernel/picotcp/RFC/rfc2018.txt +++ /dev/null @@ -1,675 +0,0 @@ - - - - - - -Network Working Group M. Mathis -Request for Comments: 2018 J. Mahdavi -Category: Standards Track PSC - S. Floyd - LBNL - A. Romanow - Sun Microsystems - October 1996 - - - TCP Selective Acknowledgment Options - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Abstract - - TCP may experience poor performance when multiple packets are lost - from one window of data. With the limited information available - from cumulative acknowledgments, a TCP sender can only learn about a - single lost packet per round trip time. An aggressive sender could - choose to retransmit packets early, but such retransmitted segments - may have already been successfully received. - - A Selective Acknowledgment (SACK) mechanism, combined with a - selective repeat retransmission policy, can help to overcome these - limitations. The receiving TCP sends back SACK packets to the sender - informing the sender of data that has been received. The sender can - then retransmit only the missing data segments. - - This memo proposes an implementation of SACK and discusses its - performance and related issues. - -Acknowledgements - - Much of the text in this document is taken directly from RFC1072 "TCP - Extensions for Long-Delay Paths" by Bob Braden and Van Jacobson. The - authors would like to thank Kevin Fall (LBNL), Christian Huitema - (INRIA), Van Jacobson (LBNL), Greg Miller (MITRE), Greg Minshall - (Ipsilon), Lixia Zhang (XEROX PARC and UCLA), Dave Borman (BSDI), - Allison Mankin (ISI) and others for their review and constructive - comments. - - - - -Mathis, et. al. Standards Track [Page 1] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - -1. Introduction - - Multiple packet losses from a window of data can have a catastrophic - effect on TCP throughput. TCP [Postel81] uses a cumulative - acknowledgment scheme in which received segments that are not at the - left edge of the receive window are not acknowledged. This forces - the sender to either wait a roundtrip time to find out about each - lost packet, or to unnecessarily retransmit segments which have been - correctly received [Fall95]. With the cumulative acknowledgment - scheme, multiple dropped segments generally cause TCP to lose its - ACK-based clock, reducing overall throughput. - - Selective Acknowledgment (SACK) is a strategy which corrects this - behavior in the face of multiple dropped segments. With selective - acknowledgments, the data receiver can inform the sender about all - segments that have arrived successfully, so the sender need - retransmit only the segments that have actually been lost. - - Several transport protocols, including NETBLT [Clark87], XTP - [Strayer92], RDP [Velten84], NADIR [Huitema81], and VMTP [Cheriton88] - have used selective acknowledgment. There is some empirical evidence - in favor of selective acknowledgments -- simple experiments with RDP - have shown that disabling the selective acknowledgment facility - greatly increases the number of retransmitted segments over a lossy, - high-delay Internet path [Partridge87]. A recent simulation study by - Kevin Fall and Sally Floyd [Fall95], demonstrates the strength of TCP - with SACK over the non-SACK Tahoe and Reno TCP implementations. - - RFC1072 [VJ88] describes one possible implementation of SACK options - for TCP. Unfortunately, it has never been deployed in the Internet, - as there was disagreement about how SACK options should be used in - conjunction with the TCP window shift option (initially described - RFC1072 and revised in [Jacobson92]). - - We propose slight modifications to the SACK options as proposed in - RFC1072. Specifically, sending a selective acknowledgment for the - most recently received data reduces the need for long SACK options - [Keshav94, Mathis95]. In addition, the SACK option now carries full - 32 bit sequence numbers. These two modifications represent the only - changes to the proposal in RFC1072. They make SACK easier to - implement and address concerns about robustness. - - The selective acknowledgment extension uses two TCP options. The - first is an enabling option, "SACK-permitted", which may be sent in a - SYN segment to indicate that the SACK option can be used once the - connection is established. The other is the SACK option itself, - which may be sent over an established connection once permission has - been given by SACK-permitted. - - - -Mathis, et. al. Standards Track [Page 2] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - - The SACK option is to be included in a segment sent from a TCP that - is receiving data to the TCP that is sending that data; we will refer - to these TCP's as the data receiver and the data sender, - respectively. We will consider a particular simplex data flow; any - data flowing in the reverse direction over the same connection can be - treated independently. - -2. Sack-Permitted Option - - This two-byte option may be sent in a SYN by a TCP that has been - extended to receive (and presumably process) the SACK option once the - connection has opened. It MUST NOT be sent on non-SYN segments. - - TCP Sack-Permitted Option: - - Kind: 4 - - +---------+---------+ - | Kind=4 | Length=2| - +---------+---------+ - -3. Sack Option Format - - The SACK option is to be used to convey extended acknowledgment - information from the receiver to the sender over an established TCP - connection. - - TCP SACK Option: - - Kind: 5 - - Length: Variable - - +--------+--------+ - | Kind=5 | Length | - +--------+--------+--------+--------+ - | Left Edge of 1st Block | - +--------+--------+--------+--------+ - | Right Edge of 1st Block | - +--------+--------+--------+--------+ - | | - / . . . / - | | - +--------+--------+--------+--------+ - | Left Edge of nth Block | - +--------+--------+--------+--------+ - | Right Edge of nth Block | - +--------+--------+--------+--------+ - - - -Mathis, et. al. Standards Track [Page 3] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - - The SACK option is to be sent by a data receiver to inform the data - sender of non-contiguous blocks of data that have been received and - queued. The data receiver awaits the receipt of data (perhaps by - means of retransmissions) to fill the gaps in sequence space between - received blocks. When missing segments are received, the data - receiver acknowledges the data normally by advancing the left window - edge in the Acknowledgement Number Field of the TCP header. The SACK - option does not change the meaning of the Acknowledgement Number - field. - - This option contains a list of some of the blocks of contiguous - sequence space occupied by data that has been received and queued - within the window. - - Each contiguous block of data queued at the data receiver is defined - in the SACK option by two 32-bit unsigned integers in network byte - order: - - * Left Edge of Block - - This is the first sequence number of this block. - - * Right Edge of Block - - This is the sequence number immediately following the last - sequence number of this block. - - Each block represents received bytes of data that are contiguous and - isolated; that is, the bytes just below the block, (Left Edge of - Block - 1), and just above the block, (Right Edge of Block), have not - been received. - - A SACK option that specifies n blocks will have a length of 8*n+2 - bytes, so the 40 bytes available for TCP options can specify a - maximum of 4 blocks. It is expected that SACK will often be used in - conjunction with the Timestamp option used for RTTM [Jacobson92], - which takes an additional 10 bytes (plus two bytes of padding); thus - a maximum of 3 SACK blocks will be allowed in this case. - - The SACK option is advisory, in that, while it notifies the data - sender that the data receiver has received the indicated segments, - the data receiver is permitted to later discard data which have been - reported in a SACK option. A discussion appears below in Section 8 - of the consequences of advisory SACK, in particular that the data - receiver may renege, or drop already SACKed data. - - - - - - -Mathis, et. al. Standards Track [Page 4] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - -4. Generating Sack Options: Data Receiver Behavior - - If the data receiver has received a SACK-Permitted option on the SYN - for this connection, the data receiver MAY elect to generate SACK - options as described below. If the data receiver generates SACK - options under any circumstance, it SHOULD generate them under all - permitted circumstances. If the data receiver has not received a - SACK-Permitted option for a given connection, it MUST NOT send SACK - options on that connection. - - If sent at all, SACK options SHOULD be included in all ACKs which do - not ACK the highest sequence number in the data receiver's queue. In - this situation the network has lost or mis-ordered data, such that - the receiver holds non-contiguous data in its queue. RFC 1122, - Section 4.2.2.21, discusses the reasons for the receiver to send ACKs - in response to additional segments received in this state. The - receiver SHOULD send an ACK for every valid segment that arrives - containing new data, and each of these "duplicate" ACKs SHOULD bear a - SACK option. - - If the data receiver chooses to send a SACK option, the following - rules apply: - - * The first SACK block (i.e., the one immediately following the - kind and length fields in the option) MUST specify the contiguous - block of data containing the segment which triggered this ACK, - unless that segment advanced the Acknowledgment Number field in - the header. This assures that the ACK with the SACK option - reflects the most recent change in the data receiver's buffer - queue. - - * The data receiver SHOULD include as many distinct SACK blocks as - possible in the SACK option. Note that the maximum available - option space may not be sufficient to report all blocks present in - the receiver's queue. - - * The SACK option SHOULD be filled out by repeating the most - recently reported SACK blocks (based on first SACK blocks in - previous SACK options) that are not subsets of a SACK block - already included in the SACK option being constructed. This - assures that in normal operation, any segment remaining part of a - non-contiguous block of data held by the data receiver is reported - in at least three successive SACK options, even for large-window - TCP implementations [RFC1323]). After the first SACK block, the - following SACK blocks in the SACK option may be listed in - arbitrary order. - - - - - -Mathis, et. al. Standards Track [Page 5] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - - It is very important that the SACK option always reports the block - containing the most recently received segment, because this provides - the sender with the most up-to-date information about the state of - the network and the data receiver's queue. - -5. Interpreting the Sack Option and Retransmission Strategy: Data - Sender Behavior - - When receiving an ACK containing a SACK option, the data sender - SHOULD record the selective acknowledgment for future reference. The - data sender is assumed to have a retransmission queue that contains - the segments that have been transmitted but not yet acknowledged, in - sequence-number order. If the data sender performs re-packetization - before retransmission, the block boundaries in a SACK option that it - receives may not fall on boundaries of segments in the retransmission - queue; however, this does not pose a serious difficulty for the - sender. - - One possible implementation of the sender's behavior is as follows. - Let us suppose that for each segment in the retransmission queue - there is a (new) flag bit "SACKed", to be used to indicate that this - particular segment has been reported in a SACK option. - - When an acknowledgment segment arrives containing a SACK option, the - data sender will turn on the SACKed bits for segments that have been - selectively acknowledged. More specifically, for each block in the - SACK option, the data sender will turn on the SACKed flags for all - segments in the retransmission queue that are wholly contained within - that block. This requires straightforward sequence number - comparisons. - - After the SACKed bit is turned on (as the result of processing a - received SACK option), the data sender will skip that segment during - any later retransmission. Any segment that has the SACKed bit turned - off and is less than the highest SACKed segment is available for - retransmission. - - After a retransmit timeout the data sender SHOULD turn off all of the - SACKed bits, since the timeout might indicate that the data receiver - has reneged. The data sender MUST retransmit the segment at the left - edge of the window after a retransmit timeout, whether or not the - SACKed bit is on for that segment. A segment will not be dequeued - and its buffer freed until the left window edge is advanced over it. - - - - - - - - -Mathis, et. al. Standards Track [Page 6] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - -5.1 Congestion Control Issues - - This document does not attempt to specify in detail the congestion - control algorithms for implementations of TCP with SACK. However, - the congestion control algorithms present in the de facto standard - TCP implementations MUST be preserved [Stevens94]. In particular, to - preserve robustness in the presence of packets reordered by the - network, recovery is not triggered by a single ACK reporting out-of- - order packets at the receiver. Further, during recovery the data - sender limits the number of segments sent in response to each ACK. - Existing implementations limit the data sender to sending one segment - during Reno-style fast recovery, or to two segments during slow-start - [Jacobson88]. Other aspects of congestion control, such as reducing - the congestion window in response to congestion, must similarly be - preserved. - - The use of time-outs as a fall-back mechanism for detecting dropped - packets is unchanged by the SACK option. Because the data receiver - is allowed to discard SACKed data, when a retransmit timeout occurs - the data sender MUST ignore prior SACK information in determining - which data to retransmit. - - Future research into congestion control algorithms may take advantage - of the additional information provided by SACK. One such area for - future research concerns modifications to TCP for a wireless or - satellite environment where packet loss is not necessarily an - indication of congestion. - -6. Efficiency and Worst Case Behavior - - If the return path carrying ACKs and SACK options were lossless, one - block per SACK option packet would always be sufficient. Every - segment arriving while the data receiver holds discontinuous data - would cause the data receiver to send an ACK with a SACK option - containing the one altered block in the receiver's queue. The data - sender is thus able to construct a precise replica of the receiver's - queue by taking the union of all the first SACK blocks. - - - - - - - - - - - - - - -Mathis, et. al. Standards Track [Page 7] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - - Since the return path is not lossless, the SACK option is defined to - include more than one SACK block in a single packet. The redundant - blocks in the SACK option packet increase the robustness of SACK - delivery in the presence of lost ACKs. For a receiver that is also - using the time stamp option [Jacobson92], the SACK option has room to - include three SACK blocks. Thus each SACK block will generally be - repeated at least three times, if necessary, once in each of three - successive ACK packets. However, if all of the ACK packets reporting - a particular SACK block are dropped, then the sender might assume - that the data in that SACK block has not been received, and - unnecessarily retransmit those segments. - - The deployment of other TCP options may reduce the number of - available SACK blocks to 2 or even to 1. This will reduce the - redundancy of SACK delivery in the presence of lost ACKs. Even so, - the exposure of TCP SACK in regard to the unnecessary retransmission - of packets is strictly less than the exposure of current - implementations of TCP. The worst-case conditions necessary for the - sender to needlessly retransmit data is discussed in more detail in a - separate document [Floyd96]. - - Older TCP implementations which do not have the SACK option will not - be unfairly disadvantaged when competing against SACK-capable TCPs. - This issue is discussed in more detail in [Floyd96]. - -7. Sack Option Examples - - The following examples attempt to demonstrate the proper behavior of - SACK generation by the data receiver. - - Assume the left window edge is 5000 and that the data transmitter - sends a burst of 8 segments, each containing 500 data bytes. - - Case 1: The first 4 segments are received but the last 4 are - dropped. - - The data receiver will return a normal TCP ACK segment - acknowledging sequence number 7000, with no SACK option. - - - - - - - - - - - - - -Mathis, et. al. Standards Track [Page 8] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - - Case 2: The first segment is dropped but the remaining 7 are - received. - - Upon receiving each of the last seven packets, the data - receiver will return a TCP ACK segment that acknowledges - sequence number 5000 and contains a SACK option specifying - one block of queued data: - - Triggering ACK Left Edge Right Edge - Segment - - 5000 (lost) - 5500 5000 5500 6000 - 6000 5000 5500 6500 - 6500 5000 5500 7000 - 7000 5000 5500 7500 - 7500 5000 5500 8000 - 8000 5000 5500 8500 - 8500 5000 5500 9000 - - - Case 3: The 2nd, 4th, 6th, and 8th (last) segments are - dropped. - - The data receiver ACKs the first packet normally. The - third, fifth, and seventh packets trigger SACK options as - follows: - - Triggering ACK First Block 2nd Block 3rd Block - Segment Left Right Left Right Left Right - Edge Edge Edge Edge Edge Edge - - 5000 5500 - 5500 (lost) - 6000 5500 6000 6500 - 6500 (lost) - 7000 5500 7000 7500 6000 6500 - 7500 (lost) - 8000 5500 8000 8500 7000 7500 6000 6500 - 8500 (lost) - - - - - - - - - - - -Mathis, et. al. Standards Track [Page 9] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - - Suppose at this point, the 4th packet is received out of order. - (This could either be because the data was badly misordered in the - network, or because the 2nd packet was retransmitted and lost, and - then the 4th packet was retransmitted). At this point the data - receiver has only two SACK blocks to report. The data receiver - replies with the following Selective Acknowledgment: - - Triggering ACK First Block 2nd Block 3rd Block - Segment Left Right Left Right Left Right - Edge Edge Edge Edge Edge Edge - - 6500 5500 6000 7500 8000 8500 - - Suppose at this point, the 2nd segment is received. The data - receiver then replies with the following Selective Acknowledgment: - - Triggering ACK First Block 2nd Block 3rd Block - Segment Left Right Left Right Left Right - Edge Edge Edge Edge Edge Edge - - 5500 7500 8000 8500 - -8. Data Receiver Reneging - - Note that the data receiver is permitted to discard data in its queue - that has not been acknowledged to the data sender, even if the data - has already been reported in a SACK option. Such discarding of - SACKed packets is discouraged, but may be used if the receiver runs - out of buffer space. - - The data receiver MAY elect not to keep data which it has reported in - a SACK option. In this case, the receiver SACK generation is - additionally qualified: - - * The first SACK block MUST reflect the newest segment. Even if - the newest segment is going to be discarded and the receiver has - already discarded adjacent segments, the first SACK block MUST - report, at a minimum, the left and right edges of the newest - segment. - - * Except for the newest segment, all SACK blocks MUST NOT report - any old data which is no longer actually held by the receiver. - - Since the data receiver may later discard data reported in a SACK - option, the sender MUST NOT discard data before it is acknowledged by - the Acknowledgment Number field in the TCP header. - - - - - -Mathis, et. al. Standards Track [Page 10] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - -9. Security Considerations - - This document neither strengthens nor weakens TCP's current security - properties. - -10. References - - [Cheriton88] Cheriton, D., "VMTP: Versatile Message Transaction - Protocol", RFC 1045, Stanford University, February 1988. - - [Clark87] Clark, D., Lambert, M., and L. Zhang, "NETBLT: A Bulk Data - Transfer Protocol", RFC 998, MIT, March 1987. - - [Fall95] Fall, K. and Floyd, S., "Comparisons of Tahoe, Reno, and - Sack TCP", ftp://ftp.ee.lbl.gov/papers/sacks.ps.Z, December 1995. - - [Floyd96] Floyd, S., "Issues of TCP with SACK", - ftp://ftp.ee.lbl.gov/papers/issues_sa.ps.Z, January 1996. - - [Huitema81] Huitema, C., and Valet, I., An Experiment on High Speed - File Transfer using Satellite Links, 7th Data Communication - Symposium, Mexico, October 1981. - - [Jacobson88] Jacobson, V., "Congestion Avoidance and Control", - Proceedings of SIGCOMM '88, Stanford, CA., August 1988. - - [Jacobson88}, Jacobson, V. and R. Braden, "TCP Extensions for Long- - Delay Paths", RFC 1072, October 1988. - - [Jacobson92] Jacobson, V., Braden, R., and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [Keshav94] Keshav, presentation to the Internet End-to-End Research - Group, November 1994. - - [Mathis95] Mathis, M., and Mahdavi, J., TCP Forward Acknowledgment - Option, presentation to the Internet End-to-End Research Group, June - 1995. - - [Partridge87] Partridge, C., "Private Communication", February 1987. - - [Postel81] Postel, J., "Transmission Control Protocol - DARPA - Internet Program Protocol Specification", RFC 793, DARPA, September - 1981. - - [Stevens94] Stevens, W., TCP/IP Illustrated, Volume 1: The Protocols, - Addison-Wesley, 1994. - - - - -Mathis, et. al. Standards Track [Page 11] - -RFC 2018 TCP Selective Acknowledgement Options October 1996 - - - [Strayer92] Strayer, T., Dempsey, B., and Weaver, A., XTP -- the - xpress transfer protocol. Addison-Wesley Publishing Company, 1992. - - [Velten84] Velten, D., Hinden, R., and J. Sax, "Reliable Data - Protocol", RFC 908, BBN, July 1984. - -11. Authors' Addresses - - Matt Mathis and Jamshid Mahdavi - Pittsburgh Supercomputing Center - 4400 Fifth Ave - Pittsburgh, PA 15213 - mathis@psc.edu - mahdavi@psc.edu - - Sally Floyd - Lawrence Berkeley National Laboratory - One Cyclotron Road - Berkeley, CA 94720 - floyd@ee.lbl.gov - - Allyn Romanow - Sun Microsystems, Inc. - 2550 Garcia Ave., MPK17-202 - Mountain View, CA 94043 - allyn@eng.sun.com - - - - - - - - - - - - - - - - - - - - - - - - - -Mathis, et. al. Standards Track [Page 12] - diff --git a/kernel/picotcp/RFC/rfc2026.txt b/kernel/picotcp/RFC/rfc2026.txt deleted file mode 100644 index 1c9c59a..0000000 --- a/kernel/picotcp/RFC/rfc2026.txt +++ /dev/null @@ -1,2019 +0,0 @@ - - - - - - -Network Working Group S. Bradner -Request for Comments: 2026 Harvard University -BCP: 9 October 1996 -Obsoletes: 1602 -Category: Best Current Practice - - - The Internet Standards Process -- Revision 3 - - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Abstract - - This memo documents the process used by the Internet community for - the standardization of protocols and procedures. It defines the - stages in the standardization process, the requirements for moving a - document between stages and the types of documents used during this - process. It also addresses the intellectual property rights and - copyright issues associated with the standards process. - -Table of Contents - - 1. INTRODUCTION....................................................2 - 1.1 Internet Standards...........................................3 - 1.2 The Internet Standards Process...............................3 - 1.3 Organization of This Document................................5 - 2. INTERNET STANDARDS-RELATED PUBLICATIONS.........................5 - 2.1 Requests for Comments (RFCs).................................5 - 2.2 Internet-Drafts..............................................7 - 3. INTERNET STANDARD SPECIFICATIONS................................8 - 3.1 Technical Specification (TS).................................8 - 3.2 Applicability Statement (AS).................................8 - 3.3 Requirement Levels...........................................9 - 4. THE INTERNET STANDARDS TRACK...................................10 - 4.1 Standards Track Maturity Levels.............................11 - 4.1.1 Proposed Standard.......................................11 - 4.1.2 Draft Standard..........................................12 - 4.1.3 Internet Standard.......................................13 - 4.2 Non-Standards Track Maturity Levels.........................13 - 4.2.1 Experimental............................................13 - 4.2.2 Informational...........................................14 - 4.2.3 Procedures for Experimental and Informational RFCs......14 - 4.2.4 Historic................................................15 - - - -Bradner Best Current Practice [Page 1] - -RFC 2026 Internet Standards Process October 1996 - - - 5. Best Current Practice (BCP) RFCs...............................15 - 5.1 BCP Review Process..........................................16 - 6. THE INTERNET STANDARDS PROCESS.................................17 - 6.1 Standards Actions...........................................17 - 6.1.1 Initiation of Action....................................17 - 6.1.2 IESG Review and Approval................................17 - 6.1.3 Publication.............................................18 - 6.2 Advancing in the Standards Track............................19 - 6.3 Revising a Standard.........................................20 - 6.4 Retiring a Standard.........................................20 - 6.5 Conflict Resolution and Appeals.............................21 - 6.5.1 Working Group Disputes...................................21 - 6.5.2 Process Failures.........................................22 - 6.5.3 Questions of Applicable Procedure........................22 - 6.5.4 Appeals Procedure........................................23 - 7. EXTERNAL STANDARDS AND SPECIFICATIONS..........................23 - 7.1 Use of External Specifications..............................24 - 7.1.1 Incorporation of an Open Standard.......................24 - 7.1.2 Incorporation of a Other Specifications.................24 - 7.1.3 Assumption..............................................25 - 8. NOTICES AND RECORD KEEPING......................................25 - 9. VARYING THE PROCESS.............................................26 - 9.1 The Variance Procedure.......................................26 - 9.2 Exclusions...................................................27 - 10. INTELLECTUAL PROPERTY RIGHTS..................................27 - 10.1. General Policy............................................27 - 10.2 Confidentiality Obligations...............................28 - 10.3. Rights and Permissions....................................28 - 10.3.1. All Contributions......................................28 - 10.3.2. Standards Track Documents..............................29 - 10.3.3 Determination of Reasonable and - Non-discriminatory Terms................................30 - 10.4. Notices...................................................30 - 11. ACKNOWLEDGMENTS................................................32 - 12. SECURITY CONSIDERATIONS........................................32 - 13. REFERENCES.....................................................33 - 14. DEFINITIONS OF TERMS...........................................33 - 15. AUTHOR'S ADDRESS...............................................34 - APPENDIX A: GLOSSARY OF ACRONYMS...................................35 - - - - - - - - - - - - -Bradner Best Current Practice [Page 2] - -RFC 2026 Internet Standards Process October 1996 - - -1. INTRODUCTION - - This memo documents the process currently used by the Internet - community for the standardization of protocols and procedures. The - Internet Standards process is an activity of the Internet Society - that is organized and managed on behalf of the Internet community by - the Internet Architecture Board (IAB) and the Internet Engineering - Steering Group (IESG). - -1.1 Internet Standards - - The Internet, a loosely-organized international collaboration of - autonomous, interconnected networks, supports host-to-host - communication through voluntary adherence to open protocols and - procedures defined by Internet Standards. There are also many - isolated interconnected networks, which are not connected to the - global Internet but use the Internet Standards. - - The Internet Standards Process described in this document is - concerned with all protocols, procedures, and conventions that are - used in or by the Internet, whether or not they are part of the - TCP/IP protocol suite. In the case of protocols developed and/or - standardized by non-Internet organizations, however, the Internet - Standards Process normally applies to the application of the protocol - or procedure in the Internet context, not to the specification of the - protocol itself. - - In general, an Internet Standard is a specification that is stable - and well-understood, is technically competent, has multiple, - independent, and interoperable implementations with substantial - operational experience, enjoys significant public support, and is - recognizably useful in some or all parts of the Internet. - -1.2 The Internet Standards Process - - In outline, the process of creating an Internet Standard is - straightforward: a specification undergoes a period of development - and several iterations of review by the Internet community and - revision based upon experience, is adopted as a Standard by the - appropriate body (see below), and is published. In practice, the - process is more complicated, due to (1) the difficulty of creating - specifications of high technical quality; (2) the need to consider - the interests of all of the affected parties; (3) the importance of - establishing widespread community consensus; and (4) the difficulty - of evaluating the utility of a particular specification for the - Internet community. - - - - - -Bradner Best Current Practice [Page 3] - -RFC 2026 Internet Standards Process October 1996 - - - The goals of the Internet Standards Process are: - o technical excellence; - o prior implementation and testing; - o clear, concise, and easily understood documentation; - o openness and fairness; and - o timeliness. - - The procedures described in this document are designed to be fair, - open, and objective; to reflect existing (proven) practice; and to - be flexible. - - o These procedures are intended to provide a fair, open, and - objective basis for developing, evaluating, and adopting Internet - Standards. They provide ample opportunity for participation and - comment by all interested parties. At each stage of the - standardization process, a specification is repeatedly discussed - and its merits debated in open meetings and/or public electronic - mailing lists, and it is made available for review via world-wide - on-line directories. - - o These procedures are explicitly aimed at recognizing and adopting - generally-accepted practices. Thus, a candidate specification - must be implemented and tested for correct operation and - interoperability by multiple independent parties and utilized in - increasingly demanding environments, before it can be adopted as - an Internet Standard. - - o These procedures provide a great deal of flexibility to adapt to - the wide variety of circumstances that occur in the - standardization process. Experience has shown this flexibility to - be vital in achieving the goals listed above. - - The goal of technical competence, the requirement for prior - implementation and testing, and the need to allow all interested - parties to comment all require significant time and effort. On the - other hand, today's rapid development of networking technology - demands timely development of standards. The Internet Standards - Process is intended to balance these conflicting goals. The process - is believed to be as short and simple as possible without sacrificing - technical excellence, thorough testing before adoption of a standard, - or openness and fairness. - - From its inception, the Internet has been, and is expected to remain, - an evolving system whose participants regularly factor new - requirements and technology into its design and implementation. Users - of the Internet and providers of the equipment, software, and - services that support it should anticipate and embrace this evolution - as a major tenet of Internet philosophy. - - - -Bradner Best Current Practice [Page 4] - -RFC 2026 Internet Standards Process October 1996 - - - The procedures described in this document are the result of a number - of years of evolution, driven both by the needs of the growing and - increasingly diverse Internet community, and by experience. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bradner Best Current Practice [Page 5] - -RFC 2026 Internet Standards Process October 1996 - - -1.3 Organization of This Document - - Section 2 describes the publications and archives of the Internet - Standards Process. Section 3 describes the types of Internet - standard specifications. Section 4 describes the Internet standards - specifications track. Section 5 describes Best Current Practice - RFCs. Section 6 describes the process and rules for Internet - standardization. Section 7 specifies the way in which externally- - sponsored specifications and practices, developed and controlled by - other standards bodies or by others, are handled within the Internet - Standards Process. Section 8 describes the requirements for notices - and record keeping Section 9 defines a variance process to allow - one-time exceptions to some of the requirements in this document - Section 10 presents the rules that are required to protect - intellectual property rights in the context of the development and - use of Internet Standards. Section 11 includes acknowledgments of - some of the people involved in creation of this document. Section 12 - notes that security issues are not dealt with by this document. - Section 13 contains a list of numbered references. Section 14 - contains definitions of some of the terms used in this document. - Section 15 lists the author's email and postal addresses. Appendix A - contains a list of frequently-used acronyms. - -2. INTERNET STANDARDS-RELATED PUBLICATIONS - -2.1 Requests for Comments (RFCs) - - Each distinct version of an Internet standards-related specification - is published as part of the "Request for Comments" (RFC) document - series. This archival series is the official publication channel for - Internet standards documents and other publications of the IESG, IAB, - and Internet community. RFCs can be obtained from a number of - Internet hosts using anonymous FTP, gopher, World Wide Web, and other - Internet document-retrieval systems. - - The RFC series of documents on networking began in 1969 as part of - the original ARPA wide-area networking (ARPANET) project (see - Appendix A for glossary of acronyms). RFCs cover a wide range of - topics in addition to Internet Standards, from early discussion of - new research concepts to status memos about the Internet. RFC - publication is the direct responsibility of the RFC Editor, under the - general direction of the IAB. - - - - - - - - - -Bradner Best Current Practice [Page 6] - -RFC 2026 Internet Standards Process October 1996 - - - The rules for formatting and submitting an RFC are defined in [5]. - Every RFC is available in ASCII text. Some RFCs are also available - in other formats. The other versions of an RFC may contain material - (such as diagrams and figures) that is not present in the ASCII - version, and it may be formatted differently. - - ********************************************************* - * * - * A stricter requirement applies to standards-track * - * specifications: the ASCII text version is the * - * definitive reference, and therefore it must be a * - * complete and accurate specification of the standard, * - * including all necessary diagrams and illustrations. * - * * - ********************************************************* - - The status of Internet protocol and service specifications is - summarized periodically in an RFC entitled "Internet Official - Protocol Standards" [1]. This RFC shows the level of maturity and - other helpful information for each Internet protocol or service - specification (see section 3). - - Some RFCs document Internet Standards. These RFCs form the 'STD' - subseries of the RFC series [4]. When a specification has been - adopted as an Internet Standard, it is given the additional label - "STDxxx", but it keeps its RFC number and its place in the RFC - series. (see section 4.1.3) - - Some RFCs standardize the results of community deliberations about - statements of principle or conclusions about what is the best way to - perform some operations or IETF process function. These RFCs form - the specification has been adopted as a BCP, it is given the - additional label "BCPxxx", but it keeps its RFC number and its place - in the RFC series. (see section 5) - - Not all specifications of protocols or services for the Internet - should or will become Internet Standards or BCPs. Such non-standards - track specifications are not subject to the rules for Internet - standardization. Non-standards track specifications may be published - directly as "Experimental" or "Informational" RFCs at the discretion - of the RFC Editor in consultation with the IESG (see section 4.2). - - - - - - - - - - -Bradner Best Current Practice [Page 7] - -RFC 2026 Internet Standards Process October 1996 - - - ******************************************************** - * * - * It is important to remember that not all RFCs * - * are standards track documents, and that not all * - * standards track documents reach the level of * - * Internet Standard. In the same way, not all RFCs * - * which describe current practices have been given * - * the review and approval to become BCPs. See * - * RFC-1796 [6] for further information. * - * * - ******************************************************** - -2.2 Internet-Drafts - - During the development of a specification, draft versions of the - document are made available for informal review and comment by - placing them in the IETF's "Internet-Drafts" directory, which is - replicated on a number of Internet hosts. This makes an evolving - working document readily available to a wide audience, facilitating - the process of review and revision. - - An Internet-Draft that is published as an RFC, or that has remained - unchanged in the Internet-Drafts directory for more than six months - without being recommended by the IESG for publication as an RFC, is - simply removed from the Internet-Drafts directory. At any time, an - Internet-Draft may be replaced by a more recent version of the same - specification, restarting the six-month timeout period. - - An Internet-Draft is NOT a means of "publishing" a specification; - specifications are published through the RFC mechanism described in - the previous section. Internet-Drafts have no formal status, and are - subject to change or removal at any time. - - ******************************************************** - * * - * Under no circumstances should an Internet-Draft * - * be referenced by any paper, report, or Request- * - * for-Proposal, nor should a vendor claim compliance * - * with an Internet-Draft. * - * * - ******************************************************** - - - - - - - - - - -Bradner Best Current Practice [Page 8] - -RFC 2026 Internet Standards Process October 1996 - - - Note: It is acceptable to reference a standards-track specification - that may reasonably be expected to be published as an RFC using the - phrase "Work in Progress" without referencing an Internet-Draft. - This may also be done in a standards track document itself as long - as the specification in which the reference is made would stand as a - complete and understandable document with or without the reference to - the "Work in Progress". - -3. INTERNET STANDARD SPECIFICATIONS - - Specifications subject to the Internet Standards Process fall into - one of two categories: Technical Specification (TS) and - Applicability Statement (AS). - -3.1 Technical Specification (TS) - - A Technical Specification is any description of a protocol, service, - procedure, convention, or format. It may completely describe all of - the relevant aspects of its subject, or it may leave one or more - parameters or options unspecified. A TS may be completely self- - contained, or it may incorporate material from other specifications - by reference to other documents (which might or might not be Internet - Standards). - - A TS shall include a statement of its scope and the general intent - for its use (domain of applicability). Thus, a TS that is inherently - specific to a particular context shall contain a statement to that - effect. However, a TS does not specify requirements for its use - within the Internet; these requirements, which depend on the - particular context in which the TS is incorporated by different - system configurations, are defined by an Applicability Statement. - -3.2 Applicability Statement (AS) - - An Applicability Statement specifies how, and under what - circumstances, one or more TSs may be applied to support a particular - Internet capability. An AS may specify uses for TSs that are not - Internet Standards, as discussed in Section 7. - - An AS identifies the relevant TSs and the specific way in which they - are to be combined, and may also specify particular values or ranges - of TS parameters or subfunctions of a TS protocol that must be - implemented. An AS also specifies the circumstances in which the use - of a particular TS is required, recommended, or elective (see section - 3.3). - - - - - - -Bradner Best Current Practice [Page 9] - -RFC 2026 Internet Standards Process October 1996 - - - An AS may describe particular methods of using a TS in a restricted - "domain of applicability", such as Internet routers, terminal - servers, Internet systems that interface to Ethernets, or datagram- - based database servers. - - The broadest type of AS is a comprehensive conformance specification, - commonly called a "requirements document", for a particular class of - Internet systems, such as Internet routers or Internet hosts. - - An AS may not have a higher maturity level in the standards track - than any standards-track TS on which the AS relies (see section 4.1). - For example, a TS at Draft Standard level may be referenced by an AS - at the Proposed Standard or Draft Standard level, but not by an AS at - the Standard level. - -3.3 Requirement Levels - - An AS shall apply one of the following "requirement levels" to each - of the TSs to which it refers: - - (a) Required: Implementation of the referenced TS, as specified by - the AS, is required to achieve minimal conformance. For example, - IP and ICMP must be implemented by all Internet systems using the - TCP/IP Protocol Suite. - - (b) Recommended: Implementation of the referenced TS is not - required for minimal conformance, but experience and/or generally - accepted technical wisdom suggest its desirability in the domain - of applicability of the AS. Vendors are strongly encouraged to - include the functions, features, and protocols of Recommended TSs - in their products, and should omit them only if the omission is - justified by some special circumstance. For example, the TELNET - protocol should be implemented by all systems that would benefit - from remote access. - - (c) Elective: Implementation of the referenced TS is optional - within the domain of applicability of the AS; that is, the AS - creates no explicit necessity to apply the TS. However, a - particular vendor may decide to implement it, or a particular user - may decide that it is a necessity in a specific environment. For - example, the DECNET MIB could be seen as valuable in an - environment where the DECNET protocol is used. - - - - - - - - - -Bradner Best Current Practice [Page 10] - -RFC 2026 Internet Standards Process October 1996 - - - As noted in section 4.1, there are TSs that are not in the - standards track or that have been retired from the standards - track, and are therefore not required, recommended, or elective. - Two additional "requirement level" designations are available for - these TSs: - - (d) Limited Use: The TS is considered to be appropriate for use - only in limited or unique circumstances. For example, the usage - of a protocol with the "Experimental" designation should generally - be limited to those actively involved with the experiment. - - (e) Not Recommended: A TS that is considered to be inappropriate - for general use is labeled "Not Recommended". This may be because - of its limited functionality, specialized nature, or historic - status. - - Although TSs and ASs are conceptually separate, in practice a - standards-track document may combine an AS and one or more related - TSs. For example, Technical Specifications that are developed - specifically and exclusively for some particular domain of - applicability, e.g., for mail server hosts, often contain within a - single specification all of the relevant AS and TS information. In - such cases, no useful purpose would be served by deliberately - distributing the information among several documents just to preserve - the formal AS/TS distinction. However, a TS that is likely to apply - to more than one domain of applicability should be developed in a - modular fashion, to facilitate its incorporation by multiple ASs. - - The "Official Protocol Standards" RFC (STD1) lists a general - requirement level for each TS, using the nomenclature defined in this - section. This RFC is updated periodically. In many cases, more - detailed descriptions of the requirement levels of particular - protocols and of individual features of the protocols will be found - in appropriate ASs. - -4. THE INTERNET STANDARDS TRACK - - Specifications that are intended to become Internet Standards evolve - through a set of maturity levels known as the "standards track". - These maturity levels -- "Proposed Standard", "Draft Standard", and - "Standard" -- are defined and discussed in section 4.1. The way in - which specifications move along the standards track is described in - section 6. - - Even after a specification has been adopted as an Internet Standard, - further evolution often occurs based on experience and the - recognition of new requirements. The nomenclature and procedures of - Internet standardization provide for the replacement of old Internet - - - -Bradner Best Current Practice [Page 11] - -RFC 2026 Internet Standards Process October 1996 - - - Standards with new ones, and the assignment of descriptive labels to - indicate the status of "retired" Internet Standards. A set of - maturity levels is defined in section 4.2 to cover these and other - specifications that are not considered to be on the standards track. - -4.1 Standards Track Maturity Levels - - Internet specifications go through stages of development, testing, - and acceptance. Within the Internet Standards Process, these stages - are formally labeled "maturity levels". - - This section describes the maturity levels and the expected - characteristics of specifications at each level. - -4.1.1 Proposed Standard - - The entry-level maturity for the standards track is "Proposed - Standard". A specific action by the IESG is required to move a - specification onto the standards track at the "Proposed Standard" - level. - - A Proposed Standard specification is generally stable, has resolved - known design choices, is believed to be well-understood, has received - significant community review, and appears to enjoy enough community - interest to be considered valuable. However, further experience - might result in a change or even retraction of the specification - before it advances. - - Usually, neither implementation nor operational experience is - required for the designation of a specification as a Proposed - Standard. However, such experience is highly desirable, and will - usually represent a strong argument in favor of a Proposed Standard - designation. - - The IESG may require implementation and/or operational experience - prior to granting Proposed Standard status to a specification that - materially affects the core Internet protocols or that specifies - behavior that may have significant operational impact on the - Internet. - - A Proposed Standard should have no known technical omissions with - respect to the requirements placed upon it. However, the IESG may - waive this requirement in order to allow a specification to advance - to the Proposed Standard state when it is considered to be useful and - necessary (and timely) even with known technical omissions. - - - - - - -Bradner Best Current Practice [Page 12] - -RFC 2026 Internet Standards Process October 1996 - - - Implementors should treat Proposed Standards as immature - specifications. It is desirable to implement them in order to gain - experience and to validate, test, and clarify the specification. - However, since the content of Proposed Standards may be changed if - problems are found or better solutions are identified, deploying - implementations of such standards into a disruption-sensitive - environment is not recommended. - -4.1.2 Draft Standard - - A specification from which at least two independent and interoperable - implementations from different code bases have been developed, and - for which sufficient successful operational experience has been - obtained, may be elevated to the "Draft Standard" level. For the - purposes of this section, "interoperable" means to be functionally - equivalent or interchangeable components of the system or process in - which they are used. If patented or otherwise controlled technology - is required for implementation, the separate implementations must - also have resulted from separate exercise of the licensing process. - Elevation to Draft Standard is a major advance in status, indicating - a strong belief that the specification is mature and will be useful. - - The requirement for at least two independent and interoperable - implementations applies to all of the options and features of the - specification. In cases in which one or more options or features - have not been demonstrated in at least two interoperable - implementations, the specification may advance to the Draft Standard - level only if those options or features are removed. - - The Working Group chair is responsible for documenting the specific - implementations which qualify the specification for Draft or Internet - Standard status along with documentation about testing of the - interoperation of these implementations. The documentation must - include information about the support of each of the individual - options and features. This documentation should be submitted to the - Area Director with the protocol action request. (see Section 6) - - A Draft Standard must be well-understood and known to be quite - stable, both in its semantics and as a basis for developing an - implementation. A Draft Standard may still require additional or - more widespread field experience, since it is possible for - implementations based on Draft Standard specifications to demonstrate - unforeseen behavior when subjected to large-scale use in production - environments. - - - - - - - -Bradner Best Current Practice [Page 13] - -RFC 2026 Internet Standards Process October 1996 - - - A Draft Standard is normally considered to be a final specification, - and changes are likely to be made only to solve specific problems - encountered. In most circumstances, it is reasonable for vendors to - deploy implementations of Draft Standards into a disruption sensitive - environment. - -4.1.3 Internet Standard - - A specification for which significant implementation and successful - operational experience has been obtained may be elevated to the - Internet Standard level. An Internet Standard (which may simply be - referred to as a Standard) is characterized by a high degree of - technical maturity and by a generally held belief that the specified - protocol or service provides significant benefit to the Internet - community. - - A specification that reaches the status of Standard is assigned a - number in the STD series while retaining its RFC number. - -4.2 Non-Standards Track Maturity Levels - - Not every specification is on the standards track. A specification - may not be intended to be an Internet Standard, or it may be intended - for eventual standardization but not yet ready to enter the standards - track. A specification may have been superseded by a more recent - Internet Standard, or have otherwise fallen into disuse or disfavor. - - Specifications that are not on the standards track are labeled with - one of three "off-track" maturity levels: "Experimental", - "Informational", or "Historic". The documents bearing these labels - are not Internet Standards in any sense. - -4.2.1 Experimental - - The "Experimental" designation typically denotes a specification that - is part of some research or development effort. Such a specification - is published for the general information of the Internet technical - community and as an archival record of the work, subject only to - editorial considerations and to verification that there has been - adequate coordination with the standards process (see below). An - Experimental specification may be the output of an organized Internet - research effort (e.g., a Research Group of the IRTF), an IETF Working - Group, or it may be an individual contribution. - - - - - - - - -Bradner Best Current Practice [Page 14] - -RFC 2026 Internet Standards Process October 1996 - - -4.2.2 Informational - - An "Informational" specification is published for the general - information of the Internet community, and does not represent an - Internet community consensus or recommendation. The Informational - designation is intended to provide for the timely publication of a - very broad range of responsible informational documents from many - sources, subject only to editorial considerations and to verification - that there has been adequate coordination with the standards process - (see section 4.2.3). - - Specifications that have been prepared outside of the Internet - community and are not incorporated into the Internet Standards - Process by any of the provisions of section 10 may be published as - Informational RFCs, with the permission of the owner and the - concurrence of the RFC Editor. - -4.2.3 Procedures for Experimental and Informational RFCs - - Unless they are the result of IETF Working Group action, documents - intended to be published with Experimental or Informational status - should be submitted directly to the RFC Editor. The RFC Editor will - publish any such documents as Internet-Drafts which have not already - been so published. In order to differentiate these Internet-Drafts - they will be labeled or grouped in the I-D directory so they are - easily recognizable. The RFC Editor will wait two weeks after this - publication for comments before proceeding further. The RFC Editor - is expected to exercise his or her judgment concerning the editorial - suitability of a document for publication with Experimental or - Informational status, and may refuse to publish a document which, in - the expert opinion of the RFC Editor, is unrelated to Internet - activity or falls below the technical and/or editorial standard for - RFCs. - - To ensure that the non-standards track Experimental and Informational - designations are not misused to circumvent the Internet Standards - Process, the IESG and the RFC Editor have agreed that the RFC Editor - will refer to the IESG any document submitted for Experimental or - Informational publication which, in the opinion of the RFC Editor, - may be related to work being done, or expected to be done, within the - IETF community. The IESG shall review such a referred document - within a reasonable period of time, and recommend either that it be - published as originally submitted or referred to the IETF as a - contribution to the Internet Standards Process. - - If (a) the IESG recommends that the document be brought within the - IETF and progressed within the IETF context, but the author declines - to do so, or (b) the IESG considers that the document proposes - - - -Bradner Best Current Practice [Page 15] - -RFC 2026 Internet Standards Process October 1996 - - - something that conflicts with, or is actually inimical to, an - established IETF effort, the document may still be published as an - Experimental or Informational RFC. In these cases, however, the IESG - may insert appropriate "disclaimer" text into the RFC either in or - immediately following the "Status of this Memo" section in order to - make the circumstances of its publication clear to readers. - - Documents proposed for Experimental and Informational RFCs by IETF - Working Groups go through IESG review. The review is initiated using - the process described in section 6.1.1. - -4.2.4 Historic - - A specification that has been superseded by a more recent - specification or is for any other reason considered to be obsolete is - assigned to the "Historic" level. (Purists have suggested that the - word should be "Historical"; however, at this point the use of - "Historic" is historical.) - - Note: Standards track specifications normally must not depend on - other standards track specifications which are at a lower maturity - level or on non standards track specifications other than referenced - specifications from other standards bodies. (See Section 7.) - -5. BEST CURRENT PRACTICE (BCP) RFCs - - The BCP subseries of the RFC series is designed to be a way to - standardize practices and the results of community deliberations. A - BCP document is subject to the same basic set of procedures as - standards track documents and thus is a vehicle by which the IETF - community can define and ratify the community's best current thinking - on a statement of principle or on what is believed to be the best way - to perform some operations or IETF process function. - - Historically Internet standards have generally been concerned with - the technical specifications for hardware and software required for - computer communication across interconnected networks. However, - since the Internet itself is composed of networks operated by a great - variety of organizations, with diverse goals and rules, good user - service requires that the operators and administrators of the - Internet follow some common guidelines for policies and operations. - While these guidelines are generally different in scope and style - from protocol standards, their establishment needs a similar process - for consensus building. - - While it is recognized that entities such as the IAB and IESG are - composed of individuals who may participate, as individuals, in the - technical work of the IETF, it is also recognized that the entities - - - -Bradner Best Current Practice [Page 16] - -RFC 2026 Internet Standards Process October 1996 - - - themselves have an existence as leaders in the community. As leaders - in the Internet technical community, these entities should have an - outlet to propose ideas to stimulate work in a particular area, to - raise the community's sensitivity to a certain issue, to make a - statement of architectural principle, or to communicate their - thoughts on other matters. The BCP subseries creates a smoothly - structured way for these management entities to insert proposals into - the consensus-building machinery of the IETF while gauging the - community's view of that issue. - - Finally, the BCP series may be used to document the operation of the - IETF itself. For example, this document defines the IETF Standards - Process and is published as a BCP. - -5.1 BCP Review Process - - Unlike standards-track documents, the mechanisms described in BCPs - are not well suited to the phased roll-in nature of the three stage - standards track and instead generally only make sense for full and - immediate instantiation. - - The BCP process is similar to that for proposed standards. The BCP - is submitted to the IESG for review, (see section 6.1.1) and the - existing review process applies, including a Last-Call on the IETF - Announce mailing list. However, once the IESG has approved the - document, the process ends and the document is published. The - resulting document is viewed as having the technical approval of the - IETF. - - Specifically, a document to be considered for the status of BCP must - undergo the procedures outlined in sections 6.1, and 6.4 of this - document. The BCP process may be appealed according to the procedures - in section 6.5. - - Because BCPs are meant to express community consensus but are arrived - at more quickly than standards, BCPs require particular care. - Specifically, BCPs should not be viewed simply as stronger - Informational RFCs, but rather should be viewed as documents suitable - for a content different from Informational RFCs. - - A specification, or group of specifications, that has, or have been - approved as a BCP is assigned a number in the BCP series while - retaining its RFC number(s). - - - - - - - - -Bradner Best Current Practice [Page 17] - -RFC 2026 Internet Standards Process October 1996 - - -6. THE INTERNET STANDARDS PROCESS - - The mechanics of the Internet Standards Process involve decisions of - the IESG concerning the elevation of a specification onto the - standards track or the movement of a standards-track specification - from one maturity level to another. Although a number of reasonably - objective criteria (described below and in section 4) are available - to guide the IESG in making a decision to move a specification onto, - along, or off the standards track, there is no algorithmic guarantee - of elevation to or progression along the standards track for any - specification. The experienced collective judgment of the IESG - concerning the technical quality of a specification proposed for - elevation to or advancement in the standards track is an essential - component of the decision-making process. - -6.1 Standards Actions - - A "standards action" -- entering a particular specification into, - advancing it within, or removing it from, the standards track -- must - be approved by the IESG. - -6.1.1 Initiation of Action - - A specification that is intended to enter or advance in the Internet - standards track shall first be posted as an Internet-Draft (see - section 2.2) unless it has not changed since publication as an RFC. - It shall remain as an Internet-Draft for a period of time, not less - than two weeks, that permits useful community review, after which a - recommendation for action may be initiated. - - A standards action is initiated by a recommendation by the IETF - Working group responsible for a specification to its Area Director, - copied to the IETF Secretariat or, in the case of a specification not - associated with a Working Group, a recommendation by an individual to - the IESG. - -6.1.2 IESG Review and Approval - - The IESG shall determine whether or not a specification submitted to - it according to section 6.1.1 satisfies the applicable criteria for - the recommended action (see sections 4.1 and 4.2), and shall in - addition determine whether or not the technical quality and clarity - of the specification is consistent with that expected for the - maturity level to which the specification is recommended. - - In order to obtain all of the information necessary to make these - determinations, particularly when the specification is considered by - the IESG to be extremely important in terms of its potential impact - - - -Bradner Best Current Practice [Page 18] - -RFC 2026 Internet Standards Process October 1996 - - - on the Internet or on the suite of Internet protocols, the IESG may, - at its discretion, commission an independent technical review of the - specification. - - The IESG will send notice to the IETF of the pending IESG - consideration of the document(s) to permit a final review by the - general Internet community. This "Last-Call" notification shall be - via electronic mail to the IETF Announce mailing list. Comments on a - Last-Call shall be accepted from anyone, and should be sent as - directed in the Last-Call announcement. - - The Last-Call period shall be no shorter than two weeks except in - those cases where the proposed standards action was not initiated by - an IETF Working Group, in which case the Last-Call period shall be no - shorter than four weeks. If the IESG believes that the community - interest would be served by allowing more time for comment, it may - decide on a longer Last-Call period or to explicitly lengthen a - current Last-Call period. - - The IESG is not bound by the action recommended when the - specification was submitted. For example, the IESG may decide to - consider the specification for publication in a different category - than that requested. If the IESG determines this before the Last- - Call is issued then the Last-Call should reflect the IESG's view. - The IESG could also decide to change the publication category based - on the response to a Last-Call. If this decision would result in a - specification being published at a "higher" level than the original - Last-Call was for, a new Last-Call should be issued indicating the - IESG recommendation. In addition, the IESG may decide to recommend - the formation of a new Working Group in the case of significant - controversy in response to a Last-Call for specification not - originating from an IETF Working Group. - - In a timely fashion after the expiration of the Last-Call period, the - IESG shall make its final determination of whether or not to approve - the standards action, and shall notify the IETF of its decision via - electronic mail to the IETF Announce mailing list. - -6.1.3 Publication - - If a standards action is approved, notification is sent to the RFC - Editor and copied to the IETF with instructions to publish the - specification as an RFC. The specification shall at that point be - removed from the Internet-Drafts directory. - - - - - - - -Bradner Best Current Practice [Page 19] - -RFC 2026 Internet Standards Process October 1996 - - - An official summary of standards actions completed and pending shall - appear in each issue of the Internet Society's newsletter. This - shall constitute the "publication of record" for Internet standards - actions. - - The RFC Editor shall publish periodically an "Internet Official - Protocol Standards" RFC [1], summarizing the status of all Internet - protocol and service specifications. - -6.2 Advancing in the Standards Track - - The procedure described in section 6.1 is followed for each action - that attends the advancement of a specification along the standards - track. - - A specification shall remain at the Proposed Standard level for at - least six (6) months. - - A specification shall remain at the Draft Standard level for at least - four (4) months, or until at least one IETF meeting has occurred, - whichever comes later. - - These minimum periods are intended to ensure adequate opportunity for - community review without severely impacting timeliness. These - intervals shall be measured from the date of publication of the - corresponding RFC(s), or, if the action does not result in RFC - publication, the date of the announcement of the IESG approval of the - action. - - A specification may be (indeed, is likely to be) revised as it - advances through the standards track. At each stage, the IESG shall - determine the scope and significance of the revision to the - specification, and, if necessary and appropriate, modify the - recommended action. Minor revisions are expected, but a significant - revision may require that the specification accumulate more - experience at its current maturity level before progressing. Finally, - if the specification has been changed very significantly, the IESG - may recommend that the revision be treated as a new document, re- - entering the standards track at the beginning. - - Change of status shall result in republication of the specification - as an RFC, except in the rare case that there have been no changes at - all in the specification since the last publication. Generally, - desired changes will be "batched" for incorporation at the next level - in the standards track. However, deferral of changes to the next - standards action on the specification will not always be possible or - desirable; for example, an important typographical error, or a - technical error that does not represent a change in overall function - - - -Bradner Best Current Practice [Page 20] - -RFC 2026 Internet Standards Process October 1996 - - - of the specification, may need to be corrected immediately. In such - cases, the IESG or RFC Editor may be asked to republish the RFC (with - a new number) with corrections, and this will not reset the minimum - time-at-level clock. - - When a standards-track specification has not reached the Internet - Standard level but has remained at the same maturity level for - twenty-four (24) months, and every twelve (12) months thereafter - until the status is changed, the IESG shall review the viability of - the standardization effort responsible for that specification and the - usefulness of the technology. Following each such review, the IESG - shall approve termination or continuation of the development effort, - at the same time the IESG shall decide to maintain the specification - at the same maturity level or to move it to Historic status. This - decision shall be communicated to the IETF by electronic mail to the - IETF Announce mailing list to allow the Internet community an - opportunity to comment. This provision is not intended to threaten a - legitimate and active Working Group effort, but rather to provide an - administrative mechanism for terminating a moribund effort. - -6.3 Revising a Standard - - A new version of an established Internet Standard must progress - through the full Internet standardization process as if it were a - completely new specification. Once the new version has reached the - Standard level, it will usually replace the previous version, which - will be moved to Historic status. However, in some cases both - versions may remain as Internet Standards to honor the requirements - of an installed base. In this situation, the relationship between - the previous and the new versions must be explicitly stated in the - text of the new version or in another appropriate document (e.g., an - Applicability Statement; see section 3.2). - -6.4 Retiring a Standard - - As the technology changes and matures, it is possible for a new - Standard specification to be so clearly superior technically that one - or more existing standards track specifications for the same function - should be retired. In this case, or when it is felt for some other - reason that an existing standards track specification should be - retired, the IESG shall approve a change of status of the old - specification(s) to Historic. This recommendation shall be issued - with the same Last-Call and notification procedures used for any - other standards action. A request to retire an existing standard can - originate from a Working Group, an Area Director or some other - interested party. - - - - - -Bradner Best Current Practice [Page 21] - -RFC 2026 Internet Standards Process October 1996 - - -6.5 Conflict Resolution and Appeals - - Disputes are possible at various stages during the IETF process. As - much as possible the process is designed so that compromises can be - made, and genuine consensus achieved, however there are times when - even the most reasonable and knowledgeable people are unable to - agree. To achieve the goals of openness and fairness, such conflicts - must be resolved by a process of open review and discussion. This - section specifies the procedures that shall be followed to deal with - Internet standards issues that cannot be resolved through the normal - processes whereby IETF Working Groups and other Internet Standards - Process participants ordinarily reach consensus. - -6.5.1 Working Group Disputes - - An individual (whether a participant in the relevant Working Group or - not) may disagree with a Working Group recommendation based on his or - her belief that either (a) his or her own views have not been - adequately considered by the Working Group, or (b) the Working Group - has made an incorrect technical choice which places the quality - and/or integrity of the Working Group's product(s) in significant - jeopardy. The first issue is a difficulty with Working Group - process; the latter is an assertion of technical error. These two - types of disagreement are quite different, but both are handled by - the same process of review. - - A person who disagrees with a Working Group recommendation shall - always first discuss the matter with the Working Group's chair(s), - who may involve other members of the Working Group (or the Working - Group as a whole) in the discussion. - - If the disagreement cannot be resolved in this way, any of the - parties involved may bring it to the attention of the Area - Director(s) for the area in which the Working Group is chartered. - The Area Director(s) shall attempt to resolve the dispute. - - If the disagreement cannot be resolved by the Area Director(s) any of - the parties involved may then appeal to the IESG as a whole. The - IESG shall then review the situation and attempt to resolve it in a - manner of its own choosing. - - If the disagreement is not resolved to the satisfaction of the - parties at the IESG level, any of the parties involved may appeal the - decision to the IAB. The IAB shall then review the situation and - attempt to resolve it in a manner of its own choosing. - - - - - - -Bradner Best Current Practice [Page 22] - -RFC 2026 Internet Standards Process October 1996 - - - The IAB decision is final with respect to the question of whether or - not the Internet standards procedures have been followed and with - respect to all questions of technical merit. - -6.5.2 Process Failures - - This document sets forward procedures required to be followed to - ensure openness and fairness of the Internet Standards Process, and - the technical viability of the standards created. The IESG is the - principal agent of the IETF for this purpose, and it is the IESG that - is charged with ensuring that the required procedures have been - followed, and that any necessary prerequisites to a standards action - have been met. - - If an individual should disagree with an action taken by the IESG in - this process, that person should first discuss the issue with the - ISEG Chair. If the IESG Chair is unable to satisfy the complainant - then the IESG as a whole should re-examine the action taken, along - with input from the complainant, and determine whether any further - action is needed. The IESG shall issue a report on its review of the - complaint to the IETF. - - Should the complainant not be satisfied with the outcome of the IESG - review, an appeal may be lodged to the IAB. The IAB shall then review - the situation and attempt to resolve it in a manner of its own - choosing and report to the IETF on the outcome of its review. - - If circumstances warrant, the IAB may direct that an IESG decision be - annulled, and the situation shall then be as it was before the IESG - decision was taken. The IAB may also recommend an action to the IESG, - or make such other recommendations as it deems fit. The IAB may not, - however, pre-empt the role of the IESG by issuing a decision which - only the IESG is empowered to make. - - The IAB decision is final with respect to the question of whether or - not the Internet standards procedures have been followed. - -6.5.3 Questions of Applicable Procedure - - Further recourse is available only in cases in which the procedures - themselves (i.e., the procedures described in this document) are - claimed to be inadequate or insufficient to the protection of the - rights of all parties in a fair and open Internet Standards Process. - Claims on this basis may be made to the Internet Society Board of - Trustees. The President of the Internet Society shall acknowledge - such an appeal within two weeks, and shall at the time of - acknowledgment advise the petitioner of the expected duration of the - Trustees' review of the appeal. The Trustees shall review the - - - -Bradner Best Current Practice [Page 23] - -RFC 2026 Internet Standards Process October 1996 - - - situation in a manner of its own choosing and report to the IETF on - the outcome of its review. - - The Trustees' decision upon completion of their review shall be final - with respect to all aspects of the dispute. - -6.5.4 Appeals Procedure - - All appeals must include a detailed and specific description of the - facts of the dispute. - - All appeals must be initiated within two months of the public - knowledge of the action or decision to be challenged. - - At all stages of the appeals process, the individuals or bodies - responsible for making the decisions have the discretion to define - the specific procedures they will follow in the process of making - their decision. - - In all cases a decision concerning the disposition of the dispute, - and the communication of that decision to the parties involved, must - be accomplished within a reasonable period of time. - - [NOTE: These procedures intentionally and explicitly do not - establish a fixed maximum time period that shall be considered - "reasonable" in all cases. The Internet Standards Process places a - premium on consensus and efforts to achieve it, and deliberately - foregoes deterministically swift execution of procedures in favor of - a latitude within which more genuine technical agreements may be - reached.] - -7. EXTERNAL STANDARDS AND SPECIFICATIONS - - Many standards groups other than the IETF create and publish - standards documents for network protocols and services. When these - external specifications play an important role in the Internet, it is - desirable to reach common agreements on their usage -- i.e., to - establish Internet Standards relating to these external - specifications. - - There are two categories of external specifications: - - (1) Open Standards - - Various national and international standards bodies, such as ANSI, - ISO, IEEE, and ITU-T, develop a variety of protocol and service - specifications that are similar to Technical Specifications - defined here. National and international groups also publish - - - -Bradner Best Current Practice [Page 24] - -RFC 2026 Internet Standards Process October 1996 - - - "implementors' agreements" that are analogous to Applicability - Statements, capturing a body of implementation-specific detail - concerned with the practical application of their standards. All - of these are considered to be "open external standards" for the - purposes of the Internet Standards Process. - - (2) Other Specifications - - Other proprietary specifications that have come to be widely used - in the Internet may be treated by the Internet community as if - they were a "standards". Such a specification is not generally - developed in an open fashion, is typically proprietary, and is - controlled by the vendor, vendors, or organization that produced - it. - -7.1 Use of External Specifications - - To avoid conflict between competing versions of a specification, the - Internet community will not standardize a specification that is - simply an "Internet version" of an existing external specification - unless an explicit cooperative arrangement to do so has been made. - However, there are several ways in which an external specification - that is important for the operation and/or evolution of the Internet - may be adopted for Internet use. - -7.1.1 Incorporation of an Open Standard - - An Internet Standard TS or AS may incorporate an open external - standard by reference. For example, many Internet Standards - incorporate by reference the ANSI standard character set "ASCII" [2]. - Whenever possible, the referenced specification shall be available - online. - -7.1.2 Incorporation of Other Specifications - - Other proprietary specifications may be incorporated by reference to - a version of the specification as long as the proprietor meets the - requirements of section 10. If the other proprietary specification - is not widely and readily available, the IESG may request that it be - published as an Informational RFC. - - The IESG generally should not favor a particular proprietary - specification over technically equivalent and competing - specification(s) by making any incorporated vendor specification - "required" or "recommended". - - - - - - -Bradner Best Current Practice [Page 25] - -RFC 2026 Internet Standards Process October 1996 - - -7.1.3 Assumption - - An IETF Working Group may start from an external specification and - develop it into an Internet specification. This is acceptable if (1) - the specification is provided to the Working Group in compliance with - the requirements of section 10, and (2) change control has been - conveyed to IETF by the original developer of the specification for - the specification or for specifications derived from the original - specification. - -8. NOTICES AND RECORD KEEPING - - Each of the organizations involved in the development and approval of - Internet Standards shall publicly announce, and shall maintain a - publicly accessible record of, every activity in which it engages, to - the extent that the activity represents the prosecution of any part - of the Internet Standards Process. For purposes of this section, the - organizations involved in the development and approval of Internet - Standards includes the IETF, the IESG, the IAB, all IETF Working - Groups, and the Internet Society Board of Trustees. - - For IETF and Working Group meetings announcements shall be made by - electronic mail to the IETF Announce mailing list and shall be made - sufficiently far in advance of the activity to permit all interested - parties to effectively participate. The announcement shall contain - (or provide pointers to) all of the information that is necessary to - support the participation of any interested individual. In the case - of a meeting, for example, the announcement shall include an agenda - that specifies the standards- related issues that will be discussed. - - The formal record of an organization's standards-related activity - shall include at least the following: - - o the charter of the organization (or a defining document equivalent - to a charter); - o complete and accurate minutes of meetings; - o the archives of Working Group electronic mail mailing lists; and - o all written contributions from participants that pertain to the - organization's standards-related activity. - - As a practical matter, the formal record of all Internet Standards - Process activities is maintained by the IETF Secretariat, and is the - responsibility of the IETF Secretariat except that each IETF Working - Group is expected to maintain their own email list archive and must - make a best effort to ensure that all traffic is captured and - included in the archives. Also, the Working Group chair is - responsible for providing the IETF Secretariat with complete and - accurate minutes of all Working Group meetings. Internet-Drafts that - - - -Bradner Best Current Practice [Page 26] - -RFC 2026 Internet Standards Process October 1996 - - - have been removed (for any reason) from the Internet-Drafts - directories shall be archived by the IETF Secretariat for the sole - purpose of preserving an historical record of Internet standards - activity and thus are not retrievable except in special - circumstances. - -9. VARYING THE PROCESS - - This document, which sets out the rules and procedures by which - Internet Standards and related documents are made is itself a product - of the Internet Standards Process (as a BCP, as described in section - 5). It replaces a previous version, and in time, is likely itself to - be replaced. - - While, when published, this document represents the community's view - of the proper and correct process to follow, and requirements to be - met, to allow for the best possible Internet Standards and BCPs, it - cannot be assumed that this will always remain the case. From time to - time there may be a desire to update it, by replacing it with a new - version. Updating this document uses the same open procedures as are - used for any other BCP. - - In addition, there may be situations where following the procedures - leads to a deadlock about a specific specification, or there may be - situations where the procedures provide no guidance. In these cases - it may be appropriate to invoke the variance procedure described - below. - -9.1 The Variance Procedure - - Upon the recommendation of the responsible IETF Working Group (or, if - no Working Group is constituted, upon the recommendation of an ad hoc - committee), the IESG may enter a particular specification into, or - advance it within, the standards track even though some of the - requirements of this document have not or will not be met. The IESG - may approve such a variance, however, only if it first determines - that the likely benefits to the Internet community are likely to - outweigh any costs to the Internet community that result from - noncompliance with the requirements in this document. In exercising - this discretion, the IESG shall at least consider (a) the technical - merit of the specification, (b) the possibility of achieving the - goals of the Internet Standards Process without granting a variance, - (c) alternatives to the granting of a variance, (d) the collateral - and precedential effects of granting a variance, and (e) the IESG's - ability to craft a variance that is as narrow as possible. In - determining whether to approve a variance, the IESG has discretion to - limit the scope of the variance to particular parts of this document - and to impose such additional restrictions or limitations as it - - - -Bradner Best Current Practice [Page 27] - -RFC 2026 Internet Standards Process October 1996 - - - determines appropriate to protect the interests of the Internet - community. - - The proposed variance must detail the problem perceived, explain the - precise provision of this document which is causing the need for a - variance, and the results of the IESG's considerations including - consideration of points (a) through (d) in the previous paragraph. - The proposed variance shall be issued as an Internet Draft. The IESG - shall then issue an extended Last-Call, of no less than 4 weeks, to - allow for community comment upon the proposal. - - In a timely fashion after the expiration of the Last-Call period, the - IESG shall make its final determination of whether or not to approve - the proposed variance, and shall notify the IETF of its decision via - electronic mail to the IETF Announce mailing list. If the variance - is approved it shall be forwarded to the RFC Editor with a request - that it be published as a BCP. - - This variance procedure is for use when a one-time waving of some - provision of this document is felt to be required. Permanent changes - to this document shall be accomplished through the normal BCP - process. - - The appeals process in section 6.5 applies to this process. - -9.2 Exclusions - - No use of this procedure may lower any specified delays, nor exempt - any proposal from the requirements of openness, fairness, or - consensus, nor from the need to keep proper records of the meetings - and mailing list discussions. - - Specifically, the following sections of this document must not be - subject of a variance: 5.1, 6.1, 6.1.1 (first paragraph), 6.1.2, 6.3 - (first sentence), 6.5 and 9. - -10. INTELLECTUAL PROPERTY RIGHTS - -10.1. General Policy - - In all matters of intellectual property rights and procedures, the - intention is to benefit the Internet community and the public at - large, while respecting the legitimate rights of others. - - - - - - - - -Bradner Best Current Practice [Page 28] - -RFC 2026 Internet Standards Process October 1996 - - -10.2 Confidentiality Obligations - - No contribution that is subject to any requirement of confidentiality - or any restriction on its dissemination may be considered in any part - of the Internet Standards Process, and there must be no assumption of - any confidentiality obligation with respect to any such contribution. - -10.3. Rights and Permissions - - In the course of standards work, the IETF receives contributions in - various forms and from many persons. To best facilitate the - dissemination of these contributions, it is necessary to understand - any intellectual property rights (IPR) relating to the contributions. - -10.3.1. All Contributions - - By submission of a contribution, each person actually submitting the - contribution is deemed to agree to the following terms and conditions - on his own behalf, on behalf of the organization (if any) he - represents and on behalf of the owners of any propriety rights in the - contribution.. Where a submission identifies contributors in - addition to the contributor(s) who provide the actual submission, the - actual submitter(s) represent that each other named contributor was - made aware of and agreed to accept the same terms and conditions on - his own behalf, on behalf of any organization he may represent and - any known owner of any proprietary rights in the contribution. - - l. Some works (e.g. works of the U.S. Government) are not subject to - copyright. However, to the extent that the submission is or may - be subject to copyright, the contributor, the organization he - represents (if any) and the owners of any proprietary rights in - the contribution, grant an unlimited perpetual, non-exclusive, - royalty-free, world-wide right and license to the ISOC and the - IETF under any copyrights in the contribution. This license - includes the right to copy, publish and distribute the - contribution in any way, and to prepare derivative works that are - based on or incorporate all or part of the contribution, the - license to such derivative works to be of the same scope as the - license of the original contribution. - - 2. The contributor acknowledges that the ISOC and IETF have no duty - to publish or otherwise use or disseminate any contribution. - - 3. The contributor grants permission to reference the name(s) and - address(es) of the contributor(s) and of the organization(s) he - represents (if any). - - - - - -Bradner Best Current Practice [Page 29] - -RFC 2026 Internet Standards Process October 1996 - - - 4. The contributor represents that contribution properly acknowledge - major contributors. - - 5. The contribuitor, the organization (if any) he represents and the - owners of any proprietary rights in the contribution, agree that - no information in the contribution is confidential and that the - ISOC and its affiliated organizations may freely disclose any - information in the contribution. - - 6. The contributor represents that he has disclosed the existence of - any proprietary or intellectual property rights in the - contribution that are reasonably and personally known to the - contributor. The contributor does not represent that he - personally knows of all potentially pertinent proprietary and - intellectual property rights owned or claimed by the organization - he represents (if any) or third parties. - - 7. The contributor represents that there are no limits to the - contributor's ability to make the grants acknowledgments and - agreements above that are reasonably and personally known to the - contributor. - - By ratifying this description of the IETF process the Internet - Society warrants that it will not inhibit the traditional open and - free access to IETF documents for which license and right have - been assigned according to the procedures set forth in this - section, including Internet-Drafts and RFCs. This warrant is - perpetual and will not be revoked by the Internet Society or its - successors or assigns. - -10.3.2. Standards Track Documents - - (A) Where any patents, patent applications, or other proprietary - rights are known, or claimed, with respect to any specification on - the standards track, and brought to the attention of the IESG, the - IESG shall not advance the specification without including in the - document a note indicating the existence of such rights, or - claimed rights. Where implementations are required before - advancement of a specification, only implementations that have, by - statement of the implementors, taken adequate steps to comply with - any such rights, or claimed rights, shall be considered for the - purpose of showing the adequacy of the specification. - (B) The IESG disclaims any responsibility for identifying the - existence of or for evaluating the applicability of any claimed - copyrights, patents, patent applications, or other rights in the - fulfilling of the its obligations under (A), and will take no - position on the validity or scope of any such rights. - - - - -Bradner Best Current Practice [Page 30] - -RFC 2026 Internet Standards Process October 1996 - - - (C) Where the IESG knows of rights, or claimed rights under (A), the - IETF Executive Director shall attempt to obtain from the claimant - of such rights, a written assurance that upon approval by the IESG - of the relevant Internet standards track specification(s), any - party will be able to obtain the right to implement, use and - distribute the technology or works when implementing, using or - distributing technology based upon the specific specification(s) - under openly specified, reasonable, non-discriminatory terms. - The Working Group proposing the use of the technology with respect - to which the proprietary rights are claimed may assist the IETF - Executive Director in this effort. The results of this procedure - shall not affect advancement of a specification along the - standards track, except that the IESG may defer approval where a - delay may facilitate the obtaining of such assurances. The - results will, however, be recorded by the IETF Executive Director, - and made available. The IESG may also direct that a summary of - the results be included in any RFC published containing the - specification. - -10.3.3 Determination of Reasonable and Non-discriminatory Terms - - The IESG will not make any explicit determination that the assurance - of reasonable and non-discriminatory terms for the use of a - technology has been fulfilled in practice. It will instead use the - normal requirements for the advancement of Internet Standards to - verify that the terms for use are reasonable. If the two unrelated - implementations of the specification that are required to advance - from Proposed Standard to Draft Standard have been produced by - different organizations or individuals or if the "significant - implementation and successful operational experience" required to - advance from Draft Standard to Standard has been achieved the - assumption is that the terms must be reasonable and to some degree, - non-discriminatory. This assumption may be challenged during the - Last-Call period. - -10.4. Notices - - (A) Standards track documents shall include the following notice: - - "The IETF takes no position regarding the validity or scope of - any intellectual property or other rights that might be claimed - to pertain to the implementation or use of the technology - described in this document or the extent to which any license - under such rights might or might not be available; neither does - it represent that it has made any effort to identify any such - rights. Information on the IETF's procedures with respect to - rights in standards-track and standards-related documentation - can be found in BCP-11. Copies of claims of rights made - - - -Bradner Best Current Practice [Page 31] - -RFC 2026 Internet Standards Process October 1996 - - - available for publication and any assurances of licenses to - be made available, or the result of an attempt made - to obtain a general license or permission for the use of such - proprietary rights by implementors or users of this - specification can be obtained from the IETF Secretariat." - - (B) The IETF encourages all interested parties to bring to its - attention, at the earliest possible time, the existence of any - intellectual property rights pertaining to Internet Standards. - For this purpose, each standards document shall include the - following invitation: - - "The IETF invites any interested party to bring to its - attention any copyrights, patents or patent applications, or - other proprietary rights which may cover technology that may be - required to practice this standard. Please address the - information to the IETF Executive Director." - - (C) The following copyright notice and disclaimer shall be included - in all ISOC standards-related documentation: - - "Copyright (C) The Internet Society (date). All Rights - Reserved. - - This document and translations of it may be copied and - furnished to others, and derivative works that comment on or - otherwise explain it or assist in its implmentation may be - prepared, copied, published and distributed, in whole or in - part, without restriction of any kind, provided that the above - copyright notice and this paragraph are included on all such - copies and derivative works. However, this document itself may - not be modified in any way, such as by removing the copyright - notice or references to the Internet Society or other Internet - organizations, except as needed for the purpose of developing - Internet standards in which case the procedures for copyrights - defined in the Internet Standards process must be followed, or - as required to translate it into languages other than English. - - The limited permissions granted above are perpetual and will - not be revoked by the Internet Society or its successors or - assigns. - - - - - - - - - - -Bradner Best Current Practice [Page 32] - -RFC 2026 Internet Standards Process October 1996 - - - This document and the information contained herein is provided - on an "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE - OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY - IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A - PARTICULAR PURPOSE." - - (D) Where the IESG is aware at the time of publication of - proprietary rights claimed with respect to a standards track - document, or the technology described or referenced therein, such - document shall contain the following notice: - - "The IETF has been notified of intellectual property rights - claimed in regard to some or all of the specification contained - in this document. For more information consult the online list - of claimed rights." - -11. ACKNOWLEDGMENTS - - There have been a number of people involved with the development of - the documents defining the IETF Standards Process over the years. - The process was first described in RFC 1310 then revised in RFC 1602 - before the current effort (which relies heavily on its predecessors). - Specific acknowledgments must be extended to Lyman Chapin, Phill - Gross and Christian Huitema as the editors of the previous versions, - to Jon Postel and Dave Crocker for their inputs to those versions, to - Andy Ireland, Geoff Stewart, Jim Lampert, and Dick Holleman for their - reviews of the legal aspects of the procedures described herein, and - to John Stewart, Robert Elz and Steve Coya for their extensive input - on the final version. - - In addition much of the credit for the refinement of the details of - the IETF processes belongs to the many members of the various - incarnations of the POISED Working Group. - -12. SECURITY CONSIDERATIONS - - Security issues are not discussed in this memo. - - - - - - - - - - - - -Bradner Best Current Practice [Page 33] - -RFC 2026 Internet Standards Process October 1996 - - -13. REFERENCES - - [1] Postel, J., "Internet Official Protocol Standards", STD 1, - USC/Information Sciences Institute, March 1996. - - [2] ANSI, Coded Character Set -- 7-Bit American Standard Code for - Information Interchange, ANSI X3.4-1986. - - [3] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, - USC/Information Sciences Institute, October 1994. - - [4] Postel, J., "Introduction to the STD Notes", RFC 1311, - USC/Information Sciences Institute, March 1992. - - [5] Postel, J., "Instructions to RFC Authors", RFC 1543, - USC/Information Sciences Institute, October 1993. - - [6] Huitema, C., J. Postel, and S. Crocker "Not All RFCs are - Standards", RFC 1796, April 1995. - -14. DEFINITIONS OF TERMS - - IETF Area - A management division within the IETF. An Area consists - of Working Groups related to a general topic such as routing. An - Area is managed by one or two Area Directors. - Area Director - The manager of an IETF Area. The Area Directors - along with the IETF Chair comprise the Internet Engineering - Steering Group (IESG). - File Transfer Protocol (FTP) - An Internet application used to - transfer files in a TCP/IP network. - gopher - An Internet application used to interactively select and - retrieve files in a TCP/IP network. - Internet Architecture Board (IAB) - An appointed group that assists - in the management of the IETF standards process. - Internet Engineering Steering Group (IESG) - A group comprised of the - IETF Area Directors and the IETF Chair. The IESG is responsible - for the management, along with the IAB, of the IETF and is the - standards approval board for the IETF. - interoperable - For the purposes of this document, "interoperable" - means to be able to interoperate over a data communications path. - Last-Call - A public comment period used to gage the level of - consensus about the reasonableness of a proposed standards action. - (see section 6.1.2) - - - - - - - - -Bradner Best Current Practice [Page 34] - -RFC 2026 Internet Standards Process October 1996 - - - online - Relating to information made available over the Internet. - When referenced in this document material is said to be online - when it is retrievable without restriction or undue fee using - standard Internet applications such as anonymous FTP, gopher or - the WWW. - Working Group - A group chartered by the IESG and IAB to work on a - specific specification, set of specifications or topic. - -15. AUTHOR'S ADDRESS - - Scott O. Bradner - Harvard University - Holyoke Center, Room 813 - 1350 Mass. Ave. - Cambridge, MA 02138 - USA - - Phone: +1 617 495 3864 - EMail: sob@harvard.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bradner Best Current Practice [Page 35] - -RFC 2026 Internet Standards Process October 1996 - - -APPENDIX A: GLOSSARY OF ACRONYMS - - ANSI: American National Standards Institute - ARPA: (U.S.) Advanced Research Projects Agency - AS: Applicability Statement - FTP: File Transfer Protocol - ASCII: American Standard Code for Information Interchange - ITU-T: Telecommunications Standardization sector of the - International Telecommunication Union (ITU), a UN - treaty organization; ITU-T was formerly called CCITT. - IAB: Internet Architecture Board - IANA: Internet Assigned Numbers Authority - IEEE: Institute of Electrical and Electronics Engineers - ICMP: Internet Control Message Protocol - IESG: Internet Engineering Steering Group - IETF: Internet Engineering Task Force - IP: Internet Protocol - IRSG Internet Research Steering Group - IRTF: Internet Research Task Force - ISO: International Organization for Standardization - ISOC: Internet Society - MIB: Management Information Base - OSI: Open Systems Interconnection - RFC: Request for Comments - TCP: Transmission Control Protocol - TS: Technical Specification - WWW: World Wide Web - - - - - - - - - - - - - - - - - - - - - - - - -Bradner Best Current Practice [Page 36] - diff --git a/kernel/picotcp/RFC/rfc2131.txt b/kernel/picotcp/RFC/rfc2131.txt deleted file mode 100644 index f45d9b8..0000000 --- a/kernel/picotcp/RFC/rfc2131.txt +++ /dev/null @@ -1,2523 +0,0 @@ - - - - - - -Network Working Group R. Droms -Request for Comments: 2131 Bucknell University -Obsoletes: 1541 March 1997 -Category: Standards Track - - Dynamic Host Configuration Protocol - -Status of this memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Abstract - - The Dynamic Host Configuration Protocol (DHCP) provides a framework - for passing configuration information to hosts on a TCPIP network. - DHCP is based on the Bootstrap Protocol (BOOTP) [7], adding the - capability of automatic allocation of reusable network addresses and - additional configuration options [19]. DHCP captures the behavior of - BOOTP relay agents [7, 21], and DHCP participants can interoperate - with BOOTP participants [9]. - -Table of Contents - - 1. Introduction. . . . . . . . . . . . . . . . . . . . . . . . . 2 - 1.1 Changes to RFC1541. . . . . . . . . . . . . . . . . . . . . . 3 - 1.2 Related Work. . . . . . . . . . . . . . . . . . . . . . . . . 4 - 1.3 Problem definition and issues . . . . . . . . . . . . . . . . 4 - 1.4 Requirements. . . . . . . . . . . . . . . . . . . . . . . . . 5 - 1.5 Terminology . . . . . . . . . . . . . . . . . . . . . . . . . 6 - 1.6 Design goals. . . . . . . . . . . . . . . . . . . . . . . . . 6 - 2. Protocol Summary. . . . . . . . . . . . . . . . . . . . . . . 8 - 2.1 Configuration parameters repository . . . . . . . . . . . . . 11 - 2.2 Dynamic allocation of network addresses . . . . . . . . . . . 12 - 3. The Client-Server Protocol. . . . . . . . . . . . . . . . . . 13 - 3.1 Client-server interaction - allocating a network address. . . 13 - 3.2 Client-server interaction - reusing a previously allocated - network address . . . . . . . . . . . . . . . . . . . . . . . 17 - 3.3 Interpretation and representation of time values. . . . . . . 20 - 3.4 Obtaining parameters with externally configured network - address . . . . . . . . . . . . . . . . . . . . . . . . . . . 20 - 3.5 Client parameters in DHCP . . . . . . . . . . . . . . . . . . 21 - 3.6 Use of DHCP in clients with multiple interfaces . . . . . . . 22 - 3.7 When clients should use DHCP. . . . . . . . . . . . . . . . . 22 - 4. Specification of the DHCP client-server protocol. . . . . . . 22 - - - -Droms Standards Track [Page 1] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - 4.1 Constructing and sending DHCP messages. . . . . . . . . . . . 22 - 4.2 DHCP server administrative controls . . . . . . . . . . . . . 25 - 4.3 DHCP server behavior. . . . . . . . . . . . . . . . . . . . . 26 - 4.4 DHCP client behavior. . . . . . . . . . . . . . . . . . . . . 34 - 5. Acknowledgments. . . . . . . . . . . . . . . . . . . . . . . .42 - 6. References . . . . . . . . . . . . . . . . . . . . . . . . . .42 - 7. Security Considerations. . . . . . . . . . . . . . . . . . . .43 - 8. Author's Address . . . . . . . . . . . . . . . . . . . . . . .44 - A. Host Configuration Parameters . . . . . . . . . . . . . . . .45 -List of Figures - 1. Format of a DHCP message . . . . . . . . . . . . . . . . . . . 9 - 2. Format of the 'flags' field. . . . . . . . . . . . . . . . . . 11 - 3. Timeline diagram of messages exchanged between DHCP client and - servers when allocating a new network address. . . . . . . . . 15 - 4. Timeline diagram of messages exchanged between DHCP client and - servers when reusing a previously allocated network address. . 18 - 5. State-transition diagram for DHCP clients. . . . . . . . . . . 34 -List of Tables - 1. Description of fields in a DHCP message. . . . . . . . . . . . 10 - 2. DHCP messages. . . . . . . . . . . . . . . . . . . . . . . . . 14 - 3. Fields and options used by DHCP servers. . . . . . . . . . . . 28 - 4. Client messages from various states. . . . . . . . . . . . . . 33 - 5. Fields and options used by DHCP clients. . . . . . . . . . . . 37 - -1. Introduction - - The Dynamic Host Configuration Protocol (DHCP) provides configuration - parameters to Internet hosts. DHCP consists of two components: a - protocol for delivering host-specific configuration parameters from a - DHCP server to a host and a mechanism for allocation of network - addresses to hosts. - - DHCP is built on a client-server model, where designated DHCP server - hosts allocate network addresses and deliver configuration parameters - to dynamically configured hosts. Throughout the remainder of this - document, the term "server" refers to a host providing initialization - parameters through DHCP, and the term "client" refers to a host - requesting initialization parameters from a DHCP server. - - A host should not act as a DHCP server unless explicitly configured - to do so by a system administrator. The diversity of hardware and - protocol implementations in the Internet would preclude reliable - operation if random hosts were allowed to respond to DHCP requests. - For example, IP requires the setting of many parameters within the - protocol implementation software. Because IP can be used on many - dissimilar kinds of network hardware, values for those parameters - cannot be guessed or assumed to have correct defaults. Also, - distributed address allocation schemes depend on a polling/defense - - - -Droms Standards Track [Page 2] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - mechanism for discovery of addresses that are already in use. IP - hosts may not always be able to defend their network addresses, so - that such a distributed address allocation scheme cannot be - guaranteed to avoid allocation of duplicate network addresses. - - DHCP supports three mechanisms for IP address allocation. In - "automatic allocation", DHCP assigns a permanent IP address to a - client. In "dynamic allocation", DHCP assigns an IP address to a - client for a limited period of time (or until the client explicitly - relinquishes the address). In "manual allocation", a client's IP - address is assigned by the network administrator, and DHCP is used - simply to convey the assigned address to the client. A particular - network will use one or more of these mechanisms, depending on the - policies of the network administrator. - - Dynamic allocation is the only one of the three mechanisms that - allows automatic reuse of an address that is no longer needed by the - client to which it was assigned. Thus, dynamic allocation is - particularly useful for assigning an address to a client that will be - connected to the network only temporarily or for sharing a limited - pool of IP addresses among a group of clients that do not need - permanent IP addresses. Dynamic allocation may also be a good choice - for assigning an IP address to a new client being permanently - connected to a network where IP addresses are sufficiently scarce - that it is important to reclaim them when old clients are retired. - Manual allocation allows DHCP to be used to eliminate the error-prone - process of manually configuring hosts with IP addresses in - environments where (for whatever reasons) it is desirable to manage - IP address assignment outside of the DHCP mechanisms. - - The format of DHCP messages is based on the format of BOOTP messages, - to capture the BOOTP relay agent behavior described as part of the - BOOTP specification [7, 21] and to allow interoperability of existing - BOOTP clients with DHCP servers. Using BOOTP relay agents eliminates - the necessity of having a DHCP server on each physical network - segment. - -1.1 Changes to RFC 1541 - - This document updates the DHCP protocol specification that appears in - RFC1541. A new DHCP message type, DHCPINFORM, has been added; see - section 3.4, 4.3 and 4.4 for details. The classing mechanism for - identifying DHCP clients to DHCP servers has been extended to include - "vendor" classes as defined in sections 4.2 and 4.3. The minimum - lease time restriction has been removed. Finally, many editorial - changes have been made to clarify the text as a result of experience - gained in DHCP interoperability tests. - - - - -Droms Standards Track [Page 3] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -1.2 Related Work - - There are several Internet protocols and related mechanisms that - address some parts of the dynamic host configuration problem. The - Reverse Address Resolution Protocol (RARP) [10] (through the - extensions defined in the Dynamic RARP (DRARP) [5]) explicitly - addresses the problem of network address discovery, and includes an - automatic IP address assignment mechanism. The Trivial File Transfer - Protocol (TFTP) [20] provides for transport of a boot image from a - boot server. The Internet Control Message Protocol (ICMP) [16] - provides for informing hosts of additional routers via "ICMP - redirect" messages. ICMP also can provide subnet mask information - through the "ICMP mask request" message and other information through - the (obsolete) "ICMP information request" message. Hosts can locate - routers through the ICMP router discovery mechanism [8]. - - BOOTP is a transport mechanism for a collection of configuration - information. BOOTP is also extensible, and official extensions [17] - have been defined for several configuration parameters. Morgan has - proposed extensions to BOOTP for dynamic IP address assignment [15]. - The Network Information Protocol (NIP), used by the Athena project at - MIT, is a distributed mechanism for dynamic IP address assignment - [19]. The Resource Location Protocol RLP [1] provides for location - of higher level services. Sun Microsystems diskless workstations use - a boot procedure that employs RARP, TFTP and an RPC mechanism called - "bootparams" to deliver configuration information and operating - system code to diskless hosts. (Sun Microsystems, Sun Workstation - and SunOS are trademarks of Sun Microsystems, Inc.) Some Sun - networks also use DRARP and an auto-installation mechanism to - automate the configuration of new hosts in an existing network. - - In other related work, the path minimum transmission unit (MTU) - discovery algorithm can determine the MTU of an arbitrary internet - path [14]. The Address Resolution Protocol (ARP) has been proposed - as a transport protocol for resource location and selection [6]. - Finally, the Host Requirements RFCs [3, 4] mention specific - requirements for host reconfiguration and suggest a scenario for - initial configuration of diskless hosts. - -1.3 Problem definition and issues - - DHCP is designed to supply DHCP clients with the configuration - parameters defined in the Host Requirements RFCs. After obtaining - parameters via DHCP, a DHCP client should be able to exchange packets - with any other host in the Internet. The TCP/IP stack parameters - supplied by DHCP are listed in Appendix A. - - - - - -Droms Standards Track [Page 4] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - Not all of these parameters are required for a newly initialized - client. A client and server may negotiate for the transmission of - only those parameters required by the client or specific to a - particular subnet. - - DHCP allows but does not require the configuration of client - parameters not directly related to the IP protocol. DHCP also does - not address registration of newly configured clients with the Domain - Name System (DNS) [12, 13]. - - DHCP is not intended for use in configuring routers. - -1.4 Requirements - - Throughout this document, the words that are used to define the - significance of particular requirements are capitalized. These words - are: - - o "MUST" - - This word or the adjective "REQUIRED" means that the - item is an absolute requirement of this specification. - - o "MUST NOT" - - This phrase means that the item is an absolute prohibition - of this specification. - - o "SHOULD" - - This word or the adjective "RECOMMENDED" means that there - may exist valid reasons in particular circumstances to ignore - this item, but the full implications should be understood and - the case carefully weighed before choosing a different course. - - o "SHOULD NOT" - - This phrase means that there may exist valid reasons in - particular circumstances when the listed behavior is acceptable - or even useful, but the full implications should be understood - and the case carefully weighed before implementing any behavior - described with this label. - - - - - - - - - -Droms Standards Track [Page 5] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - o "MAY" - - This word or the adjective "OPTIONAL" means that this item is - truly optional. One vendor may choose to include the item - because a particular marketplace requires it or because it - enhances the product, for example; another vendor may omit the - same item. - -1.5 Terminology - - This document uses the following terms: - - o "DHCP client" - - A DHCP client is an Internet host using DHCP to obtain - configuration parameters such as a network address. - - o "DHCP server" - - A DHCP server is an Internet host that returns configuration - parameters to DHCP clients. - - o "BOOTP relay agent" - - A BOOTP relay agent or relay agent is an Internet host or router - that passes DHCP messages between DHCP clients and DHCP servers. - DHCP is designed to use the same relay agent behavior as specified - in the BOOTP protocol specification. - - o "binding" - - A binding is a collection of configuration parameters, including - at least an IP address, associated with or "bound to" a DHCP - client. Bindings are managed by DHCP servers. - -1.6 Design goals - - The following list gives general design goals for DHCP. - - o DHCP should be a mechanism rather than a policy. DHCP must - allow local system administrators control over configuration - parameters where desired; e.g., local system administrators - should be able to enforce local policies concerning allocation - and access to local resources where desired. - - - - - - - -Droms Standards Track [Page 6] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - o Clients should require no manual configuration. Each client - should be able to discover appropriate local configuration - parameters without user intervention and incorporate those - parameters into its own configuration. - - o Networks should require no manual configuration for individual - clients. Under normal circumstances, the network manager - should not have to enter any per-client configuration - parameters. - - o DHCP should not require a server on each subnet. To allow for - scale and economy, DHCP must work across routers or through the - intervention of BOOTP relay agents. - - o A DHCP client must be prepared to receive multiple responses - to a request for configuration parameters. Some installations - may include multiple, overlapping DHCP servers to enhance - reliability and increase performance. - - o DHCP must coexist with statically configured, non-participating - hosts and with existing network protocol implementations. - - o DHCP must interoperate with the BOOTP relay agent behavior as - described by RFC 951 and by RFC 1542 [21]. - - o DHCP must provide service to existing BOOTP clients. - - The following list gives design goals specific to the transmission of - the network layer parameters. DHCP must: - - o Guarantee that any specific network address will not be in - use by more than one DHCP client at a time, - - o Retain DHCP client configuration across DHCP client reboot. A - DHCP client should, whenever possible, be assigned the same - configuration parameters (e.g., network address) in response - to each request, - - o Retain DHCP client configuration across server reboots, and, - whenever possible, a DHCP client should be assigned the same - configuration parameters despite restarts of the DHCP mechanism, - - o Allow automated assignment of configuration parameters to new - clients to avoid hand configuration for new clients, - - o Support fixed or permanent allocation of configuration - parameters to specific clients. - - - - -Droms Standards Track [Page 7] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -2. Protocol Summary - - From the client's point of view, DHCP is an extension of the BOOTP - mechanism. This behavior allows existing BOOTP clients to - interoperate with DHCP servers without requiring any change to the - clients' initialization software. RFC 1542 [2] details the - interactions between BOOTP and DHCP clients and servers [9]. There - are some new, optional transactions that optimize the interaction - between DHCP clients and servers that are described in sections 3 and - 4. - - Figure 1 gives the format of a DHCP message and table 1 describes - each of the fields in the DHCP message. The numbers in parentheses - indicate the size of each field in octets. The names for the fields - given in the figure will be used throughout this document to refer to - the fields in DHCP messages. - - There are two primary differences between DHCP and BOOTP. First, - DHCP defines mechanisms through which clients can be assigned a - network address for a finite lease, allowing for serial reassignment - of network addresses to different clients. Second, DHCP provides the - mechanism for a client to acquire all of the IP configuration - parameters that it needs in order to operate. - - DHCP introduces a small change in terminology intended to clarify the - meaning of one of the fields. What was the "vendor extensions" field - in BOOTP has been re-named the "options" field in DHCP. Similarly, - the tagged data items that were used inside the BOOTP "vendor - extensions" field, which were formerly referred to as "vendor - extensions," are now termed simply "options." - - - - - - - - - - - - - - - - - - - - - -Droms Standards Track [Page 8] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | op (1) | htype (1) | hlen (1) | hops (1) | - +---------------+---------------+---------------+---------------+ - | xid (4) | - +-------------------------------+-------------------------------+ - | secs (2) | flags (2) | - +-------------------------------+-------------------------------+ - | ciaddr (4) | - +---------------------------------------------------------------+ - | yiaddr (4) | - +---------------------------------------------------------------+ - | siaddr (4) | - +---------------------------------------------------------------+ - | giaddr (4) | - +---------------------------------------------------------------+ - | | - | chaddr (16) | - | | - | | - +---------------------------------------------------------------+ - | | - | sname (64) | - +---------------------------------------------------------------+ - | | - | file (128) | - +---------------------------------------------------------------+ - | | - | options (variable) | - +---------------------------------------------------------------+ - - Figure 1: Format of a DHCP message - - DHCP defines a new 'client identifier' option that is used to pass an - explicit client identifier to a DHCP server. This change eliminates - the overloading of the 'chaddr' field in BOOTP messages, where - 'chaddr' is used both as a hardware address for transmission of BOOTP - reply messages and as a client identifier. The 'client identifier' - is an opaque key, not to be interpreted by the server; for example, - the 'client identifier' may contain a hardware address, identical to - the contents of the 'chaddr' field, or it may contain another type of - identifier, such as a DNS name. The 'client identifier' chosen by a - DHCP client MUST be unique to that client within the subnet to which - the client is attached. If the client uses a 'client identifier' in - one message, it MUST use that same identifier in all subsequent - messages, to ensure that all servers correctly identify the client. - - - - -Droms Standards Track [Page 9] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - DHCP clarifies the interpretation of the 'siaddr' field as the - address of the server to use in the next step of the client's - bootstrap process. A DHCP server may return its own address in the - 'siaddr' field, if the server is prepared to supply the next - bootstrap service (e.g., delivery of an operating system executable - image). A DHCP server always returns its own address in the 'server - identifier' option. - - FIELD OCTETS DESCRIPTION - ----- ------ ----------- - - op 1 Message op code / message type. - 1 = BOOTREQUEST, 2 = BOOTREPLY - htype 1 Hardware address type, see ARP section in "Assigned - Numbers" RFC; e.g., '1' = 10mb ethernet. - hlen 1 Hardware address length (e.g. '6' for 10mb - ethernet). - hops 1 Client sets to zero, optionally used by relay agents - when booting via a relay agent. - xid 4 Transaction ID, a random number chosen by the - client, used by the client and server to associate - messages and responses between a client and a - server. - secs 2 Filled in by client, seconds elapsed since client - began address acquisition or renewal process. - flags 2 Flags (see figure 2). - ciaddr 4 Client IP address; only filled in if client is in - BOUND, RENEW or REBINDING state and can respond - to ARP requests. - yiaddr 4 'your' (client) IP address. - siaddr 4 IP address of next server to use in bootstrap; - returned in DHCPOFFER, DHCPACK by server. - giaddr 4 Relay agent IP address, used in booting via a - relay agent. - chaddr 16 Client hardware address. - sname 64 Optional server host name, null terminated string. - file 128 Boot file name, null terminated string; "generic" - name or null in DHCPDISCOVER, fully qualified - directory-path name in DHCPOFFER. - options var Optional parameters field. See the options - documents for a list of defined options. - - Table 1: Description of fields in a DHCP message - - The 'options' field is now variable length. A DHCP client must be - prepared to receive DHCP messages with an 'options' field of at least - length 312 octets. This requirement implies that a DHCP client must - be prepared to receive a message of up to 576 octets, the minimum IP - - - -Droms Standards Track [Page 10] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - datagram size an IP host must be prepared to accept [3]. DHCP - clients may negotiate the use of larger DHCP messages through the - 'maximum DHCP message size' option. The options field may be further - extended into the 'file' and 'sname' fields. - - In the case of a client using DHCP for initial configuration (before - the client's TCP/IP software has been completely configured), DHCP - requires creative use of the client's TCP/IP software and liberal - interpretation of RFC 1122. The TCP/IP software SHOULD accept and - forward to the IP layer any IP packets delivered to the client's - hardware address before the IP address is configured; DHCP servers - and BOOTP relay agents may not be able to deliver DHCP messages to - clients that cannot accept hardware unicast datagrams before the - TCP/IP software is configured. - - To work around some clients that cannot accept IP unicast datagrams - before the TCP/IP software is configured as discussed in the previous - paragraph, DHCP uses the 'flags' field [21]. The leftmost bit is - defined as the BROADCAST (B) flag. The semantics of this flag are - discussed in section 4.1 of this document. The remaining bits of the - flags field are reserved for future use. They MUST be set to zero by - clients and ignored by servers and relay agents. Figure 2 gives the - format of the 'flags' field. - - 1 1 1 1 1 1 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |B| MBZ | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - B: BROADCAST flag - - MBZ: MUST BE ZERO (reserved for future use) - - Figure 2: Format of the 'flags' field - -2.1 Configuration parameters repository - - The first service provided by DHCP is to provide persistent storage - of network parameters for network clients. The model of DHCP - persistent storage is that the DHCP service stores a key-value entry - for each client, where the key is some unique identifier (for - example, an IP subnet number and a unique identifier within the - subnet) and the value contains the configuration parameters for the - client. - - For example, the key might be the pair (IP-subnet-number, hardware- - address) (note that the "hardware-address" should be typed by the - - - -Droms Standards Track [Page 11] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - type of hardware to accommodate possible duplication of hardware - addresses resulting from bit-ordering problems in a mixed-media, - bridged network) allowing for serial or concurrent reuse of a - hardware address on different subnets, and for hardware addresses - that may not be globally unique. Alternately, the key might be the - pair (IP-subnet-number, hostname), allowing the server to assign - parameters intelligently to a DHCP client that has been moved to a - different subnet or has changed hardware addresses (perhaps because - the network interface failed and was replaced). The protocol defines - that the key will be (IP-subnet-number, hardware-address) unless the - client explicitly supplies an identifier using the 'client - identifier' option. A client can query the DHCP service to - retrieve its configuration parameters. The client interface to the - configuration parameters repository consists of protocol messages to - request configuration parameters and responses from the server - carrying the configuration parameters. - -2.2 Dynamic allocation of network addresses - - The second service provided by DHCP is the allocation of temporary or - permanent network (IP) addresses to clients. The basic mechanism for - the dynamic allocation of network addresses is simple: a client - requests the use of an address for some period of time. The - allocation mechanism (the collection of DHCP servers) guarantees not - to reallocate that address within the requested time and attempts to - return the same network address each time the client requests an - address. In this document, the period over which a network address - is allocated to a client is referred to as a "lease" [11]. The - client may extend its lease with subsequent requests. The client may - issue a message to release the address back to the server when the - client no longer needs the address. The client may ask for a - permanent assignment by asking for an infinite lease. Even when - assigning "permanent" addresses, a server may choose to give out - lengthy but non-infinite leases to allow detection of the fact that - the client has been retired. - - In some environments it will be necessary to reassign network - addresses due to exhaustion of available addresses. In such - environments, the allocation mechanism will reuse addresses whose - lease has expired. The server should use whatever information is - available in the configuration information repository to choose an - address to reuse. For example, the server may choose the least - recently assigned address. As a consistency check, the allocating - server SHOULD probe the reused address before allocating the address, - e.g., with an ICMP echo request, and the client SHOULD probe the - newly received address, e.g., with ARP. - - - - - -Droms Standards Track [Page 12] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -3. The Client-Server Protocol - - DHCP uses the BOOTP message format defined in RFC 951 and given in - table 1 and figure 1. The 'op' field of each DHCP message sent from - a client to a server contains BOOTREQUEST. BOOTREPLY is used in the - 'op' field of each DHCP message sent from a server to a client. - - The first four octets of the 'options' field of the DHCP message - contain the (decimal) values 99, 130, 83 and 99, respectively (this - is the same magic cookie as is defined in RFC 1497 [17]). The - remainder of the 'options' field consists of a list of tagged - parameters that are called "options". All of the "vendor extensions" - listed in RFC 1497 are also DHCP options. RFC 1533 gives the - complete set of options defined for use with DHCP. - - Several options have been defined so far. One particular option - - the "DHCP message type" option - must be included in every DHCP - message. This option defines the "type" of the DHCP message. - Additional options may be allowed, required, or not allowed, - depending on the DHCP message type. - - Throughout this document, DHCP messages that include a 'DHCP message - type' option will be referred to by the type of the message; e.g., a - DHCP message with 'DHCP message type' option type 1 will be referred - to as a "DHCPDISCOVER" message. - -3.1 Client-server interaction - allocating a network address - - The following summary of the protocol exchanges between clients and - servers refers to the DHCP messages described in table 2. The - timeline diagram in figure 3 shows the timing relationships in a - typical client-server interaction. If the client already knows its - address, some steps may be omitted; this abbreviated interaction is - described in section 3.2. - - 1. The client broadcasts a DHCPDISCOVER message on its local physical - subnet. The DHCPDISCOVER message MAY include options that suggest - values for the network address and lease duration. BOOTP relay - agents may pass the message on to DHCP servers not on the same - physical subnet. - - 2. Each server may respond with a DHCPOFFER message that includes an - available network address in the 'yiaddr' field (and other - configuration parameters in DHCP options). Servers need not - reserve the offered network address, although the protocol will - work more efficiently if the server avoids allocating the offered - network address to another client. When allocating a new address, - servers SHOULD check that the offered network address is not - - - -Droms Standards Track [Page 13] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - already in use; e.g., the server may probe the offered address - with an ICMP Echo Request. Servers SHOULD be implemented so that - network administrators MAY choose to disable probes of newly - allocated addresses. The server transmits the DHCPOFFER message - to the client, using the BOOTP relay agent if necessary. - - Message Use - ------- --- - - DHCPDISCOVER - Client broadcast to locate available servers. - - DHCPOFFER - Server to client in response to DHCPDISCOVER with - offer of configuration parameters. - - DHCPREQUEST - Client message to servers either (a) requesting - offered parameters from one server and implicitly - declining offers from all others, (b) confirming - correctness of previously allocated address after, - e.g., system reboot, or (c) extending the lease on a - particular network address. - - DHCPACK - Server to client with configuration parameters, - including committed network address. - - DHCPNAK - Server to client indicating client's notion of network - address is incorrect (e.g., client has moved to new - subnet) or client's lease as expired - - DHCPDECLINE - Client to server indicating network address is already - in use. - - DHCPRELEASE - Client to server relinquishing network address and - cancelling remaining lease. - - DHCPINFORM - Client to server, asking only for local configuration - parameters; client already has externally configured - network address. - - Table 2: DHCP messages - - - - - - - - - - - - -Droms Standards Track [Page 14] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - Server Client Server - (not selected) (selected) - - v v v - | | | - | Begins initialization | - | | | - | _____________/|\____________ | - |/DHCPDISCOVER | DHCPDISCOVER \| - | | | - Determines | Determines - configuration | configuration - | | | - |\ | ____________/ | - | \________ | /DHCPOFFER | - | DHCPOFFER\ |/ | - | \ | | - | Collects replies | - | \| | - | Selects configuration | - | | | - | _____________/|\____________ | - |/ DHCPREQUEST | DHCPREQUEST\ | - | | | - | | Commits configuration - | | | - | | _____________/| - | |/ DHCPACK | - | | | - | Initialization complete | - | | | - . . . - . . . - | | | - | Graceful shutdown | - | | | - | |\ ____________ | - | | DHCPRELEASE \| - | | | - | | Discards lease - | | | - v v v - Figure 3: Timeline diagram of messages exchanged between DHCP - client and servers when allocating a new network address - - - - - - - -Droms Standards Track [Page 15] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - 3. The client receives one or more DHCPOFFER messages from one or more - servers. The client may choose to wait for multiple responses. - The client chooses one server from which to request configuration - parameters, based on the configuration parameters offered in the - DHCPOFFER messages. The client broadcasts a DHCPREQUEST message - that MUST include the 'server identifier' option to indicate which - server it has selected, and that MAY include other options - specifying desired configuration values. The 'requested IP - address' option MUST be set to the value of 'yiaddr' in the - DHCPOFFER message from the server. This DHCPREQUEST message is - broadcast and relayed through DHCP/BOOTP relay agents. To help - ensure that any BOOTP relay agents forward the DHCPREQUEST message - to the same set of DHCP servers that received the original - DHCPDISCOVER message, the DHCPREQUEST message MUST use the same - value in the DHCP message header's 'secs' field and be sent to the - same IP broadcast address as the original DHCPDISCOVER message. - The client times out and retransmits the DHCPDISCOVER message if - the client receives no DHCPOFFER messages. - - 4. The servers receive the DHCPREQUEST broadcast from the client. - Those servers not selected by the DHCPREQUEST message use the - message as notification that the client has declined that server's - offer. The server selected in the DHCPREQUEST message commits the - binding for the client to persistent storage and responds with a - DHCPACK message containing the configuration parameters for the - requesting client. The combination of 'client identifier' or - 'chaddr' and assigned network address constitute a unique - identifier for the client's lease and are used by both the client - and server to identify a lease referred to in any DHCP messages. - Any configuration parameters in the DHCPACK message SHOULD NOT - conflict with those in the earlier DHCPOFFER message to which the - client is responding. The server SHOULD NOT check the offered - network address at this point. The 'yiaddr' field in the DHCPACK - messages is filled in with the selected network address. - - If the selected server is unable to satisfy the DHCPREQUEST message - (e.g., the requested network address has been allocated), the - server SHOULD respond with a DHCPNAK message. - - A server MAY choose to mark addresses offered to clients in - DHCPOFFER messages as unavailable. The server SHOULD mark an - address offered to a client in a DHCPOFFER message as available if - the server receives no DHCPREQUEST message from that client. - - 5. The client receives the DHCPACK message with configuration - parameters. The client SHOULD perform a final check on the - parameters (e.g., ARP for allocated network address), and notes the - duration of the lease specified in the DHCPACK message. At this - - - -Droms Standards Track [Page 16] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - point, the client is configured. If the client detects that the - address is already in use (e.g., through the use of ARP), the - client MUST send a DHCPDECLINE message to the server and restarts - the configuration process. The client SHOULD wait a minimum of ten - seconds before restarting the configuration process to avoid - excessive network traffic in case of looping. - - If the client receives a DHCPNAK message, the client restarts the - configuration process. - - The client times out and retransmits the DHCPREQUEST message if the - client receives neither a DHCPACK or a DHCPNAK message. The client - retransmits the DHCPREQUEST according to the retransmission - algorithm in section 4.1. The client should choose to retransmit - the DHCPREQUEST enough times to give adequate probability of - contacting the server without causing the client (and the user of - that client) to wait overly long before giving up; e.g., a client - retransmitting as described in section 4.1 might retransmit the - DHCPREQUEST message four times, for a total delay of 60 seconds, - before restarting the initialization procedure. If the client - receives neither a DHCPACK or a DHCPNAK message after employing the - retransmission algorithm, the client reverts to INIT state and - restarts the initialization process. The client SHOULD notify the - user that the initialization process has failed and is restarting. - - 6. The client may choose to relinquish its lease on a network address - by sending a DHCPRELEASE message to the server. The client - identifies the lease to be released with its 'client identifier', - or 'chaddr' and network address in the DHCPRELEASE message. If the - client used a 'client identifier' when it obtained the lease, it - MUST use the same 'client identifier' in the DHCPRELEASE message. - -3.2 Client-server interaction - reusing a previously allocated network - address - - If a client remembers and wishes to reuse a previously allocated - network address, a client may choose to omit some of the steps - described in the previous section. The timeline diagram in figure 4 - shows the timing relationships in a typical client-server interaction - for a client reusing a previously allocated network address. - - - - - - - - - - - -Droms Standards Track [Page 17] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - 1. The client broadcasts a DHCPREQUEST message on its local subnet. - The message includes the client's network address in the - 'requested IP address' option. As the client has not received its - network address, it MUST NOT fill in the 'ciaddr' field. BOOTP - relay agents pass the message on to DHCP servers not on the same - subnet. If the client used a 'client identifier' to obtain its - address, the client MUST use the same 'client identifier' in the - DHCPREQUEST message. - - 2. Servers with knowledge of the client's configuration parameters - respond with a DHCPACK message to the client. Servers SHOULD NOT - check that the client's network address is already in use; the - client may respond to ICMP Echo Request messages at this point. - - Server Client Server - - v v v - | | | - | Begins | - | initialization | - | | | - | /|\ | - | _________ __/ | \__________ | - | /DHCPREQU EST | DHCPREQUEST\ | - |/ | \| - | | | - Locates | Locates - configuration | configuration - | | | - |\ | /| - | \ | ___________/ | - | \ | / DHCPACK | - | \ _______ |/ | - | DHCPACK\ | | - | Initialization | - | complete | - | \| | - | | | - | (Subsequent | - | DHCPACKS | - | ignored) | - | | | - | | | - v v v - - Figure 4: Timeline diagram of messages exchanged between DHCP - client and servers when reusing a previously allocated - network address - - - -Droms Standards Track [Page 18] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - If the client's request is invalid (e.g., the client has moved - to a new subnet), servers SHOULD respond with a DHCPNAK message to - the client. Servers SHOULD NOT respond if their information is not - guaranteed to be accurate. For example, a server that identifies a - request for an expired binding that is owned by another server SHOULD - NOT respond with a DHCPNAK unless the servers are using an explicit - mechanism to maintain coherency among the servers. - - If 'giaddr' is 0x0 in the DHCPREQUEST message, the client is on - the same subnet as the server. The server MUST - broadcast the DHCPNAK message to the 0xffffffff broadcast address - because the client may not have a correct network address or subnet - mask, and the client may not be answering ARP requests. - Otherwise, the server MUST send the DHCPNAK message to the IP - address of the BOOTP relay agent, as recorded in 'giaddr'. The - relay agent will, in turn, forward the message directly to the - client's hardware address, so that the DHCPNAK can be delivered even - if the client has moved to a new network. - - 3. The client receives the DHCPACK message with configuration - parameters. The client performs a final check on the parameters - (as in section 3.1), and notes the duration of the lease specified - in the DHCPACK message. The specific lease is implicitly identified - by the 'client identifier' or 'chaddr' and the network address. At - this point, the client is configured. - - If the client detects that the IP address in the DHCPACK message - is already in use, the client MUST send a DHCPDECLINE message to the - server and restarts the configuration process by requesting a - new network address. This action corresponds to the client - moving to the INIT state in the DHCP state diagram, which is - described in section 4.4. - - If the client receives a DHCPNAK message, it cannot reuse its - remembered network address. It must instead request a new - address by restarting the configuration process, this time - using the (non-abbreviated) procedure described in section - 3.1. This action also corresponds to the client moving to - the INIT state in the DHCP state diagram. - - The client times out and retransmits the DHCPREQUEST message if - the client receives neither a DHCPACK nor a DHCPNAK message. The - client retransmits the DHCPREQUEST according to the retransmission - algorithm in section 4.1. The client should choose to retransmit - the DHCPREQUEST enough times to give adequate probability of - contacting the server without causing the client (and the user of - that client) to wait overly long before giving up; e.g., a client - retransmitting as described in section 4.1 might retransmit the - - - -Droms Standards Track [Page 19] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - DHCPREQUEST message four times, for a total delay of 60 seconds, - before restarting the initialization procedure. If the client - receives neither a DHCPACK or a DHCPNAK message after employing - the retransmission algorithm, the client MAY choose to use the - previously allocated network address and configuration parameters - for the remainder of the unexpired lease. This corresponds to - moving to BOUND state in the client state transition diagram shown - in figure 5. - - 4. The client may choose to relinquish its lease on a network - address by sending a DHCPRELEASE message to the server. The - client identifies the lease to be released with its - 'client identifier', or 'chaddr' and network address in the - DHCPRELEASE message. - - Note that in this case, where the client retains its network - address locally, the client will not normally relinquish its - lease during a graceful shutdown. Only in the case where the - client explicitly needs to relinquish its lease, e.g., the client - is about to be moved to a different subnet, will the client send - a DHCPRELEASE message. - -3.3 Interpretation and representation of time values - - A client acquires a lease for a network address for a fixed period of - time (which may be infinite). Throughout the protocol, times are to - be represented in units of seconds. The time value of 0xffffffff is - reserved to represent "infinity". - - As clients and servers may not have synchronized clocks, times are - represented in DHCP messages as relative times, to be interpreted - with respect to the client's local clock. Representing relative - times in units of seconds in an unsigned 32 bit word gives a range of - relative times from 0 to approximately 100 years, which is sufficient - for the relative times to be measured using DHCP. - - The algorithm for lease duration interpretation given in the previous - paragraph assumes that client and server clocks are stable relative - to each other. If there is drift between the two clocks, the server - may consider the lease expired before the client does. To - compensate, the server may return a shorter lease duration to the - client than the server commits to its local database of client - information. - -3.4 Obtaining parameters with externally configured network address - - If a client has obtained a network address through some other means - (e.g., manual configuration), it may use a DHCPINFORM request message - - - -Droms Standards Track [Page 20] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - to obtain other local configuration parameters. Servers receiving a - DHCPINFORM message construct a DHCPACK message with any local - configuration parameters appropriate for the client without: - allocating a new address, checking for an existing binding, filling - in 'yiaddr' or including lease time parameters. The servers SHOULD - unicast the DHCPACK reply to the address given in the 'ciaddr' field - of the DHCPINFORM message. - - The server SHOULD check the network address in a DHCPINFORM message - for consistency, but MUST NOT check for an existing lease. The - server forms a DHCPACK message containing the configuration - parameters for the requesting client and sends the DHCPACK message - directly to the client. - -3.5 Client parameters in DHCP - - Not all clients require initialization of all parameters listed in - Appendix A. Two techniques are used to reduce the number of - parameters transmitted from the server to the client. First, most of - the parameters have defaults defined in the Host Requirements RFCs; - if the client receives no parameters from the server that override - the defaults, a client uses those default values. Second, in its - initial DHCPDISCOVER or DHCPREQUEST message, a client may provide the - server with a list of specific parameters the client is interested - in. If the client includes a list of parameters in a DHCPDISCOVER - message, it MUST include that list in any subsequent DHCPREQUEST - messages. - - The client SHOULD include the 'maximum DHCP message size' option to - let the server know how large the server may make its DHCP messages. - The parameters returned to a client may still exceed the space - allocated to options in a DHCP message. In this case, two additional - options flags (which must appear in the 'options' field of the - message) indicate that the 'file' and 'sname' fields are to be used - for options. - - The client can inform the server which configuration parameters the - client is interested in by including the 'parameter request list' - option. The data portion of this option explicitly lists the options - requested by tag number. - - In addition, the client may suggest values for the network address - and lease time in the DHCPDISCOVER message. The client may include - the 'requested IP address' option to suggest that a particular IP - address be assigned, and may include the 'IP address lease time' - option to suggest the lease time it would like. Other options - representing "hints" at configuration parameters are allowed in a - DHCPDISCOVER or DHCPREQUEST message. However, additional options may - - - -Droms Standards Track [Page 21] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - be ignored by servers, and multiple servers may, therefore, not - return identical values for some options. The 'requested IP address' - option is to be filled in only in a DHCPREQUEST message when the - client is verifying network parameters obtained previously. The - client fills in the 'ciaddr' field only when correctly configured - with an IP address in BOUND, RENEWING or REBINDING state. - - If a server receives a DHCPREQUEST message with an invalid 'requested - IP address', the server SHOULD respond to the client with a DHCPNAK - message and may choose to report the problem to the system - administrator. The server may include an error message in the - 'message' option. - -3.6 Use of DHCP in clients with multiple interfaces - - A client with multiple network interfaces must use DHCP through each - interface independently to obtain configuration information - parameters for those separate interfaces. - -3.7 When clients should use DHCP - - A client SHOULD use DHCP to reacquire or verify its IP address and - network parameters whenever the local network parameters may have - changed; e.g., at system boot time or after a disconnection from the - local network, as the local network configuration may change without - the client's or user's knowledge. - - If a client has knowledge of a previous network address and is unable - to contact a local DHCP server, the client may continue to use the - previous network address until the lease for that address expires. - If the lease expires before the client can contact a DHCP server, the - client must immediately discontinue use of the previous network - address and may inform local users of the problem. - -4. Specification of the DHCP client-server protocol - - In this section, we assume that a DHCP server has a block of network - addresses from which it can satisfy requests for new addresses. Each - server also maintains a database of allocated addresses and leases in - local permanent storage. - -4.1 Constructing and sending DHCP messages - - DHCP clients and servers both construct DHCP messages by filling in - fields in the fixed format section of the message and appending - tagged data items in the variable length option area. The options - area includes first a four-octet 'magic cookie' (which was described - in section 3), followed by the options. The last option must always - - - -Droms Standards Track [Page 22] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - be the 'end' option. - - DHCP uses UDP as its transport protocol. DHCP messages from a client - to a server are sent to the 'DHCP server' port (67), and DHCP - messages from a server to a client are sent to the 'DHCP client' port - (68). A server with multiple network address (e.g., a multi-homed - host) MAY use any of its network addresses in outgoing DHCP messages. - - The 'server identifier' field is used both to identify a DHCP server - in a DHCP message and as a destination address from clients to - servers. A server with multiple network addresses MUST be prepared - to to accept any of its network addresses as identifying that server - in a DHCP message. To accommodate potentially incomplete network - connectivity, a server MUST choose an address as a 'server - identifier' that, to the best of the server's knowledge, is reachable - from the client. For example, if the DHCP server and the DHCP client - are connected to the same subnet (i.e., the 'giaddr' field in the - message from the client is zero), the server SHOULD select the IP - address the server is using for communication on that subnet as the - 'server identifier'. If the server is using multiple IP addresses on - that subnet, any such address may be used. If the server has - received a message through a DHCP relay agent, the server SHOULD - choose an address from the interface on which the message was - recieved as the 'server identifier' (unless the server has other, - better information on which to make its choice). DHCP clients MUST - use the IP address provided in the 'server identifier' option for any - unicast requests to the DHCP server. - - DHCP messages broadcast by a client prior to that client obtaining - its IP address must have the source address field in the IP header - set to 0. - - If the 'giaddr' field in a DHCP message from a client is non-zero, - the server sends any return messages to the 'DHCP server' port on the - BOOTP relay agent whose address appears in 'giaddr'. If the 'giaddr' - field is zero and the 'ciaddr' field is nonzero, then the server - unicasts DHCPOFFER and DHCPACK messages to the address in 'ciaddr'. - If 'giaddr' is zero and 'ciaddr' is zero, and the broadcast bit is - set, then the server broadcasts DHCPOFFER and DHCPACK messages to - 0xffffffff. If the broadcast bit is not set and 'giaddr' is zero and - 'ciaddr' is zero, then the server unicasts DHCPOFFER and DHCPACK - messages to the client's hardware address and 'yiaddr' address. In - all cases, when 'giaddr' is zero, the server broadcasts any DHCPNAK - messages to 0xffffffff. - - If the options in a DHCP message extend into the 'sname' and 'file' - fields, the 'option overload' option MUST appear in the 'options' - field, with value 1, 2 or 3, as specified in RFC 1533. If the - - - -Droms Standards Track [Page 23] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - 'option overload' option is present in the 'options' field, the - options in the 'options' field MUST be terminated by an 'end' option, - and MAY contain one or more 'pad' options to fill the options field. - The options in the 'sname' and 'file' fields (if in use as indicated - by the 'options overload' option) MUST begin with the first octet of - the field, MUST be terminated by an 'end' option, and MUST be - followed by 'pad' options to fill the remainder of the field. Any - individual option in the 'options', 'sname' and 'file' fields MUST be - entirely contained in that field. The options in the 'options' field - MUST be interpreted first, so that any 'option overload' options may - be interpreted. The 'file' field MUST be interpreted next (if the - 'option overload' option indicates that the 'file' field contains - DHCP options), followed by the 'sname' field. - - The values to be passed in an 'option' tag may be too long to fit in - the 255 octets available to a single option (e.g., a list of routers - in a 'router' option [21]). Options may appear only once, unless - otherwise specified in the options document. The client concatenates - the values of multiple instances of the same option into a single - parameter list for configuration. - - DHCP clients are responsible for all message retransmission. The - client MUST adopt a retransmission strategy that incorporates a - randomized exponential backoff algorithm to determine the delay - between retransmissions. The delay between retransmissions SHOULD be - chosen to allow sufficient time for replies from the server to be - delivered based on the characteristics of the internetwork between - the client and the server. For example, in a 10Mb/sec Ethernet - internetwork, the delay before the first retransmission SHOULD be 4 - seconds randomized by the value of a uniform random number chosen - from the range -1 to +1. Clients with clocks that provide resolution - granularity of less than one second may choose a non-integer - randomization value. The delay before the next retransmission SHOULD - be 8 seconds randomized by the value of a uniform number chosen from - the range -1 to +1. The retransmission delay SHOULD be doubled with - subsequent retransmissions up to a maximum of 64 seconds. The client - MAY provide an indication of retransmission attempts to the user as - an indication of the progress of the configuration process. - - The 'xid' field is used by the client to match incoming DHCP messages - with pending requests. A DHCP client MUST choose 'xid's in such a - way as to minimize the chance of using an 'xid' identical to one used - by another client. For example, a client may choose a different, - random initial 'xid' each time the client is rebooted, and - subsequently use sequential 'xid's until the next reboot. Selecting - a new 'xid' for each retransmission is an implementation decision. A - client may choose to reuse the same 'xid' or select a new 'xid' for - each retransmitted message. - - - -Droms Standards Track [Page 24] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - Normally, DHCP servers and BOOTP relay agents attempt to deliver - DHCPOFFER, DHCPACK and DHCPNAK messages directly to the client using - uicast delivery. The IP destination address (in the IP header) is - set to the DHCP 'yiaddr' address and the link-layer destination - address is set to the DHCP 'chaddr' address. Unfortunately, some - client implementations are unable to receive such unicast IP - datagrams until the implementation has been configured with a valid - IP address (leading to a deadlock in which the client's IP address - cannot be delivered until the client has been configured with an IP - address). - - A client that cannot receive unicast IP datagrams until its protocol - software has been configured with an IP address SHOULD set the - BROADCAST bit in the 'flags' field to 1 in any DHCPDISCOVER or - DHCPREQUEST messages that client sends. The BROADCAST bit will - provide a hint to the DHCP server and BOOTP relay agent to broadcast - any messages to the client on the client's subnet. A client that can - receive unicast IP datagrams before its protocol software has been - configured SHOULD clear the BROADCAST bit to 0. The BOOTP - clarifications document discusses the ramifications of the use of the - BROADCAST bit [21]. - - A server or relay agent sending or relaying a DHCP message directly - to a DHCP client (i.e., not to a relay agent specified in the - 'giaddr' field) SHOULD examine the BROADCAST bit in the 'flags' - field. If this bit is set to 1, the DHCP message SHOULD be sent as - an IP broadcast using an IP broadcast address (preferably 0xffffffff) - as the IP destination address and the link-layer broadcast address as - the link-layer destination address. If the BROADCAST bit is cleared - to 0, the message SHOULD be sent as an IP unicast to the IP address - specified in the 'yiaddr' field and the link-layer address specified - in the 'chaddr' field. If unicasting is not possible, the message - MAY be sent as an IP broadcast using an IP broadcast address - (preferably 0xffffffff) as the IP destination address and the link- - layer broadcast address as the link-layer destination address. - -4.2 DHCP server administrative controls - - DHCP servers are not required to respond to every DHCPDISCOVER and - DHCPREQUEST message they receive. For example, a network - administrator, to retain stringent control over the clients attached - to the network, may choose to configure DHCP servers to respond only - to clients that have been previously registered through some external - mechanism. The DHCP specification describes only the interactions - between clients and servers when the clients and servers choose to - interact; it is beyond the scope of the DHCP specification to - describe all of the administrative controls that system - administrators might want to use. Specific DHCP server - - - -Droms Standards Track [Page 25] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - implementations may incorporate any controls or policies desired by a - network administrator. - - In some environments, a DHCP server will have to consider the values - of the vendor class options included in DHCPDISCOVER or DHCPREQUEST - messages when determining the correct parameters for a particular - client. - - A DHCP server needs to use some unique identifier to associate a - client with its lease. The client MAY choose to explicitly provide - the identifier through the 'client identifier' option. If the client - supplies a 'client identifier', the client MUST use the same 'client - identifier' in all subsequent messages, and the server MUST use that - identifier to identify the client. If the client does not provide a - 'client identifier' option, the server MUST use the contents of the - 'chaddr' field to identify the client. It is crucial for a DHCP - client to use an identifier unique within the subnet to which the - client is attached in the 'client identifier' option. Use of - 'chaddr' as the client's unique identifier may cause unexpected - results, as that identifier may be associated with a hardware - interface that could be moved to a new client. Some sites may choose - to use a manufacturer's serial number as the 'client identifier', to - avoid unexpected changes in a clients network address due to transfer - of hardware interfaces among computers. Sites may also choose to use - a DNS name as the 'client identifier', causing address leases to be - associated with the DNS name rather than a specific hardware box. - - DHCP clients are free to use any strategy in selecting a DHCP server - among those from which the client receives a DHCPOFFER message. The - client implementation of DHCP SHOULD provide a mechanism for the user - to select directly the 'vendor class identifier' values. - -4.3 DHCP server behavior - - A DHCP server processes incoming DHCP messages from a client based on - the current state of the binding for that client. A DHCP server can - receive the following messages from a client: - - o DHCPDISCOVER - - o DHCPREQUEST - - o DHCPDECLINE - - o DHCPRELEASE - - o DHCPINFORM - - - - -Droms Standards Track [Page 26] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - Table 3 gives the use of the fields and options in a DHCP message by - a server. The remainder of this section describes the action of the - DHCP server for each possible incoming message. - -4.3.1 DHCPDISCOVER message - - When a server receives a DHCPDISCOVER message from a client, the - server chooses a network address for the requesting client. If no - address is available, the server may choose to report the problem to - the system administrator. If an address is available, the new address - SHOULD be chosen as follows: - - o The client's current address as recorded in the client's current - binding, ELSE - - o The client's previous address as recorded in the client's (now - expired or released) binding, if that address is in the server's - pool of available addresses and not already allocated, ELSE - - o The address requested in the 'Requested IP Address' option, if that - address is valid and not already allocated, ELSE - - o A new address allocated from the server's pool of available - addresses; the address is selected based on the subnet from which - the message was received (if 'giaddr' is 0) or on the address of - the relay agent that forwarded the message ('giaddr' when not 0). - - As described in section 4.2, a server MAY, for administrative - reasons, assign an address other than the one requested, or may - refuse to allocate an address to a particular client even though free - addresses are available. - - Note that, in some network architectures (e.g., internets with more - than one IP subnet assigned to a physical network segment), it may be - the case that the DHCP client should be assigned an address from a - different subnet than the address recorded in 'giaddr'. Thus, DHCP - does not require that the client be assigned as address from the - subnet in 'giaddr'. A server is free to choose some other subnet, - and it is beyond the scope of the DHCP specification to describe ways - in which the assigned IP address might be chosen. - - While not required for correct operation of DHCP, the server SHOULD - NOT reuse the selected network address before the client responds to - the server's DHCPOFFER message. The server may choose to record the - address as offered to the client. - - The server must also choose an expiration time for the lease, as - follows: - - - -Droms Standards Track [Page 27] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - o IF the client has not requested a specific lease in the - DHCPDISCOVER message and the client already has an assigned network - address, the server returns the lease expiration time previously - assigned to that address (note that the client must explicitly - request a specific lease to extend the expiration time on a - previously assigned address), ELSE - - o IF the client has not requested a specific lease in the - DHCPDISCOVER message and the client does not have an assigned - network address, the server assigns a locally configured default - lease time, ELSE - - o IF the client has requested a specific lease in the DHCPDISCOVER - message (regardless of whether the client has an assigned network - address), the server may choose either to return the requested - lease (if the lease is acceptable to local policy) or select - another lease. - -Field DHCPOFFER DHCPACK DHCPNAK ------ --------- ------- ------- -'op' BOOTREPLY BOOTREPLY BOOTREPLY -'htype' (From "Assigned Numbers" RFC) -'hlen' (Hardware address length in octets) -'hops' 0 0 0 -'xid' 'xid' from client 'xid' from client 'xid' from client - DHCPDISCOVER DHCPREQUEST DHCPREQUEST - message message message -'secs' 0 0 0 -'ciaddr' 0 'ciaddr' from 0 - DHCPREQUEST or 0 -'yiaddr' IP address offered IP address 0 - to client assigned to client -'siaddr' IP address of next IP address of next 0 - bootstrap server bootstrap server -'flags' 'flags' from 'flags' from 'flags' from - client DHCPDISCOVER client DHCPREQUEST client DHCPREQUEST - message message message -'giaddr' 'giaddr' from 'giaddr' from 'giaddr' from - client DHCPDISCOVER client DHCPREQUEST client DHCPREQUEST - message message message -'chaddr' 'chaddr' from 'chaddr' from 'chaddr' from - client DHCPDISCOVER client DHCPREQUEST client DHCPREQUEST - message message message -'sname' Server host name Server host name (unused) - or options or options -'file' Client boot file Client boot file (unused) - name or options name or options -'options' options options - - - -Droms Standards Track [Page 28] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -Option DHCPOFFER DHCPACK DHCPNAK ------- --------- ------- ------- -Requested IP address MUST NOT MUST NOT MUST NOT -IP address lease time MUST MUST (DHCPREQUEST) MUST NOT - MUST NOT (DHCPINFORM) -Use 'file'/'sname' fields MAY MAY MUST NOT -DHCP message type DHCPOFFER DHCPACK DHCPNAK -Parameter request list MUST NOT MUST NOT MUST NOT -Message SHOULD SHOULD SHOULD -Client identifier MUST NOT MUST NOT MAY -Vendor class identifier MAY MAY MAY -Server identifier MUST MUST MUST -Maximum message size MUST NOT MUST NOT MUST NOT -All others MAY MAY MUST NOT - - Table 3: Fields and options used by DHCP servers - - Once the network address and lease have been determined, the server - constructs a DHCPOFFER message with the offered configuration - parameters. It is important for all DHCP servers to return the same - parameters (with the possible exception of a newly allocated network - address) to ensure predictable client behavior regardless of which - server the client selects. The configuration parameters MUST be - selected by applying the following rules in the order given below. - The network administrator is responsible for configuring multiple - DHCP servers to ensure uniform responses from those servers. The - server MUST return to the client: - - o The client's network address, as determined by the rules given - earlier in this section, - - o The expiration time for the client's lease, as determined by the - rules given earlier in this section, - - o Parameters requested by the client, according to the following - rules: - - -- IF the server has been explicitly configured with a default - value for the parameter, the server MUST include that value - in an appropriate option in the 'option' field, ELSE - - -- IF the server recognizes the parameter as a parameter - defined in the Host Requirements Document, the server MUST - include the default value for that parameter as given in the - Host Requirements Document in an appropriate option in the - 'option' field, ELSE - - -- The server MUST NOT return a value for that parameter, - - - -Droms Standards Track [Page 29] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - The server MUST supply as many of the requested parameters as - possible and MUST omit any parameters it cannot provide. The - server MUST include each requested parameter only once unless - explicitly allowed in the DHCP Options and BOOTP Vendor - Extensions document. - - o Any parameters from the existing binding that differ from the Host - Requirements Document defaults, - - o Any parameters specific to this client (as identified by - the contents of 'chaddr' or 'client identifier' in the DHCPDISCOVER - or DHCPREQUEST message), e.g., as configured by the network - administrator, - - o Any parameters specific to this client's class (as identified - by the contents of the 'vendor class identifier' - option in the DHCPDISCOVER or DHCPREQUEST message), - e.g., as configured by the network administrator; the parameters - MUST be identified by an exact match between the client's vendor - class identifiers and the client's classes identified in the - server, - - o Parameters with non-default values on the client's subnet. - - The server MAY choose to return the 'vendor class identifier' used to - determine the parameters in the DHCPOFFER message to assist the - client in selecting which DHCPOFFER to accept. The server inserts - the 'xid' field from the DHCPDISCOVER message into the 'xid' field of - the DHCPOFFER message and sends the DHCPOFFER message to the - requesting client. - -4.3.2 DHCPREQUEST message - - A DHCPREQUEST message may come from a client responding to a - DHCPOFFER message from a server, from a client verifying a previously - allocated IP address or from a client extending the lease on a - network address. If the DHCPREQUEST message contains a 'server - identifier' option, the message is in response to a DHCPOFFER - message. Otherwise, the message is a request to verify or extend an - existing lease. If the client uses a 'client identifier' in a - DHCPREQUEST message, it MUST use that same 'client identifier' in all - subsequent messages. If the client included a list of requested - parameters in a DHCPDISCOVER message, it MUST include that list in - all subsequent messages. - - - - - - - -Droms Standards Track [Page 30] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - Any configuration parameters in the DHCPACK message SHOULD NOT - conflict with those in the earlier DHCPOFFER message to which the - client is responding. The client SHOULD use the parameters in the - DHCPACK message for configuration. - - Clients send DHCPREQUEST messages as follows: - - o DHCPREQUEST generated during SELECTING state: - - Client inserts the address of the selected server in 'server - identifier', 'ciaddr' MUST be zero, 'requested IP address' MUST be - filled in with the yiaddr value from the chosen DHCPOFFER. - - Note that the client may choose to collect several DHCPOFFER - messages and select the "best" offer. The client indicates its - selection by identifying the offering server in the DHCPREQUEST - message. If the client receives no acceptable offers, the client - may choose to try another DHCPDISCOVER message. Therefore, the - servers may not receive a specific DHCPREQUEST from which they can - decide whether or not the client has accepted the offer. Because - the servers have not committed any network address assignments on - the basis of a DHCPOFFER, servers are free to reuse offered - network addresses in response to subsequent requests. As an - implementation detail, servers SHOULD NOT reuse offered addresses - and may use an implementation-specific timeout mechanism to decide - when to reuse an offered address. - - o DHCPREQUEST generated during INIT-REBOOT state: - - 'server identifier' MUST NOT be filled in, 'requested IP address' - option MUST be filled in with client's notion of its previously - assigned address. 'ciaddr' MUST be zero. The client is seeking to - verify a previously allocated, cached configuration. Server SHOULD - send a DHCPNAK message to the client if the 'requested IP address' - is incorrect, or is on the wrong network. - - Determining whether a client in the INIT-REBOOT state is on the - correct network is done by examining the contents of 'giaddr', the - 'requested IP address' option, and a database lookup. If the DHCP - server detects that the client is on the wrong net (i.e., the - result of applying the local subnet mask or remote subnet mask (if - 'giaddr' is not zero) to 'requested IP address' option value - doesn't match reality), then the server SHOULD send a DHCPNAK - message to the client. - - - - - - - -Droms Standards Track [Page 31] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - If the network is correct, then the DHCP server should check if - the client's notion of its IP address is correct. If not, then the - server SHOULD send a DHCPNAK message to the client. If the DHCP - server has no record of this client, then it MUST remain silent, - and MAY output a warning to the network administrator. This - behavior is necessary for peaceful coexistence of non- - communicating DHCP servers on the same wire. - - If 'giaddr' is 0x0 in the DHCPREQUEST message, the client is on - the same subnet as the server. The server MUST broadcast the - DHCPNAK message to the 0xffffffff broadcast address because the - client may not have a correct network address or subnet mask, and - the client may not be answering ARP requests. - - If 'giaddr' is set in the DHCPREQUEST message, the client is on a - different subnet. The server MUST set the broadcast bit in the - DHCPNAK, so that the relay agent will broadcast the DHCPNAK to the - client, because the client may not have a correct network address - or subnet mask, and the client may not be answering ARP requests. - - o DHCPREQUEST generated during RENEWING state: - - 'server identifier' MUST NOT be filled in, 'requested IP address' - option MUST NOT be filled in, 'ciaddr' MUST be filled in with - client's IP address. In this situation, the client is completely - configured, and is trying to extend its lease. This message will - be unicast, so no relay agents will be involved in its - transmission. Because 'giaddr' is therefore not filled in, the - DHCP server will trust the value in 'ciaddr', and use it when - replying to the client. - - A client MAY choose to renew or extend its lease prior to T1. The - server may choose not to extend the lease (as a policy decision by - the network administrator), but should return a DHCPACK message - regardless. - - o DHCPREQUEST generated during REBINDING state: - - 'server identifier' MUST NOT be filled in, 'requested IP address' - option MUST NOT be filled in, 'ciaddr' MUST be filled in with - client's IP address. In this situation, the client is completely - configured, and is trying to extend its lease. This message MUST - be broadcast to the 0xffffffff IP broadcast address. The DHCP - server SHOULD check 'ciaddr' for correctness before replying to - the DHCPREQUEST. - - - - - - -Droms Standards Track [Page 32] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - The DHCPREQUEST from a REBINDING client is intended to accommodate - sites that have multiple DHCP servers and a mechanism for - maintaining consistency among leases managed by multiple servers. - A DHCP server MAY extend a client's lease only if it has local - administrative authority to do so. - -4.3.3 DHCPDECLINE message - - If the server receives a DHCPDECLINE message, the client has - discovered through some other means that the suggested network - address is already in use. The server MUST mark the network address - as not available and SHOULD notify the local system administrator of - a possible configuration problem. - -4.3.4 DHCPRELEASE message - - Upon receipt of a DHCPRELEASE message, the server marks the network - address as not allocated. The server SHOULD retain a record of the - client's initialization parameters for possible reuse in response to - subsequent requests from the client. - -4.3.5 DHCPINFORM message - - The server responds to a DHCPINFORM message by sending a DHCPACK - message directly to the address given in the 'ciaddr' field of the - DHCPINFORM message. The server MUST NOT send a lease expiration time - to the client and SHOULD NOT fill in 'yiaddr'. The server includes - other parameters in the DHCPACK message as defined in section 4.3.1. - -4.3.6 Client messages - - Table 4 details the differences between messages from clients in - various states. - - --------------------------------------------------------------------- - | |INIT-REBOOT |SELECTING |RENEWING |REBINDING | - --------------------------------------------------------------------- - |broad/unicast |broadcast |broadcast |unicast |broadcast | - |server-ip |MUST NOT |MUST |MUST NOT |MUST NOT | - |requested-ip |MUST |MUST |MUST NOT |MUST NOT | - |ciaddr |zero |zero |IP address |IP address| - --------------------------------------------------------------------- - - Table 4: Client messages from different states - - - - - - - -Droms Standards Track [Page 33] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -4.4 DHCP client behavior - - Figure 5 gives a state-transition diagram for a DHCP client. A - client can receive the following messages from a server: - - o DHCPOFFER - - o DHCPACK - - o DHCPNAK - - The DHCPINFORM message is not shown in figure 5. A client simply - sends the DHCPINFORM and waits for DHCPACK messages. Once the client - has selected its parameters, it has completed the configuration - process. - - Table 5 gives the use of the fields and options in a DHCP message by - a client. The remainder of this section describes the action of the - DHCP client for each possible incoming message. The description in - the following section corresponds to the full configuration procedure - previously described in section 3.1, and the text in the subsequent - section corresponds to the abbreviated configuration procedure - described in section 3.2. - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Droms Standards Track [Page 34] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - -------- ------- -| | +-------------------------->| |<-------------------+ -| INIT- | | +-------------------->| INIT | | -| REBOOT |DHCPNAK/ +---------->| |<---+ | -| |Restart| | ------- | | - -------- | DHCPNAK/ | | | - | Discard offer | -/Send DHCPDISCOVER | --/Send DHCPREQUEST | | | - | | | DHCPACK v | | - ----------- | (not accept.)/ ----------- | | -| | | Send DHCPDECLINE | | | -| REBOOTING | | | | SELECTING |<----+ | -| | | / | | |DHCPOFFER/ | - ----------- | / ----------- | |Collect | - | | / | | | replies | -DHCPACK/ | / +----------------+ +-------+ | -Record lease, set| | v Select offer/ | -timers T1, T2 ------------ send DHCPREQUEST | | - | +----->| | DHCPNAK, Lease expired/ | - | | | REQUESTING | Halt network | - DHCPOFFER/ | | | | - Discard ------------ | | - | | | | ----------- | - | +--------+ DHCPACK/ | | | - | Record lease, set -----| REBINDING | | - | timers T1, T2 / | | | - | | DHCPACK/ ----------- | - | v Record lease, set ^ | - +----------------> ------- /timers T1,T2 | | - +----->| |<---+ | | - | | BOUND |<---+ | | - DHCPOFFER, DHCPACK, | | | T2 expires/ DHCPNAK/ - DHCPNAK/Discard ------- | Broadcast Halt network - | | | | DHCPREQUEST | - +-------+ | DHCPACK/ | | - T1 expires/ Record lease, set | | - Send DHCPREQUEST timers T1, T2 | | - to leasing server | | | - | ---------- | | - | | |------------+ | - +->| RENEWING | | - | |----------------------------+ - ---------- - Figure 5: State-transition diagram for DHCP clients - - - - - - - -Droms Standards Track [Page 35] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -4.4.1 Initialization and allocation of network address - - The client begins in INIT state and forms a DHCPDISCOVER message. - The client SHOULD wait a random time between one and ten seconds to - desynchronize the use of DHCP at startup. The client sets 'ciaddr' - to 0x00000000. The client MAY request specific parameters by - including the 'parameter request list' option. The client MAY - suggest a network address and/or lease time by including the - 'requested IP address' and 'IP address lease time' options. The - client MUST include its hardware address in the 'chaddr' field, if - necessary for delivery of DHCP reply messages. The client MAY - include a different unique identifier in the 'client identifier' - option, as discussed in section 4.2. If the client included a list - of requested parameters in a DHCPDISCOVER message, it MUST include - that list in all subsequent messages. - - The client generates and records a random transaction identifier and - inserts that identifier into the 'xid' field. The client records its - own local time for later use in computing the lease expiration. The - client then broadcasts the DHCPDISCOVER on the local hardware - broadcast address to the 0xffffffff IP broadcast address and 'DHCP - server' UDP port. - - If the 'xid' of an arriving DHCPOFFER message does not match the - 'xid' of the most recent DHCPDISCOVER message, the DHCPOFFER message - must be silently discarded. Any arriving DHCPACK messages must be - silently discarded. - - The client collects DHCPOFFER messages over a period of time, selects - one DHCPOFFER message from the (possibly many) incoming DHCPOFFER - messages (e.g., the first DHCPOFFER message or the DHCPOFFER message - from the previously used server) and extracts the server address from - the 'server identifier' option in the DHCPOFFER message. The time - over which the client collects messages and the mechanism used to - select one DHCPOFFER are implementation dependent. - - - - - - - - - - - - - - - - -Droms Standards Track [Page 36] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -Field DHCPDISCOVER DHCPREQUEST DHCPDECLINE, - DHCPINFORM DHCPRELEASE ------ ------------ ----------- ----------- -'op' BOOTREQUEST BOOTREQUEST BOOTREQUEST -'htype' (From "Assigned Numbers" RFC) -'hlen' (Hardware address length in octets) -'hops' 0 0 0 -'xid' selected by client 'xid' from server selected by - DHCPOFFER message client -'secs' 0 or seconds since 0 or seconds since 0 - DHCP process started DHCP process started -'flags' Set 'BROADCAST' Set 'BROADCAST' 0 - flag if client flag if client - requires broadcast requires broadcast - reply reply -'ciaddr' 0 (DHCPDISCOVER) 0 or client's 0 (DHCPDECLINE) - client's network address client's network - network address (BOUND/RENEW/REBIND) address - (DHCPINFORM) (DHCPRELEASE) -'yiaddr' 0 0 0 -'siaddr' 0 0 0 -'giaddr' 0 0 0 -'chaddr' client's hardware client's hardware client's hardware - address address address -'sname' options, if options, if (unused) - indicated in indicated in - 'sname/file' 'sname/file' - option; otherwise option; otherwise - unused unused -'file' options, if options, if (unused) - indicated in indicated in - 'sname/file' 'sname/file' - option; otherwise option; otherwise - unused unused -'options' options options (unused) - - - - - - - - - - - - - - - - -Droms Standards Track [Page 37] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -Option DHCPDISCOVER DHCPREQUEST DHCPDECLINE, - DHCPINFORM DHCPRELEASE ------- ------------ ----------- ----------- -Requested IP address MAY MUST (in MUST - (DISCOVER) SELECTING or (DHCPDECLINE), - MUST NOT INIT-REBOOT) MUST NOT - (INFORM) MUST NOT (in (DHCPRELEASE) - BOUND or - RENEWING) -IP address lease time MAY MAY MUST NOT - (DISCOVER) - MUST NOT - (INFORM) -Use 'file'/'sname' fields MAY MAY MAY -DHCP message type DHCPDISCOVER/ DHCPREQUEST DHCPDECLINE/ - DHCPINFORM DHCPRELEASE -Client identifier MAY MAY MAY -Vendor class identifier MAY MAY MUST NOT -Server identifier MUST NOT MUST (after MUST - SELECTING) - MUST NOT (after - INIT-REBOOT, - BOUND, RENEWING - or REBINDING) -Parameter request list MAY MAY MUST NOT -Maximum message size MAY MAY MUST NOT -Message SHOULD NOT SHOULD NOT SHOULD -Site-specific MAY MAY MUST NOT -All others MAY MAY MUST NOT - - Table 5: Fields and options used by DHCP clients - - If the parameters are acceptable, the client records the address of - the server that supplied the parameters from the 'server identifier' - field and sends that address in the 'server identifier' field of a - DHCPREQUEST broadcast message. Once the DHCPACK message from the - server arrives, the client is initialized and moves to BOUND state. - The DHCPREQUEST message contains the same 'xid' as the DHCPOFFER - message. The client records the lease expiration time as the sum of - the time at which the original request was sent and the duration of - the lease from the DHCPACK message. The client SHOULD perform a - check on the suggested address to ensure that the address is not - already in use. For example, if the client is on a network that - supports ARP, the client may issue an ARP request for the suggested - request. When broadcasting an ARP request for the suggested address, - the client must fill in its own hardware address as the sender's - hardware address, and 0 as the sender's IP address, to avoid - confusing ARP caches in other hosts on the same subnet. If the - - - -Droms Standards Track [Page 38] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - network address appears to be in use, the client MUST send a - DHCPDECLINE message to the server. The client SHOULD broadcast an ARP - reply to announce the client's new IP address and clear any outdated - ARP cache entries in hosts on the client's subnet. - -4.4.2 Initialization with known network address - - The client begins in INIT-REBOOT state and sends a DHCPREQUEST - message. The client MUST insert its known network address as a - 'requested IP address' option in the DHCPREQUEST message. The client - may request specific configuration parameters by including the - 'parameter request list' option. The client generates and records a - random transaction identifier and inserts that identifier into the - 'xid' field. The client records its own local time for later use in - computing the lease expiration. The client MUST NOT include a - 'server identifier' in the DHCPREQUEST message. The client then - broadcasts the DHCPREQUEST on the local hardware broadcast address to - the 'DHCP server' UDP port. - - Once a DHCPACK message with an 'xid' field matching that in the - client's DHCPREQUEST message arrives from any server, the client is - initialized and moves to BOUND state. The client records the lease - expiration time as the sum of the time at which the DHCPREQUEST - message was sent and the duration of the lease from the DHCPACK - message. - -4.4.3 Initialization with an externally assigned network address - - The client sends a DHCPINFORM message. The client may request - specific configuration parameters by including the 'parameter request - list' option. The client generates and records a random transaction - identifier and inserts that identifier into the 'xid' field. The - client places its own network address in the 'ciaddr' field. The - client SHOULD NOT request lease time parameters. - - The client then unicasts the DHCPINFORM to the DHCP server if it - knows the server's address, otherwise it broadcasts the message to - the limited (all 1s) broadcast address. DHCPINFORM messages MUST be - directed to the 'DHCP server' UDP port. - - Once a DHCPACK message with an 'xid' field matching that in the - client's DHCPINFORM message arrives from any server, the client is - initialized. - - If the client does not receive a DHCPACK within a reasonable period - of time (60 seconds or 4 tries if using timeout suggested in section - 4.1), then it SHOULD display a message informing the user of the - problem, and then SHOULD begin network processing using suitable - - - -Droms Standards Track [Page 39] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - defaults as per Appendix A. - -4.4.4 Use of broadcast and unicast - - The DHCP client broadcasts DHCPDISCOVER, DHCPREQUEST and DHCPINFORM - messages, unless the client knows the address of a DHCP server. The - client unicasts DHCPRELEASE messages to the server. Because the - client is declining the use of the IP address supplied by the server, - the client broadcasts DHCPDECLINE messages. - - When the DHCP client knows the address of a DHCP server, in either - INIT or REBOOTING state, the client may use that address in the - DHCPDISCOVER or DHCPREQUEST rather than the IP broadcast address. - The client may also use unicast to send DHCPINFORM messages to a - known DHCP server. If the client receives no response to DHCP - messages sent to the IP address of a known DHCP server, the DHCP - client reverts to using the IP broadcast address. - -4.4.5 Reacquisition and expiration - - The client maintains two times, T1 and T2, that specify the times at - which the client tries to extend its lease on its network address. - T1 is the time at which the client enters the RENEWING state and - attempts to contact the server that originally issued the client's - network address. T2 is the time at which the client enters the - REBINDING state and attempts to contact any server. T1 MUST be - earlier than T2, which, in turn, MUST be earlier than the time at - which the client's lease will expire. - - To avoid the need for synchronized clocks, T1 and T2 are expressed in - options as relative times [2]. - - At time T1 the client moves to RENEWING state and sends (via unicast) - a DHCPREQUEST message to the server to extend its lease. The client - sets the 'ciaddr' field in the DHCPREQUEST to its current network - address. The client records the local time at which the DHCPREQUEST - message is sent for computation of the lease expiration time. The - client MUST NOT include a 'server identifier' in the DHCPREQUEST - message. - - Any DHCPACK messages that arrive with an 'xid' that does not match - the 'xid' of the client's DHCPREQUEST message are silently discarded. - When the client receives a DHCPACK from the server, the client - computes the lease expiration time as the sum of the time at which - the client sent the DHCPREQUEST message and the duration of the lease - in the DHCPACK message. The client has successfully reacquired its - network address, returns to BOUND state and may continue network - processing. - - - -Droms Standards Track [Page 40] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - If no DHCPACK arrives before time T2, the client moves to REBINDING - state and sends (via broadcast) a DHCPREQUEST message to extend its - lease. The client sets the 'ciaddr' field in the DHCPREQUEST to its - current network address. The client MUST NOT include a 'server - identifier' in the DHCPREQUEST message. - - Times T1 and T2 are configurable by the server through options. T1 - defaults to (0.5 * duration_of_lease). T2 defaults to (0.875 * - duration_of_lease). Times T1 and T2 SHOULD be chosen with some - random "fuzz" around a fixed value, to avoid synchronization of - client reacquisition. - - A client MAY choose to renew or extend its lease prior to T1. The - server MAY choose to extend the client's lease according to policy - set by the network administrator. The server SHOULD return T1 and - T2, and their values SHOULD be adjusted from their original values to - take account of the time remaining on the lease. - - In both RENEWING and REBINDING states, if the client receives no - response to its DHCPREQUEST message, the client SHOULD wait one-half - of the remaining time until T2 (in RENEWING state) and one-half of - the remaining lease time (in REBINDING state), down to a minimum of - 60 seconds, before retransmitting the DHCPREQUEST message. - - If the lease expires before the client receives a DHCPACK, the client - moves to INIT state, MUST immediately stop any other network - processing and requests network initialization parameters as if the - client were uninitialized. If the client then receives a DHCPACK - allocating that client its previous network address, the client - SHOULD continue network processing. If the client is given a new - network address, it MUST NOT continue using the previous network - address and SHOULD notify the local users of the problem. - -4.4.6 DHCPRELEASE - - If the client no longer requires use of its assigned network address - (e.g., the client is gracefully shut down), the client sends a - DHCPRELEASE message to the server. Note that the correct operation - of DHCP does not depend on the transmission of DHCPRELEASE messages. - - - - - - - - - - - - -Droms Standards Track [Page 41] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -5. Acknowledgments - - The author thanks the many (and too numerous to mention!) members of - the DHC WG for their tireless and ongoing efforts in the development - of DHCP and this document. - - The efforts of J Allard, Mike Carney, Dave Lapp, Fred Lien and John - Mendonca in organizing DHCP interoperability testing sessions are - gratefully acknowledged. - - The development of this document was supported in part by grants from - the Corporation for National Research Initiatives (CNRI), Bucknell - University and Sun Microsystems. - -6. References - - [1] Acetta, M., "Resource Location Protocol", RFC 887, CMU, December - 1983. - - [2] Alexander, S., and R. Droms, "DHCP Options and BOOTP Vendor - Extensions", RFC 1533, Lachman Technology, Inc., Bucknell - University, October 1993. - - [3] Braden, R., Editor, "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, USC/Information Sciences - Institute, October 1989. - - [4] Braden, R., Editor, "Requirements for Internet Hosts -- - Application and Support, STD 3, RFC 1123, USC/Information - Sciences Institute, October 1989. - - [5] Brownell, D, "Dynamic Reverse Address Resolution Protocol - (DRARP)", Work in Progress. - - [6] Comer, D., and R. Droms, "Uniform Access to Internet Directory - Services", Proc. of ACM SIGCOMM '90 (Special issue of Computer - Communications Review), 20(4):50--59, 1990. - - [7] Croft, B., and J. Gilmore, "Bootstrap Protocol (BOOTP)", RFC 951, - Stanford and SUN Microsystems, September 1985. - - [8] Deering, S., "ICMP Router Discovery Messages", RFC 1256, Xerox - PARC, September 1991. - - [9] Droms, D., "Interoperation between DHCP and BOOTP", RFC 1534, - Bucknell University, October 1993. - - - - - -Droms Standards Track [Page 42] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - [10] Finlayson, R., Mann, T., Mogul, J., and M. Theimer, "A Reverse - Address Resolution Protocol", RFC 903, Stanford, June 1984. - - [11] Gray C., and D. Cheriton, "Leases: An Efficient Fault-Tolerant - Mechanism for Distributed File Cache Consistency", In Proc. of - the Twelfth ACM Symposium on Operating Systems Design, 1989. - - [12] Mockapetris, P., "Domain Names -- Concepts and Facilities", STD - 13, RFC 1034, USC/Information Sciences Institute, November 1987. - - [13] Mockapetris, P., "Domain Names -- Implementation and - Specification", STD 13, RFC 1035, USC/Information Sciences - Institute, November 1987. - - [14] Mogul J., and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - [15] Morgan, R., "Dynamic IP Address Assignment for Ethernet Attached - Hosts", Work in Progress. - - [16] Postel, J., "Internet Control Message Protocol", STD 5, RFC 792, - USC/Information Sciences Institute, September 1981. - - [17] Reynolds, J., "BOOTP Vendor Information Extensions", RFC 1497, - USC/Information Sciences Institute, August 1993. - - [18] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1700, - USC/Information Sciences Institute, October 1994. - - [19] Jeffrey Schiller and Mark Rosenstein. A Protocol for the Dynamic - Assignment of IP Addresses for use on an Ethernet. (Available - from the Athena Project, MIT), 1989. - - [20] Sollins, K., "The TFTP Protocol (Revision 2)", RFC 783, NIC, - June 1981. - - [21] Wimer, W., "Clarifications and Extensions for the Bootstrap - Protocol", RFC 1542, Carnegie Mellon University, October 1993. - -7. Security Considerations - - DHCP is built directly on UDP and IP which are as yet inherently - insecure. Furthermore, DHCP is generally intended to make - maintenance of remote and/or diskless hosts easier. While perhaps - not impossible, configuring such hosts with passwords or keys may be - difficult and inconvenient. Therefore, DHCP in its current form is - quite insecure. - - - - -Droms Standards Track [Page 43] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - - Unauthorized DHCP servers may be easily set up. Such servers can - then send false and potentially disruptive information to clients - such as incorrect or duplicate IP addresses, incorrect routing - information (including spoof routers, etc.), incorrect domain - nameserver addresses (such as spoof nameservers), and so on. - Clearly, once this seed information is in place, an attacker can - further compromise affected systems. - - Malicious DHCP clients could masquerade as legitimate clients and - retrieve information intended for those legitimate clients. Where - dynamic allocation of resources is used, a malicious client could - claim all resources for itself, thereby denying resources to - legitimate clients. - -8. Author's Address - - Ralph Droms - Computer Science Department - 323 Dana Engineering - Bucknell University - Lewisburg, PA 17837 - - Phone: (717) 524-1145 - EMail: droms@bucknell.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - -Droms Standards Track [Page 44] - -RFC 2131 Dynamic Host Configuration Protocol March 1997 - - -A. Host Configuration Parameters - - IP-layer_parameters,_per_host:_ - - Be a router on/off HRC 3.1 - Non-local source routing on/off HRC 3.3.5 - Policy filters for - non-local source routing (list) HRC 3.3.5 - Maximum reassembly size integer HRC 3.3.2 - Default TTL integer HRC 3.2.1.7 - PMTU aging timeout integer MTU 6.6 - MTU plateau table (list) MTU 7 - IP-layer_parameters,_per_interface:_ - IP address (address) HRC 3.3.1.6 - Subnet mask (address mask) HRC 3.3.1.6 - MTU integer HRC 3.3.3 - All-subnets-MTU on/off HRC 3.3.3 - Broadcast address flavor 0x00000000/0xffffffff HRC 3.3.6 - Perform mask discovery on/off HRC 3.2.2.9 - Be a mask supplier on/off HRC 3.2.2.9 - Perform router discovery on/off RD 5.1 - Router solicitation address (address) RD 5.1 - Default routers, list of: - router address (address) HRC 3.3.1.6 - preference level integer HRC 3.3.1.6 - Static routes, list of: - destination (host/subnet/net) HRC 3.3.1.2 - destination mask (address mask) HRC 3.3.1.2 - type-of-service integer HRC 3.3.1.2 - first-hop router (address) HRC 3.3.1.2 - ignore redirects on/off HRC 3.3.1.2 - PMTU integer MTU 6.6 - perform PMTU discovery on/off MTU 6.6 - - Link-layer_parameters,_per_interface:_ - Trailers on/off HRC 2.3.1 - ARP cache timeout integer HRC 2.3.2.1 - Ethernet encapsulation (RFC 894/RFC 1042) HRC 2.3.3 - - TCP_parameters,_per_host:_ - TTL integer HRC 4.2.2.19 - Keep-alive interval integer HRC 4.2.3.6 - Keep-alive data size 0/1 HRC 4.2.3.6 - -Key: - - MTU = Path MTU Discovery (RFC 1191, Proposed Standard) - RD = Router Discovery (RFC 1256, Proposed Standard) - - - -Droms Standards Track [Page 45] - diff --git a/kernel/picotcp/RFC/rfc2132.txt b/kernel/picotcp/RFC/rfc2132.txt deleted file mode 100644 index e9c4f4b..0000000 --- a/kernel/picotcp/RFC/rfc2132.txt +++ /dev/null @@ -1,1907 +0,0 @@ - - - - - - -Network Working Group S. Alexander -Request for Comments: 2132 Silicon Graphics, Inc. -Obsoletes: 1533 R. Droms -Category: Standards Track Bucknell University - March 1997 - - DHCP Options and BOOTP Vendor Extensions - -Status of this memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Abstract - - The Dynamic Host Configuration Protocol (DHCP) [1] provides a - framework for passing configuration information to hosts on a TCP/IP - network. Configuration parameters and other control information are - carried in tagged data items that are stored in the 'options' field - of the DHCP message. The data items themselves are also called - "options." - - This document specifies the current set of DHCP options. Future - options will be specified in separate RFCs. The current list of - valid options is also available in ftp://ftp.isi.edu/in- - notes/iana/assignments [22]. - - All of the vendor information extensions defined in RFC 1497 [2] may - be used as DHCP options. The definitions given in RFC 1497 are - included in this document, which supersedes RFC 1497. All of the - DHCP options defined in this document, except for those specific to - DHCP as defined in section 9, may be used as BOOTP vendor information - extensions. - -Table of Contents - - 1. Introduction .............................................. 2 - 2. BOOTP Extension/DHCP Option Field Format .................. 4 - 3. RFC 1497 Vendor Extensions ................................ 5 - 4. IP Layer Parameters per Host .............................. 11 - 5. IP Layer Parameters per Interface ........................ 13 - 6. Link Layer Parameters per Interface ....................... 16 - 7. TCP Parameters ............................................ 17 - 8. Application and Service Parameters ........................ 18 - 9. DHCP Extensions ........................................... 25 - - - -Alexander & Droms Standards Track [Page 1] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - 10. Defining new extensions ................................... 31 - 11. Acknowledgements .......................................... 31 - 12. References ................................................ 32 - 13. Security Considerations ................................... 33 - 14. Authors' Addresses ........................................ 34 - -1. Introduction - - This document specifies options for use with both the Dynamic Host - Configuration Protocol and the Bootstrap Protocol. - - The full description of DHCP packet formats may be found in the DHCP - specification document [1], and the full description of BOOTP packet - formats may be found in the BOOTP specification document [3]. This - document defines the format of information in the last field of DHCP - packets ('options') and of BOOTP packets ('vend'). The remainder of - this section defines a generalized use of this area for giving - information useful to a wide class of machines, operating systems and - configurations. Sites with a single DHCP or BOOTP server that is - shared among heterogeneous clients may choose to define other, site- - specific formats for the use of the 'options' field. - - Section 2 of this memo describes the formats of DHCP options and - BOOTP vendor extensions. Section 3 describes options defined in - previous documents for use with BOOTP (all may also be used with - DHCP). Sections 4-8 define new options intended for use with both - DHCP and BOOTP. Section 9 defines options used only in DHCP. - - References further describing most of the options defined in sections - 2-6 can be found in section 12. The use of the options defined in - section 9 is described in the DHCP specification [1]. - - Information on registering new options is contained in section 10. - - This document updates the definition of DHCP/BOOTP options that - appears in RFC1533. The classing mechanism has been extended to - include vendor classes as described in section 8.4 and 9.13. The new - procedure for defining new DHCP/BOOTP options in described in section - 10. Several new options, including NIS+ domain and servers, Mobile - IP home agent, SMTP server, TFTP server and Bootfile server, have - been added. Text giving definitions used throughout the document has - been added in section 1.1. Text emphasizing the need for uniqueness - of client-identifiers has been added to section 9.14. - - - - - - - - -Alexander & Droms Standards Track [Page 2] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -1.1 Requirements - - Throughout this document, the words that are used to define the - significance of particular requirements are capitalized. These words - are: - - o "MUST" - - This word or the adjective "REQUIRED" means that the item is an - absolute requirement of this specification. - - o "MUST NOT" - - This phrase means that the item is an absolute prohibition of - this specification. - - o "SHOULD" - - This word or the adjective "RECOMMENDED" means that there may - exist valid reasons in particular circumstances to ignore this - item, but the full implications should be understood and the case - carefully weighed before choosing a different course. - - o "SHOULD NOT" - - This phrase means that there may exist valid reasons in - particular circumstances when the listed behavior is acceptable - or even useful, but the full implications should be understood - and the case carefully weighed before implementing any behavior - described with this label. - - o "MAY" - - This word or the adjective "OPTIONAL" means that this item is - truly optional. One vendor may choose to include the item - because a particular marketplace requires it or because it - enhances the product, for example; another vendor may omit the - same item. - -1.2 Terminology - - This document uses the following terms: - - o "DHCP client" - - A DHCP client or "client" is an Internet host using DHCP to - obtain configuration parameters such as a network address. - - - - -Alexander & Droms Standards Track [Page 3] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - o "DHCP server" - - A DHCP server of "server"is an Internet host that returns - configuration parameters to DHCP clients. - - o "binding" - - A binding is a collection of configuration parameters, including - at least an IP address, associated with or "bound to" a DHCP - client. Bindings are managed by DHCP servers. - -2. BOOTP Extension/DHCP Option Field Format - - - DHCP options have the same format as the BOOTP 'vendor extensions' - defined in RFC 1497 [2]. Options may be fixed length or variable - length. All options begin with a tag octet, which uniquely - identifies the option. Fixed-length options without data consist of - only a tag octet. Only options 0 and 255 are fixed length. All - other options are variable-length with a length octet following the - tag octet. The value of the length octet does not include the two - octets specifying the tag and length. The length octet is followed - by "length" octets of data. Options containing NVT ASCII data SHOULD - NOT include a trailing NULL; however, the receiver of such options - MUST be prepared to delete trailing nulls if they exist. The - receiver MUST NOT require that a trailing null be included in the - data. In the case of some variable-length options the length field - is a constant but must still be specified. - - Any options defined subsequent to this document MUST contain a length - octet even if the length is fixed or zero. - - All multi-octet quantities are in network byte-order. - - When used with BOOTP, the first four octets of the vendor information - field have been assigned to the "magic cookie" (as suggested in RFC - 951). This field identifies the mode in which the succeeding data is - to be interpreted. The value of the magic cookie is the 4 octet - dotted decimal 99.130.83.99 (or hexadecimal number 63.82.53.63) in - network byte order. - - All of the "vendor extensions" defined in RFC 1497 are also DHCP - options. - - Option codes 128 to 254 (decimal) are reserved for site-specific - options. - - - - - -Alexander & Droms Standards Track [Page 4] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - Except for the options in section 9, all options may be used with - either DHCP or BOOTP. - - Many of these options have their default values specified in other - documents. In particular, RFC 1122 [4] specifies default values for - most IP and TCP configuration parameters. - - Many options supply one or more 32-bit IP address. Use of IP - addresses rather than fully-qualified Domain Names (FQDNs) may make - future renumbering of IP hosts more difficult. Use of these - addresses is discouraged at sites that may require renumbering. - -3. RFC 1497 Vendor Extensions - - This section lists the vendor extensions as defined in RFC 1497. - They are defined here for completeness. - -3.1. Pad Option - - The pad option can be used to cause subsequent fields to align on - word boundaries. - - The code for the pad option is 0, and its length is 1 octet. - - Code - +-----+ - | 0 | - +-----+ - -3.2. End Option - - The end option marks the end of valid information in the vendor - field. Subsequent octets should be filled with pad options. - - The code for the end option is 255, and its length is 1 octet. - - Code - +-----+ - | 255 | - +-----+ - -3.3. Subnet Mask - - The subnet mask option specifies the client's subnet mask as per RFC - 950 [5]. - - If both the subnet mask and the router option are specified in a DHCP - reply, the subnet mask option MUST be first. - - - -Alexander & Droms Standards Track [Page 5] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for the subnet mask option is 1, and its length is 4 octets. - - Code Len Subnet Mask - +-----+-----+-----+-----+-----+-----+ - | 1 | 4 | m1 | m2 | m3 | m4 | - +-----+-----+-----+-----+-----+-----+ - -3.4. Time Offset - - The time offset field specifies the offset of the client's subnet in - seconds from Coordinated Universal Time (UTC). The offset is - expressed as a two's complement 32-bit integer. A positive offset - indicates a location east of the zero meridian and a negative offset - indicates a location west of the zero meridian. - - The code for the time offset option is 2, and its length is 4 octets. - - Code Len Time Offset - +-----+-----+-----+-----+-----+-----+ - | 2 | 4 | n1 | n2 | n3 | n4 | - +-----+-----+-----+-----+-----+-----+ - -3.5. Router Option - - The router option specifies a list of IP addresses for routers on the - client's subnet. Routers SHOULD be listed in order of preference. - - The code for the router option is 3. The minimum length for the - router option is 4 octets, and the length MUST always be a multiple - of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 3 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.6. Time Server Option - - The time server option specifies a list of RFC 868 [6] time servers - available to the client. Servers SHOULD be listed in order of - preference. - - The code for the time server option is 4. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - - - - - -Alexander & Droms Standards Track [Page 6] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 4 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.7. Name Server Option - - The name server option specifies a list of IEN 116 [7] name servers - available to the client. Servers SHOULD be listed in order of - preference. - - The code for the name server option is 5. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 5 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.8. Domain Name Server Option - - The domain name server option specifies a list of Domain Name System - (STD 13, RFC 1035 [8]) name servers available to the client. Servers - SHOULD be listed in order of preference. - - The code for the domain name server option is 6. The minimum length - for this option is 4 octets, and the length MUST always be a multiple - of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 6 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.9. Log Server Option - - The log server option specifies a list of MIT-LCS UDP log servers - available to the client. Servers SHOULD be listed in order of - preference. - - The code for the log server option is 7. The minimum length for this - option is 4 octets, and the length MUST always be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 7 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - - - -Alexander & Droms Standards Track [Page 7] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -3.10. Cookie Server Option - - The cookie server option specifies a list of RFC 865 [9] cookie - servers available to the client. Servers SHOULD be listed in order - of preference. - - The code for the log server option is 8. The minimum length for this - option is 4 octets, and the length MUST always be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 8 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.11. LPR Server Option - - The LPR server option specifies a list of RFC 1179 [10] line printer - servers available to the client. Servers SHOULD be listed in order - of preference. - - The code for the LPR server option is 9. The minimum length for this - option is 4 octets, and the length MUST always be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 9 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.12. Impress Server Option - - The Impress server option specifies a list of Imagen Impress servers - available to the client. Servers SHOULD be listed in order of - preference. - - The code for the Impress server option is 10. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 10 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.13. Resource Location Server Option - - This option specifies a list of RFC 887 [11] Resource Location - servers available to the client. Servers SHOULD be listed in order - of preference. - - - -Alexander & Droms Standards Track [Page 8] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for this option is 11. The minimum length for this option - is 4 octets, and the length MUST always be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 11 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.14. Host Name Option - - This option specifies the name of the client. The name may or may - not be qualified with the local domain name (see section 3.17 for the - preferred way to retrieve the domain name). See RFC 1035 for - character set restrictions. - - The code for this option is 12, and its minimum length is 1. - - Code Len Host Name - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 12 | n | h1 | h2 | h3 | h4 | h5 | h6 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -3.15. Boot File Size Option - - This option specifies the length in 512-octet blocks of the default - boot image for the client. The file length is specified as an - unsigned 16-bit integer. - - The code for this option is 13, and its length is 2. - - Code Len File Size - +-----+-----+-----+-----+ - | 13 | 2 | l1 | l2 | - +-----+-----+-----+-----+ - -3.16. Merit Dump File - - This option specifies the path-name of a file to which the client's - core image should be dumped in the event the client crashes. The - path is formatted as a character string consisting of characters from - the NVT ASCII character set. - - The code for this option is 14. Its minimum length is 1. - - Code Len Dump File Pathname - +-----+-----+-----+-----+-----+-----+--- - | 14 | n | n1 | n2 | n3 | n4 | ... - +-----+-----+-----+-----+-----+-----+--- - - - -Alexander & Droms Standards Track [Page 9] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -3.17. Domain Name - - This option specifies the domain name that client should use when - resolving hostnames via the Domain Name System. - - The code for this option is 15. Its minimum length is 1. - - Code Len Domain Name - +-----+-----+-----+-----+-----+-----+-- - | 15 | n | d1 | d2 | d3 | d4 | ... - +-----+-----+-----+-----+-----+-----+-- - -3.18. Swap Server - - This specifies the IP address of the client's swap server. - - The code for this option is 16 and its length is 4. - - Code Len Swap Server Address - +-----+-----+-----+-----+-----+-----+ - | 16 | n | a1 | a2 | a3 | a4 | - +-----+-----+-----+-----+-----+-----+ - -3.19. Root Path - - This option specifies the path-name that contains the client's root - disk. The path is formatted as a character string consisting of - characters from the NVT ASCII character set. - - The code for this option is 17. Its minimum length is 1. - - Code Len Root Disk Pathname - +-----+-----+-----+-----+-----+-----+--- - | 17 | n | n1 | n2 | n3 | n4 | ... - +-----+-----+-----+-----+-----+-----+--- - -3.20. Extensions Path - - A string to specify a file, retrievable via TFTP, which contains - information which can be interpreted in the same way as the 64-octet - vendor-extension field within the BOOTP response, with the following - exceptions: - - - the length of the file is unconstrained; - - all references to Tag 18 (i.e., instances of the - BOOTP Extensions Path field) within the file are - ignored. - - - - -Alexander & Droms Standards Track [Page 10] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for this option is 18. Its minimum length is 1. - - Code Len Extensions Pathname - +-----+-----+-----+-----+-----+-----+--- - | 18 | n | n1 | n2 | n3 | n4 | ... - +-----+-----+-----+-----+-----+-----+--- - -4. IP Layer Parameters per Host - - This section details the options that affect the operation of the IP - layer on a per-host basis. - -4.1. IP Forwarding Enable/Disable Option - - This option specifies whether the client should configure its IP - layer for packet forwarding. A value of 0 means disable IP - forwarding, and a value of 1 means enable IP forwarding. - - The code for this option is 19, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 19 | 1 | 0/1 | - +-----+-----+-----+ - -4.2. Non-Local Source Routing Enable/Disable Option - - This option specifies whether the client should configure its IP - layer to allow forwarding of datagrams with non-local source routes - (see Section 3.3.5 of [4] for a discussion of this topic). A value - of 0 means disallow forwarding of such datagrams, and a value of 1 - means allow forwarding. - - The code for this option is 20, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 20 | 1 | 0/1 | - +-----+-----+-----+ - -4.3. Policy Filter Option - - This option specifies policy filters for non-local source routing. - The filters consist of a list of IP addresses and masks which specify - destination/mask pairs with which to filter incoming source routes. - - Any source routed datagram whose next-hop address does not match one - of the filters should be discarded by the client. - - - -Alexander & Droms Standards Track [Page 11] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - See [4] for further information. - - The code for this option is 21. The minimum length of this option is - 8, and the length MUST be a multiple of 8. - - Code Len Address 1 Mask 1 - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ - | 21 | n | a1 | a2 | a3 | a4 | m1 | m2 | m3 | m4 | - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ - Address 2 Mask 2 - +-----+-----+-----+-----+-----+-----+-----+-----+--- - | a1 | a2 | a3 | a4 | m1 | m2 | m3 | m4 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+--- - -4.4. Maximum Datagram Reassembly Size - - This option specifies the maximum size datagram that the client - should be prepared to reassemble. The size is specified as a 16-bit - unsigned integer. The minimum value legal value is 576. - - The code for this option is 22, and its length is 2. - - Code Len Size - +-----+-----+-----+-----+ - | 22 | 2 | s1 | s2 | - +-----+-----+-----+-----+ - -4.5. Default IP Time-to-live - - This option specifies the default time-to-live that the client should - use on outgoing datagrams. The TTL is specified as an octet with a - value between 1 and 255. - - The code for this option is 23, and its length is 1. - - Code Len TTL - +-----+-----+-----+ - | 23 | 1 | ttl | - +-----+-----+-----+ - -4.6. Path MTU Aging Timeout Option - - This option specifies the timeout (in seconds) to use when aging Path - MTU values discovered by the mechanism defined in RFC 1191 [12]. The - timeout is specified as a 32-bit unsigned integer. - - The code for this option is 24, and its length is 4. - - - - -Alexander & Droms Standards Track [Page 12] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - Code Len Timeout - +-----+-----+-----+-----+-----+-----+ - | 24 | 4 | t1 | t2 | t3 | t4 | - +-----+-----+-----+-----+-----+-----+ - -4.7. Path MTU Plateau Table Option - - This option specifies a table of MTU sizes to use when performing - Path MTU Discovery as defined in RFC 1191. The table is formatted as - a list of 16-bit unsigned integers, ordered from smallest to largest. - The minimum MTU value cannot be smaller than 68. - - The code for this option is 25. Its minimum length is 2, and the - length MUST be a multiple of 2. - - Code Len Size 1 Size 2 - +-----+-----+-----+-----+-----+-----+--- - | 25 | n | s1 | s2 | s1 | s2 | ... - +-----+-----+-----+-----+-----+-----+--- - -5. IP Layer Parameters per Interface - - This section details the options that affect the operation of the IP - layer on a per-interface basis. It is expected that a client can - issue multiple requests, one per interface, in order to configure - interfaces with their specific parameters. - -5.1. Interface MTU Option - - This option specifies the MTU to use on this interface. The MTU is - specified as a 16-bit unsigned integer. The minimum legal value for - the MTU is 68. - - The code for this option is 26, and its length is 2. - - Code Len MTU - +-----+-----+-----+-----+ - | 26 | 2 | m1 | m2 | - +-----+-----+-----+-----+ - -5.2. All Subnets are Local Option - - This option specifies whether or not the client may assume that all - subnets of the IP network to which the client is connected use the - same MTU as the subnet of that network to which the client is - directly connected. A value of 1 indicates that all subnets share - the same MTU. A value of 0 means that the client should assume that - some subnets of the directly connected network may have smaller MTUs. - - - -Alexander & Droms Standards Track [Page 13] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for this option is 27, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 27 | 1 | 0/1 | - +-----+-----+-----+ - -5.3. Broadcast Address Option - - This option specifies the broadcast address in use on the client's - subnet. Legal values for broadcast addresses are specified in - section 3.2.1.3 of [4]. - - The code for this option is 28, and its length is 4. - - Code Len Broadcast Address - +-----+-----+-----+-----+-----+-----+ - | 28 | 4 | b1 | b2 | b3 | b4 | - +-----+-----+-----+-----+-----+-----+ - -5.4. Perform Mask Discovery Option - - This option specifies whether or not the client should perform subnet - mask discovery using ICMP. A value of 0 indicates that the client - should not perform mask discovery. A value of 1 means that the - client should perform mask discovery. - - The code for this option is 29, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 29 | 1 | 0/1 | - +-----+-----+-----+ - -5.5. Mask Supplier Option - - This option specifies whether or not the client should respond to - subnet mask requests using ICMP. A value of 0 indicates that the - client should not respond. A value of 1 means that the client should - respond. - - The code for this option is 30, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 30 | 1 | 0/1 | - +-----+-----+-----+ - - - - -Alexander & Droms Standards Track [Page 14] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -5.6. Perform Router Discovery Option - - This option specifies whether or not the client should solicit - routers using the Router Discovery mechanism defined in RFC 1256 - [13]. A value of 0 indicates that the client should not perform - router discovery. A value of 1 means that the client should perform - router discovery. - - The code for this option is 31, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 31 | 1 | 0/1 | - +-----+-----+-----+ - -5.7. Router Solicitation Address Option - - This option specifies the address to which the client should transmit - router solicitation requests. - - The code for this option is 32, and its length is 4. - - Code Len Address - +-----+-----+-----+-----+-----+-----+ - | 32 | 4 | a1 | a2 | a3 | a4 | - +-----+-----+-----+-----+-----+-----+ - -5.8. Static Route Option - - This option specifies a list of static routes that the client should - install in its routing cache. If multiple routes to the same - destination are specified, they are listed in descending order of - priority. - - The routes consist of a list of IP address pairs. The first address - is the destination address, and the second address is the router for - the destination. - - The default route (0.0.0.0) is an illegal destination for a static - route. See section 3.5 for information about the router option. - - The code for this option is 33. The minimum length of this option is - 8, and the length MUST be a multiple of 8. - - - - - - - - -Alexander & Droms Standards Track [Page 15] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - Code Len Destination 1 Router 1 - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ - | 33 | n | d1 | d2 | d3 | d4 | r1 | r2 | r3 | r4 | - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ - Destination 2 Router 2 - +-----+-----+-----+-----+-----+-----+-----+-----+--- - | d1 | d2 | d3 | d4 | r1 | r2 | r3 | r4 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+--- - -6. Link Layer Parameters per Interface - - This section lists the options that affect the operation of the data - link layer on a per-interface basis. - -6.1. Trailer Encapsulation Option - - This option specifies whether or not the client should negotiate the - use of trailers (RFC 893 [14]) when using the ARP protocol. A value - of 0 indicates that the client should not attempt to use trailers. A - value of 1 means that the client should attempt to use trailers. - - The code for this option is 34, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 34 | 1 | 0/1 | - +-----+-----+-----+ - -6.2. ARP Cache Timeout Option - - This option specifies the timeout in seconds for ARP cache entries. - The time is specified as a 32-bit unsigned integer. - - The code for this option is 35, and its length is 4. - - Code Len Time - +-----+-----+-----+-----+-----+-----+ - | 35 | 4 | t1 | t2 | t3 | t4 | - +-----+-----+-----+-----+-----+-----+ - -6.3. Ethernet Encapsulation Option - - This option specifies whether or not the client should use Ethernet - Version 2 (RFC 894 [15]) or IEEE 802.3 (RFC 1042 [16]) encapsulation - if the interface is an Ethernet. A value of 0 indicates that the - client should use RFC 894 encapsulation. A value of 1 means that the - client should use RFC 1042 encapsulation. - - - - -Alexander & Droms Standards Track [Page 16] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for this option is 36, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 36 | 1 | 0/1 | - +-----+-----+-----+ - -7. TCP Parameters - - This section lists the options that affect the operation of the TCP - layer on a per-interface basis. - -7.1. TCP Default TTL Option - - This option specifies the default TTL that the client should use when - sending TCP segments. The value is represented as an 8-bit unsigned - integer. The minimum value is 1. - - The code for this option is 37, and its length is 1. - - Code Len TTL - +-----+-----+-----+ - | 37 | 1 | n | - +-----+-----+-----+ - -7.2. TCP Keepalive Interval Option - - This option specifies the interval (in seconds) that the client TCP - should wait before sending a keepalive message on a TCP connection. - The time is specified as a 32-bit unsigned integer. A value of zero - indicates that the client should not generate keepalive messages on - connections unless specifically requested by an application. - - The code for this option is 38, and its length is 4. - - Code Len Time - +-----+-----+-----+-----+-----+-----+ - | 38 | 4 | t1 | t2 | t3 | t4 | - +-----+-----+-----+-----+-----+-----+ - -7.3. TCP Keepalive Garbage Option - - This option specifies the whether or not the client should send TCP - keepalive messages with a octet of garbage for compatibility with - older implementations. A value of 0 indicates that a garbage octet - should not be sent. A value of 1 indicates that a garbage octet - should be sent. - - - - -Alexander & Droms Standards Track [Page 17] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for this option is 39, and its length is 1. - - Code Len Value - +-----+-----+-----+ - | 39 | 1 | 0/1 | - +-----+-----+-----+ - -8. Application and Service Parameters - - This section details some miscellaneous options used to configure - miscellaneous applications and services. - -8.1. Network Information Service Domain Option - - This option specifies the name of the client's NIS [17] domain. The - domain is formatted as a character string consisting of characters - from the NVT ASCII character set. - - The code for this option is 40. Its minimum length is 1. - - Code Len NIS Domain Name - +-----+-----+-----+-----+-----+-----+--- - | 40 | n | n1 | n2 | n3 | n4 | ... - +-----+-----+-----+-----+-----+-----+--- - -8.2. Network Information Servers Option - - This option specifies a list of IP addresses indicating NIS servers - available to the client. Servers SHOULD be listed in order of - preference. - - The code for this option is 41. Its minimum length is 4, and the - length MUST be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 41 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.3. Network Time Protocol Servers Option - - This option specifies a list of IP addresses indicating NTP [18] - servers available to the client. Servers SHOULD be listed in order - of preference. - - The code for this option is 42. Its minimum length is 4, and the - length MUST be a multiple of 4. - - - - -Alexander & Droms Standards Track [Page 18] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 42 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.4. Vendor Specific Information - - This option is used by clients and servers to exchange vendor- - specific information. The information is an opaque object of n - octets, presumably interpreted by vendor-specific code on the clients - and servers. The definition of this information is vendor specific. - The vendor is indicated in the vendor class identifier option. - Servers not equipped to interpret the vendor-specific information - sent by a client MUST ignore it (although it may be reported). - Clients which do not receive desired vendor-specific information - SHOULD make an attempt to operate without it, although they may do so - (and announce they are doing so) in a degraded mode. - - If a vendor potentially encodes more than one item of information in - this option, then the vendor SHOULD encode the option using - "Encapsulated vendor-specific options" as described below: - - The Encapsulated vendor-specific options field SHOULD be encoded as a - sequence of code/length/value fields of identical syntax to the DHCP - options field with the following exceptions: - - 1) There SHOULD NOT be a "magic cookie" field in the encapsulated - vendor-specific extensions field. - - 2) Codes other than 0 or 255 MAY be redefined by the vendor within - the encapsulated vendor-specific extensions field, but SHOULD - conform to the tag-length-value syntax defined in section 2. - - 3) Code 255 (END), if present, signifies the end of the - encapsulated vendor extensions, not the end of the vendor - extensions field. If no code 255 is present, then the end of - the enclosing vendor-specific information field is taken as the - end of the encapsulated vendor-specific extensions field. - - The code for this option is 43 and its minimum length is 1. - - Code Len Vendor-specific information - +-----+-----+-----+-----+--- - | 43 | n | i1 | i2 | ... - +-----+-----+-----+-----+--- - - - - - - -Alexander & Droms Standards Track [Page 19] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - When encapsulated vendor-specific extensions are used, the - information bytes 1-n have the following format: - - Code Len Data item Code Len Data item Code - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ - | T1 | n | d1 | d2 | ... | T2 | n | D1 | D2 | ... | ... | - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+ - -8.5. NetBIOS over TCP/IP Name Server Option - - The NetBIOS name server (NBNS) option specifies a list of RFC - 1001/1002 [19] [20] NBNS name servers listed in order of preference. - - The code for this option is 44. The minimum length of the option is - 4 octets, and the length must always be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---- - | 44 | n | a1 | a2 | a3 | a4 | b1 | b2 | b3 | b4 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---- - -8.6. NetBIOS over TCP/IP Datagram Distribution Server Option - - The NetBIOS datagram distribution server (NBDD) option specifies a - list of RFC 1001/1002 NBDD servers listed in order of preference. The - code for this option is 45. The minimum length of the option is 4 - octets, and the length must always be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---- - | 45 | n | a1 | a2 | a3 | a4 | b1 | b2 | b3 | b4 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---- - -8.7. NetBIOS over TCP/IP Node Type Option - - The NetBIOS node type option allows NetBIOS over TCP/IP clients which - are configurable to be configured as described in RFC 1001/1002. The - value is specified as a single octet which identifies the client type - as follows: - - Value Node Type - ----- --------- - 0x1 B-node - 0x2 P-node - 0x4 M-node - 0x8 H-node - - - - - -Alexander & Droms Standards Track [Page 20] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - In the above chart, the notation '0x' indicates a number in base-16 - (hexadecimal). - - The code for this option is 46. The length of this option is always - 1. - - Code Len Node Type - +-----+-----+-----------+ - | 46 | 1 | see above | - +-----+-----+-----------+ - -8.8. NetBIOS over TCP/IP Scope Option - - The NetBIOS scope option specifies the NetBIOS over TCP/IP scope - parameter for the client as specified in RFC 1001/1002. See [19], - [20], and [8] for character-set restrictions. - - The code for this option is 47. The minimum length of this option is - 1. - - Code Len NetBIOS Scope - +-----+-----+-----+-----+-----+-----+---- - | 47 | n | s1 | s2 | s3 | s4 | ... - +-----+-----+-----+-----+-----+-----+---- - -8.9. X Window System Font Server Option - - This option specifies a list of X Window System [21] Font servers - available to the client. Servers SHOULD be listed in order of - preference. - - The code for this option is 48. The minimum length of this option is - 4 octets, and the length MUST be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+--- - | 48 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+--- - -8.10. X Window System Display Manager Option - - This option specifies a list of IP addresses of systems that are - running the X Window System Display Manager and are available to the - client. - - Addresses SHOULD be listed in order of preference. - - - - - -Alexander & Droms Standards Track [Page 21] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for the this option is 49. The minimum length of this option - is 4, and the length MUST be a multiple of 4. - - Code Len Address 1 Address 2 - - +-----+-----+-----+-----+-----+-----+-----+-----+--- - | 49 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+--- - -8.11. Network Information Service+ Domain Option - - This option specifies the name of the client's NIS+ [17] domain. The - domain is formatted as a character string consisting of characters - from the NVT ASCII character set. - - The code for this option is 64. Its minimum length is 1. - - Code Len NIS Client Domain Name - +-----+-----+-----+-----+-----+-----+--- - | 64 | n | n1 | n2 | n3 | n4 | ... - +-----+-----+-----+-----+-----+-----+--- - -8.12. Network Information Service+ Servers Option - - This option specifies a list of IP addresses indicating NIS+ servers - available to the client. Servers SHOULD be listed in order of - preference. - - The code for this option is 65. Its minimum length is 4, and the - length MUST be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 65 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.13. Mobile IP Home Agent option - - This option specifies a list of IP addresses indicating mobile IP - home agents available to the client. Agents SHOULD be listed in - order of preference. - - The code for this option is 68. Its minimum length is 0 (indicating - no home agents are available) and the length MUST be a multiple of 4. - It is expected that the usual length will be four octets, containing - a single home agent's address. - - - - - -Alexander & Droms Standards Track [Page 22] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - Code Len Home Agent Addresses (zero or more) - +-----+-----+-----+-----+-----+-----+-- - | 68 | n | a1 | a2 | a3 | a4 | ... - +-----+-----+-----+-----+-----+-----+-- - -8.14. Simple Mail Transport Protocol (SMTP) Server Option - - The SMTP server option specifies a list of SMTP servers available to - the client. Servers SHOULD be listed in order of preference. - - The code for the SMTP server option is 69. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 69 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.15. Post Office Protocol (POP3) Server Option - - The POP3 server option specifies a list of POP3 available to the - client. Servers SHOULD be listed in order of preference. - - The code for the POP3 server option is 70. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 70 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.16. Network News Transport Protocol (NNTP) Server Option - - The NNTP server option specifies a list of NNTP available to the - client. Servers SHOULD be listed in order of preference. - - The code for the NNTP server option is 71. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 71 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - - - - - -Alexander & Droms Standards Track [Page 23] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -8.17. Default World Wide Web (WWW) Server Option - - The WWW server option specifies a list of WWW available to the - client. Servers SHOULD be listed in order of preference. - - The code for the WWW server option is 72. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 72 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.18. Default Finger Server Option - - The Finger server option specifies a list of Finger available to the - client. Servers SHOULD be listed in order of preference. - - The code for the Finger server option is 73. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 73 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.19. Default Internet Relay Chat (IRC) Server Option - - The IRC server option specifies a list of IRC available to the - client. Servers SHOULD be listed in order of preference. - - The code for the IRC server option is 74. The minimum length for - this option is 4 octets, and the length MUST always be a multiple of - 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 74 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.20. StreetTalk Server Option - - The StreetTalk server option specifies a list of StreetTalk servers - available to the client. Servers SHOULD be listed in order of - preference. - - - - -Alexander & Droms Standards Track [Page 24] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for the StreetTalk server option is 75. The minimum length - for this option is 4 octets, and the length MUST always be a multiple - of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 75 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -8.21. StreetTalk Directory Assistance (STDA) Server Option - - The StreetTalk Directory Assistance (STDA) server option specifies a - list of STDA servers available to the client. Servers SHOULD be - listed in order of preference. - - The code for the StreetTalk Directory Assistance server option is 76. - The minimum length for this option is 4 octets, and the length MUST - always be a multiple of 4. - - Code Len Address 1 Address 2 - +-----+-----+-----+-----+-----+-----+-----+-----+-- - | 76 | n | a1 | a2 | a3 | a4 | a1 | a2 | ... - +-----+-----+-----+-----+-----+-----+-----+-----+-- - -9. DHCP Extensions - - This section details the options that are specific to DHCP. - -9.1. Requested IP Address - - This option is used in a client request (DHCPDISCOVER) to allow the - client to request that a particular IP address be assigned. - - The code for this option is 50, and its length is 4. - - Code Len Address - +-----+-----+-----+-----+-----+-----+ - | 50 | 4 | a1 | a2 | a3 | a4 | - +-----+-----+-----+-----+-----+-----+ - -9.2. IP Address Lease Time - - This option is used in a client request (DHCPDISCOVER or DHCPREQUEST) - to allow the client to request a lease time for the IP address. In a - server reply (DHCPOFFER), a DHCP server uses this option to specify - the lease time it is willing to offer. - - - - - -Alexander & Droms Standards Track [Page 25] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The time is in units of seconds, and is specified as a 32-bit - unsigned integer. - - The code for this option is 51, and its length is 4. - - Code Len Lease Time - +-----+-----+-----+-----+-----+-----+ - | 51 | 4 | t1 | t2 | t3 | t4 | - +-----+-----+-----+-----+-----+-----+ - -9.3. Option Overload - - This option is used to indicate that the DHCP 'sname' or 'file' - fields are being overloaded by using them to carry DHCP options. A - DHCP server inserts this option if the returned parameters will - exceed the usual space allotted for options. - - If this option is present, the client interprets the specified - additional fields after it concludes interpretation of the standard - option fields. - - The code for this option is 52, and its length is 1. Legal values - for this option are: - - Value Meaning - ----- -------- - 1 the 'file' field is used to hold options - 2 the 'sname' field is used to hold options - 3 both fields are used to hold options - - Code Len Value - +-----+-----+-----+ - | 52 | 1 |1/2/3| - +-----+-----+-----+ - -9.4 TFTP server name - - This option is used to identify a TFTP server when the 'sname' field - in the DHCP header has been used for DHCP options. - - The code for this option is 66, and its minimum length is 1. - - Code Len TFTP server - +-----+-----+-----+-----+-----+--- - | 66 | n | c1 | c2 | c3 | ... - +-----+-----+-----+-----+-----+--- - - - - - -Alexander & Droms Standards Track [Page 26] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -9.5 Bootfile name - - This option is used to identify a bootfile when the 'file' field in - the DHCP header has been used for DHCP options. - - The code for this option is 67, and its minimum length is 1. - - Code Len Bootfile name - +-----+-----+-----+-----+-----+--- - | 67 | n | c1 | c2 | c3 | ... - +-----+-----+-----+-----+-----+--- - -9.6. DHCP Message Type - - This option is used to convey the type of the DHCP message. The code - for this option is 53, and its length is 1. Legal values for this - option are: - - Value Message Type - ----- ------------ - 1 DHCPDISCOVER - 2 DHCPOFFER - 3 DHCPREQUEST - 4 DHCPDECLINE - 5 DHCPACK - 6 DHCPNAK - 7 DHCPRELEASE - 8 DHCPINFORM - - Code Len Type - +-----+-----+-----+ - | 53 | 1 | 1-9 | - +-----+-----+-----+ - -9.7. Server Identifier - - This option is used in DHCPOFFER and DHCPREQUEST messages, and may - optionally be included in the DHCPACK and DHCPNAK messages. DHCP - servers include this option in the DHCPOFFER in order to allow the - client to distinguish between lease offers. DHCP clients use the - contents of the 'server identifier' field as the destination address - for any DHCP messages unicast to the DHCP server. DHCP clients also - indicate which of several lease offers is being accepted by including - this option in a DHCPREQUEST message. - - The identifier is the IP address of the selected server. - - The code for this option is 54, and its length is 4. - - - -Alexander & Droms Standards Track [Page 27] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - Code Len Address - +-----+-----+-----+-----+-----+-----+ - | 54 | 4 | a1 | a2 | a3 | a4 | - +-----+-----+-----+-----+-----+-----+ - -9.8. Parameter Request List - - This option is used by a DHCP client to request values for specified - configuration parameters. The list of requested parameters is - specified as n octets, where each octet is a valid DHCP option code - as defined in this document. - - The client MAY list the options in order of preference. The DHCP - server is not required to return the options in the requested order, - but MUST try to insert the requested options in the order requested - by the client. - - The code for this option is 55. Its minimum length is 1. - - Code Len Option Codes - +-----+-----+-----+-----+--- - | 55 | n | c1 | c2 | ... - +-----+-----+-----+-----+--- - -9.9. Message - - This option is used by a DHCP server to provide an error message to a - DHCP client in a DHCPNAK message in the event of a failure. A client - may use this option in a DHCPDECLINE message to indicate the why the - client declined the offered parameters. The message consists of n - octets of NVT ASCII text, which the client may display on an - available output device. - - The code for this option is 56 and its minimum length is 1. - - Code Len Text - +-----+-----+-----+-----+--- - | 56 | n | c1 | c2 | ... - +-----+-----+-----+-----+--- - -9.10. Maximum DHCP Message Size - - This option specifies the maximum length DHCP message that it is - willing to accept. The length is specified as an unsigned 16-bit - integer. A client may use the maximum DHCP message size option in - DHCPDISCOVER or DHCPREQUEST messages, but should not use the option - in DHCPDECLINE messages. - - - - -Alexander & Droms Standards Track [Page 28] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The code for this option is 57, and its length is 2. The minimum - legal value is 576 octets. - - Code Len Length - +-----+-----+-----+-----+ - | 57 | 2 | l1 | l2 | - +-----+-----+-----+-----+ - -9.11. Renewal (T1) Time Value - - This option specifies the time interval from address assignment until - the client transitions to the RENEWING state. - - The value is in units of seconds, and is specified as a 32-bit - unsigned integer. - - The code for this option is 58, and its length is 4. - - Code Len T1 Interval - +-----+-----+-----+-----+-----+-----+ - | 58 | 4 | t1 | t2 | t3 | t4 | - +-----+-----+-----+-----+-----+-----+ - -9.12. Rebinding (T2) Time Value - - This option specifies the time interval from address assignment until - the client transitions to the REBINDING state. - - The value is in units of seconds, and is specified as a 32-bit - unsigned integer. - - The code for this option is 59, and its length is 4. - - Code Len T2 Interval - +-----+-----+-----+-----+-----+-----+ - | 59 | 4 | t1 | t2 | t3 | t4 | - +-----+-----+-----+-----+-----+-----+ - -9.13. Vendor class identifier - - This option is used by DHCP clients to optionally identify the vendor - type and configuration of a DHCP client. The information is a string - of n octets, interpreted by servers. Vendors may choose to define - specific vendor class identifiers to convey particular configuration - or other identification information about a client. For example, the - identifier may encode the client's hardware configuration. Servers - not equipped to interpret the class-specific information sent by a - client MUST ignore it (although it may be reported). Servers that - - - -Alexander & Droms Standards Track [Page 29] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - respond SHOULD only use option 43 to return the vendor-specific - information to the client. - - The code for this option is 60, and its minimum length is 1. - - Code Len Vendor class Identifier - +-----+-----+-----+-----+--- - | 60 | n | i1 | i2 | ... - +-----+-----+-----+-----+--- - -9.14. Client-identifier - - This option is used by DHCP clients to specify their unique - identifier. DHCP servers use this value to index their database of - address bindings. This value is expected to be unique for all - clients in an administrative domain. - - Identifiers SHOULD be treated as opaque objects by DHCP servers. - - The client identifier MAY consist of type-value pairs similar to the - 'htype'/'chaddr' fields defined in [3]. For instance, it MAY consist - of a hardware type and hardware address. In this case the type field - SHOULD be one of the ARP hardware types defined in STD2 [22]. A - hardware type of 0 (zero) should be used when the value field - contains an identifier other than a hardware address (e.g. a fully - qualified domain name). - - For correct identification of clients, each client's client- - identifier MUST be unique among the client-identifiers used on the - subnet to which the client is attached. Vendors and system - administrators are responsible for choosing client-identifiers that - meet this requirement for uniqueness. - - The code for this option is 61, and its minimum length is 2. - - Code Len Type Client-Identifier - +-----+-----+-----+-----+-----+--- - | 61 | n | t1 | i1 | i2 | ... - +-----+-----+-----+-----+-----+--- - - - - - - - - - - - - -Alexander & Droms Standards Track [Page 30] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -10. Defining new extensions - - The author of a new DHCP option will follow these steps to obtain - acceptance of the option as a part of the DHCP Internet Standard: - - 1. The author devises the new option. - 2. The author requests a number for the new option from IANA by - contacting: - Internet Assigned Numbers Authority (IANA) - USC/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, California 90292-6695 - - or by email as: iana@iana.org - - 3. The author documents the new option, using the newly obtained - option number, as an Internet Draft. - 4. The author submits the Internet Draft for review through the IETF - standards process as defined in "Internet Official Protocol - Standards" (STD 1). The new option will be submitted for eventual - acceptance as an Internet Standard. - 5. The new option progresses through the IETF standards process; the - new option will be reviewed by the Dynamic Host Configuration - Working Group (if that group still exists), or as an Internet - Draft not submitted by an IETF working group. - 6. If the new option fails to gain acceptance as an Internet - Standard, the assigned option number will be returned to IANA for - reassignment. - - This procedure for defining new extensions will ensure that: - - * allocation of new option numbers is coordinated from a single - authority, - * new options are reviewed for technical correctness and - appropriateness, and - * documentation for new options is complete and published. - -11. Acknowledgements - - The author thanks the many (and too numerous to mention!) members of - the DHC WG for their tireless and ongoing efforts in the development - of DHCP and this document. - - The efforts of J Allard, Mike Carney, Dave Lapp, Fred Lien and John - Mendonca in organizing DHCP interoperability testing sessions are - gratefully acknowledged. - - - - - -Alexander & Droms Standards Track [Page 31] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - The development of this document was supported in part by grants from - the Corporation for National Research Initiatives (CNRI), Bucknell - University and Sun Microsystems. - -12. References - - [1] Droms, R., "Dynamic Host Configuration Protocol", RFC 2131, - Bucknell University, March 1997. - - [2] Reynolds, J., "BOOTP Vendor Information Extensions", RFC 1497, - USC/Information Sciences Institute, August 1993. - - [3] Croft, W., and J. Gilmore, "Bootstrap Protocol", RFC 951, - Stanford University and Sun Microsystems, September 1985. - - [4] Braden, R., Editor, "Requirements for Internet Hosts - - Communication Layers", STD 3, RFC 1122, USC/Information Sciences - Institute, October 1989. - - [5] Mogul, J., and J. Postel, "Internet Standard Subnetting - Procedure", STD 5, RFC 950, USC/Information Sciences Institute, - August 1985. - - [6] Postel, J., and K. Harrenstien, "Time Protocol", STD 26, RFC - 868, USC/Information Sciences Institute, SRI, May 1983. - - [7] Postel, J., "Name Server", IEN 116, USC/Information Sciences - Institute, August 1979. - - [8] Mockapetris, P., "Domain Names - Implementation and - Specification", STD 13, RFC 1035, USC/Information Sciences - Institute, November 1987. - - [9] Postel, J., "Quote of the Day Protocol", STD 23, RFC 865, - USC/Information Sciences Institute, May 1983. - - [10] McLaughlin, L., "Line Printer Daemon Protocol", RFC 1179, The - Wollongong Group, August 1990. - - [11] Accetta, M., "Resource Location Protocol", RFC 887, CMU, - December 1983. - - [12] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, - DECWRL, Stanford University, November 1990. - - [13] Deering, S., "ICMP Router Discovery Messages", RFC 1256, - Xerox PARC, September 1991. - - - - -Alexander & Droms Standards Track [Page 32] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - - [14] Leffler, S. and M. Karels, "Trailer Encapsulations", RFC 893, - U. C. Berkeley, April 1984. - - [15] Hornig, C., "Standard for the Transmission of IP Datagrams over - Ethernet Networks", RFC 894, Symbolics, April 1984. - - [16] Postel, J. and J. Reynolds, "Standard for the Transmission of - IP Datagrams Over IEEE 802 Networks", RFC 1042, USC/Information - Sciences Institute, February 1988. - - [17] Sun Microsystems, "System and Network Administration", March - 1990. - - [18] Mills, D., "Internet Time Synchronization: The Network Time - Protocol", RFC 1305, UDEL, March 1992. - - [19] NetBIOS Working Group, "Protocol Standard for a NetBIOS Service - on a TCP/UDP transport: Concepts and Methods", STD 19, RFC 1001, - March 1987. - - [20] NetBIOS Working Group, "Protocol Standard for a NetBIOS Service - on a TCP/UDP transport: Detailed Specifications", STD 19, RFC - 1002, March 1987. - - [21] Scheifler, R., "FYI On the X Window System", FYI 6, RFC 1198, - MIT Laboratory for Computer Science, January 1991. - - [22] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, RFC 1700, - USC/Information Sciences Institute, July 1992. - -13. Security Considerations - - Security issues are not discussed in this memo. - - - - - - - - - - - - - - - - - - -Alexander & Droms Standards Track [Page 33] - -RFC 2132 DHCP Options and BOOTP Vendor Extensions March 1997 - - -14. Authors' Addresses - - Steve Alexander - Silicon Graphics, Inc. - 2011 N. Shoreline Boulevard - Mailstop 510 - Mountain View, CA 94043-1389 - - Phone: (415) 933-6172 - EMail: sca@engr.sgi.com - - - Ralph Droms - Bucknell University - Lewisburg, PA 17837 - - Phone: (717) 524-1145 - EMail: droms@bucknell.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Alexander & Droms Standards Track [Page 34] - diff --git a/kernel/picotcp/RFC/rfc2140.txt b/kernel/picotcp/RFC/rfc2140.txt deleted file mode 100644 index 200671c..0000000 --- a/kernel/picotcp/RFC/rfc2140.txt +++ /dev/null @@ -1,619 +0,0 @@ - - - - - - -Network Working Group J. Touch -Request for Comments: 2140 ISI -Category: Informational April 1997 - - - TCP Control Block Interdependence - -Status of this Memo - - This memo provides information for the Internet community. This memo - does not specify an Internet standard of any kind. Distribution of - this memo is unlimited. - - -Abstract - - This memo makes the case for interdependent TCP control blocks, where - part of the TCP state is shared among similar concurrent connections, - or across similar connection instances. TCP state includes a - combination of parameters, such as connection state, current round- - trip time estimates, congestion control information, and process - information. This state is currently maintained on a per-connection - basis in the TCP control block, but should be shared across - connections to the same host. The goal is to improve transient - transport performance, while maintaining backward-compatibility with - existing implementations. - - This document is a product of the LSAM project at ISI. - - -Introduction - - TCP is a connection-oriented reliable transport protocol layered over - IP [9]. Each TCP connection maintains state, usually in a data - structure called the TCP Control Block (TCB). The TCB contains - information about the connection state, its associated local process, - and feedback parameters about the connection's transmission - properties. As originally specified and usually implemented, the TCB - is maintained on a per-connection basis. This document discusses the - implications of that decision, and argues for an alternate - implementation that shares some of this state across similar - connection instances and among similar simultaneous connections. The - resulting implementation can have better transient performance, - especially for numerous short-lived and simultaneous connections, as - often used in the World-Wide Web [1]. These changes affect only the - TCB initialization, and so have no effect on the long-term behavior - of TCP after a connection has been established. - - - - -Touch Informational [Page 1] - -RFC 2140 TCP Control Block Interdependence April 1997 - - -The TCP Control Block (TCB) - - A TCB is associated with each connection, i.e., with each association - of a pair of applications across the network. The TCB can be - summarized as containing [9]: - - - Local process state - - pointers to send and receive buffers - pointers to retransmission queue and current segment - pointers to Internet Protocol (IP) PCB - - Per-connection shared state - - macro-state - - connection state - timers - flags - local and remote host numbers and ports - - micro-state - - send and receive window state (size*, current number) - round-trip time and variance - cong. window size* - cong. window size threshold* - max windows seen* - MSS# - round-trip time and variance# - - - The per-connection information is shown as split into macro-state and - micro-state, terminology borrowed from [5]. Macro-state describes the - finite state machine; we include the endpoint numbers and components - (timers, flags) used to help maintain that state. This includes the - protocol for establishing and maintaining shared state about the - connection. Micro-state describes the protocol after a connection has - been established, to maintain the reliability and congestion control - of the data transferred in the connection. - - We further distinguish two other classes of shared micro-state that - are associated more with host-pairs than with application pairs. One - class is clearly host-pair dependent (#, e.g., MSS, RTT), and the - other is host-pair dependent in its aggregate (*, e.g., cong. window - info., curr. window sizes). - - - - -Touch Informational [Page 2] - -RFC 2140 TCP Control Block Interdependence April 1997 - - -TCB Interdependence - - The observation that some TCB state is host-pair specific rather than - application-pair dependent is not new, and is a common engineering - decision in layered protocol implementations. A discussion of sharing - RTT information among protocols layered over IP, including UDP and - TCP, occurred in [8]. T/TCP uses caches to maintain TCB information - across instances, e.g., smoothed RTT, RTT variance, congestion - avoidance threshold, and MSS [3]. These values are in addition to - connection counts used by T/TCP to accelerate data delivery prior to - the full three-way handshake during an OPEN. The goal is to aggregate - TCB components where they reflect one association - that of the - host-pair, rather than artificially separating those components by - connection. - - At least one current T/TCP implementation saves the MSS and - aggregates the RTT parameters across multiple connections, but omits - caching the congestion window information [4], as originally - specified in [2]. There may be other values that may be cached, such - as current window size, to permit new connections full access to - accumulated channel resources. - - We observe that there are two cases of TCB interdependence. Temporal - sharing occurs when the TCB of an earlier (now CLOSED) connection to - a host is used to initialize some parameters of a new connection to - that same host. Ensemble sharing occurs when a currently active - connection to a host is used to initialize another (concurrent) - connection to that host. T/TCP documents considered the temporal - case; we consider both. - -An Example of Temporal Sharing - - Temporal sharing of cached TCB data has been implemented in the SunOS - 4.1.3 T/TCP extensions [4] and the FreeBSD port of same [7]. As - mentioned before, only the MSS and RTT parameters are cached, as - originally specified in [2]. Later discussion of T/TCP suggested - including congestion control parameters in this cache [3]. - - The cache is accessed in two ways: it is read to initialize new TCBs, - and written when more current per-host state is available. New TCBs - are initialized as follows; snd_cwnd reuse is not yet implemented, - although discussed in the T/TCP concepts [2]: - - - - - - - - - -Touch Informational [Page 3] - -RFC 2140 TCP Control Block Interdependence April 1997 - - - TEMPORAL SHARING - TCB Initialization - - Cached TCB New TCB - ---------------------------------------- - old-MSS old-MSS - - old-RTT old-RTT - - old-RTTvar old-RTTvar - - old-snd_cwnd old-snd_cwnd (not yet impl.) - - - Most cached TCB values are updated when a connection closes. An - exception is MSS, which is updated whenever the MSS option is - received in a TCP header. - - - TEMPORAL SHARING - Cache Updates - - Cached TCB Current TCB when? New Cached TCB - --------------------------------------------------------------- - old-MSS curr-MSS MSSopt curr-MSS - - old-RTT curr-RTT CLOSE old += (curr - old) >> 2 - - old-RTTvar curr-RTTvar CLOSE old += (curr - old) >> 2 - - old-snd_cwnd curr-snd_cwnd CLOSE curr-snd_cwnd (not yet impl.) - - MSS caching is trivial; reported values are cached, and the most - recent value is used. The cache is updated when the MSS option is - received, so the cache always has the most recent MSS value from any - connection. The cache is consulted only at connection establishment, - and not otherwise updated, which means that MSS options do not affect - current connections. The default MSS is never saved; only reported - MSS values update the cache, so an explicit override is required to - reduce the MSS. - - RTT values are updated by a more complicated mechanism [3], [8]. - Dynamic RTT estimation requires a sequence of RTT measurements, even - though a single T/TCP transaction may not accumulate enough samples. - As a result, the cached RTT (and its variance) is an average of its - previous value with the contents of the currently active TCB for that - host, when a TCB is closed. RTT values are updated only when a - connection is closed. Further, the method for averaging the RTT - values is not the same as the method for computing the RTT values - within a connection, so that the cached value may not be appropriate. - - - -Touch Informational [Page 4] - -RFC 2140 TCP Control Block Interdependence April 1997 - - - For temporal sharing, the cache requires updating only when a - connection closes, because the cached values will not yet be used to - initialize a new TCB. For the ensemble sharing, this is not the case, - as discussed below. - - Other TCB variables may also be cached between sequential instances, - such as the congestion control window information. Old cache values - can be overwritten with the current TCB estimates, or a MAX or MIN - function can be used to merge the results, depending on the optimism - or pessimism of the reused values. For example, the congestion window - can be reused if there are no concurrent connections. - -An Example of Ensemble Sharing - - Sharing cached TCB data across concurrent connections requires - attention to the aggregate nature of some of the shared state. - Although MSS and RTT values can be shared by copying, it may not be - appropriate to copy congestion window information. At this point, we - present only the MSS and RTT rules: - - - ENSEMBLE SHARING - TCB Initialization - - Cached TCB New TCB - ---------------------------------- - old-MSS old-MSS - - old-RTT old-RTT - - old-RTTvar old-RTTvar - - - - ENSEMBLE SHARING - Cache Updates - - Cached TCB Current TCB when? New Cached TCB - ----------------------------------------------------------- - old-MSS curr-MSS MSSopt curr-MSS - - old-RTT curr-RTT update rtt_update(old,curr) - - old-RTTvar curr-RTTvar update rtt_update(old,curr) - - - For ensemble sharing, TCB information should be cached as early as - possible, sometimes before a connection is closed. Otherwise, opening - multiple concurrent connections may not result in TCB data sharing if - no connection closes before others open. An optimistic solution would - - - -Touch Informational [Page 5] - -RFC 2140 TCP Control Block Interdependence April 1997 - - - be to update cached data as early as possible, rather than only when - a connection is closing. Some T/TCP implementations do this for MSS - when the TCP MSS header option is received [4], although it is not - addressed specifically in the concepts or functional specification - [2][3]. - - In current T/TCP, RTT values are updated only after a CLOSE, which - does not benefit concurrent sessions. As mentioned in the temporal - case, averaging values between concurrent connections requires - incorporating new RTT measurements. The amount of work involved in - updating the aggregate average should be minimized, but the resulting - value should be equivalent to having all values measured within a - single connection. The function "rtt_update" in the ensemble sharing - table indicates this operation, which occurs whenever the RTT would - have been updated in the individual TCP connection. As a result, the - cache contains the shared RTT variables, which no longer need to - reside in the TCB [8]. - - Congestion window size aggregation is more complicated in the - concurrent case. When there is an ensemble of connections, we need - to decide how that ensemble would have shared the congestion window, - in order to derive initial values for new TCBs. Because concurrent - connections between two hosts share network paths (usually), they - also share whatever capacity exists along that path. With regard to - congestion, the set of connections might behave as if it were - multiplexed prior to TCP, as if all data were part of a single - connection. As a result, the current window sizes would maintain a - constant sum, presuming sufficient offered load. This would go beyond - caching to truly sharing state, as in the RTT case. - - We pause to note that any assumption of this sharing can be - incorrect, including this one. In current implementations, new - congestion windows are set at an initial value of one segment, so - that the sum of the current windows is increased for any new - connection. This can have detrimental consequences where several - connections share a highly congested link, such as in trans-Atlantic - Web access. - - There are several ways to initialize the congestion window in a new - TCB among an ensemble of current connections to a host, as shown - below. Current TCP implementations initialize it to one segment [9], - and T/TCP hinted that it should be initialized to the old window size - [3]. In the former, the assumption is that new connections should - behave as conservatively as possible. In the latter, no accommodation - is made to concurrent aggregate behavior. - - In either case, the sum of window sizes can increase, rather than - remain constant. Another solution is to give each pending connection - - - -Touch Informational [Page 6] - -RFC 2140 TCP Control Block Interdependence April 1997 - - - its "fair share" of the available congestion window, and let the - connections balance from there. The assumption we make here is that - new connections are implicit requests for an equal share of available - link bandwidth which should be granted at the expense of current - connections. This may or may not be the appropriate function; we - propose that it be examined further. - - - ENSEMBLE SHARING - TCB Initialization - Some Options for Sharing Window-size - - Cached TCB New TCB - ----------------------------------------------------------------- - old-snd_cwnd (current) one segment - - (T/TCP hint) old-snd_cwnd - - (proposed) old-snd_cwnd/(N+1) - subtract old-snd_cwnd/(N+1)/N - from each concurrent - - - ENSEMBLE SHARING - Cache Updates - - Cached TCB Current TCB when? New Cached TCB - ---------------------------------------------------------------- - old-snd_cwnd curr-snd_cwnd update (adjust sum as appropriate) - - -Compatibility Issues - - Current TCP implementations do not use TCB caching, with the - exception of T/TCP variants [4][7]. New connections use the default - initial values of all non-instantiated TCB variables. As a result, - each connection calculates its own RTT measurements, MSS value, and - congestion information. Eventually these values are updated for each - connection. - - For the congestion and current window information, the initial values - may not be consistent with the long-term aggregate behavior of a set - of concurrent connections. If a single connection has a window of 4 - segments, new connections assume initial windows of 1 segment (the - minimum), although the current connection's window doesn't decrease - to accommodate this additional load. As a result, connections can - mutually interfere. One example of this has been seen on trans- - Atlantic links, where concurrent connections supporting Web traffic - can collide because their initial windows are too large, even when - set at one segment. - - - -Touch Informational [Page 7] - -RFC 2140 TCP Control Block Interdependence April 1997 - - - Because this proposal attempts to anticipate the aggregate steady- - state values of TCB state among a group or over time, it should avoid - the transient effects of new connections. In addition, because it - considers the ensemble and temporal properties of those aggregates, - it should also prevent the transients of short-lived or multiple - concurrent connections from adversely affecting the overall network - performance. We are performing analysis and experiments to validate - these assumptions. - -Performance Considerations - - Here we attempt to optimize transient behavior of TCP without - modifying its long-term properties. The predominant expense is in - maintaining the cached values, or in using per-host state rather than - per-connection state. In cases where performance is affected, - however, we note that the per-host information can be kept in per- - connection copies (as done now), because with higher performance - should come less interference between concurrent connections. - - Sharing TCB state can occur only at connection establishment and - close (to update the cache), to minimize overhead, optimize transient - behavior, and minimize the effect on the steady-state. It is possible - that sharing state during a connection, as in the RTT or window-size - variables, may be of benefit, provided its implementation cost is not - high. - -Implications - - There are several implications to incorporating TCB interdependence - in TCP implementations. First, it may prevent the need for - application-layer multiplexing for performance enhancement [6]. - Protocols like persistent-HTTP avoid connection reestablishment costs - by serializing or multiplexing a set of per-host connections across a - single TCP connection. This avoids TCP's per-connection OPEN - handshake, and also avoids recomputing MSS, RTT, and congestion - windows. By avoiding the so-called, "slow-start restart," performance - can be optimized. Our proposal provides the MSS, RTT, and OPEN - handshake avoidance of T/TCP, and the "slow-start restart avoidance" - of multiplexing, without requiring a multiplexing mechanism at the - application layer. This multiplexing will be complicated when - quality-of-service mechanisms (e.g., "integrated services - scheduling") are provided later. - - Second, we are attempting to push some of the TCP implementation from - the traditional transport layer (in the ISO model [10]), to the - network layer. This acknowledges that some state currently maintained - as per-connection is in fact per-path, which we simplify as per- - host-pair. Transport protocols typically manage per-application-pair - - - -Touch Informational [Page 8] - -RFC 2140 TCP Control Block Interdependence April 1997 - - - associations (per stream), and network protocols manage per-path - associations (routing). Round-trip time, MSS, and congestion - information is more appropriately handled in a network-layer fashion, - aggregated among concurrent connections, and shared across connection - instances. - - An earlier version of RTT sharing suggested implementing RTT state at - the IP layer, rather than at the TCP layer [8]. Our observations are - for sharing state among TCP connections, which avoids some of the - difficulties in an IP-layer solution. One such problem is determining - the associated prior outgoing packet for an incoming packet, to infer - RTT from the exchange. Because RTTs are still determined inside the - TCP layer, this is simpler than at the IP layer. This is a case where - information should be computed at the transport layer, but shared at - the network layer. - - We also note that per-host-pair associations are not the limit of - these techniques. It is possible that TCBs could be similarly shared - between hosts on a LAN, because the predominant path can be LAN-LAN, - rather than host-host. - - There may be other information that can be shared between concurrent - connections. For example, knowing that another connection has just - tried to expand its window size and failed, a connection may not - attempt to do the same for some period. The idea is that existing TCP - implementations infer the behavior of all competing connections, - including those within the same host or LAN. One possible - optimization is to make that implicit feedback explicit, via extended - information in the per-host TCP area. - -Security Considerations - - These suggested implementation enhancements do not have additional - ramifications for direct attacks. These enhancements may be - susceptible to denial-of-service attacks if not otherwise secured. - For example, an application can open a connection and set its window - size to 0, denying service to any other subsequent connection between - those hosts. - - TCB sharing may be susceptible to denial-of-service attacks, wherever - the TCB is shared, between connections in a single host, or between - hosts if TCB sharing is implemented on the LAN (see Implications - section). Some shared TCB parameters are used only to create new - TCBs, others are shared among the TCBs of ongoing connections. New - connections can join the ongoing set, e.g., to optimize send window - size among a set of connections to the same host. - - - - - -Touch Informational [Page 9] - -RFC 2140 TCP Control Block Interdependence April 1997 - - - Attacks on parameters used only for initialization affect only the - transient performance of a TCP connection. For short connections, - the performance ramification can approach that of a denial-of-service - attack. E.g., if an application changes its TCB to have a false and - small window size, subsequent connections would experience - performance degradation until their window grew appropriately. - - The solution is to limit the effect of compromised TCB values. TCBs - are compromised when they are modified directly by an application or - transmitted between hosts via unauthenticated means (e.g., by using a - dirty flag). TCBs that are not compromised by application - modification do not have any unique security ramifications. Note that - the proposed parameters for TCB sharing are not currently modifiable - by an application. - - All shared TCBs MUST be validated against default minimum parameters - before used for new connections. This validation would not impact - performance, because it occurs only at TCB initialization. This - limits the effect of attacks on new connections, to reducing the - benefit of TCB sharing, resulting in the current default TCP - performance. For ongoing connections, the effect of incoming packets - on shared information should be both limited and validated against - constraints before use. This is a beneficial precaution for existing - TCP implementations as well. - - TCBs modified by an application SHOULD not be shared, unless the new - connection sharing the compromised information has been given - explicit permission to use such information by the connection API. No - mechanism for that indication currently exists, but it could be - supported by an augmented API. This sharing restriction SHOULD be - implemented in both the host and the LAN. Sharing on a LAN SHOULD - utilize authentication to prevent undetected tampering of shared TCB - parameters. These restrictions limit the security impact of modified - TCBs both for connection initialization and for ongoing connections. - - Finally, shared values MUST be limited to performance factors only. - Other information, such as TCP sequence numbers, when shared, are - already known to compromise security. - -Acknowledgements - - The author would like to thank the members of the High-Performance - Computing and Communications Division at ISI, notably Bill Manning, - Bob Braden, Jon Postel, Ted Faber, and Cliff Neuman for their - assistance in the development of this memo. - - - - - - -Touch Informational [Page 10] - -RFC 2140 TCP Control Block Interdependence April 1997 - - -References - - [1] Berners-Lee, T., et al., "The World-Wide Web," Communications of - the ACM, V37, Aug. 1994, pp. 76-82. - - [2] Braden, R., "Transaction TCP -- Concepts," RFC-1379, - USC/Information Sciences Institute, September 1992. - - [3] Braden, R., "T/TCP -- TCP Extensions for Transactions Functional - Specification," RFC-1644, USC/Information Sciences Institute, - July 1994. - - [4] Braden, B., "T/TCP -- Transaction TCP: Source Changes for Sun OS - 4.1.3,", Release 1.0, USC/ISI, September 14, 1994. - - [5] Comer, D., and Stevens, D., Internetworking with TCP/IP, V2, - Prentice-Hall, NJ, 1991. - - [6] Fielding, R., et al., "Hypertext Transfer Protocol -- HTTP/1.1," - Work in Progress. - - [7] FreeBSD source code, Release 2.10, . - - [8] Jacobson, V., (mail to public list "tcp-ip", no archive found), - 1986. - - [9] Postel, Jon, "Transmission Control Protocol," Network Working - Group RFC-793/STD-7, ISI, Sept. 1981. - - [10] Tannenbaum, A., Computer Networks, Prentice-Hall, NJ, 1988. - -Author's Address - - Joe Touch - University of Southern California/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292-6695 - USA - Phone: +1 310-822-1511 x151 - Fax: +1 310-823-6714 - URL: http://www.isi.edu/~touch - Email: touch@isi.edu - - - - - - - - - -Touch Informational [Page 11] - diff --git a/kernel/picotcp/RFC/rfc2347.txt b/kernel/picotcp/RFC/rfc2347.txt deleted file mode 100644 index dbc0f8c..0000000 --- a/kernel/picotcp/RFC/rfc2347.txt +++ /dev/null @@ -1,395 +0,0 @@ - - - - - - -Network Working Group G. Malkin -Request for Commments: 2347 Bay Networks -Updates: 1350 A. Harkin -Obsoletes: 1782 Hewlett Packard Co. -Category: Standards Track May 1998 - - - TFTP Option Extension - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -Abstract - - The Trivial File Transfer Protocol [1] is a simple, lock-step, file - transfer protocol which allows a client to get or put a file onto a - remote host. This document describes a simple extension to TFTP to - allow option negotiation prior to the file transfer. - -Introduction - - The option negotiation mechanism proposed in this document is a - backward-compatible extension to the TFTP protocol. It allows file - transfer options to be negotiated prior to the transfer using a - mechanism which is consistent with TFTP's Request Packet format. The - mechanism is kept simple by enforcing a request-respond-acknowledge - sequence, similar to the lock-step approach taken by TFTP itself. - - While the option negotiation mechanism is general purpose, in that - many types of options may be negotiated, it was created to support - the Blocksize option defined in [2]. Additional options are defined - in [3]. - -Packet Formats - - TFTP options are appended to the Read Request and Write Request - packets. A new type of TFTP packet, the Option Acknowledgment - (OACK), is used to acknowledge a client's option negotiation request. - A new error code, 8, is hereby defined to indicate that a transfer - - - -Malkin & Harkin Standards Track [Page 1] - -RFC 2347 TFTP Option Extension May 1998 - - - should be terminated due to option negotiation. - - Options are appended to a TFTP Read Request or Write Request packet - as follows: - - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+--> - | opc |filename| 0 | mode | 0 | opt1 | 0 | value1 | 0 | < - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+--> - - >-------+---+---~~---+---+ - < optN | 0 | valueN | 0 | - >-------+---+---~~---+---+ - - opc - The opcode field contains either a 1, for Read Requests, or 2, - for Write Requests, as defined in [1]. - - filename - The name of the file to be read or written, as defined in [1]. - This is a NULL-terminated field. - - mode - The mode of the file transfer: "netascii", "octet", or "mail", - as defined in [1]. This is a NULL-terminated field. - - opt1 - The first option, in case-insensitive ASCII (e.g., blksize). - This is a NULL-terminated field. - - value1 - The value associated with the first option, in case- - insensitive ASCII. This is a NULL-terminated field. - - optN, valueN - The final option/value pair. Each NULL-terminated field is - specified in case-insensitive ASCII. - - The options and values are all NULL-terminated, in keeping with the - original request format. If multiple options are to be negotiated, - they are appended to each other. The order in which options are - specified is not significant. The maximum size of a request packet - is 512 octets. - - The OACK packet has the following format: - - - - - - - -Malkin & Harkin Standards Track [Page 2] - -RFC 2347 TFTP Option Extension May 1998 - - - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+ - | opc | opt1 | 0 | value1 | 0 | optN | 0 | valueN | 0 | - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+ - - opc - The opcode field contains a 6, for Option Acknowledgment. - - opt1 - The first option acknowledgment, copied from the original - request. - - value1 - The acknowledged value associated with the first option. If - and how this value may differ from the original request is - detailed in the specification for the option. - - optN, valueN - The final option/value acknowledgment pair. - -Negotiation Protocol - - The client appends options at the end of the Read Request or Write - request packet, as shown above. Any number of options may be - specified; however, an option may only be specified once. The order - of the options is not significant. - - If the server supports option negotiation, and it recognizes one or - more of the options specified in the request packet, the server may - respond with an Options Acknowledgment (OACK). Each option the - server recognizes, and accepts the value for, is included in the - OACK. Some options may allow alternate values to be proposed, but - this is an option specific feature. The server must not include in - the OACK any option which had not been specifically requested by the - client; that is, only the client may initiate option negotiation. - Options which the server does not support should be omitted from the - OACK; they should not cause an ERROR packet to be generated. If the - value of a supported option is invalid, the specification for that - option will indicate whether the server should simply omit the option - from the OACK, respond with an alternate value, or send an ERROR - packet, with error code 8, to terminate the transfer. - - An option not acknowledged by the server must be ignored by the - client and server as if it were never requested. If multiple options - were requested, the client must use those options which were - acknowledged by the server and must not use those options which were - not acknowledged by the server. - - - - - -Malkin & Harkin Standards Track [Page 3] - -RFC 2347 TFTP Option Extension May 1998 - - - When the client appends options to the end of a Read Request packet, - three possible responses may be returned by the server: - - OACK - acknowledge of Read Request and the options; - - DATA - acknowledge of Read Request, but not the options; - - ERROR - the request has been denied. - - When the client appends options to the end of a Write Request packet, - three possible responses may be returned by the server: - - OACK - acknowledge of Write Request and the options; - - ACK - acknowledge of Write Request, but not the options; - - ERROR - the request has been denied. - - If a server implementation does not support option negotiation, it - will likely ignore any options appended to the client's request. In - this case, the server will return a DATA packet for a Read Request - and an ACK packet for a Write Request establishing normal TFTP data - transfer. In the event that a server returns an error for a request - which carries an option, the client may attempt to repeat the request - without appending any options. This implementation option would - handle servers which consider extraneous data in the request packet - to be erroneous. - - Depending on the original transfer request there are two ways for a - client to confirm acceptance of a server's OACK. If the transfer was - initiated with a Read Request, then an ACK (with the data block - number set to 0) is sent by the client to confirm the values in the - server's OACK packet. If the transfer was initiated with a Write - Request, then the client begins the transfer with the first DATA - packet, using the negotiated values. If the client rejects the OACK, - then it sends an ERROR packet, with error code 8, to the server and - the transfer is terminated. - - Once a client acknowledges an OACK, with an appropriate non-error - response, that client has agreed to use only the options and values - returned by the server. Remember that the server cannot request an - option; it can only respond to them. If the client receives an OACK - containing an unrequested option, it should respond with an ERROR - packet, with error code 8, and terminate the transfer. - - - - - - - -Malkin & Harkin Standards Track [Page 4] - -RFC 2347 TFTP Option Extension May 1998 - - -Examples - - Read Request - - client server - ------------------------------------------------------- - |1|foofile|0|octet|0|blksize|0|1432|0| --> RRQ - <-- |6|blksize|0|1432|0| OACK - |4|0| --> ACK - <-- |3|1| 1432 octets of data | DATA - |4|1| --> ACK - <-- |3|2| 1432 octets of data | DATA - |4|2| --> ACK - <-- |3|3|<1432 octets of data | DATA - |4|3| --> ACK - - Write Request - - client server - ------------------------------------------------------- - |2|barfile|0|octet|0|blksize|0|2048|0| --> RRQ - <-- |6|blksize|0|2048|0| OACK - |3|1| 2048 octets of data | --> DATA - <-- |4|1| ACK - |3|2| 2048 octets of data | --> DATA - <-- |4|2| ACK - |3|3|<2048 octets of data | --> DATA - <-- |4|3| ACK - -Security Considerations - - The basic TFTP protocol has no security mechanism. This is why it - has no rename, delete, or file overwrite capabilities. This document - does not add any security to TFTP; however, the specified extensions - do not add any additional security risks. - -References - - [1] Sollins, K., "The TFTP Protocol (Revision 2)", STD 33, RFC 1350, - October 1992. - - [2] Malkin, G., and A. Harkin, "TFTP Blocksize Option", RFC 2348, - May 1998. - - [3] Malkin, G., and A. Harkin, "TFTP Timeout Interval and Transfer - Size Options", RFC 2349, May 1998. - - - - - -Malkin & Harkin Standards Track [Page 5] - -RFC 2347 TFTP Option Extension May 1998 - - -Authors' Addresses - - Gary Scott Malkin - Bay Networks - 8 Federal Street - Billerica, MA 01821 - - Phone: (978) 916-4237 - EMail: gmalkin@baynetworks.com - - - Art Harkin - Internet Services Project - Information Networks Division - 19420 Homestead Road MS 43LN - Cupertino, CA 95014 - - Phone: (408) 447-3755 - EMail: ash@cup.hp.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Malkin & Harkin Standards Track [Page 6] - -RFC 2347 TFTP Option Extension May 1998 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Malkin & Harkin Standards Track [Page 7] - diff --git a/kernel/picotcp/RFC/rfc2349.txt b/kernel/picotcp/RFC/rfc2349.txt deleted file mode 100644 index 31abb3e..0000000 --- a/kernel/picotcp/RFC/rfc2349.txt +++ /dev/null @@ -1,283 +0,0 @@ - - - - - - -Network Working Group G. Malkin -Request for Commments: 2349 Bay Networks -Updates: 1350 A. Harkin -Obsoletes: 1784 Hewlett Packard Co. -Category: Standards Track May 1998 - - - TFTP Timeout Interval and Transfer Size Options - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -Abstract - - The Trivial File Transfer Protocol [1] is a simple, lock-step, file - transfer protocol which allows a client to get or put a file onto a - remote host. - - This document describes two TFTP options. The first allows the client - and server to negotiate the Timeout Interval. The second allows the - side receiving the file to determine the ultimate size of the - transfer before it begins. The TFTP Option Extension mechanism is - described in [2]. - -Timeout Interval Option Specification - - The TFTP Read Request or Write Request packet is modified to include - the timeout option as follows: - - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+ - | opc |filename| 0 | mode | 0 | timeout| 0 | #secs | 0 | - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+ - - opc - The opcode field contains either a 1, for Read Requests, or 2, - for Write Requests, as defined in [1]. - - - - - - -Malkin & Harkin Standards Track [Page 1] - -RFC 2349 TFTP Timeout Interval and Transfer Size Options May 1998 - - - filename - The name of the file to be read or written, as defined in [1]. - This is a NULL-terminated field. - - mode - The mode of the file transfer: "netascii", "octet", or "mail", - as defined in [1]. This is a NULL-terminated field. - - timeout - The Timeout Interval option, "timeout" (case in-sensitive). - This is a NULL-terminated field. - - #secs - The number of seconds to wait before retransmitting, specified - in ASCII. Valid values range between "1" and "255" seconds, - inclusive. This is a NULL-terminated field. - - For example: - - +-------+--------+---+--------+---+--------+---+-------+---+ - | 1 | foobar | 0 | octet | 0 | timeout| 0 | 1 | 0 | - +-------+--------+---+--------+---+--------+---+-------+---+ - - is a Read Request, for the file named "foobar", in octet (binary) - transfer mode, with a timeout interval of 1 second. - - If the server is willing to accept the timeout option, it sends an - Option Acknowledgment (OACK) to the client. The specified timeout - value must match the value specified by the client. - -Transfer Size Option Specification - - The TFTP Read Request or Write Request packet is modified to include - the tsize option as follows: - - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+ - | opc |filename| 0 | mode | 0 | tsize | 0 | size | 0 | - +-------+---~~---+---+---~~---+---+---~~---+---+---~~---+---+ - - opc - The opcode field contains either a 1, for Read Requests, or 2, - for Write Requests, as defined in [1]. - - filename - The name of the file to be read or written, as defined in [1]. - This is a NULL-terminated field. - - - - - -Malkin & Harkin Standards Track [Page 2] - -RFC 2349 TFTP Timeout Interval and Transfer Size Options May 1998 - - - mode - The mode of the file transfer: "netascii", "octet", or "mail", - as defined in [1]. This is a NULL-terminated field. - - tsize - The Transfer Size option, "tsize" (case in-sensitive). This is - a NULL-terminated field. - - size - The size of the file to be transfered. This is a NULL- - terminated field. - - For example: - - +-------+--------+---+--------+---+--------+---+--------+---+ - | 2 | foobar | 0 | octet | 0 | tsize | 0 | 673312 | 0 | - +-------+--------+---+--------+---+--------+---+--------+---+ - - is a Write Request, with the 673312-octet file named "foobar", in - octet (binary) transfer mode. - - In Read Request packets, a size of "0" is specified in the request - and the size of the file, in octets, is returned in the OACK. If the - file is too large for the client to handle, it may abort the transfer - with an Error packet (error code 3). In Write Request packets, the - size of the file, in octets, is specified in the request and echoed - back in the OACK. If the file is too large for the server to handle, - it may abort the transfer with an Error packet (error code 3). - -Security Considerations - - The basic TFTP protocol has no security mechanism. This is why it - has no rename, delete, or file overwrite capabilities. This document - does not add any security to TFTP; however, the specified extensions - do not add any additional security risks. - -References - - [1] Sollins, K., "The TFTP Protocol (Revision 2)", STD 33, RFC 1350, - October 92. - - [2] Malkin, G., and A. Harkin, "TFTP Option Extension", RFC 2347, - May 1998. - - - - - - - - -Malkin & Harkin Standards Track [Page 3] - -RFC 2349 TFTP Timeout Interval and Transfer Size Options May 1998 - - -Authors' Addresses - - Gary Scott Malkin - Bay Networks - 8 Federal Street - Billerica, MA 01821 - - Phone: (978) 916-4237 - EMail: gmalkin@baynetworks.com - - - Art Harkin - Internet Services Project - Information Networks Division - 19420 Homestead Road MS 43LN - Cupertino, CA 95014 - - Phone: (408) 447-3755 - EMail: ash@cup.hp.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Malkin & Harkin Standards Track [Page 4] - -RFC 2349 TFTP Timeout Interval and Transfer Size Options May 1998 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Malkin & Harkin Standards Track [Page 5] - diff --git a/kernel/picotcp/RFC/rfc2385.txt b/kernel/picotcp/RFC/rfc2385.txt deleted file mode 100644 index deeb0de..0000000 --- a/kernel/picotcp/RFC/rfc2385.txt +++ /dev/null @@ -1,339 +0,0 @@ - - - - - - -Network Working Group A. Heffernan -Request for Comments: 2385 cisco Systems -Category: Standards Track August 1998 - - - Protection of BGP Sessions via the TCP MD5 Signature Option - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -IESG Note - - This document describes currrent existing practice for securing BGP - against certain simple attacks. It is understood to have security - weaknesses against concerted attacks. - -Abstract - - This memo describes a TCP extension to enhance security for BGP. It - defines a new TCP option for carrying an MD5 [RFC1321] digest in a - TCP segment. This digest acts like a signature for that segment, - incorporating information known only to the connection end points. - Since BGP uses TCP as its transport, using this option in the way - described in this paper significantly reduces the danger from certain - security attacks on BGP. - -1.0 Introduction - - The primary motivation for this option is to allow BGP to protect - itself against the introduction of spoofed TCP segments into the - connection stream. Of particular concern are TCP resets. - - To spoof a connection using the scheme described in this paper, an - attacker would not only have to guess TCP sequence numbers, but would - also have had to obtain the password included in the MD5 digest. - This password never appears in the connection stream, and the actual - form of the password is up to the application. It could even change - - - - - -Heffernan Standards Track [Page 1] - -RFC 2385 TCP MD5 Signature Option August 1998 - - - during the lifetime of a particular connection so long as this change - was synchronized on both ends (although retransmission can become - problematical in some TCP implementations with changing passwords). - - Finally, there is no negotiation for the use of this option in a - connection, rather it is purely a matter of site policy whether or - not its connections use the option. - -2.0 Proposal - - Every segment sent on a TCP connection to be protected against - spoofing will contain the 16-byte MD5 digest produced by applying the - MD5 algorithm to these items in the following order: - - 1. the TCP pseudo-header (in the order: source IP address, - destination IP address, zero-padded protocol number, and - segment length) - 2. the TCP header, excluding options, and assuming a checksum of - zero - 3. the TCP segment data (if any) - 4. an independently-specified key or password, known to both TCPs - and presumably connection-specific - - The header and pseudo-header are in network byte order. The nature - of the key is deliberately left unspecified, but it must be known by - both ends of the connection. A particular TCP implementation will - determine what the application may specify as the key. - - Upon receiving a signed segment, the receiver must validate it by - calculating its own digest from the same data (using its own key) and - comparing the two digest. A failing comparison must result in the - segment being dropped and must not produce any response back to the - sender. Logging the failure is probably advisable. - - Unlike other TCP extensions (e.g., the Window Scale option - [RFC1323]), the absence of the option in the SYN,ACK segment must not - cause the sender to disable its sending of signatures. This - negotiation is typically done to prevent some TCP implementations - from misbehaving upon receiving options in non-SYN segments. This is - not a problem for this option, since the SYN,ACK sent during - connection negotiation will not be signed and will thus be ignored. - The connection will never be made, and non-SYN segments with options - will never be sent. More importantly, the sending of signatures must - be under the complete control of the application, not at the mercy of - the remote host not understanding the option. - - - - - - -Heffernan Standards Track [Page 2] - -RFC 2385 TCP MD5 Signature Option August 1998 - - -3.0 Syntax - - The proposed option has the following format: - - +---------+---------+-------------------+ - | Kind=19 |Length=18| MD5 digest... | - +---------+---------+-------------------+ - | | - +---------------------------------------+ - | | - +---------------------------------------+ - | | - +-------------------+-------------------+ - | | - +-------------------+ - - The MD5 digest is always 16 bytes in length, and the option would - appear in every segment of a connection. - -4.0 Some Implications - -4.1 Connectionless Resets - - A connectionless reset will be ignored by the receiver of the reset, - since the originator of that reset does not know the key, and so - cannot generate the proper signature for the segment. This means, - for example, that connection attempts by a TCP which is generating - signatures to a port with no listener will time out instead of being - refused. Similarly, resets generated by a TCP in response to - segments sent on a stale connection will also be ignored. - Operationally this can be a problem since resets help BGP recover - quickly from peer crashes. - -4.2 Performance - - The performance hit in calculating digests may inhibit the use of - this option. Some measurements of a sample implementation showed - that on a 100 MHz R4600, generating a signature for simple ACK - segment took an average of 0.0268 ms, while generating a signature - for a data segment carrying 4096 bytes of data took 0.8776 ms on - average. These times would be applied to both the input and output - paths, with the input path also bearing the cost of a 16-byte - compare. - - - - - - - - -Heffernan Standards Track [Page 3] - -RFC 2385 TCP MD5 Signature Option August 1998 - - -4.3 TCP Header Size - - As with other options that are added to every segment, the size of - the MD5 option must be factored into the MSS offered to the other - side during connection negotiation. Specifically, the size of the - header to subtract from the MTU (whether it is the MTU of the - outgoing interface or IP's minimal MTU of 576 bytes) is now at least - 18 bytes larger. - - The total header size is also an issue. The TCP header specifies - where segment data starts with a 4-bit field which gives the total - size of the header (including options) in 32-byte words. This means - that the total size of the header plus option must be less than or - equal to 60 bytes -- this leaves 40 bytes for options. - - As a concrete example, 4.4BSD defaults to sending window-scaling and - timestamp information for connections it initiates. The most loaded - segment will be the initial SYN packet to start the connection. With - MD5 signatures, the SYN packet will contain the following: - - -- 4 bytes MSS option - -- 4 bytes window scale option (3 bytes padded to 4 in 4.4BSD) - -- 12 bytes for timestamp (4.4BSD pads the option as recommended - in RFC 1323 Appendix A) - -- 18 bytes for MD5 digest - -- 2 bytes for end-of-option-list, to pad to a 32-bit boundary. - - This sums to 40 bytes, which just makes it. - -4.4 MD5 as a Hashing Algorithm - - Since this memo was first issued (under a different title), the MD5 - algorithm has been found to be vulnerable to collision search attacks - [Dobb], and is considered by some to be insufficiently strong for - this type of application. - - This memo still specifies the MD5 algorithm, however, since the - option has already been deployed operationally, and there was no - "algorithm type" field defined to allow an upgrade using the same - option number. The original document did not specify a type field - since this would require at least one more byte, and it was felt at - the time that taking 19 bytes for the complete option (which would - probably be padded to 20 bytes in TCP implementations) would be too - much of a waste of the already limited option space. - - - - - - - -Heffernan Standards Track [Page 4] - -RFC 2385 TCP MD5 Signature Option August 1998 - - - This does not prevent the deployment of another similar option which - uses another hashing algorithm (like SHA-1). Also, if most - implementations pad the 18 byte option as defined to 20 bytes anyway, - it would be just as well to define a new option which contains an - algorithm type field. - - This would need to be addressed in another document, however. - -4.5 Key configuration - - It should be noted that the key configuration mechanism of routers - may restrict the possible keys that may be used between peers. It is - strongly recommended that an implementation be able to support at - minimum a key composed of a string of printable ASCII of 80 bytes or - less, as this is current practice. - -5.0 Security Considerations - - This document defines a weak but currently practiced security - mechanism for BGP. It is anticipated that future work will provide - different stronger mechanisms for dealing with these issues. - -6.0 References - - [RFC1321] Rivest, R., "The MD5 Message-Digest Algorithm," RFC 1321, - April 1992. - - [RFC1323] Jacobson, V., Braden, R., and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [Dobb] H. Dobbertin, "The Status of MD5 After a Recent Attack", RSA - Labs' CryptoBytes, Vol. 2 No. 2, Summer 1996. - http://www.rsa.com/rsalabs/pubs/cryptobytes.html - -Author's Address - - Andy Heffernan - cisco Systems - 170 West Tasman Drive - San Jose, CA 95134 USA - - Phone: +1 408 526-8115 - EMail: ahh@cisco.com - - - - - - - - -Heffernan Standards Track [Page 5] - -RFC 2385 TCP MD5 Signature Option August 1998 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Heffernan Standards Track [Page 6] - diff --git a/kernel/picotcp/RFC/rfc2398.txt b/kernel/picotcp/RFC/rfc2398.txt deleted file mode 100644 index ece738a..0000000 --- a/kernel/picotcp/RFC/rfc2398.txt +++ /dev/null @@ -1,843 +0,0 @@ - - - - - - -Network Working Group S. Parker -Request for Comments: 2398 C. Schmechel -FYI: 33 Sun Microsystems, Inc. -Category: Informational August 1998 - - - Some Testing Tools for TCP Implementors - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -1. Introduction - - Available tools for testing TCP implementations are catalogued by - this memo. Hopefully disseminating this information will encourage - those responsible for building and maintaining TCP to make the best - use of available tests. The type of testing the tool provides, the - type of tests it is capable of doing, and its availability is - enumerated. This document lists only tools which can evaluate one or - more TCP implementations, or which can privde some specific results - which describe or evaluate the TCP being tested. A number of these - tools produce time-sequence plots, see - - Tim Shepard's thesis [She91] for a general discussion of these plots. - - Each tools is defined as follows: - - Name - - The name associated with the testing tool. - - Category - - One or more categories of tests which the tools are capable of - providing. Categories used are: functional correctness, performance, - stress. Functional correctness tests how stringent a TCP - implementation is to the RFC specifications. Performance tests how - - - - - - - -Parker & Schmechel Informational [Page 1] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - - quickly a TCP implementation can send and receive data, etc. Stress - tests how a TCP implementation is effected under high load - conditions. - - Description - - A description of the tools construction, and the implementation - methodology of the tests. - - Automation - - What steps are required to complete the test? What human - intervention is required? - - Availability - - How do you retrieve this tool and get more information about it? - - Required Environment - - Compilers, OS version, etc. required to build and/or run the - associated tool. - - References - - A list of publications relating to the tool, if any. - -2. Tools - -2.1. Dbs - - Author - Yukio Murayama - - Category - Performance / Stress - - Description - Dbs is a tool which allows multiple data transfers to be coordinated, - and the resulting TCP behavior to be reviewed. Results are presented - as ASCII log files. - - Automation - Command of execution is driven by a script file. - - - - - - - -Parker & Schmechel Informational [Page 2] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - - Availability - See http://www.ai3.net/products/dbs for details of precise OS - versions supported, and for download of the source code. Current - implementation supports BSDI BSD/OS, Linux, mkLinux, SunOS, IRIX, - Ultrix, NEWS OS, HP-UX. Other environments are likely easy to add. - - Required Environment - C language compiler, UNIX-style socket API support. - -2.2. Dummynet - - Author - Luigi Rizzo - - Category - Functional Correctness / Performance - - Description - Dummynet is a tool which simulates the presence of finite size - queues, bandwidth limitations, and communication delays. Dummynet - inserts between two layers of the protocol stack (in the current - implementation between TCP and IP), simulating the above effects in - an operational system. This way experiments can be done using real - protocol implementations and real applications, even running on the - same host (dummynet also intercepts communications on the loopback - interface). Reconfiguration of dummynet parameters (delay, queue - size, bandwidth) can be done on the fly by using a sysctl call. The - overhead of dummynet is extremely low. - - Automation - Requires merging diff files with kernel source code. Command-line - driven through the sysctl command to modify kernel variables. - - Availability - See http://www.iet.unipi.it/~luigi/research.html or e-mail Luigi - Rizzo (l.rizzo@iet.unipi.it). Source code is available for FreeBSD - 2.1 and FreeBSD 2.2 (easily adaptable to other BSD-derived systems). - - Required Environment - C language compiler, BSD-derived system, kernel source code. - - References - [Riz97] - - - - - - - - -Parker & Schmechel Informational [Page 3] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - -2.3. Netperf - - Author - Rick Jones - - Category - Performance - - Description - Single connection bandwidth or latency tests for TCP, UDP, and DLPI. - Includes provisions for CPU utilization measurement. - - Automation - Requires compilation (K&R C sufficient for all but-DHISTOGRAM, may - require ANSI C in the future) if starting from source. Execution as - child of inetd requires editing of /etc/services and /etc/inetd.conf. - Scripts are provided for a quick look (snapshot_script), bulk - throughput of TCP and UDP, and latency for TCP and UDP. It is - command-line driven. - - Availability - See http://www.cup.hp.com/netperf/NetperfPage.html or e-mail Rick - Jones (raj@cup.hp.com). Binaries are available here for HP/UX Irix, - Solaris, and Win32. - - Required Environment - C language compiler, POSIX.1, sockets. - -2.4. NIST Net - - Author - Mark Carson - - Category - Functional Correctness / Performance - - Description - NIST Net is a network emulator. The tool is packaged as a Linux - kernel patch, a kernel module, a set of programming APIs, and - command-line and X-based user interfaces. - - NIST Net works by turning the system into a "selectively bad" router - - incoming packets may be delayed, dropped, duplicated, bandwidth- - constrained, etc. Packet delays may be fixed or randomly - distributed, with loadable probability distributions. Packet loss - may be uniformly distributed (constant loss probability) or - congestion-dependent (probability of loss increases with packet queue - lengths). Explicit congestion notifications may optionally be sent - - - -Parker & Schmechel Informational [Page 4] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - - in place of congestion-dependent loss. - - Automation - To control the operation of the emulator, there is an interactive - user interface, a non-interactive command-line interface, and a set - of APIs. Any or all of these may be used in concert. The - interactive interface is suitable for simple, spur-of-the-moment - testing, while the command-line or APIs may be used to create - scripted, non-interactive tests. - - Availability - NIST Net is available for public download from the NIST Net web site, - http://www.antd.nist.gov/itg/nistnet/. The web site also has - installation instructions and documentation. - - Required Environment - NIST Net requires a Linux installtion, with kernel version 2.0.27 - - 2.0.33. A kernel source tree and build tools are required to build - and install the NIST Net components. Building the X interface - requires a version of XFree86 (Current Version is 3.3.2). An - Athena-replacement widget set such as neXtaw - (http://www.inf.ufrgs.br/~kojima/nextaw/) is also desirable for an - improved user interface. - - NIST Net should run on any i386-compatible machine capable of running - Linux, with one or more interfaces. - -2.5. Orchestra - - Author - Scott Dawson, Farnam Jahanian, and Todd Mitton - - Category - Functional Correctness / Performance - - Description - This tool is a library which provides the user with an ability to - build a protocol layer capable of performing fault injection on - protocols. Several fault injection layers have been built using this - library, one of which has been used to test different vendor - implementations of TCP. This is accomplished by probing the vendor - implementation from one machine containing a protocol stack that has - been instrumented with Orchestra. A connection is opened from the - vendor TCP implementation to the machine which has been instrumented. - Faults may then be injected at the Orchestra side of the connection - and the vendor TCP's response may be monitored. The most recent - version of Orchestra runs inside the X-kernel protocol stack on the - OSF MK operating system. - - - -Parker & Schmechel Informational [Page 5] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - - When using Orchestra to test a protocol, the fault injection layer is - placed below the target protocol in the protocol stack. This can - either be done on one machine on the network, if protocol stacks on - the other machines cannot be modified (as in the case of testing - TCP), or can be done on all machines on the network (as in the case - of testing a protocol under development). Once the fault injection - layer is in the protocol stack, all messages sent by and destined for - the target protocol pass through it on their way to/from the network. - The Orchestra fault injection layer can manipulate these messages. - In particular, it can drop, delay, re-order, duplicate, or modify - messages. It can also introduce new messages into the system if - desired. - - The actions of the Orchestra fault injection layer on each message - are determined by a script, written in Tcl. This script is - interpreted by the fault injection layer when the message enters the - layer. The script has access to the header information about the - message, and can make decisions based on header values. It can also - keep information about previous messages, counters, or any other data - which the script writer deems useful. Users of Orchestra may also - define their own actions to be taken on messages, written in C, that - may be called from the fault injection scripts. - - Automation - Scripts can be specified either using a graphical user interface - which generates Tcl, or by writing Tcl directly. At this time, - post-analysis of the results of the test must also be performed by - the user. Essentially this consists of looking at a packet trace - that Orchestra generates for (in)correct behavior. Must compile and - link fault generated layer with the protocol stack. - - Availability - See http://www.eecs.umich.edu/RTCL/projects/orchestra/ or e-mail - Scott Dawson (sdawson@eecs.umich.edu). - - Required Environment OSF MK operating system, or X-kernel like network - architecture, or adapted to network stack. - - References - [DJ94], [DJM96a], [DJM96b] - - - - - - - - - - - -Parker & Schmechel Informational [Page 6] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - -2.6. Packet Shell - - Author - Steve Parker and Chris Schmechel - - Category - Functional Correctness / Performance - - Description - An extensible Tcl/Tk based software toolset for protocol development - and testing. Tcl (Tool Command Language) is an embeddable scripting - language and Tk is a graphical user interface toolkit based on Tcl. - The Packet Shell creates Tcl commands that allow you to create, - modify, send, and receive packets on networks. The operations for - each protocol are supplied by a dynamic linked library called a - protocol library. These libraries are silently linked in from a - special directory when the Packet Shell begins execution. The current - protocol libraries are: IP, IPv6, IPv6 extensions, ICMP, ICMPv6, - Ethernet layer, data layer, file layer (snoop and tcpdump support), - socket layer, TCP, TLI. - - It includes harness, which is a Tk based graphical user interface for - creating test scripts within the Packet Shell. It includes tests for - no initial slow start, and retain out of sequence data as TCP test - cases mentioned in [PADHV98]. - - It includes tcpgraph, which is used with a snoop or tcpdump capture - file to produce a TCP time-sequence plot using xplot. - - Automation - Command-line driven through Tcl commands, or graphical user interface - models are available through the harness format. - - Availability - See http://playground.sun.com/psh/ or e-mail owner-packet- - shell@sunroof.eng.sun.com. - - Required Environment - - Solaris 2.4 or higher. Porting required for other operating systems. - - - - - - - - - - - -Parker & Schmechel Informational [Page 7] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - -2.7. Tcpanaly - - Author - Vern Paxson - - Category - Functional Correctness / Performance - - Description - This is a tool for automatically analyzing a TCP implementation's - behavior by inspecting packet traces of the TCP's activity. It does - so through packet filter traces produced by tcpdump. It has coded - within it knowledge of a large number of TCP implementations. Using - this, it can determine whether a given trace appears consistent with - a given implementation, and, if so, exactly why the TCP chose to - transmit each packet at the time it did. If a trace is found - inconsistent with a TCP, tcpanaly either diagnoses a likely - measurement error present in the trace, or indicates exactly whether - the activity in the trace deviates from that of the TCP, which can - greatly aid in determining how the traced implementation behaves. - - Tcpanaly's category is somewhat difficult to classify, since it - attempts to profile the behavior of an implementation, rather than to - explicitly test specific correctness or performance issues. However, - this profile identifies correctness and performance problems. - - Adding new implementations of TCP behavior is possible with tcpanaly - through the use of C++ classes. - - Automation - Command-line driven and only the traces of the TCP sending and - receiving bulk data transfers are needed as input. - - Availability - Contact Vern Paxson (vern@ee.lbl.gov). - - Required Environment - C++ compiler. - - References - [Pax97a] - - - - - - - - - - -Parker & Schmechel Informational [Page 8] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - -2.8. Tcptrace - - Author - Shawn Ostermann - - Category - Functional Correctness / Performance - - Description - This is a TCP trace file analysis tool. It reads output trace files - in the formats of : tcpdump, snoop, etherpeek, and netm. - - For each connection, it keeps track of elapsed time, bytes/segments - sent and received, retransmissions, round trip times, window - advertisements, throughput, etc from simple to very detailed output. - - It can also produce three different types of graphs: - - Time Sequence Graph (shows the segments sent and ACKs returned as a - function of time) - - Instantaneous Throughput (shows the instantaneous, averaged over a - few segments, throughput of the connection as a function of time). - - Round Trip Times (shows the round trip times for the ACKs as a - function of time) - - Automation - Command-line driven, and uses the xplot program to view the graphs. - - Availability - Source code is available, and Solaris binary along with sample - traces. See http://jarok.cs.ohiou.edu/software/tcptrace/tcptrace.html - or e-mail Shawn Ostermann (ostermann@cs.ohiou.edu). - - Required Environment - C compiler, Solaris, FreeBSD, NetBSD, HPUX, Linux. - - - - - - - - - - - - - - -Parker & Schmechel Informational [Page 9] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - -2.9. Tracelook - - Author - Greg Minshall - - Category - Functional Correctness / Performance - - Description - This is a Tcl/Tk program for graphically viewing the contents of - tcpdump trace files. When plotting a connection, a user can select - various variables to be plotted. In each direction of the connection, - the user can plot the advertised window in each packet, the highest - sequence number in each packet, the lowest sequence number in each - packet, and the acknowledgement number in each packet. - - Automation - Command-line driven with a graphical user interface for the graph. - - Availability - See http://www.ipsilon.com/~minshall/sw/tracelook/tracelook.html or - e-mail Greg Minshall (minshall@ipsilon.com). - - Required Environment - A modern version of awk, and Tcl/Tk (Tk version 3.6 or higher). The - program xgraph is required to view the graphs under X11. - -2.10. TReno - - Author - Matt Mathis and Jamshid Mahdavi - - Category - Performance - - Description - This is a TCP throughput measurement tool based on sending UDP or - ICMP packets in patterns that are controlled at the user-level so - that their timing reflects what would be sent by a TCP that observes - proper congestion control (and implements SACK). This allows it to - measure throughput independent of the TCP implementation of end hosts - and serve as a useful platform for prototyping TCP changes. - - Automation - Command-line driven. No "server" is required, and it only requires a - single argument of the machine to run the test to. - - - - - -Parker & Schmechel Informational [Page 10] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - - Availability - See http://www.psc.edu/networking/treno_info.html or e-mail Matt - Mathis (mathis@psc.edu) or Jamshid Mahdavi (mahdavi@psc.edu). - - Required Environment - C compiler, POSIX.1, raw sockets. - -2.11. Ttcp - - Author - Unknown - - Category - Performance - - Description - Originally written to move files around, ttcp became the classic - throughput benchmark or load generator, with the addition of support - for sourcing to/from memory. It can also be used as a traffic - absorber. It has spawned many variants, recent ones include support - for UDP, data pattern generation, page alignment, and even alignment - offset control. - - Automation - Command-line driven. - - Availability - See ftp://ftp.arl.mil/pub/ttcp/ or e-mail ARL (ftp@arl.mil) which - includes the most common variants available. - - Required Environment - C compiler, BSD sockets. - -2.12. Xplot - - Author - Tim Shepard - - Category - Functional Correctness / Performance - - Description - This is a fairly conventional graphing/plotting tool (xplot itself), - a script to turn tcpdump output into xplot input, and some sample - code to generate xplot commands to plot the TCP time-sequence graph). - - Automation - Command-line driven with a graphical user interface for the plot. - - - -Parker & Schmechel Informational [Page 11] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - - Availability - See ftp://mercury.lcs.mit.edu/pub/shep/xplot.tar.gz or e-mail Tim - Shepard (shep@lcs.mit.edu). - - Required Environment - C compiler, X11. - - References - [She91] - -3. Summary - - This memo lists all TCP tests and testing tools reported to the - authors as part of TCP Implementer's working group and is not - exhaustive. These tools have been verified as available by the - authors. - -4. Security Considerations - - Network analysis tools are improving at a steady pace. The - continuing improvement in these tools such as the ones described make - security concerns significant. - - Some of the tools could be used to create rogue packets or denial- - of-service attacks against other hosts. Also, some of the tools - require changes to the kernel (foreign code) and might require root - privileges to execute. So you are trusting code that you have - fetched from some perhaps untrustworthy remote site. This code could - contain malicious code that could present any kind of attack. - - None of the listed tools evaluate security in any way or form. - - There are privacy concerns when grabbing packets from the network in - that you are now able to read other people's mail, files, etc. This - impacts more than just the host running the tool but all traffic - crossing the host's physical network. - -5. References - - [DJ94] Scott Dawson and Farnam Jahanian, "Probing and Fault - Injection of Distributed Protocol Implementations", - University of Michigan Technical Report CSE-TR-217-94, EECS - Department. - - [DJM96a] Scott Dawson, Farnam Jahanian, and Todd Mitton, "ORCHESTRA: - A Fault Injection Environment for Distributed Systems", - University of Michigan Technical Report CSE-TR-318-96, EECS - Department. - - - -Parker & Schmechel Informational [Page 12] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - - [DJM96b] Scott Dawson, Farnam Jahanian, and Todd Mitton, - "Experiments on Six Commercial TCP Implementations Using a - Software Fault Injection Tool", University of Michigan - Technical Report CSE-TR-298-96, EECS Department. - - [Pax97a] Vern Paxson, "Automated Packet Trace Analysis of TCP - Implementations", ACM SIGCOMM '97, September 1997, Cannes, - France. - - [PADHV98] Paxson, V., Allman, M., Dawson, S., Heavens, I., and B. - Volz, "Known TCP Implementation Problems", Work In - Progress. - - [Riz97] Luigi Rizzo, "Dummynet: a simple approach to the evaluation - of network protocols", ACM Computer Communication Review, - Vol. 27, N. 1, January 1997, pp. 31-41. - - [She91] Tim Shepard, "TCP Packet Trace Analysis", MIT Laboratory - for Computer Science MIT-LCS-TR-494, February, 1991. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Parker & Schmechel Informational [Page 13] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - -6. Authors' Addresses - - Steve Parker - Sun Microsystems, Inc. - 901 San Antonio Road, UMPK17-202 - Palo Alto, CA 94043 - USA - - Phone: (650) 786-5176 - EMail: sparker@eng.sun.com - - - Chris Schmechel - Sun Microsystems, Inc. - 901 San Antonio Road, UMPK17-202 - Palo Alto, CA, 94043 - USA - - Phone: (650) 786-4053 - EMail: cschmec@eng.sun.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Parker & Schmechel Informational [Page 14] - -RFC 2398 Some Testing Tools for TCP Implementors August 1998 - - -7. Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Parker & Schmechel Informational [Page 15] - diff --git a/kernel/picotcp/RFC/rfc2415.txt b/kernel/picotcp/RFC/rfc2415.txt deleted file mode 100644 index a5506ee..0000000 --- a/kernel/picotcp/RFC/rfc2415.txt +++ /dev/null @@ -1,619 +0,0 @@ - - - - - - -Network Working Group K. Poduri -Request for Comments: 2415 K. Nichols -Category: Informational Bay Networks - September 1998 - - - Simulation Studies of Increased Initial TCP Window Size - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -Abstract - - An increase in the permissible initial window size of a TCP - connection, from one segment to three or four segments, has been - under discussion in the tcp-impl working group. This document covers - some simulation studies of the effects of increasing the initial - window size of TCP. Both long-lived TCP connections (file transfers) - and short-lived web-browsing style connections were modeled. The - simulations were performed using the publicly available ns-2 - simulator and our custom models and files are also available. - -1. Introduction - - We present results from a set of simulations with increased TCP - initial window (IW). The main objectives were to explore the - conditions under which the larger IW was a "win" and to determine the - effects, if any, the larger IW might have on other traffic flows - using an IW of one segment. - - This study was inspired by discussions at the Munich IETF tcp-impl - and tcp-sat meetings. A proposal to increase the IW size to about 4K - bytes (4380 bytes in the case of 1460 byte segments) was discussed. - Concerns about both the utility of the increase and its effect on - other traffic were raised. Some studies were presented showing the - positive effects of increased IW on individual connections, but no - studies were shown with a wide variety of simultaneous traffic flows. - It appeared that some of the questions being raised could be - addressed in an ns-2 simulation. Early results from our simulations - were previously posted to the tcp-impl mailing list and presented at - the tcp-impl WG meeting at the December 1997 IETF. - - - -Poduri & Nichols Informational [Page 1] - -RFC 2415 TCP Window Size September 1998 - - -2. Model and Assumptions - - We simulated a network topology with a bottleneck link as shown: - - 10Mb, 10Mb, - (all 4 links) (all 4 links) - - C n2_________ ______ n6 S - l n3_________\ /______ n7 e - i \\ 1.5Mb, 50ms // r - e n0 ------------------------ n1 v - n n4__________// \ \_____ n8 e - t n5__________/ \______ n9 r - s s - - URLs --> <--- FTP & Web data - - File downloading and web-browsing clients are attached to the nodes - (n2-n5) on the left-hand side. These clients are served by the FTP - and Web servers attached to the nodes (n6-n9) on the right-hand side. - The links to and from those nodes are at 10 Mbps. The bottleneck link - is between n1 and n0. All links are bi-directional, but only ACKs, - SYNs, FINs, and URLs are flowing from left to right. Some simulations - were also performed with data traffic flowing from right to left - simultaneously, but it had no effect on the results. - - In the simulations we assumed that all ftps transferred 1-MB files - and that all web pages had exactly three embedded URLs. The web - clients are browsing quite aggressively, requesting a new page after - a random delay uniformly distributed between 1 and 5 seconds. This is - not meant to realistically model a single user's web-browsing - pattern, but to create a reasonably heavy traffic load whose - individual tcp connections accurately reflect real web traffic. Some - discussion of these models as used in earlier studies is available in - references [3] and [4]. - - The maximum tcp window was set to 11 packets, maximum packet (or - segment) size to 1460 bytes, and buffer sizes were set at 25 packets. - (The ns-2 TCPs require setting window sizes and buffer sizes in - number of packets. In our tcp-full code some of the internal - parameters have been set to be byte-oriented, but external values - must still be set in number of packets.) In our simulations, we - varied the number of data segments sent into a new TCP connection (or - initial window) from one to four, keeping all segments at 1460 bytes. - A dropped packet causes a restart window of one segment to be used, - just as in current practice. - - - - - -Poduri & Nichols Informational [Page 2] - -RFC 2415 TCP Window Size September 1998 - - - For ns-2 users: The tcp-full code was modified to use an - "application" class and three application client-server pairs were - written: a simple file transfer (ftp), a model of http1.0 style web - connection and a very rough model of http1.1 style web connection. - The required files and scripts for these simulations are available - under the contributed code section on the ns-simulator web page at - the sites ftp://ftp.ee.lbl.gov/IW.{tar, tar.Z} or http://www- - nrg.ee.lbl.gov/floyd/tcp_init_win.html. - - Simulations were run with 8, 16, 32 web clients and a number of ftp - clients ranging from 0 to 3. The IW was varied from 1 to 4, though - the 4-packet case lies beyond what is currently recommended. The - figures of merit used were goodput, the median page delay seen by the - web clients and the median file transfer delay seen by the ftp - clients. The simulated run time was rather large, 360 seconds, to - ensure an adequate sample. (Median values remained the same for - simulations with larger run times and can be considered stable) - -3. Results - - In our simulations, we varied the number of file transfer clients in - order to change the congestion of the link. Recall that our ftp - clients continuously request 1 Mbyte transfers, so the link - utilization is over 90% when even a single ftp client is present. - When three file transfer clients are running simultaneously, the - resultant congestion is somewhat pathological, making the values - recorded stable. Though all connections use the same initial window, - the effect of increasing the IW on a 1 Mbyte file transfer is not - detectable, thus we focus on the web browsing connections. (In the - tables, we use "webs" to indicate number of web clients and "ftps" to - indicate the number of file transfer clients attached.) Table 1 shows - the median delays experienced by the web transfers with an increase - in the TCP IW. There is clearly an improvement in transfer delays - for the web connections with increase in the IW, in many cases on the - order of 30%. The steepness of the performance improvement going - from an IW of 1 to an IW of 2 is mainly due to the distribution of - files fetched by each URL (see references [1] and [2]); the median - size of both primary and in-line URLs fits completely into two - packets. If file distributions change, the shape of this curve may - also change. - - - - - - - - - - - -Poduri & Nichols Informational [Page 3] - -RFC 2415 TCP Window Size September 1998 - - - Table 1. Median web page delay - - #Webs #FTPs IW=1 IW=2 IW=3 IW=4 - (s) (% decrease) - ---------------------------------------------- - 8 0 0.56 14.3 17.9 16.1 - 8 1 1.06 18.9 25.5 32.1 - 8 2 1.18 16.1 17.1 28.9 - 8 3 1.26 11.9 19.0 27.0 - 16 0 0.64 11.0 15.6 18.8 - 16 1 1.04 17.3 24.0 35.6 - 16 2 1.22 17.2 20.5 25.4 - 16 3 1.31 10.7 21.4 22.1 - 32 0 0.92 17.6 28.6 21.0 - 32 1 1.19 19.6 25.0 26.1 - 32 2 1.43 23.8 35.0 33.6 - 32 3 1.56 19.2 29.5 33.3 - - Table 2 shows the bottleneck link utilization and packet drop - percentage of the same experiment. Packet drop rates did increase - with IW, but in all cases except that of the single most pathological - overload, the increase in drop percentage was less than 1%. A - decrease in packet drop percentage is observed in some overloaded - situations, specifically when ftp transfers consumed most of the link - bandwidth and a large number of web transfers shared the remaining - bandwidth of the link. In this case, the web transfers experience - severe packet loss and some of the IW=4 web clients suffer multiple - packet losses from the same window, resulting in longer recovery - times than when there is a single packet loss in a window. During the - recovery time, the connections are inactive which alleviates - congestion and thus results in a decrease in the packet drop - percentage. It should be noted that such observations were made only - in extremely overloaded scenarios. - - - - - - - - - - - - - - - - - - -Poduri & Nichols Informational [Page 4] - -RFC 2415 TCP Window Size September 1998 - - -Table 2. Link utilization and packet drop rates - - Percentage Link Utilization | Packet drop rate -#Webs #FTPs IW=1 IW=2 IW=3 IW=4 |IW=1 IW=2 IW=3 IW=4 ------------------------------------------------------------------------ - 8 0 34 37 38 39 | 0.0 0.0 0.0 0.0 - 8 1 95 92 93 92 | 0.6 1.2 1.4 1.3 - 8 2 98 97 97 96 | 1.8 2.3 2.3 2.7 - 8 3 98 98 98 98 | 2.6 3.0 3.5 3.5 ------------------------------------------------------------------------ - 16 0 67 69 69 67 | 0.1 0.5 0.8 1.0 - 16 1 96 95 93 92 | 2.1 2.6 2.9 2.9 - 16 2 98 98 97 96 | 3.5 3.6 4.2 4.5 - 16 3 99 99 98 98 | 4.5 4.7 5.2 4.9 ------------------------------------------------------------------------ - 32 0 92 87 85 84 | 0.1 0.5 0.8 1.0 - 32 1 98 97 96 96 | 2.1 2.6 2.9 2.9 - 32 2 99 99 98 98 | 3.5 3.6 4.2 4.5 - 32 3 100 99 99 98 | 9.3 8.4 7.7 7.6 - - To get a more complete picture of performance, we computed the - network power, goodput divided by median delay (in Mbytes/ms), and - plotted it against IW for all scenarios. (Each scenario is uniquely - identified by its number of webs and number of file transfers.) We - plot these values in Figure 1 (in the pdf version), illustrating a - general advantage to increasing IW. When a large number of web - clients is combined with ftps, particularly multiple ftps, - pathological cases result from the extreme congestion. In these - cases, there appears to be no particular trend to the results of - increasing the IW, in fact simulation results are not particularly - stable. - - To get a clearer picture of what is happening across all the tested - scenarios, we normalized the network power values for the non- - pathological scenario by the network power for that scenario at IW of - one. These results are plotted in Figure 2. As IW is increased from - one to four, network power increased by at least 15%, even in a - congested scenario dominated by bulk transfer traffic. In simulations - where web traffic has a dominant share of the available bandwidth, - the increase in network power was up to 60%. - - The increase in network power at higher initial window sizes is due - to an increase in throughput and a decrease in the delay. Since the - (slightly) increased drop rates were accompanied by better - performance, drop rate is clearly not an indicator of user level - performance. - - - - - -Poduri & Nichols Informational [Page 5] - -RFC 2415 TCP Window Size September 1998 - - - The gains in performance seen by the web clients need to be balanced - against the performance the file transfers are seeing. We computed - ftp network power and show this in Table 3. It appears that the - improvement in network power seen by the web connections has - negligible effect on the concurrent file transfers. It can be - observed from the table that there is a small variation in the - network power of file transfers with an increase in the size of IW - but no particular trend can be seen. It can be concluded that the - network power of file transfers essentially remained the same. - However, it should be noted that a larger IW does allow web transfers - to gain slightly more bandwidth than with a smaller IW. This could - mean fewer bytes transferred for FTP applications or a slight - decrease in network power as computed by us. - - Table 3. Network power of file transfers with an increase in the TCP - IW size - - #Webs #FTPs IW=1 IW=2 IW=3 IW=4 - -------------------------------------------- - 8 1 4.7 4.2 4.2 4.2 - 8 2 3.0 2.8 3.0 2.8 - 8 3 2.2 2.2 2.2 2.2 - 16 1 2.3 2.4 2.4 2.5 - 16 2 1.8 2.0 1.8 1.9 - 16 3 1.4 1.6 1.5 1.7 - 32 1 0.7 0.9 1.3 0.9 - 32 2 0.8 1.0 1.3 1.1 - 32 3 0.7 1.0 1.2 1.0 - - The above simulations all used http1.0 style web connections, thus, a - natural question is to ask how results are affected by migration to - http1.1. A rough model of this behavior was simulated by using one - connection to send all of the information from both the primary URL - and the three embedded, or in-line, URLs. Since the transfer size is - now made up of four web files, the steep improvement in performance - between an IW of 1 and an IW of two, noted in the previous results, - has been smoothed. Results are shown in Tables 4 & 5 and Figs. 3 & 4. - Occasionally an increase in IW from 3 to 4 decreases the network - power owing to a non-increase or a slight decrease in the throughput. - TCP connections opening up with a higher window size into a very - congested network might experience some packet drops and consequently - a slight decrease in the throughput. This indicates that increase of - the initial window sizes to further higher values (>4) may not always - result in a favorable network performance. This can be seen clearly - in Figure 4 where the network power shows a decrease for the two - highly congested cases. - - - - - -Poduri & Nichols Informational [Page 6] - -RFC 2415 TCP Window Size September 1998 - - - Table 4. Median web page delay for http1.1 - - #Webs #FTPs IW=1 IW=2 IW=3 IW=4 - (s) (% decrease) - ---------------------------------------------- - 8 0 0.47 14.9 19.1 21.3 - 8 1 0.84 17.9 19.0 25.0 - 8 2 0.99 11.5 17.3 23.0 - 8 3 1.04 12.1 20.2 28.3 - 16 0 0.54 07.4 14.8 20.4 - 16 1 0.89 14.6 21.3 27.0 - 16 2 1.02 14.7 19.6 25.5 - 16 3 1.11 09.0 17.0 18.9 - 32 0 0.94 16.0 29.8 36.2 - 32 1 1.23 12.2 28.5 21.1 - 32 2 1.39 06.5 13.7 12.2 - 32 3 1.46 04.0 11.0 15.0 - - - Table 5. Network power of file transfers with an increase in the - TCP IW size - - #Webs #FTPs IW=1 IW=2 IW=3 IW=4 - -------------------------------------------- - 8 1 4.2 4.2 4.2 3.7 - 8 2 2.7 2.5 2.6 2.3 - 8 3 2.1 1.9 2.0 2.0 - 16 1 1.8 1.8 1.5 1.4 - 16 2 1.5 1.2 1.1 1.5 - 16 3 1.0 1.0 1.0 1.0 - 32 1 0.3 0.3 0.5 0.3 - 32 2 0.4 0.3 0.4 0.4 - 32 3 0.4 0.3 0.4 0.5 - - For further insight, we returned to the http1.0 model and mixed some - web-browsing connections with IWs of one with those using IWs of - three. In this experiment, we first simulated a total of 16 web- - browsing connections, all using IW of one. Then the clients were - split into two groups of 8 each, one of which uses IW=1 and the other - used IW=3. - - We repeated the simulations for a total of 32 and 64 web-browsing - clients, splitting those into groups of 16 and 32 respectively. Table - 6 shows these results. We report the goodput (in Mbytes), the web - page delays (in milli seconds), the percent utilization of the link - and the percent of packets dropped. - - - - - -Poduri & Nichols Informational [Page 7] - -RFC 2415 TCP Window Size September 1998 - - -Table 6. Results for half-and-half scenario - -Median Page Delays and Goodput (MB) | Link Utilization (%) & Drops (%) -#Webs IW=1 | IW=3 | IW=1 | IW=3 - G.put dly | G.put dly | L.util Drops| L.util Drops -------------------|-------------------|---------------|--------------- -16 35.5 0.64| 36.4 0.54 | 67 0.1 | 69 0.7 -8/8 16.9 0.67| 18.9 0.52 | 68 0.5 | -------------------|-------------------|---------------|--------------- -32 48.9 0.91| 44.7 0.68 | 92 3.5 | 85 4.3 -16/16 22.8 0.94| 22.9 0.71 | 89 4.6 | -------------------|-------------------|---------------|---------------- -64 51.9 1.50| 47.6 0.86 | 98 13.0 | 91 8.6 -32/32 29.0 1.40| 22.0 1.20 | 98 12.0 | - - Unsurprisingly, the non-split experiments are consistent with our - earlier results, clients with IW=3 outperform clients with IW=1. The - results of running the 8/8 and 16/16 splits show that running a - mixture of IW=3 and IW=1 has no negative effect on the IW=1 - conversations, while IW=3 conversations maintain their performance. - However, the 32/32 split shows that web-browsing connections with - IW=3 are adversely affected. We believe this is due to the - pathological dynamics of this extremely congested scenario. Since - embedded URLs open their connections simultaneously, very large - number of TCP connections are arriving at the bottleneck link - resulting in multiple packet losses for the IW=3 conversations. The - myriad problems of this simultaneous opening strategy is, of course, - part of the motivation for the development of http1.1. - -4. Discussion - - The indications from these results are that increasing the initial - window size to 3 packets (or 4380 bytes) helps to improve perceived - performance. Many further variations on these simulation scenarios - are possible and we've made our simulation models and scripts - available in order to facilitate others' experiments. - - We also used the RED queue management included with ns-2 to perform - some other simulation studies. We have not reported on those results - here since we don't consider the studies complete. We found that by - adding RED to the bottleneck link, we achieved similar performance - gains (with an IW of 1) to those we found with increased IWs without - RED. Others may wish to investigate this further. - - Although the simulation sets were run for a T1 link, several - scenarios with varying levels of congestion and varying number of web - and ftp clients were analyzed. It is reasonable to expect that the - results would scale for links with higher bandwidth. However, - - - -Poduri & Nichols Informational [Page 8] - -RFC 2415 TCP Window Size September 1998 - - - interested readers could investigate this aspect further. - - We also used the RED queue management included with ns-2 to perform - some other simulation studies. We have not reported on those results - here since we don't consider the studies complete. We found that by - adding RED to the bottleneck link, we achieved similar performance - gains (with an IW of 1) to those we found with increased IWs without - RED. Others may wish to investigate this further. - -5. References - - [1] B. Mah, "An Empirical Model of HTTP Network Traffic", Proceedings - of INFOCOM '97, Kobe, Japan, April 7-11, 1997. - - [2] C.R. Cunha, A. Bestavros, M.E. Crovella, "Characteristics of WWW - Client-based Traces", Boston University Computer Science - Technical Report BU-CS-95-010, July 18, 1995. - - [3] K.M. Nichols and M. Laubach, "Tiers of Service for Data Access in - a HFC Architecture", Proceedings of SCTE Convergence Conference, - January, 1997. - - [4] K.M. Nichols, "Improving Network Simulation with Feedback", - available from knichols@baynetworks.com - -6. Acknowledgements - - This work benefited from discussions with and comments from Van - Jacobson. - -7. Security Considerations - - This document discusses a simulation study of the effects of a - proposed change to TCP. Consequently, there are no security - considerations directly related to the document. There are also no - known security considerations associated with the proposed change. - - - - - - - - - - - - - - - -Poduri & Nichols Informational [Page 9] - -RFC 2415 TCP Window Size September 1998 - - -8. Authors' Addresses - - Kedarnath Poduri - Bay Networks - 4401 Great America Parkway - SC01-04 - Santa Clara, CA 95052-8185 - - Phone: +1-408-495-2463 - Fax: +1-408-495-1299 - EMail: kpoduri@Baynetworks.com - - - Kathleen Nichols - Bay Networks - 4401 Great America Parkway - SC01-04 - Santa Clara, CA 95052-8185 - - EMail: knichols@baynetworks.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Poduri & Nichols Informational [Page 10] - -RFC 2415 TCP Window Size September 1998 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Poduri & Nichols Informational [Page 11] - diff --git a/kernel/picotcp/RFC/rfc2416.txt b/kernel/picotcp/RFC/rfc2416.txt deleted file mode 100644 index b5e3a90..0000000 --- a/kernel/picotcp/RFC/rfc2416.txt +++ /dev/null @@ -1,395 +0,0 @@ - - - - - - -Network Working Group T. Shepard -Request for Comments: 2416 C. Partridge -Category: Informational BBN Technologies - September 1998 - - - When TCP Starts Up With Four Packets Into Only Three Buffers - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -Abstract - - This memo is to document a simple experiment. The experiment showed - that in the case of a TCP receiver behind a 9600 bps modem link at - the edge of a fast Internet where there are only 3 buffers before the - modem (and the fourth packet of a four-packet start will surely be - dropped), no significant degradation in performance is experienced by - a TCP sending with a four-packet start when compared with a normal - slow start (which starts with just one packet). - -Background - - Sally Floyd has proposed that TCPs start their initial slow start by - sending as many as four packets (instead of the usual one packet) as - a means of getting TCP up-to-speed faster. (Slow starts instigated - due to timeouts would still start with just one packet.) Starting - with more than one packet might reduce the start-up latency over - long-fat pipes by two round-trip times. This proposal is documented - further in [1], [2], and in [3] and we assume the reader is familiar - with the details of this proposal. - - On the end2end-interest mailing list, concern was raised that in the - (allegedly common) case where a slow modem is served by a router - which only allocates three buffers per modem (one buffer being - transmitted while two packets are waiting), that starting with four - packets would not be good because the fourth packet is sure to be - dropped. - - - - - - -Shepard & Partridge Informational [Page 1] - -RFC 2416 TCP with Four Packets into Three Buffers September 1998 - - - Vern Paxson replied with the comment (among other things) that the - four-packet start is no worse than what happens after two round trip - times in normal slow start, hence no new problem is introduced by - starting with as many as four packets. If there is a problem with a - four-packet start, then the problem already exists in a normal slow- - start startup after two round trip times when the slow-start - algorithm will release into the net four closely spaced packets. - - The experiment reported here confirmed Vern Paxson's reasoning. - -Scenario and experimental setup - - -+--------+ 100 Mbps +---+ 1.5 Mbps +---+ 9600 bps +----------+ -| source +------------+ R +-------------+ R +--------------+ receiver | -+--------+ no delay +---+ 25 ms delay +---+ 150 ms delay +----------+ - - | | - | | - (we spy here) (this router has only 3 buffers - to hold packets going into the - 9600 bps link) - - The scenario studied and simulated consists of three links between - the source and sink. The first link is a 100 Mbps link with no - delay. It connects the sender to a router. (It was included to have - a means of logging the returning ACKs at the time they would be seen - by the sender.) The second link is a 1.5 Mbps link with a 25 ms - one-way delay. (This link was included to roughly model traversing - an un-congested, intra-continental piece of the terrestrial - Internet.) The third link is a 9600 bps link with a 150 ms one-way - delay. It connects the edge of the net to a receiver which is behind - the 9600 bps link. - - The queue limits for the queues at each end of the first two links - were set to 100 (a value sufficiently large that this limit was never - a factor). The queue limits at each end of the 9600 bps link were - set to 3 packets (which can hold at most two packets while one is - being sent). - - Version 1.2a2 of the the NS simulator (available from LBL) was used - to simulate both one-packet and four-packet starts for each of the - available TCP algorithms (tahoe, reno, sack, fack) and the conclusion - reported here is independent of which TCP algorithm is used (in - general, we believe). In this memo, the "tahoe" module will be used - to illustrate what happens. In the 4-packet start cases, the - "window-init" variable was set to 4, and the TCP implementations were - modified to use the value of the window-init variable only on - - - -Shepard & Partridge Informational [Page 2] - -RFC 2416 TCP with Four Packets into Three Buffers September 1998 - - - connection start, but to set cwnd to 1 on other instances of a slow- - start. (The tcp.cc module as shipped with ns-1.2a2 would use the - window-init value in all cases.) - - The packets in simulation are 1024 bytes long for purposes of - determining the time it takes to transmit them through the links. - (The TCP modules included with the LBL NS simulator do not simulate - the TCP sequence number mechanisms. They use just packet numbers.) - - Observations are made of all packets and acknowledgements crossing - the 100 Mbps no-delay link, near the sender. (All descriptions below - are from this point of view.) - -What happens with normal slow start - - At time 0.0 packet number 1 is sent. - - At time 1.222 an ack is received covering packet number 1, and - packets 2 and 3 are sent. - - At time 2.444 an ack is received covering packet number 2, and - packets 4 and 5 are sent. - - At time 3.278 an ack is received covering packet number 3, and - packets 6 and 7 are sent. - - At time 4.111 an ack is received covering packet number 4, and - packets 8 and 9 are sent. - - At time 4.944 an ack is received covering packet number 5, and - packets 10 and 11 are sent. - - At time 5.778 an ack is received covering packet number 6, and - packets 12 and 13 are sent. - - At time 6.111 a duplicate ack is recieved (covering packet number 6). - - At time 7.444 another duplicate ack is received (covering packet - number 6). - - At time 8.278 a third duplicate ack is received (covering packet - number 6) and packet number 7 is retransmitted. - - (And the trace continues...) - -What happens with a four-packet start - - At time 0.0, packets 1, 2, 3, and 4 are sent. - - - -Shepard & Partridge Informational [Page 3] - -RFC 2416 TCP with Four Packets into Three Buffers September 1998 - - - At time 1.222 an ack is received covering packet number 1, and - packets 5 and 6 are sent. - - At time 2.055 an ack is received covering packet number 2, and - packets 7 and 8 are sent. - - At time 2.889 an ack is received covering packet number 3, and - packets 9 and 10 are sent. - - At time 3.722 a duplicate ack is received (covering packet number 3). - - At time 4.555 another duplicate ack is received (covering packet - number 3). - - At time 5.389 a third duplicate ack is received (covering packet - number 3) and packet number 4 is retransmitted. - - (And the trace continues...) - -Discussion - - At the point left off in the two traces above, the two different - systems are in almost identical states. The two traces from that - point on are almost the same, modulo a shift in time of (8.278 - - 5.389) = 2.889 seconds and a shift of three packets. If the normal - TCP (with the one-packet start) will deliver packet N at time T, then - the TCP with the four-packet start will deliver packet N - 3 at time - T - 2.889 (seconds). - - Note that the time to send three 1024-byte TCP segments through a - 9600 bps modem is 2.66 seconds. So at what time does the four- - packet-start TCP deliver packet N? At time T - 2.889 + 2.66 = T - - 0.229 in most cases, and in some cases earlier, in some cases later, - because different packets (by number) experience loss in the two - traces. - - Thus the four-packet-start TCP is in some sense 0.229 seconds (or - about one fifth of a packet) ahead of where the one-packet-start TCP - would be. (This is due to the extra time the modem sits idle while - waiting for the dally timer to go off in the receiver in the case of - the one-packet-start TCP.) - - The states of the two systems are not exactly identical. They differ - slightly in the round-trip-time estimators because the behavior at - the start is not identical. (The observed round trip times may differ - by a small amount due to dally timers and due to that the one-packet - start experiences more round trip times before the first loss.) In - the cases where a retransmit timer did later go off, the additional - - - -Shepard & Partridge Informational [Page 4] - -RFC 2416 TCP with Four Packets into Three Buffers September 1998 - - - difference in timing was much smaller than the 0.229 second - difference discribed above. - -Conclusion - - In this particular case, the four-packet start is not harmful. - -Non-conclusions, opinions, and future work - - A four-packet start would be very helpful in situations where a - long-delay link is involved (as it would reduce transfer times for - moderately-sized transfers by as much as two round-trip times). But - it remains (in the authors' opinions at this time) an open question - whether or not the four-packet start would be safe for the network. - - It would be nice to see if this result could be duplicated with real - TCPs, real modems, and real three-buffer limits. - -Security Considerations - - This document discusses a simulation study of the effects of a - proposed change to TCP. Consequently, there are no security - considerations directly related to the document. There are also no - known security considerations associated with the proposed change. - -References - - 1. S. Floyd, Increasing TCP's Initial Window (January 29, 1997). - URL ftp://ftp.ee.lbl.gov/papers/draft.jan29. - - 2. S. Floyd and M. Allman, Increasing TCP's Initial Window (July, - 1997). URL http://gigahertz.lerc.nasa.gov/~mallman/share/draft- - ss.txt - - 3. Allman, M., Floyd, S., and C. Partridge, "Increasing TCP's - Initial Window", RFC 2414, September 1998. - - - - - - - - - - - - - - - -Shepard & Partridge Informational [Page 5] - -RFC 2416 TCP with Four Packets into Three Buffers September 1998 - - -Authors' Addresses - - Tim Shepard - BBN Technologies - 10 Moulton Street - Cambridge, MA 02138 - - EMail: shep@alum.mit.edu - - - Craig Partridge - BBN Technologies - 10 Moulton Street - Cambridge, MA 02138 - - EMail: craig@bbn.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Shepard & Partridge Informational [Page 6] - -RFC 2416 TCP with Four Packets into Three Buffers September 1998 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Shepard & Partridge Informational [Page 7] - diff --git a/kernel/picotcp/RFC/rfc2452.txt b/kernel/picotcp/RFC/rfc2452.txt deleted file mode 100644 index dcbcf26..0000000 --- a/kernel/picotcp/RFC/rfc2452.txt +++ /dev/null @@ -1,563 +0,0 @@ - - - - - - -Network Working Group M. Daniele -Request for Comments: 2452 Compaq Computer Corporation -Category: Standards Track December 1998 - - - IP Version 6 Management Information Base - for the Transmission Control Protocol - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -Abstract - - This document is one in the series of documents that define various - MIB objects for IPv6. Specifically, this document is the MIB module - which defines managed objects for implementations of the Transmission - Control Protocol (TCP) over IP Version 6 (IPv6). - - This document also recommends a specific policy with respect to the - applicability of RFC 2012 for implementations of IPv6. Namely, that - most of managed objects defined in RFC 2012 are independent of which - IP versions underlie TCP, and only the TCP connection information is - IP version-specific. - - This memo defines an experimental portion of the Management - Information Base (MIB) for use with network management protocols in - IPv6-based internets. - -1. Introduction - - A management system contains: several (potentially many) nodes, each - with a processing entity, termed an agent, which has access to - management instrumentation; at least one management station; and, a - management protocol, used to convey management information between - the agents and management stations. Operations of the protocol are - carried out under an administrative framework which defines - authentication, authorization, access control, and privacy policies. - - - - - -Daniele Standards Track [Page 1] - -RFC 2452 TCP MIB for IPv6 December 1998 - - - Management stations execute management applications which monitor and - control managed elements. Managed elements are devices such as - hosts, routers, terminal servers, etc., which are monitored and - controlled via access to their management information. - - Management information is viewed as a collection of managed objects, - residing in a virtual information store, termed the Management - Information Base (MIB). Collections of related objects are defined - in MIB modules. These modules are written using a subset of OSI's - Abstract Syntax Notation One (ASN.1) [1], termed the Structure of - Management Information (SMI) [2]. - -2. Overview - - This document is one in the series of documents that define various - MIB objects, and statements of conformance, for IPv6. This document - defines the required instrumentation for implementations of TCP over - IPv6. - -3. Transparency of IP versions to TCP - - The fact that a particular TCP connection uses IPv6 as opposed to - IPv4, is largely invisible to a TCP implementation. A "TCPng" did - not need to be defined, implementations simply need to support IPv6 - addresses. - - As such, the managed objects already defined in [TCP MIB] are - sufficient for managing TCP in the presence of IPv6. These objects - are equally applicable whether the managed node supports IPv4 only, - IPv6 only, or both IPv4 and IPv6. - - For example, tcpActiveOpens counts "The number of times TCP - connections have made a direct transition to the SYN-SENT state from - the CLOSED state", regardless of which version of IP is used between - the connection endpoints. - - Stated differently, TCP implementations don't need separate counters - for IPv4 and for IPv6. - -4. Representing TCP Connections - - The exception to the statements in section 3 is the tcpConnTable. - Since IPv6 addresses cannot be represented with the IpAddress syntax, - not all TCP connections can be represented in the tcpConnTable - defined in [TCP MIB]. - - - - - - -Daniele Standards Track [Page 2] - -RFC 2452 TCP MIB for IPv6 December 1998 - - - This memo defines a new, separate table to represent only those TCP - connections between IPv6 endpoints. TCP connections between IPv4 - endpoints continue to be represented in tcpConnTable [TCP MIB]. (It - is not possible to establish a TCP connection between an IPv4 - endpoint and an IPv6 endpoint.) - - A different approach would have been to define a new table to - represent all TCP connections regardless of IP version. This would - require changes to [TCP MIB] and hence to existing (IPv4-only) TCP - implementations. The approach suggested in this memo has the - advantage of leaving IPv4-only implementations intact. - - It is assumed that the objects defined in this memo will eventually - be defined in an update to [TCP MIB]. For this reason, the module - identity is assigned under the experimental portion of the MIB. - -5. Conformance - - This memo contains conformance statements to define conformance to - this MIB for TCP over IPv6 implementations. - -6. Definitions - -IPV6-TCP-MIB DEFINITIONS ::= BEGIN - -IMPORTS - MODULE-COMPLIANCE, OBJECT-GROUP FROM SNMPv2-CONF - MODULE-IDENTITY, OBJECT-TYPE, - mib-2, experimental FROM SNMPv2-SMI - Ipv6Address, Ipv6IfIndexOrZero FROM IPV6-TC; - -ipv6TcpMIB MODULE-IDENTITY - LAST-UPDATED "9801290000Z" - ORGANIZATION "IETF IPv6 MIB Working Group" - CONTACT-INFO - " Mike Daniele - - Postal: Compaq Computer Corporation - 110 Spitbrook Rd - Nashua, NH 03062. - US - - Phone: +1 603 884 1423 - Email: daniele@zk3.dec.com" - DESCRIPTION - "The MIB module for entities implementing TCP over IPv6." - ::= { experimental 86 } - - - - -Daniele Standards Track [Page 3] - -RFC 2452 TCP MIB for IPv6 December 1998 - - --- objects specific to TCP for IPv6 - -tcp OBJECT IDENTIFIER ::= { mib-2 6 } - --- the TCP over IPv6 Connection table - --- This connection table contains information about this --- entity's existing TCP connections between IPv6 endpoints. --- Only connections between IPv6 addresses are contained in --- this table. This entity's connections between IPv4 --- endpoints are contained in tcpConnTable. - -ipv6TcpConnTable OBJECT-TYPE - SYNTAX SEQUENCE OF Ipv6TcpConnEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table containing TCP connection-specific information, - for only those connections whose endpoints are IPv6 addresses." - ::= { tcp 16 } - -ipv6TcpConnEntry OBJECT-TYPE - SYNTAX Ipv6TcpConnEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A conceptual row of the ipv6TcpConnTable containing - information about a particular current TCP connection. - Each row of this table is transient, in that it ceases to - exist when (or soon after) the connection makes the transition - to the CLOSED state. - - Note that conceptual rows in this table require an additional - index object compared to tcpConnTable, since IPv6 addresses - are not guaranteed to be unique on the managed node." - INDEX { ipv6TcpConnLocalAddress, - ipv6TcpConnLocalPort, - ipv6TcpConnRemAddress, - ipv6TcpConnRemPort, - ipv6TcpConnIfIndex } - ::= { ipv6TcpConnTable 1 } - -Ipv6TcpConnEntry ::= - SEQUENCE { ipv6TcpConnLocalAddress Ipv6Address, - ipv6TcpConnLocalPort INTEGER (0..65535), - ipv6TcpConnRemAddress Ipv6Address, - ipv6TcpConnRemPort INTEGER (0..65535), - ipv6TcpConnIfIndex Ipv6IfIndexOrZero, - - - -Daniele Standards Track [Page 4] - -RFC 2452 TCP MIB for IPv6 December 1998 - - - ipv6TcpConnState INTEGER } - -ipv6TcpConnLocalAddress OBJECT-TYPE - SYNTAX Ipv6Address - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The local IPv6 address for this TCP connection. In - the case of a connection in the listen state which - is willing to accept connections for any IPv6 - address associated with the managed node, the value - ::0 is used." - ::= { ipv6TcpConnEntry 1 } - -ipv6TcpConnLocalPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The local port number for this TCP connection." - ::= { ipv6TcpConnEntry 2 } - -ipv6TcpConnRemAddress OBJECT-TYPE - SYNTAX Ipv6Address - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The remote IPv6 address for this TCP connection." - ::= { ipv6TcpConnEntry 3 } - -ipv6TcpConnRemPort OBJECT-TYPE - SYNTAX INTEGER (0..65535) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The remote port number for this TCP connection." - ::= { ipv6TcpConnEntry 4 } - -ipv6TcpConnIfIndex OBJECT-TYPE - SYNTAX Ipv6IfIndexOrZero - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "An index object used to disambiguate conceptual rows in - the table, since the connection 4-tuple may not be unique. - - If the connection's remote address (ipv6TcpConnRemAddress) - is a link-local address and the connection's local address - - - -Daniele Standards Track [Page 5] - -RFC 2452 TCP MIB for IPv6 December 1998 - - - (ipv6TcpConnLocalAddress) is not a link-local address, this - object identifies a local interface on the same link as - the connection's remote link-local address. - - Otherwise, this object identifies the local interface that - is associated with the ipv6TcpConnLocalAddress for this - TCP connection. If such a local interface cannot be determined, - this object should take on the value 0. (A possible example - of this would be if the value of ipv6TcpConnLocalAddress is ::0.) - - The interface identified by a particular non-0 value of this - index is the same interface as identified by the same value - of ipv6IfIndex. - - The value of this object must remain constant during the life - of the TCP connection." - ::= { ipv6TcpConnEntry 5 } - -ipv6TcpConnState OBJECT-TYPE - SYNTAX INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11), - deleteTCB(12) } - MAX-ACCESS read-write - STATUS current - DESCRIPTION - "The state of this TCP connection. - - The only value which may be set by a management station is - deleteTCB(12). Accordingly, it is appropriate for an agent - to return an error response (`badValue' for SNMPv1, 'wrongValue' - for SNMPv2) if a management station attempts to set this - object to any other value. - - If a management station sets this object to the value - deleteTCB(12), then this has the effect of deleting the TCB - (as defined in RFC 793) of the corresponding connection on - the managed node, resulting in immediate termination of the - connection. - - - -Daniele Standards Track [Page 6] - -RFC 2452 TCP MIB for IPv6 December 1998 - - - As an implementation-specific option, a RST segment may be - sent from the managed node to the other TCP endpoint (note - however that RST segments are not sent reliably)." - ::= { ipv6TcpConnEntry 6 } - --- --- conformance information --- - -ipv6TcpConformance OBJECT IDENTIFIER ::= { ipv6TcpMIB 2 } - -ipv6TcpCompliances OBJECT IDENTIFIER ::= { ipv6TcpConformance 1 } -ipv6TcpGroups OBJECT IDENTIFIER ::= { ipv6TcpConformance 2 } - --- compliance statements - -ipv6TcpCompliance MODULE-COMPLIANCE - STATUS current - DESCRIPTION - "The compliance statement for SNMPv2 entities which - implement TCP over IPv6." - MODULE -- this module - MANDATORY-GROUPS { ipv6TcpGroup } - ::= { ipv6TcpCompliances 1 } - -ipv6TcpGroup OBJECT-GROUP - OBJECTS { -- these are defined in this module - -- ipv6TcpConnLocalAddress (not-accessible) - -- ipv6TcpConnLocalPort (not-accessible) - -- ipv6TcpConnRemAddress (not-accessible) - -- ipv6TcpConnRemPort (not-accessible) - -- ipv6TcpConnIfIndex (not-accessible) - ipv6TcpConnState } - STATUS current - DESCRIPTION - "The group of objects providing management of - TCP over IPv6." - ::= { ipv6TcpGroups 1 } - -END - - - - - - - - - - - -Daniele Standards Track [Page 7] - -RFC 2452 TCP MIB for IPv6 December 1998 - - -7. Acknowledgments - - This memo is a product of the IPng work group, and benefited - especially from the contributions of the following working group - members: - - Dimitry Haskin Bay Networks - Margaret Forsythe Epilogue - Tim Hartrick Mentat - Frank Solensky FTP - Jack McCann DEC - -8. References - - [1] Information processing systems - Open Systems - Interconnection - Specification of Abstract Syntax - Notation One (ASN.1), International Organization for - Standardization. International Standard 8824, - (December, 1987). - - [2] McCloghrie, K., Editor, "Structure of Management - Information for version 2 of the Simple Network - Management Protocol (SNMPv2)", RFC 1902, January 1996. - - [TCP MIB] SNMPv2 Working Group, McCloghrie, K., Editor, "SNMPv2 - Management Information Base for the Transmission - Control Protocol using SMIv2", RFC 2012, November 1996. - - [IPV6 MIB TC] Haskin, D., and S. Onishi, "Management Information - Base for IP Version 6: Textual Conventions and General - Group", RFC 2465, December 1998. - - [IPV6] Deering, S., and R. Hinden, "Internet Protocol, Version - 6 (IPv6) Specification", RFC 2460, December 1998. - - [RFC2274] Blumenthal, U., and B. Wijnen, "The User-Based Security - Model for Version 3 of the Simple Network Management - Protocol (SNMPv3)", RFC 2274, January 1998. - - [RFC2275] Wijnen, B., Presuhn, R., and K. McCloghrie, "View-based - Access Control Model for the Simple Network Management - Protocol (SNMP)", RFC 2275, January 1998. - -9. Security Considerations - - This MIB contains a management object that has a MAX-ACCESS clause of - read-write and/or read-create. In particular, it is possible to - delete individual TCP control blocks (i.e., connections). - - - -Daniele Standards Track [Page 8] - -RFC 2452 TCP MIB for IPv6 December 1998 - - - Consequently, anyone having the ability to issue a SET on this object - can impact the operation of the node. - - There are a number of managed objects in this MIB that may be - considered to contain sensitive information in some environments. - For example, the MIB identifies the active TCP connections on the - node. Although this information might be considered sensitive in - some environments (i.e., to identify ports on which to launch - denial-of-service or other attacks), there are already other ways of - obtaining similar information. For example, sending a random TCP - packet to an unused port prompts the generation of a TCP reset - message. - - Therefore, it may be important in some environments to control read - and/or write access to these objects and possibly to even encrypt the - values of these object when sending them over the network via SNMP. - Not all versions of SNMP provide features for such a secure - environment. SNMPv1 by itself does not provide encryption or strong - authentication. - - It is recommended that the implementors consider the security - features as provided by the SNMPv3 framework. Specifically, the use - of the User-based Security Model [RFC2274] and the View-based Access - Control Model [RFC2275] is recommended. - - It is then a customer/user responsibility to ensure that the SNMP - entity giving access to an instance of this MIB, is properly - configured to give access to those objects only to those principals - (users) that have legitimate rights to access them. - -10. Author's Address - - Mike Daniele - Compaq Computer Corporation - 110 Spit Brook Rd - Nashua, NH 03062 - - Phone: +1-603-884-1423 - EMail: daniele@zk3.dec.com - - - - - - - - - - - - -Daniele Standards Track [Page 9] - -RFC 2452 TCP MIB for IPv6 December 1998 - - -11. Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Daniele Standards Track [Page 10] - diff --git a/kernel/picotcp/RFC/rfc2460.txt b/kernel/picotcp/RFC/rfc2460.txt deleted file mode 100644 index de7b7fa..0000000 --- a/kernel/picotcp/RFC/rfc2460.txt +++ /dev/null @@ -1,2187 +0,0 @@ - - - - - - -Network Working Group S. Deering -Request for Comments: 2460 Cisco -Obsoletes: 1883 R. Hinden -Category: Standards Track Nokia - December 1998 - - - Internet Protocol, Version 6 (IPv6) - Specification - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -Abstract - - This document specifies version 6 of the Internet Protocol (IPv6), - also sometimes referred to as IP Next Generation or IPng. - -Table of Contents - - 1. Introduction..................................................2 - 2. Terminology...................................................3 - 3. IPv6 Header Format............................................4 - 4. IPv6 Extension Headers........................................6 - 4.1 Extension Header Order...................................7 - 4.2 Options..................................................9 - 4.3 Hop-by-Hop Options Header...............................11 - 4.4 Routing Header..........................................12 - 4.5 Fragment Header.........................................18 - 4.6 Destination Options Header..............................23 - 4.7 No Next Header..........................................24 - 5. Packet Size Issues...........................................24 - 6. Flow Labels..................................................25 - 7. Traffic Classes..............................................25 - 8. Upper-Layer Protocol Issues..................................27 - 8.1 Upper-Layer Checksums...................................27 - 8.2 Maximum Packet Lifetime.................................28 - 8.3 Maximum Upper-Layer Payload Size........................28 - 8.4 Responding to Packets Carrying Routing Headers..........29 - - - -Deering & Hinden Standards Track [Page 1] - -RFC 2460 IPv6 Specification December 1998 - - - Appendix A. Semantics and Usage of the Flow Label Field.........30 - Appendix B. Formatting Guidelines for Options...................32 - Security Considerations.........................................35 - Acknowledgments.................................................35 - Authors' Addresses..............................................35 - References......................................................35 - Changes Since RFC-1883..........................................36 - Full Copyright Statement........................................39 - -1. Introduction - - IP version 6 (IPv6) is a new version of the Internet Protocol, - designed as the successor to IP version 4 (IPv4) [RFC-791]. The - changes from IPv4 to IPv6 fall primarily into the following - categories: - - o Expanded Addressing Capabilities - - IPv6 increases the IP address size from 32 bits to 128 bits, to - support more levels of addressing hierarchy, a much greater - number of addressable nodes, and simpler auto-configuration of - addresses. The scalability of multicast routing is improved by - adding a "scope" field to multicast addresses. And a new type - of address called an "anycast address" is defined, used to send - a packet to any one of a group of nodes. - - o Header Format Simplification - - Some IPv4 header fields have been dropped or made optional, to - reduce the common-case processing cost of packet handling and - to limit the bandwidth cost of the IPv6 header. - - o Improved Support for Extensions and Options - - Changes in the way IP header options are encoded allows for - more efficient forwarding, less stringent limits on the length - of options, and greater flexibility for introducing new options - in the future. - - o Flow Labeling Capability - - A new capability is added to enable the labeling of packets - belonging to particular traffic "flows" for which the sender - requests special handling, such as non-default quality of - service or "real-time" service. - - - - - - -Deering & Hinden Standards Track [Page 2] - -RFC 2460 IPv6 Specification December 1998 - - - o Authentication and Privacy Capabilities - - Extensions to support authentication, data integrity, and - (optional) data confidentiality are specified for IPv6. - - This document specifies the basic IPv6 header and the initially- - defined IPv6 extension headers and options. It also discusses packet - size issues, the semantics of flow labels and traffic classes, and - the effects of IPv6 on upper-layer protocols. The format and - semantics of IPv6 addresses are specified separately in [ADDRARCH]. - The IPv6 version of ICMP, which all IPv6 implementations are required - to include, is specified in [ICMPv6]. - -2. Terminology - - node - a device that implements IPv6. - - router - a node that forwards IPv6 packets not explicitly - addressed to itself. [See Note below]. - - host - any node that is not a router. [See Note below]. - - upper layer - a protocol layer immediately above IPv6. Examples are - transport protocols such as TCP and UDP, control - protocols such as ICMP, routing protocols such as OSPF, - and internet or lower-layer protocols being "tunneled" - over (i.e., encapsulated in) IPv6 such as IPX, - AppleTalk, or IPv6 itself. - - link - a communication facility or medium over which nodes can - communicate at the link layer, i.e., the layer - immediately below IPv6. Examples are Ethernets (simple - or bridged); PPP links; X.25, Frame Relay, or ATM - networks; and internet (or higher) layer "tunnels", - such as tunnels over IPv4 or IPv6 itself. - - neighbors - nodes attached to the same link. - - interface - a node's attachment to a link. - - address - an IPv6-layer identifier for an interface or a set of - interfaces. - - packet - an IPv6 header plus payload. - - link MTU - the maximum transmission unit, i.e., maximum packet - size in octets, that can be conveyed over a link. - - - - -Deering & Hinden Standards Track [Page 3] - -RFC 2460 IPv6 Specification December 1998 - - - path MTU - the minimum link MTU of all the links in a path between - a source node and a destination node. - - Note: it is possible, though unusual, for a device with multiple - interfaces to be configured to forward non-self-destined packets - arriving from some set (fewer than all) of its interfaces, and to - discard non-self-destined packets arriving from its other interfaces. - Such a device must obey the protocol requirements for routers when - receiving packets from, and interacting with neighbors over, the - former (forwarding) interfaces. It must obey the protocol - requirements for hosts when receiving packets from, and interacting - with neighbors over, the latter (non-forwarding) interfaces. - -3. IPv6 Header Format - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |Version| Traffic Class | Flow Label | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Payload Length | Next Header | Hop Limit | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + + - | | - + Source Address + - | | - + + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + + - | | - + Destination Address + - | | - + + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Version 4-bit Internet Protocol version number = 6. - - Traffic Class 8-bit traffic class field. See section 7. - - Flow Label 20-bit flow label. See section 6. - - Payload Length 16-bit unsigned integer. Length of the IPv6 - payload, i.e., the rest of the packet following - this IPv6 header, in octets. (Note that any - - - - - -Deering & Hinden Standards Track [Page 4] - -RFC 2460 IPv6 Specification December 1998 - - - extension headers [section 4] present are - considered part of the payload, i.e., included - in the length count.) - - Next Header 8-bit selector. Identifies the type of header - immediately following the IPv6 header. Uses the - same values as the IPv4 Protocol field [RFC-1700 - et seq.]. - - Hop Limit 8-bit unsigned integer. Decremented by 1 by - each node that forwards the packet. The packet - is discarded if Hop Limit is decremented to - zero. - - Source Address 128-bit address of the originator of the packet. - See [ADDRARCH]. - - Destination Address 128-bit address of the intended recipient of the - packet (possibly not the ultimate recipient, if - a Routing header is present). See [ADDRARCH] - and section 4.4. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 5] - -RFC 2460 IPv6 Specification December 1998 - - -4. IPv6 Extension Headers - - In IPv6, optional internet-layer information is encoded in separate - headers that may be placed between the IPv6 header and the upper- - layer header in a packet. There are a small number of such extension - headers, each identified by a distinct Next Header value. As - illustrated in these examples, an IPv6 packet may carry zero, one, or - more extension headers, each identified by the Next Header field of - the preceding header: - - +---------------+------------------------ - | IPv6 header | TCP header + data - | | - | Next Header = | - | TCP | - +---------------+------------------------ - - - +---------------+----------------+------------------------ - | IPv6 header | Routing header | TCP header + data - | | | - | Next Header = | Next Header = | - | Routing | TCP | - +---------------+----------------+------------------------ - - - +---------------+----------------+-----------------+----------------- - | IPv6 header | Routing header | Fragment header | fragment of TCP - | | | | header + data - | Next Header = | Next Header = | Next Header = | - | Routing | Fragment | TCP | - +---------------+----------------+-----------------+----------------- - - With one exception, extension headers are not examined or processed - by any node along a packet's delivery path, until the packet reaches - the node (or each of the set of nodes, in the case of multicast) - identified in the Destination Address field of the IPv6 header. - There, normal demultiplexing on the Next Header field of the IPv6 - header invokes the module to process the first extension header, or - the upper-layer header if no extension header is present. The - contents and semantics of each extension header determine whether or - not to proceed to the next header. Therefore, extension headers must - be processed strictly in the order they appear in the packet; a - receiver must not, for example, scan through a packet looking for a - particular kind of extension header and process that header prior to - processing all preceding ones. - - - - - -Deering & Hinden Standards Track [Page 6] - -RFC 2460 IPv6 Specification December 1998 - - - The exception referred to in the preceding paragraph is the Hop-by- - Hop Options header, which carries information that must be examined - and processed by every node along a packet's delivery path, including - the source and destination nodes. The Hop-by-Hop Options header, - when present, must immediately follow the IPv6 header. Its presence - is indicated by the value zero in the Next Header field of the IPv6 - header. - - If, as a result of processing a header, a node is required to proceed - to the next header but the Next Header value in the current header is - unrecognized by the node, it should discard the packet and send an - ICMP Parameter Problem message to the source of the packet, with an - ICMP Code value of 1 ("unrecognized Next Header type encountered") - and the ICMP Pointer field containing the offset of the unrecognized - value within the original packet. The same action should be taken if - a node encounters a Next Header value of zero in any header other - than an IPv6 header. - - Each extension header is an integer multiple of 8 octets long, in - order to retain 8-octet alignment for subsequent headers. Multi- - octet fields within each extension header are aligned on their - natural boundaries, i.e., fields of width n octets are placed at an - integer multiple of n octets from the start of the header, for n = 1, - 2, 4, or 8. - - A full implementation of IPv6 includes implementation of the - following extension headers: - - Hop-by-Hop Options - Routing (Type 0) - Fragment - Destination Options - Authentication - Encapsulating Security Payload - - The first four are specified in this document; the last two are - specified in [RFC-2402] and [RFC-2406], respectively. - -4.1 Extension Header Order - - When more than one extension header is used in the same packet, it is - recommended that those headers appear in the following order: - - IPv6 header - Hop-by-Hop Options header - Destination Options header (note 1) - Routing header - Fragment header - - - -Deering & Hinden Standards Track [Page 7] - -RFC 2460 IPv6 Specification December 1998 - - - Authentication header (note 2) - Encapsulating Security Payload header (note 2) - Destination Options header (note 3) - upper-layer header - - note 1: for options to be processed by the first destination - that appears in the IPv6 Destination Address field - plus subsequent destinations listed in the Routing - header. - - note 2: additional recommendations regarding the relative - order of the Authentication and Encapsulating - Security Payload headers are given in [RFC-2406]. - - note 3: for options to be processed only by the final - destination of the packet. - - Each extension header should occur at most once, except for the - Destination Options header which should occur at most twice (once - before a Routing header and once before the upper-layer header). - - If the upper-layer header is another IPv6 header (in the case of IPv6 - being tunneled over or encapsulated in IPv6), it may be followed by - its own extension headers, which are separately subject to the same - ordering recommendations. - - If and when other extension headers are defined, their ordering - constraints relative to the above listed headers must be specified. - - IPv6 nodes must accept and attempt to process extension headers in - any order and occurring any number of times in the same packet, - except for the Hop-by-Hop Options header which is restricted to - appear immediately after an IPv6 header only. Nonetheless, it is - strongly advised that sources of IPv6 packets adhere to the above - recommended order until and unless subsequent specifications revise - that recommendation. - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 8] - -RFC 2460 IPv6 Specification December 1998 - - -4.2 Options - - Two of the currently-defined extension headers -- the Hop-by-Hop - Options header and the Destination Options header -- carry a variable - number of type-length-value (TLV) encoded "options", of the following - format: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - - - | Option Type | Opt Data Len | Option Data - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - - - - Option Type 8-bit identifier of the type of option. - - Opt Data Len 8-bit unsigned integer. Length of the Option - Data field of this option, in octets. - - Option Data Variable-length field. Option-Type-specific - data. - - The sequence of options within a header must be processed strictly in - the order they appear in the header; a receiver must not, for - example, scan through the header looking for a particular kind of - option and process that option prior to processing all preceding - ones. - - The Option Type identifiers are internally encoded such that their - highest-order two bits specify the action that must be taken if the - processing IPv6 node does not recognize the Option Type: - - 00 - skip over this option and continue processing the header. - - 01 - discard the packet. - - 10 - discard the packet and, regardless of whether or not the - packet's Destination Address was a multicast address, send an - ICMP Parameter Problem, Code 2, message to the packet's - Source Address, pointing to the unrecognized Option Type. - - 11 - discard the packet and, only if the packet's Destination - Address was not a multicast address, send an ICMP Parameter - Problem, Code 2, message to the packet's Source Address, - pointing to the unrecognized Option Type. - - The third-highest-order bit of the Option Type specifies whether or - not the Option Data of that option can change en-route to the - packet's final destination. When an Authentication header is present - - - - - -Deering & Hinden Standards Track [Page 9] - -RFC 2460 IPv6 Specification December 1998 - - - in the packet, for any option whose data may change en-route, its - entire Option Data field must be treated as zero-valued octets when - computing or verifying the packet's authenticating value. - - 0 - Option Data does not change en-route - - 1 - Option Data may change en-route - - The three high-order bits described above are to be treated as part - of the Option Type, not independent of the Option Type. That is, a - particular option is identified by a full 8-bit Option Type, not just - the low-order 5 bits of an Option Type. - - The same Option Type numbering space is used for both the Hop-by-Hop - Options header and the Destination Options header. However, the - specification of a particular option may restrict its use to only one - of those two headers. - - Individual options may have specific alignment requirements, to - ensure that multi-octet values within Option Data fields fall on - natural boundaries. The alignment requirement of an option is - specified using the notation xn+y, meaning the Option Type must - appear at an integer multiple of x octets from the start of the - header, plus y octets. For example: - - 2n means any 2-octet offset from the start of the header. - 8n+2 means any 8-octet offset from the start of the header, - plus 2 octets. - - There are two padding options which are used when necessary to align - subsequent options and to pad out the containing header to a multiple - of 8 octets in length. These padding options must be recognized by - all IPv6 implementations: - - Pad1 option (alignment requirement: none) - - +-+-+-+-+-+-+-+-+ - | 0 | - +-+-+-+-+-+-+-+-+ - - NOTE! the format of the Pad1 option is a special case -- it does - not have length and value fields. - - The Pad1 option is used to insert one octet of padding into the - Options area of a header. If more than one octet of padding is - required, the PadN option, described next, should be used, rather - than multiple Pad1 options. - - - - -Deering & Hinden Standards Track [Page 10] - -RFC 2460 IPv6 Specification December 1998 - - - PadN option (alignment requirement: none) - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - - - | 1 | Opt Data Len | Option Data - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - - - - - - - - - - The PadN option is used to insert two or more octets of padding - into the Options area of a header. For N octets of padding, the - Opt Data Len field contains the value N-2, and the Option Data - consists of N-2 zero-valued octets. - - Appendix B contains formatting guidelines for designing new options. - -4.3 Hop-by-Hop Options Header - - The Hop-by-Hop Options header is used to carry optional information - that must be examined by every node along a packet's delivery path. - The Hop-by-Hop Options header is identified by a Next Header value of - 0 in the IPv6 header, and has the following format: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + - | | - . . - . Options . - . . - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Next Header 8-bit selector. Identifies the type of header - immediately following the Hop-by-Hop Options - header. Uses the same values as the IPv4 - Protocol field [RFC-1700 et seq.]. - - Hdr Ext Len 8-bit unsigned integer. Length of the Hop-by- - Hop Options header in 8-octet units, not - including the first 8 octets. - - Options Variable-length field, of length such that the - complete Hop-by-Hop Options header is an integer - multiple of 8 octets long. Contains one or more - TLV-encoded options, as described in section - 4.2. - - The only hop-by-hop options defined in this document are the Pad1 and - PadN options specified in section 4.2. - - - - -Deering & Hinden Standards Track [Page 11] - -RFC 2460 IPv6 Specification December 1998 - - -4.4 Routing Header - - The Routing header is used by an IPv6 source to list one or more - intermediate nodes to be "visited" on the way to a packet's - destination. This function is very similar to IPv4's Loose Source - and Record Route option. The Routing header is identified by a Next - Header value of 43 in the immediately preceding header, and has the - following format: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len | Routing Type | Segments Left | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - . . - . type-specific data . - . . - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Next Header 8-bit selector. Identifies the type of header - immediately following the Routing header. Uses - the same values as the IPv4 Protocol field - [RFC-1700 et seq.]. - - Hdr Ext Len 8-bit unsigned integer. Length of the Routing - header in 8-octet units, not including the first - 8 octets. - - Routing Type 8-bit identifier of a particular Routing header - variant. - - Segments Left 8-bit unsigned integer. Number of route - segments remaining, i.e., number of explicitly - listed intermediate nodes still to be visited - before reaching the final destination. - - type-specific data Variable-length field, of format determined by - the Routing Type, and of length such that the - complete Routing header is an integer multiple - of 8 octets long. - - If, while processing a received packet, a node encounters a Routing - header with an unrecognized Routing Type value, the required behavior - of the node depends on the value of the Segments Left field, as - follows: - - - - - - -Deering & Hinden Standards Track [Page 12] - -RFC 2460 IPv6 Specification December 1998 - - - If Segments Left is zero, the node must ignore the Routing header - and proceed to process the next header in the packet, whose type - is identified by the Next Header field in the Routing header. - - If Segments Left is non-zero, the node must discard the packet and - send an ICMP Parameter Problem, Code 0, message to the packet's - Source Address, pointing to the unrecognized Routing Type. - - If, after processing a Routing header of a received packet, an - intermediate node determines that the packet is to be forwarded onto - a link whose link MTU is less than the size of the packet, the node - must discard the packet and send an ICMP Packet Too Big message to - the packet's Source Address. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 13] - -RFC 2460 IPv6 Specification December 1998 - - - The Type 0 Routing header has the following format: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len | Routing Type=0| Segments Left | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Reserved | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + + - | | - + Address[1] + - | | - + + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + + - | | - + Address[2] + - | | - + + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - . . . - . . . - . . . - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + + - | | - + Address[n] + - | | - + + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Next Header 8-bit selector. Identifies the type of header - immediately following the Routing header. Uses - the same values as the IPv4 Protocol field - [RFC-1700 et seq.]. - - Hdr Ext Len 8-bit unsigned integer. Length of the Routing - header in 8-octet units, not including the first - 8 octets. For the Type 0 Routing header, Hdr - Ext Len is equal to two times the number of - addresses in the header. - - Routing Type 0. - - - -Deering & Hinden Standards Track [Page 14] - -RFC 2460 IPv6 Specification December 1998 - - - Segments Left 8-bit unsigned integer. Number of route - segments remaining, i.e., number of explicitly - listed intermediate nodes still to be visited - before reaching the final destination. - - Reserved 32-bit reserved field. Initialized to zero for - transmission; ignored on reception. - - Address[1..n] Vector of 128-bit addresses, numbered 1 to n. - - Multicast addresses must not appear in a Routing header of Type 0, or - in the IPv6 Destination Address field of a packet carrying a Routing - header of Type 0. - - A Routing header is not examined or processed until it reaches the - node identified in the Destination Address field of the IPv6 header. - In that node, dispatching on the Next Header field of the immediately - preceding header causes the Routing header module to be invoked, - which, in the case of Routing Type 0, performs the following - algorithm: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 15] - -RFC 2460 IPv6 Specification December 1998 - - - if Segments Left = 0 { - proceed to process the next header in the packet, whose type is - identified by the Next Header field in the Routing header - } - else if Hdr Ext Len is odd { - send an ICMP Parameter Problem, Code 0, message to the Source - Address, pointing to the Hdr Ext Len field, and discard the - packet - } - else { - compute n, the number of addresses in the Routing header, by - dividing Hdr Ext Len by 2 - - if Segments Left is greater than n { - send an ICMP Parameter Problem, Code 0, message to the Source - Address, pointing to the Segments Left field, and discard the - packet - } - else { - decrement Segments Left by 1; - compute i, the index of the next address to be visited in - the address vector, by subtracting Segments Left from n - - if Address [i] or the IPv6 Destination Address is multicast { - discard the packet - } - else { - swap the IPv6 Destination Address and Address[i] - - if the IPv6 Hop Limit is less than or equal to 1 { - send an ICMP Time Exceeded -- Hop Limit Exceeded in - Transit message to the Source Address and discard the - packet - } - else { - decrement the Hop Limit by 1 - - resubmit the packet to the IPv6 module for transmission - to the new destination - } - } - } - } - - - - - - - - -Deering & Hinden Standards Track [Page 16] - -RFC 2460 IPv6 Specification December 1998 - - - As an example of the effects of the above algorithm, consider the - case of a source node S sending a packet to destination node D, using - a Routing header to cause the packet to be routed via intermediate - nodes I1, I2, and I3. The values of the relevant IPv6 header and - Routing header fields on each segment of the delivery path would be - as follows: - - As the packet travels from S to I1: - - Source Address = S Hdr Ext Len = 6 - Destination Address = I1 Segments Left = 3 - Address[1] = I2 - Address[2] = I3 - Address[3] = D - - As the packet travels from I1 to I2: - - Source Address = S Hdr Ext Len = 6 - Destination Address = I2 Segments Left = 2 - Address[1] = I1 - Address[2] = I3 - Address[3] = D - - As the packet travels from I2 to I3: - - Source Address = S Hdr Ext Len = 6 - Destination Address = I3 Segments Left = 1 - Address[1] = I1 - Address[2] = I2 - Address[3] = D - - As the packet travels from I3 to D: - - Source Address = S Hdr Ext Len = 6 - Destination Address = D Segments Left = 0 - Address[1] = I1 - Address[2] = I2 - Address[3] = I3 - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 17] - -RFC 2460 IPv6 Specification December 1998 - - -4.5 Fragment Header - - The Fragment header is used by an IPv6 source to send a packet larger - than would fit in the path MTU to its destination. (Note: unlike - IPv4, fragmentation in IPv6 is performed only by source nodes, not by - routers along a packet's delivery path -- see section 5.) The - Fragment header is identified by a Next Header value of 44 in the - immediately preceding header, and has the following format: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Reserved | Fragment Offset |Res|M| - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Identification | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Next Header 8-bit selector. Identifies the initial header - type of the Fragmentable Part of the original - packet (defined below). Uses the same values as - the IPv4 Protocol field [RFC-1700 et seq.]. - - Reserved 8-bit reserved field. Initialized to zero for - transmission; ignored on reception. - - Fragment Offset 13-bit unsigned integer. The offset, in 8-octet - units, of the data following this header, - relative to the start of the Fragmentable Part - of the original packet. - - Res 2-bit reserved field. Initialized to zero for - transmission; ignored on reception. - - M flag 1 = more fragments; 0 = last fragment. - - Identification 32 bits. See description below. - - In order to send a packet that is too large to fit in the MTU of the - path to its destination, a source node may divide the packet into - fragments and send each fragment as a separate packet, to be - reassembled at the receiver. - - For every packet that is to be fragmented, the source node generates - an Identification value. The Identification must be different than - that of any other fragmented packet sent recently* with the same - Source Address and Destination Address. If a Routing header is - present, the Destination Address of concern is that of the final - destination. - - - - - -Deering & Hinden Standards Track [Page 18] - -RFC 2460 IPv6 Specification December 1998 - - - * "recently" means within the maximum likely lifetime of a packet, - including transit time from source to destination and time spent - awaiting reassembly with other fragments of the same packet. - However, it is not required that a source node know the maximum - packet lifetime. Rather, it is assumed that the requirement can - be met by maintaining the Identification value as a simple, 32- - bit, "wrap-around" counter, incremented each time a packet must - be fragmented. It is an implementation choice whether to - maintain a single counter for the node or multiple counters, - e.g., one for each of the node's possible source addresses, or - one for each active (source address, destination address) - combination. - - The initial, large, unfragmented packet is referred to as the - "original packet", and it is considered to consist of two parts, as - illustrated: - - original packet: - - +------------------+----------------------//-----------------------+ - | Unfragmentable | Fragmentable | - | Part | Part | - +------------------+----------------------//-----------------------+ - - The Unfragmentable Part consists of the IPv6 header plus any - extension headers that must be processed by nodes en route to the - destination, that is, all headers up to and including the Routing - header if present, else the Hop-by-Hop Options header if present, - else no extension headers. - - The Fragmentable Part consists of the rest of the packet, that is, - any extension headers that need be processed only by the final - destination node(s), plus the upper-layer header and data. - - The Fragmentable Part of the original packet is divided into - fragments, each, except possibly the last ("rightmost") one, being an - integer multiple of 8 octets long. The fragments are transmitted in - separate "fragment packets" as illustrated: - - original packet: - - +------------------+--------------+--------------+--//--+----------+ - | Unfragmentable | first | second | | last | - | Part | fragment | fragment | .... | fragment | - +------------------+--------------+--------------+--//--+----------+ - - - - - - -Deering & Hinden Standards Track [Page 19] - -RFC 2460 IPv6 Specification December 1998 - - - fragment packets: - - +------------------+--------+--------------+ - | Unfragmentable |Fragment| first | - | Part | Header | fragment | - +------------------+--------+--------------+ - - +------------------+--------+--------------+ - | Unfragmentable |Fragment| second | - | Part | Header | fragment | - +------------------+--------+--------------+ - o - o - o - +------------------+--------+----------+ - | Unfragmentable |Fragment| last | - | Part | Header | fragment | - +------------------+--------+----------+ - - Each fragment packet is composed of: - - (1) The Unfragmentable Part of the original packet, with the - Payload Length of the original IPv6 header changed to contain - the length of this fragment packet only (excluding the length - of the IPv6 header itself), and the Next Header field of the - last header of the Unfragmentable Part changed to 44. - - (2) A Fragment header containing: - - The Next Header value that identifies the first header of - the Fragmentable Part of the original packet. - - A Fragment Offset containing the offset of the fragment, - in 8-octet units, relative to the start of the - Fragmentable Part of the original packet. The Fragment - Offset of the first ("leftmost") fragment is 0. - - An M flag value of 0 if the fragment is the last - ("rightmost") one, else an M flag value of 1. - - The Identification value generated for the original - packet. - - (3) The fragment itself. - - The lengths of the fragments must be chosen such that the resulting - fragment packets fit within the MTU of the path to the packets' - destination(s). - - - -Deering & Hinden Standards Track [Page 20] - -RFC 2460 IPv6 Specification December 1998 - - - At the destination, fragment packets are reassembled into their - original, unfragmented form, as illustrated: - - reassembled original packet: - - +------------------+----------------------//------------------------+ - | Unfragmentable | Fragmentable | - | Part | Part | - +------------------+----------------------//------------------------+ - - The following rules govern reassembly: - - An original packet is reassembled only from fragment packets that - have the same Source Address, Destination Address, and Fragment - Identification. - - The Unfragmentable Part of the reassembled packet consists of all - headers up to, but not including, the Fragment header of the first - fragment packet (that is, the packet whose Fragment Offset is - zero), with the following two changes: - - The Next Header field of the last header of the Unfragmentable - Part is obtained from the Next Header field of the first - fragment's Fragment header. - - The Payload Length of the reassembled packet is computed from - the length of the Unfragmentable Part and the length and offset - of the last fragment. For example, a formula for computing the - Payload Length of the reassembled original packet is: - - PL.orig = PL.first - FL.first - 8 + (8 * FO.last) + FL.last - - where - PL.orig = Payload Length field of reassembled packet. - PL.first = Payload Length field of first fragment packet. - FL.first = length of fragment following Fragment header of - first fragment packet. - FO.last = Fragment Offset field of Fragment header of - last fragment packet. - FL.last = length of fragment following Fragment header of - last fragment packet. - - The Fragmentable Part of the reassembled packet is constructed - from the fragments following the Fragment headers in each of the - fragment packets. The length of each fragment is computed by - subtracting from the packet's Payload Length the length of the - - - - - -Deering & Hinden Standards Track [Page 21] - -RFC 2460 IPv6 Specification December 1998 - - - headers between the IPv6 header and fragment itself; its relative - position in Fragmentable Part is computed from its Fragment Offset - value. - - The Fragment header is not present in the final, reassembled - packet. - - The following error conditions may arise when reassembling fragmented - packets: - - If insufficient fragments are received to complete reassembly of a - packet within 60 seconds of the reception of the first-arriving - fragment of that packet, reassembly of that packet must be - abandoned and all the fragments that have been received for that - packet must be discarded. If the first fragment (i.e., the one - with a Fragment Offset of zero) has been received, an ICMP Time - Exceeded -- Fragment Reassembly Time Exceeded message should be - sent to the source of that fragment. - - If the length of a fragment, as derived from the fragment packet's - Payload Length field, is not a multiple of 8 octets and the M flag - of that fragment is 1, then that fragment must be discarded and an - ICMP Parameter Problem, Code 0, message should be sent to the - source of the fragment, pointing to the Payload Length field of - the fragment packet. - - If the length and offset of a fragment are such that the Payload - Length of the packet reassembled from that fragment would exceed - 65,535 octets, then that fragment must be discarded and an ICMP - Parameter Problem, Code 0, message should be sent to the source of - the fragment, pointing to the Fragment Offset field of the - fragment packet. - - The following conditions are not expected to occur, but are not - considered errors if they do: - - The number and content of the headers preceding the Fragment - header of different fragments of the same original packet may - differ. Whatever headers are present, preceding the Fragment - header in each fragment packet, are processed when the packets - arrive, prior to queueing the fragments for reassembly. Only - those headers in the Offset zero fragment packet are retained in - the reassembled packet. - - The Next Header values in the Fragment headers of different - fragments of the same original packet may differ. Only the value - from the Offset zero fragment packet is used for reassembly. - - - - -Deering & Hinden Standards Track [Page 22] - -RFC 2460 IPv6 Specification December 1998 - - -4.6 Destination Options Header - - The Destination Options header is used to carry optional information - that need be examined only by a packet's destination node(s). The - Destination Options header is identified by a Next Header value of 60 - in the immediately preceding header, and has the following format: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + - | | - . . - . Options . - . . - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Next Header 8-bit selector. Identifies the type of header - immediately following the Destination Options - header. Uses the same values as the IPv4 - Protocol field [RFC-1700 et seq.]. - - Hdr Ext Len 8-bit unsigned integer. Length of the - Destination Options header in 8-octet units, not - including the first 8 octets. - - Options Variable-length field, of length such that the - complete Destination Options header is an - integer multiple of 8 octets long. Contains one - or more TLV-encoded options, as described in - section 4.2. - - The only destination options defined in this document are the Pad1 - and PadN options specified in section 4.2. - - Note that there are two possible ways to encode optional destination - information in an IPv6 packet: either as an option in the Destination - Options header, or as a separate extension header. The Fragment - header and the Authentication header are examples of the latter - approach. Which approach can be used depends on what action is - desired of a destination node that does not understand the optional - information: - - o If the desired action is for the destination node to discard - the packet and, only if the packet's Destination Address is not - a multicast address, send an ICMP Unrecognized Type message to - the packet's Source Address, then the information may be - encoded either as a separate header or as an option in the - - - -Deering & Hinden Standards Track [Page 23] - -RFC 2460 IPv6 Specification December 1998 - - - Destination Options header whose Option Type has the value 11 - in its highest-order two bits. The choice may depend on such - factors as which takes fewer octets, or which yields better - alignment or more efficient parsing. - - o If any other action is desired, the information must be encoded - as an option in the Destination Options header whose Option - Type has the value 00, 01, or 10 in its highest-order two bits, - specifying the desired action (see section 4.2). - -4.7 No Next Header - - The value 59 in the Next Header field of an IPv6 header or any - extension header indicates that there is nothing following that - header. If the Payload Length field of the IPv6 header indicates the - presence of octets past the end of a header whose Next Header field - contains 59, those octets must be ignored, and passed on unchanged if - the packet is forwarded. - -5. Packet Size Issues - - IPv6 requires that every link in the internet have an MTU of 1280 - octets or greater. On any link that cannot convey a 1280-octet - packet in one piece, link-specific fragmentation and reassembly must - be provided at a layer below IPv6. - - Links that have a configurable MTU (for example, PPP links [RFC- - 1661]) must be configured to have an MTU of at least 1280 octets; it - is recommended that they be configured with an MTU of 1500 octets or - greater, to accommodate possible encapsulations (i.e., tunneling) - without incurring IPv6-layer fragmentation. - - From each link to which a node is directly attached, the node must be - able to accept packets as large as that link's MTU. - - It is strongly recommended that IPv6 nodes implement Path MTU - Discovery [RFC-1981], in order to discover and take advantage of path - MTUs greater than 1280 octets. However, a minimal IPv6 - implementation (e.g., in a boot ROM) may simply restrict itself to - sending packets no larger than 1280 octets, and omit implementation - of Path MTU Discovery. - - In order to send a packet larger than a path's MTU, a node may use - the IPv6 Fragment header to fragment the packet at the source and - have it reassembled at the destination(s). However, the use of such - fragmentation is discouraged in any application that is able to - adjust its packets to fit the measured path MTU (i.e., down to 1280 - octets). - - - -Deering & Hinden Standards Track [Page 24] - -RFC 2460 IPv6 Specification December 1998 - - - A node must be able to accept a fragmented packet that, after - reassembly, is as large as 1500 octets. A node is permitted to - accept fragmented packets that reassemble to more than 1500 octets. - An upper-layer protocol or application that depends on IPv6 - fragmentation to send packets larger than the MTU of a path should - not send packets larger than 1500 octets unless it has assurance that - the destination is capable of reassembling packets of that larger - size. - - In response to an IPv6 packet that is sent to an IPv4 destination - (i.e., a packet that undergoes translation from IPv6 to IPv4), the - originating IPv6 node may receive an ICMP Packet Too Big message - reporting a Next-Hop MTU less than 1280. In that case, the IPv6 node - is not required to reduce the size of subsequent packets to less than - 1280, but must include a Fragment header in those packets so that the - IPv6-to-IPv4 translating router can obtain a suitable Identification - value to use in resulting IPv4 fragments. Note that this means the - payload may have to be reduced to 1232 octets (1280 minus 40 for the - IPv6 header and 8 for the Fragment header), and smaller still if - additional extension headers are used. - -6. Flow Labels - - The 20-bit Flow Label field in the IPv6 header may be used by a - source to label sequences of packets for which it requests special - handling by the IPv6 routers, such as non-default quality of service - or "real-time" service. This aspect of IPv6 is, at the time of - writing, still experimental and subject to change as the requirements - for flow support in the Internet become clearer. Hosts or routers - that do not support the functions of the Flow Label field are - required to set the field to zero when originating a packet, pass the - field on unchanged when forwarding a packet, and ignore the field - when receiving a packet. - - Appendix A describes the current intended semantics and usage of the - Flow Label field. - -7. Traffic Classes - - The 8-bit Traffic Class field in the IPv6 header is available for use - by originating nodes and/or forwarding routers to identify and - distinguish between different classes or priorities of IPv6 packets. - At the point in time at which this specification is being written, - there are a number of experiments underway in the use of the IPv4 - Type of Service and/or Precedence bits to provide various forms of - "differentiated service" for IP packets, other than through the use - of explicit flow set-up. The Traffic Class field in the IPv6 header - is intended to allow similar functionality to be supported in IPv6. - - - -Deering & Hinden Standards Track [Page 25] - -RFC 2460 IPv6 Specification December 1998 - - - It is hoped that those experiments will eventually lead to agreement - on what sorts of traffic classifications are most useful for IP - packets. Detailed definitions of the syntax and semantics of all or - some of the IPv6 Traffic Class bits, whether experimental or intended - for eventual standardization, are to be provided in separate - documents. - - The following general requirements apply to the Traffic Class field: - - o The service interface to the IPv6 service within a node must - provide a means for an upper-layer protocol to supply the value - of the Traffic Class bits in packets originated by that upper- - layer protocol. The default value must be zero for all 8 bits. - - o Nodes that support a specific (experimental or eventual - standard) use of some or all of the Traffic Class bits are - permitted to change the value of those bits in packets that - they originate, forward, or receive, as required for that - specific use. Nodes should ignore and leave unchanged any bits - of the Traffic Class field for which they do not support a - specific use. - - o An upper-layer protocol must not assume that the value of the - Traffic Class bits in a received packet are the same as the - value sent by the packet's source. - - - - - - - - - - - - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 26] - -RFC 2460 IPv6 Specification December 1998 - - -8. Upper-Layer Protocol Issues - -8.1 Upper-Layer Checksums - - Any transport or other upper-layer protocol that includes the - addresses from the IP header in its checksum computation must be - modified for use over IPv6, to include the 128-bit IPv6 addresses - instead of 32-bit IPv4 addresses. In particular, the following - illustration shows the TCP and UDP "pseudo-header" for IPv6: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + + - | | - + Source Address + - | | - + + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + + - | | - + Destination Address + - | | - + + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Upper-Layer Packet Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | zero | Next Header | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - o If the IPv6 packet contains a Routing header, the Destination - Address used in the pseudo-header is that of the final - destination. At the originating node, that address will be in - the last element of the Routing header; at the recipient(s), - that address will be in the Destination Address field of the - IPv6 header. - - o The Next Header value in the pseudo-header identifies the - upper-layer protocol (e.g., 6 for TCP, or 17 for UDP). It will - differ from the Next Header value in the IPv6 header if there - are extension headers between the IPv6 header and the upper- - layer header. - - o The Upper-Layer Packet Length in the pseudo-header is the - length of the upper-layer header and data (e.g., TCP header - plus TCP data). Some upper-layer protocols carry their own - - - -Deering & Hinden Standards Track [Page 27] - -RFC 2460 IPv6 Specification December 1998 - - - length information (e.g., the Length field in the UDP header); - for such protocols, that is the length used in the pseudo- - header. Other protocols (such as TCP) do not carry their own - length information, in which case the length used in the - pseudo-header is the Payload Length from the IPv6 header, minus - the length of any extension headers present between the IPv6 - header and the upper-layer header. - - o Unlike IPv4, when UDP packets are originated by an IPv6 node, - the UDP checksum is not optional. That is, whenever - originating a UDP packet, an IPv6 node must compute a UDP - checksum over the packet and the pseudo-header, and, if that - computation yields a result of zero, it must be changed to hex - FFFF for placement in the UDP header. IPv6 receivers must - discard UDP packets containing a zero checksum, and should log - the error. - - The IPv6 version of ICMP [ICMPv6] includes the above pseudo-header in - its checksum computation; this is a change from the IPv4 version of - ICMP, which does not include a pseudo-header in its checksum. The - reason for the change is to protect ICMP from misdelivery or - corruption of those fields of the IPv6 header on which it depends, - which, unlike IPv4, are not covered by an internet-layer checksum. - The Next Header field in the pseudo-header for ICMP contains the - value 58, which identifies the IPv6 version of ICMP. - -8.2 Maximum Packet Lifetime - - Unlike IPv4, IPv6 nodes are not required to enforce maximum packet - lifetime. That is the reason the IPv4 "Time to Live" field was - renamed "Hop Limit" in IPv6. In practice, very few, if any, IPv4 - implementations conform to the requirement that they limit packet - lifetime, so this is not a change in practice. Any upper-layer - protocol that relies on the internet layer (whether IPv4 or IPv6) to - limit packet lifetime ought to be upgraded to provide its own - mechanisms for detecting and discarding obsolete packets. - -8.3 Maximum Upper-Layer Payload Size - - When computing the maximum payload size available for upper-layer - data, an upper-layer protocol must take into account the larger size - of the IPv6 header relative to the IPv4 header. For example, in - IPv4, TCP's MSS option is computed as the maximum packet size (a - default value or a value learned through Path MTU Discovery) minus 40 - octets (20 octets for the minimum-length IPv4 header and 20 octets - for the minimum-length TCP header). When using TCP over IPv6, the - MSS must be computed as the maximum packet size minus 60 octets, - - - - -Deering & Hinden Standards Track [Page 28] - -RFC 2460 IPv6 Specification December 1998 - - - because the minimum-length IPv6 header (i.e., an IPv6 header with no - extension headers) is 20 octets longer than a minimum-length IPv4 - header. - -8.4 Responding to Packets Carrying Routing Headers - - When an upper-layer protocol sends one or more packets in response to - a received packet that included a Routing header, the response - packet(s) must not include a Routing header that was automatically - derived by "reversing" the received Routing header UNLESS the - integrity and authenticity of the received Source Address and Routing - header have been verified (e.g., via the use of an Authentication - header in the received packet). In other words, only the following - kinds of packets are permitted in response to a received packet - bearing a Routing header: - - o Response packets that do not carry Routing headers. - - o Response packets that carry Routing headers that were NOT - derived by reversing the Routing header of the received packet - (for example, a Routing header supplied by local - configuration). - - o Response packets that carry Routing headers that were derived - by reversing the Routing header of the received packet IF AND - ONLY IF the integrity and authenticity of the Source Address - and Routing header from the received packet have been verified - by the responder. - - - - - - - - - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 29] - -RFC 2460 IPv6 Specification December 1998 - - -Appendix A. Semantics and Usage of the Flow Label Field - - A flow is a sequence of packets sent from a particular source to a - particular (unicast or multicast) destination for which the source - desires special handling by the intervening routers. The nature of - that special handling might be conveyed to the routers by a control - protocol, such as a resource reservation protocol, or by information - within the flow's packets themselves, e.g., in a hop-by-hop option. - The details of such control protocols or options are beyond the scope - of this document. - - There may be multiple active flows from a source to a destination, as - well as traffic that is not associated with any flow. A flow is - uniquely identified by the combination of a source address and a - non-zero flow label. Packets that do not belong to a flow carry a - flow label of zero. - - A flow label is assigned to a flow by the flow's source node. New - flow labels must be chosen (pseudo-)randomly and uniformly from the - range 1 to FFFFF hex. The purpose of the random allocation is to - make any set of bits within the Flow Label field suitable for use as - a hash key by routers, for looking up the state associated with the - flow. - - All packets belonging to the same flow must be sent with the same - source address, destination address, and flow label. If any of those - packets includes a Hop-by-Hop Options header, then they all must be - originated with the same Hop-by-Hop Options header contents - (excluding the Next Header field of the Hop-by-Hop Options header). - If any of those packets includes a Routing header, then they all must - be originated with the same contents in all extension headers up to - and including the Routing header (excluding the Next Header field in - the Routing header). The routers or destinations are permitted, but - not required, to verify that these conditions are satisfied. If a - violation is detected, it should be reported to the source by an ICMP - Parameter Problem message, Code 0, pointing to the high-order octet - of the Flow Label field (i.e., offset 1 within the IPv6 packet). - - The maximum lifetime of any flow-handling state established along a - flow's path must be specified as part of the description of the - state-establishment mechanism, e.g., the resource reservation - protocol or the flow-setup hop-by-hop option. A source must not re- - use a flow label for a new flow within the maximum lifetime of any - flow-handling state that might have been established for the prior - use of that flow label. - - - - - - -Deering & Hinden Standards Track [Page 30] - -RFC 2460 IPv6 Specification December 1998 - - - When a node stops and restarts (e.g., as a result of a "crash"), it - must be careful not to use a flow label that it might have used for - an earlier flow whose lifetime may not have expired yet. This may be - accomplished by recording flow label usage on stable storage so that - it can be remembered across crashes, or by refraining from using any - flow labels until the maximum lifetime of any possible previously - established flows has expired. If the minimum time for rebooting the - node is known, that time can be deducted from the necessary waiting - period before starting to allocate flow labels. - - There is no requirement that all, or even most, packets belong to - flows, i.e., carry non-zero flow labels. This observation is placed - here to remind protocol designers and implementors not to assume - otherwise. For example, it would be unwise to design a router whose - performance would be adequate only if most packets belonged to flows, - or to design a header compression scheme that only worked on packets - that belonged to flows. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 31] - -RFC 2460 IPv6 Specification December 1998 - - -Appendix B. Formatting Guidelines for Options - - This appendix gives some advice on how to lay out the fields when - designing new options to be used in the Hop-by-Hop Options header or - the Destination Options header, as described in section 4.2. These - guidelines are based on the following assumptions: - - o One desirable feature is that any multi-octet fields within the - Option Data area of an option be aligned on their natural - boundaries, i.e., fields of width n octets should be placed at - an integer multiple of n octets from the start of the Hop-by- - Hop or Destination Options header, for n = 1, 2, 4, or 8. - - o Another desirable feature is that the Hop-by-Hop or Destination - Options header take up as little space as possible, subject to - the requirement that the header be an integer multiple of 8 - octets long. - - o It may be assumed that, when either of the option-bearing - headers are present, they carry a very small number of options, - usually only one. - - These assumptions suggest the following approach to laying out the - fields of an option: order the fields from smallest to largest, with - no interior padding, then derive the alignment requirement for the - entire option based on the alignment requirement of the largest field - (up to a maximum alignment of 8 octets). This approach is - illustrated in the following examples: - - Example 1 - - If an option X required two data fields, one of length 8 octets and - one of length 4 octets, it would be laid out as follows: - - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Option Type=X |Opt Data Len=12| - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + 8-octet field + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - - - - - -Deering & Hinden Standards Track [Page 32] - -RFC 2460 IPv6 Specification December 1998 - - - Its alignment requirement is 8n+2, to ensure that the 8-octet field - starts at a multiple-of-8 offset from the start of the enclosing - header. A complete Hop-by-Hop or Destination Options header - containing this one option would look as follows: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len=1 | Option Type=X |Opt Data Len=12| - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + 8-octet field + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Example 2 - - If an option Y required three data fields, one of length 4 octets, - one of length 2 octets, and one of length 1 octet, it would be laid - out as follows: - - +-+-+-+-+-+-+-+-+ - | Option Type=Y | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |Opt Data Len=7 | 1-octet field | 2-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Its alignment requirement is 4n+3, to ensure that the 4-octet field - starts at a multiple-of-4 offset from the start of the enclosing - header. A complete Hop-by-Hop or Destination Options header - containing this one option would look as follows: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len=1 | Pad1 Option=0 | Option Type=Y | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |Opt Data Len=7 | 1-octet field | 2-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PadN Option=1 |Opt Data Len=2 | 0 | 0 | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - - - - - - -Deering & Hinden Standards Track [Page 33] - -RFC 2460 IPv6 Specification December 1998 - - - Example 3 - - A Hop-by-Hop or Destination Options header containing both options X - and Y from Examples 1 and 2 would have one of the two following - formats, depending on which option appeared first: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len=3 | Option Type=X |Opt Data Len=12| - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + 8-octet field + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PadN Option=1 |Opt Data Len=1 | 0 | Option Type=Y | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |Opt Data Len=7 | 1-octet field | 2-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PadN Option=1 |Opt Data Len=2 | 0 | 0 | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len=3 | Pad1 Option=0 | Option Type=Y | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |Opt Data Len=7 | 1-octet field | 2-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | PadN Option=1 |Opt Data Len=4 | 0 | 0 | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 0 | 0 | Option Type=X |Opt Data Len=12| - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | 4-octet field | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - + 8-octet field + - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - - - - - - - - -Deering & Hinden Standards Track [Page 34] - -RFC 2460 IPv6 Specification December 1998 - - -Security Considerations - - The security features of IPv6 are described in the Security - Architecture for the Internet Protocol [RFC-2401]. - -Acknowledgments - - The authors gratefully acknowledge the many helpful suggestions of - the members of the IPng working group, the End-to-End Protocols - research group, and the Internet Community At Large. - -Authors' Addresses - - Stephen E. Deering - Cisco Systems, Inc. - 170 West Tasman Drive - San Jose, CA 95134-1706 - USA - - Phone: +1 408 527 8213 - Fax: +1 408 527 8254 - EMail: deering@cisco.com - - - Robert M. Hinden - Nokia - 232 Java Drive - Sunnyvale, CA 94089 - USA - - Phone: +1 408 990-2004 - Fax: +1 408 743-5677 - EMail: hinden@iprg.nokia.com - -References - - [RFC-2401] Kent, S. and R. Atkinson, "Security Architecture for the - Internet Protocol", RFC 2401, November 1998. - - [RFC-2402] Kent, S. and R. Atkinson, "IP Authentication Header", - RFC 2402, November 1998. - - [RFC-2406] Kent, S. and R. Atkinson, "IP Encapsulating Security - Protocol (ESP)", RFC 2406, November 1998. - - [ICMPv6] Conta, A. and S. Deering, "ICMP for the Internet - Protocol Version 6 (IPv6)", RFC 2463, December 1998. - - - - -Deering & Hinden Standards Track [Page 35] - -RFC 2460 IPv6 Specification December 1998 - - - [ADDRARCH] Hinden, R. and S. Deering, "IP Version 6 Addressing - Architecture", RFC 2373, July 1998. - - [RFC-1981] McCann, J., Mogul, J. and S. Deering, "Path MTU - Discovery for IP version 6", RFC 1981, August 1996. - - [RFC-791] Postel, J., "Internet Protocol", STD 5, RFC 791, - September 1981. - - [RFC-1700] Reynolds, J. and J. Postel, "Assigned Numbers", STD 2, - RFC 1700, October 1994. See also: - http://www.iana.org/numbers.html - - [RFC-1661] Simpson, W., "The Point-to-Point Protocol (PPP)", STD - 51, RFC 1661, July 1994. - -CHANGES SINCE RFC-1883 - - This memo has the following changes from RFC-1883. Numbers identify - the Internet-Draft version in which the change was made. - - 02) Removed all references to jumbograms and the Jumbo Payload - option (moved to a separate document). - - 02) Moved most of Flow Label description from section 6 to (new) - Appendix A. - - 02) In Flow Label description, now in Appendix A, corrected maximum - Flow Label value from FFFFFF to FFFFF (i.e., one less "F") due - to reduction of size of Flow Label field from 24 bits to 20 - bits. - - 02) Renumbered (relettered?) the previous Appendix A to be Appendix - B. - - 02) Changed the wording of the Security Considerations section to - avoid dependency loop between this spec and the IPsec specs. - - 02) Updated R. Hinden's email address and company affiliation. - - - -------------------------------------------------------- - - 01) In section 3, changed field name "Class" to "Traffic Class" and - increased its size from 4 to 8 bits. Decreased size of Flow - Label field from 24 to 20 bits to compensate for increase in - Traffic Class field. - - - - -Deering & Hinden Standards Track [Page 36] - -RFC 2460 IPv6 Specification December 1998 - - - 01) In section 4.1, restored the order of the Authentication Header - and the ESP header, which were mistakenly swapped in the 00 - version of this memo. - - 01) In section 4.4, deleted the Strict/Loose Bit Map field and the - strict routing functionality from the Type 0 Routing header, and - removed the restriction on number of addresses that may be - carried in the Type 0 Routing header (was limited to 23 - addresses, because of the size of the strict/loose bit map). - - 01) In section 5, changed the minimum IPv6 MTU from 576 to 1280 - octets, and added a recommendation that links with configurable - MTU (e.g., PPP links) be configured to have an MTU of at least - 1500 octets. - - 01) In section 5, deleted the requirement that a node must not send - fragmented packets that reassemble to more than 1500 octets - without knowledge of the destination reassembly buffer size, and - replaced it with a recommendation that upper-layer protocols or - applications should not do that. - - 01) Replaced reference to the IPv4 Path MTU Discovery spec (RFC- - 1191) with reference to the IPv6 Path MTU Discovery spec (RFC- - 1981), and deleted the Notes at the end of section 5 regarding - Path MTU Discovery, since those details are now covered by RFC- - 1981. - - 01) In section 6, deleted specification of "opportunistic" flow - set-up, and removed all references to the 6-second maximum - lifetime for opportunistically established flow state. - - 01) In section 7, deleted the provisional description of the - internal structure and semantics of the Traffic Class field, and - specified that such descriptions be provided in separate - documents. - - -------------------------------------------------------- - - 00) In section 4, corrected the Code value to indicate "unrecognized - Next Header type encountered" in an ICMP Parameter Problem - message (changed from 2 to 1). - - 00) In the description of the Payload Length field in section 3, and - of the Jumbo Payload Length field in section 4.3, made it - clearer that extension headers are included in the payload - length count. - - - - - -Deering & Hinden Standards Track [Page 37] - -RFC 2460 IPv6 Specification December 1998 - - - 00) In section 4.1, swapped the order of the Authentication header - and the ESP header. (NOTE: this was a mistake, and the change - was undone in version 01.) - - 00) In section 4.2, made it clearer that options are identified by - the full 8-bit Option Type, not by the low-order 5 bits of an - Option Type. Also specified that the same Option Type numbering - space is used for both Hop-by-Hop Options and Destination - Options headers. - - 00) In section 4.4, added a sentence requiring that nodes processing - a Routing header must send an ICMP Packet Too Big message in - response to a packet that is too big to fit in the next hop link - (rather than, say, performing fragmentation). - - 00) Changed the name of the IPv6 Priority field to "Class", and - replaced the previous description of Priority in section 7 with - a description of the Class field. Also, excluded this field - from the set of fields that must remain the same for all packets - in the same flow, as specified in section 6. - - 00) In the pseudo-header in section 8.1, changed the name of the - "Payload Length" field to "Upper-Layer Packet Length". Also - clarified that, in the case of protocols that carry their own - length info (like non-jumbogram UDP), it is the upper-layer- - derived length, not the IP-layer-derived length, that is used in - the pseudo-header. - - 00) Added section 8.4, specifying that upper-layer protocols, when - responding to a received packet that carried a Routing header, - must not include the reverse of the Routing header in the - response packet(s) unless the received Routing header was - authenticated. - - 00) Fixed some typos and grammatical errors. - - 00) Authors' contact info updated. - - -------------------------------------------------------- - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 38] - -RFC 2460 IPv6 Specification December 1998 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Deering & Hinden Standards Track [Page 39] - diff --git a/kernel/picotcp/RFC/rfc2474.txt b/kernel/picotcp/RFC/rfc2474.txt deleted file mode 100644 index 06c8bab..0000000 --- a/kernel/picotcp/RFC/rfc2474.txt +++ /dev/null @@ -1,1123 +0,0 @@ - - - - - - -Network Working Group K. Nichols -Request for Comments: 2474 Cisco Systems -Obsoletes: 1455, 1349 S. Blake -Category: Standards Track Torrent Networking Technologies - F. Baker - Cisco Systems - D. Black - EMC Corporation - December 1998 - - - Definition of the Differentiated Services Field (DS Field) - in the IPv4 and IPv6 Headers - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1998). All Rights Reserved. - -Abstract - - Differentiated services enhancements to the Internet protocol are - intended to enable scalable service discrimination in the Internet - without the need for per-flow state and signaling at every hop. A - variety of services may be built from a small, well-defined set of - building blocks which are deployed in network nodes. The services - may be either end-to-end or intra-domain; they include both those - that can satisfy quantitative performance requirements (e.g., peak - bandwidth) and those based on relative performance (e.g., "class" - differentiation). Services can be constructed by a combination of: - - - setting bits in an IP header field at network boundaries - (autonomous system boundaries, internal administrative boundaries, - or hosts), - - using those bits to determine how packets are forwarded by the - nodes inside the network, and - - conditioning the marked packets at network boundaries in accordance - with the requirements or rules of each service. - - - - - - -Nichols, et. al. Standards Track [Page 1] - -RFC 2474 Differentiated Services Field December 1998 - - - The requirements or rules of each service must be set through - administrative policy mechanisms which are outside the scope of this - document. A differentiated services-compliant network node includes - a classifier that selects packets based on the value of the DS field, - along with buffer management and packet scheduling mechanisms capable - of delivering the specific packet forwarding treatment indicated by - the DS field value. Setting of the DS field and conditioning of the - temporal behavior of marked packets need only be performed at network - boundaries and may vary in complexity. - - This document defines the IP header field, called the DS (for - differentiated services) field. In IPv4, it defines the layout of - the TOS octet; in IPv6, the Traffic Class octet. In addition, a base - set of packet forwarding treatments, or per-hop behaviors, is - defined. - - For a more complete understanding of differentiated services, see - also the differentiated services architecture [ARCH]. - -Table of Contents - - 1. Introduction ................................................. 3 - 2. Terminology Used in This Document ............................ 5 - 3. Differentiated Services Field Definition ..................... 7 - 4. Historical Codepoint Definitions and PHB Requirements ........ 9 - 4.1 A Default PHB ............................................. 9 - 4.2 Once and Future IP Precedence Field Use ................... 10 - 4.2.1 IP Precedence History and Evolution in Brief .......... 10 - 4.2.2 Subsuming IP Precedence into Class Selector .......... 11 - Codepoints - 4.2.2.1 The Class Selector Codepoints ..................... 11 - 4.2.2.2 The Class Selector PHB Requirements ............... 11 - 4.2.2.3 Using the Class Selector PHB Requirements ......... 12 - for IP Precedence Compatibility - 4.2.2.4 Example Mechanisms for Implementing Class ......... 12 - Selector Compliant PHB Groups - 4.3 Summary ................................................... 13 - 5. Per-Hop Behavior Standardization Guidelines .................. 13 - 6. IANA Considerations .......................................... 14 - 7. Security Considerations ...................................... 15 - 7.1 Theft and Denial of Service ............................... 15 - 7.2 IPsec and Tunneling Interactions .......................... 16 - 8. Acknowledgments .............................................. 17 - 9. References ................................................... 17 - Authors' Addresses ............................................... 19 - Full Copyright Statement ......................................... 20 - - - - - -Nichols, et. al. Standards Track [Page 2] - -RFC 2474 Differentiated Services Field December 1998 - - -1. Introduction - - Differentiated services are intended to provide a framework and - building blocks to enable deployment of scalable service - discrimination in the Internet. The differentiated services approach - aims to speed deployment by separating the architecture into two - major components, one of which is fairly well-understood and the - other of which is just beginning to be understood. In this, we are - guided by the original design of the Internet where the decision was - made to separate the forwarding and routing components. Packet - forwarding is the relatively simple task that needs to be performed - on a per-packet basis as quickly as possible. Forwarding uses the - packet header to find an entry in a routing table that determines the - packet's output interface. Routing sets the entries in that table - and may need to reflect a range of transit and other policies as well - as to keep track of route failures. Routing tables are maintained as - a background process to the forwarding task. Further, routing is the - more complex task and it has continued to evolve over the past 20 - years. - - Analogously, the differentiated services architecture contains two - main components. One is the fairly well-understood behavior in the - forwarding path and the other is the more complex and still emerging - background policy and allocation component that configures parameters - used in the forwarding path. The forwarding path behaviors include - the differential treatment an individual packet receives, as - implemented by queue service disciplines and/or queue management - disciplines. These per-hop behaviors are useful and required in - network nodes to deliver differentiated treatment of packets no - matter how we construct end-to-end or intra-domain services. Our - focus is on the general semantics of the behaviors rather than the - specific mechanisms used to implement them since these behaviors will - evolve less rapidly than the mechanisms. - - Per-hop behaviors and mechanisms to select them on a per-packet basis - can be deployed in network nodes today and it is this aspect of the - differentiated services architecture that is being addressed first. - In addition, the forwarding path may require that some monitoring, - policing, and shaping be done on the network traffic designated for - "special" treatment in order to enforce requirements associated with - the delivery of the special treatment. Mechanisms for this kind of - traffic conditioning are also fairly well-understood. The wide - deployment of such traffic conditioners is also important to enable - the construction of services, though their actual use in constructing - services may evolve over time. - - - - - - -Nichols, et. al. Standards Track [Page 3] - -RFC 2474 Differentiated Services Field December 1998 - - - The configuration of network elements with respect to which packets - get special treatment and what kinds of rules are to be applied to - the use of resources is much less well-understood. Nevertheless, it - is possible to deploy useful differentiated services in networks by - using simple policies and static configurations. As described in - [ARCH], there are a number of ways to compose per-hop behaviors and - traffic conditioners to create services. In the process, additional - experience is gained that will guide more complex policies and - allocations. The basic behaviors in the forwarding path can remain - the same while this component of the architecture evolves. - Experiences with the construction of such services will continue for - some time, thus we avoid standardizing this construction as it is - premature. Further, much of the details of service construction are - covered by legal agreements between different business entities and - we avoid this as it is very much outside the scope of the IETF. - - This document concentrates on the forwarding path component. In the - packet forwarding path, differentiated services are realized by - mapping the codepoint contained in a field in the IP packet header to - a particular forwarding treatment, or per-hop behavior (PHB), at each - network node along its path. The codepoints may be chosen from a set - of mandatory values defined later in this document, from a set of - recommended values to be defined in future documents, or may have - purely local meaning. PHBs are expected to be implemented by - employing a range of queue service and/or queue management - disciplines on a network node's output interface queue: for example - weighted round-robin (WRR) queue servicing or drop-preference queue - management. - - Marking is performed by traffic conditioners at network boundaries, - including the edges of the network (first-hop router or source host) - and administrative boundaries. Traffic conditioners may include the - primitives of marking, metering, policing and shaping (these - mechanisms are described in [ARCH]). Services are realized by the - use of particular packet classification and traffic conditioning - mechanisms at boundaries coupled with the concatenation of per-hop - behaviors along the transit path of the traffic. A goal of the - differentiated services architecture is to specify these building - blocks for future extensibility, both of the number and type of the - building blocks and of the services built from them. - - Terminology used in this memo is defined in Sec. 2. The - differentiated services field definition (DS field) is given in Sec. - 3. In Sec. 4, we discuss the desire for partial backwards - compatibility with current use of the IPv4 Precedence field. As a - solution, we introduce Class Selector Codepoints and Class Selector - - - - - -Nichols, et. al. Standards Track [Page 4] - -RFC 2474 Differentiated Services Field December 1998 - - - Compliant PHBs. Sec. 5 presents guidelines for per-hop behavior - standardization. Sec. 6 discusses guidelines for allocation of - codepoints. Sec. 7 covers security considerations. - - This document is a concise description of the DS field and its uses. - It is intended to be read along with the differentiated services - architecture [ARCH]. - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in [RFC2119]. - -2. Terminology Used in This Document - - Behavior Aggregate: a collection of packets with the same codepoint - crossing a link in a particular direction. The terms "aggregate" and - "behavior aggregate" are used interchangeably in this document. - - Classifier: an entity which selects packets based on the content of - packet headers according to defined rules. - - Class Selector Codepoint: any of the eight codepoints in the range ' - xxx000' (where 'x' may equal '0' or '1'). Class Selector Codepoints - are discussed in Sec. 4.2.2. - - Class Selector Compliant PHB: a per-hop behavior satisfying the Class - Selector PHB Requirements specified in Sec. 4.2.2.2. - - Codepoint: a specific value of the DSCP portion of the DS field. - Recommended codepoints SHOULD map to specific, standardized PHBs. - Multiple codepoints MAY map to the same PHB. - - Differentiated Services Boundary: the edge of a DS domain, where - classifiers and traffic conditioners are likely to be deployed. A - differentiated services boundary can be further sub-divided into - ingress and egress nodes, where the ingress/egress nodes are the - downstream/upstream nodes of a boundary link in a given traffic - direction. A differentiated services boundary typically is found at - the ingress to the first-hop differentiated services-compliant router - (or network node) that a host's packets traverse, or at the egress of - the last-hop differentiated services-compliant router or network node - that packets traverse before arriving at a host. This is sometimes - referred to as the boundary at a leaf router. A differentiated - services boundary may be co-located with a host, subject to local - policy. Also DS boundary. - - Differentiated Services-Compliant: in compliance with the - requirements specified in this document. Also DS-compliant. - - - -Nichols, et. al. Standards Track [Page 5] - -RFC 2474 Differentiated Services Field December 1998 - - - Differentiated Services Domain: a contiguous portion of the Internet - over which a consistent set of differentiated services policies are - administered in a coordinated fashion. A differentiated services - domain can represent different administrative domains or autonomous - systems, different trust regions, different network technologies - (e.g., cell/frame), hosts and routers, etc. Also DS domain. - - Differentiated Services Field: the IPv4 header TOS octet or the IPv6 - Traffic Class octet when interpreted in conformance with the - definition given in this document. Also DS field. - - Mechanism: The implementation of one or more per-hop behaviors - according to a particular algorithm. - - Microflow: a single instance of an application-to-application flow of - packets which is identified by source address, destination address, - protocol id, and source port, destination port (where applicable). - - Per-hop Behavior (PHB): a description of the externally observable - forwarding treatment applied at a differentiated services-compliant - node to a behavior aggregate. The description of a PHB SHOULD be - sufficiently detailed to allow the construction of predictable - services, as documented in [ARCH]. - - Per-hop Behavior Group: a set of one or more PHBs that can only be - meaningfully specified and implemented simultaneously, due to a - common constraint applying to all PHBs in the set such as a queue - servicing or queue management policy. Also PHB Group. - - Traffic Conditioning: control functions that can be applied to a - behavior aggregate, application flow, or other operationally useful - subset of traffic, e.g., routing updates. These MAY include - metering, policing, shaping, and packet marking. Traffic - conditioning is used to enforce agreements between domains and to - condition traffic to receive a differentiated service within a domain - by marking packets with the appropriate codepoint in the DS field and - by monitoring and altering the temporal characteristics of the - aggregate where necessary. See [ARCH]. - - Traffic Conditioner: an entity that performs traffic conditioning - functions and which MAY contain meters, policers, shapers, and - markers. Traffic conditioners are typically deployed in DS boundary - nodes (i.e., not in interior nodes of a DS domain). - - Service: a description of the overall treatment of (a subset of) a - customer's traffic across a particular domain, across a set of - interconnected DS domains, or end-to-end. Service descriptions are - covered by administrative policy and services are constructed by - - - -Nichols, et. al. Standards Track [Page 6] - -RFC 2474 Differentiated Services Field December 1998 - - - applying traffic conditioning to create behavior aggregates which - experience a known PHB at each node within the DS domain. Multiple - services can be supported by a single per-hop behavior used in - concert with a range of traffic conditioners. - - To summarize, classifiers and traffic conditioners are used to select - which packets are to be added to behavior aggregates. Aggregates - receive differentiated treatment in a DS domain and traffic - conditioners MAY alter the temporal characteristics of the aggregate - to conform to some requirements. A packet's DS field is used to - designate the packet's behavior aggregate and is subsequently used to - determine which forwarding treatment the packet receives. A behavior - aggregate classifier which can select a PHB, for example a - differential output queue servicing discipline, based on the - codepoint in the DS field SHOULD be included in all network nodes in - a DS domain. The classifiers and traffic conditioners at DS - boundaries are configured in accordance with some service - specification, a matter of administrative policy outside the scope of - this document. - - Additional differentiated services definitions are given in [ARCH]. - -3. Differentiated Services Field Definition - - A replacement header field, called the DS field, is defined, which is - intended to supersede the existing definitions of the IPv4 TOS octet - [RFC791] and the IPv6 Traffic Class octet [IPv6]. - - Six bits of the DS field are used as a codepoint (DSCP) to select the - PHB a packet experiences at each node. A two-bit currently unused - (CU) field is reserved and its definition and interpretation are - outside the scope of this document. The value of the CU bits are - ignored by differentiated services-compliant nodes when determining - the per-hop behavior to apply to a received packet. - - The DS field structure is presented below: - - - 0 1 2 3 4 5 6 7 - +---+---+---+---+---+---+---+---+ - | DSCP | CU | - +---+---+---+---+---+---+---+---+ - - DSCP: differentiated services codepoint - CU: currently unused - - - - - - -Nichols, et. al. Standards Track [Page 7] - -RFC 2474 Differentiated Services Field December 1998 - - - In a DSCP value notation 'xxxxxx' (where 'x' may equal '0' or '1') - used in this document, the left-most bit signifies bit 0 of the DS - field (as shown above), and the right-most bit signifies bit 5. - - Implementors should note that the DSCP field is six bits wide. DS- - compliant nodes MUST select PHBs by matching against the entire 6-bit - DSCP field, e.g., by treating the value of the field as a table index - which is used to select a particular packet handling mechanism which - has been implemented in that device. The value of the CU field MUST - be ignored by PHB selection. The DSCP field is defined as an - unstructured field to facilitate the definition of future per-hop - behaviors. - - With some exceptions noted below, the mapping of codepoints to PHBs - MUST be configurable. A DS-compliant node MUST support the logical - equivalent of a configurable mapping table from codepoints to PHBs. - PHB specifications MUST include a recommended default codepoint, - which MUST be unique for codepoints in the standard space (see Sec. - 6). Implementations should support the recommended codepoint-to-PHB - mappings in their default configuration. Operators may choose to use - different codepoints for a PHB, either in addition to or in place of - the recommended default. Note that if operators do so choose, re- - marking of DS fields may be necessary at administrative boundaries - even if the same PHBs are implemented on both sides of the boundary. - - See [ARCH] for further discussion of re-marking. - - The exceptions to general configurability are for codepoints 'xxx000' - and are noted in Secs. 4.2.2 and 4.3. - - Packets received with an unrecognized codepoint SHOULD be forwarded - as if they were marked for the Default behavior (see Sec. 4), and - their codepoints should not be changed. Such packets MUST NOT cause - the network node to malfunction. - - The structure of the DS field shown above is incompatible with the - existing definition of the IPv4 TOS octet in [RFC791]. The - presumption is that DS domains protect themselves by deploying re- - marking boundary nodes, as should networks using the RFC 791 - Precedence designations. Correct operational procedure SHOULD follow - [RFC791], which states: "If the actual use of these precedence - designations is of concern to a particular network, it is the - responsibility of that network to control the access to, and use of, - those precedence designations." Validating the value of the DS field - at DS boundaries is sensible in any case since an upstream node can - easily set it to any arbitrary value. DS domains that are not - isolated by suitably configured boundary nodes may deliver - unpredictable service. - - - -Nichols, et. al. Standards Track [Page 8] - -RFC 2474 Differentiated Services Field December 1998 - - - Nodes MAY rewrite the DS field as needed to provide a desired local - or end-to-end service. Specifications of DS field translations at DS - boundaries are the subject of service level agreements between - providers and users, and are outside the scope of this document. - Standardized PHBs allow providers to build their services from a - well-known set of packet forwarding treatments that can be expected - to be present in the equipment of many vendors. - -4. Historical Codepoint Definitions and PHB Requirements - - The DS field will have a limited backwards compatibility with current - practice, as described in this section. Backwards compatibility is - addressed in two ways. First, there are per-hop behaviors that are - already in widespread use (e.g., those satisfying the IPv4 Precedence - queueing requirements specified in [RFC1812]), and we wish to permit - their continued use in DS-compliant nodes. In addition, there are - some codepoints that correspond to historical use of the IP - Precedence field and we reserve these codepoints to map to PHBs that - meet the general requirements specified in Sec. 4.2.2.2, though the - specific differentiated services PHBs mapped to by those codepoints - MAY have additional specifications. - - No attempt is made to maintain backwards compatibility with the "DTR" - or TOS bits of the IPv4 TOS octet, as defined in [RFC791]. - -4.1 A Default PHB - - A "default" PHB MUST be available in a DS-compliant node. This is - the common, best-effort forwarding behavior available in existing - routers as standardized in [RFC1812]. When no other agreements are - in place, it is assumed that packets belong to this aggregate. Such - packets MAY be sent into a network without adhering to any particular - rules and the network will deliver as many of these packets as - possible and as soon as possible, subject to other resource policy - constraints. A reasonable implementation of this PHB would be a - queueing discipline that sends packets of this aggregate whenever the - output link is not required to satisfy another PHB. A reasonable - policy for constructing services would ensure that the aggregate was - not "starved". This could be enforced by a mechanism in each node - that reserves some minimal resources (e.g, buffers, bandwidth) for - Default behavior aggregates. This permits senders that are not - differentiated services-aware to continue to use the network in the - same manner as today. The impact of the introduction of - differentiated services into a domain on the service expectations of - its customers and peers is a complex matter involving policy - decisions by the domain and is outside the scope of this document. - The RECOMMENDED codepoint for the Default PHB is the bit pattern ' - 000000'; the value '000000' MUST map to a PHB that meets these - - - -Nichols, et. al. Standards Track [Page 9] - -RFC 2474 Differentiated Services Field December 1998 - - - specifications. The codepoint chosen for Default behavior is - compatible with existing practice [RFC791]. Where a codepoint is not - mapped to a standardized or local use PHB, it SHOULD be mapped to the - Default PHB. - - A packet initially marked for the Default behavior MAY be re-marked - with another codepoint as it passes a boundary into a DS domain so - that it will be forwarded using a different PHB within that domain, - possibly subject to some negotiated agreement between the peering - domains. - -4.2 Once and Future IP Precedence Field Use - - We wish to maintain some form of backward compatibility with present - uses of the IP Precedence Field: bits 0-2 of the IPv4 TOS octet. - Routers exist that use the IP Precedence field to select different - per-hop forwarding treatments in a similar way to the use proposed - here for the DSCP field. Thus, a simple prototype differentiated - services architecture can be quickly deployed by appropriately - configuring these routers. Further, IP systems today understand the - location of the IP Precedence field, and thus if these bits are used - in a similar manner as DS-compliant equipment is deployed, - significant failures are not likely during early deployment. In - other words, strict DS-compliance need not be ubiquitous even within - a single service provider's network if bits 0-2 of the DSCP field are - employed in a manner similar to, or subsuming, the deployed uses of - the IP Precedence field. - -4.2.1 IP Precedence History and Evolution in Brief - - The IP Precedence field is something of a forerunner of the DS field. - IP Precedence, and the IP Precedence Field, were first defined in - [RFC791]. The values that the three-bit IP Precedence Field might - take were assigned to various uses, including network control - traffic, routing traffic, and various levels of privilege. The least - level of privilege was deemed "routine traffic". In [RFC791], the - notion of Precedence was defined broadly as "An independent measure - of the importance of this datagram." Not all values of the IP - Precedence field were assumed to have meaning across boundaries, for - instance "The Network Control precedence designation is intended to - be used within a network only. The actual use and control of that - designation is up to each network." [RFC791] - - Although early BBN IMPs implemented the Precedence feature, early - commercial routers and UNIX IP forwarding code generally did not. As - networks became more complex and customer requirements grew, - commercial router vendors developed ways to implement various kinds - of queueing services including priority queueing, which were - - - -Nichols, et. al. Standards Track [Page 10] - -RFC 2474 Differentiated Services Field December 1998 - - - generally based on policies encoded in filters in the routers, which - examined IP addresses, IP protocol numbers, TCP or UDP ports, and - other header fields. IP Precedence was and is among the options such - filters can examine. - - In short, IP Precedence is widely deployed and widely used, if not in - exactly the manner intended in [RFC791]. This was recognized in - [RFC1122], which states that while the use of the IP Precedence field - is valid, the specific assignment of the priorities in [RFC791] were - merely historical. - -4.2.2 Subsuming IP Precedence into Class Selector Codepoints - - A specification of the packet forwarding treatments selected by the - IP Precedence field today would have to be quite general; probably - not specific enough to build predictable services from in the - differentiated services framework. To preserve partial backwards - compatibility with known current uses of the IP Precedence field - without sacrificing future flexibility, we have taken the approach of - describing minimum requirements on a set of PHBs that are compatible - with most of the deployed forwarding treatments selected by the IP - Precedence field. In addition, we give a set of codepoints that MUST - map to PHBs meeting these minimum requirements. The PHBs mapped to - by these codepoints MAY have a more detailed list of specifications - in addition to the required ones stated here. Other codepoints MAY - map to these same PHBs. We refer to this set of codepoints as the - Class Selector Codepoints, and the minimum requirements for PHBs that - these codepoints may map to are called the Class Selector PHB - Requirements. - -4.2.2.1 The Class Selector Codepoints - - A specification of the packet forwarding treatments selected by the - The DS field values of 'xxx000|xx', or DSCP = 'xxx000' and CU - subfield unspecified, are reserved as a set of Class Selector - Codepoints. PHBs which are mapped to by these codepoints MUST - satisfy the Class Selector PHB requirements in addition to preserving - the Default PHB requirement on codepoint '000000' (Sec. 4.1). - -4.2.2.2 The Class Selector PHB Requirements - - We refer to a Class Selector Codepoint with a larger numerical value - than another Class Selector Codepoint as having a higher relative - order while a Class Selector Codepoint with a smaller numerical value - than another Class Selector Codepoint is said to have a lower - relative order. The set of PHBs mapped to by the eight Class - Selector Codepoints MUST yield at least two independently forwarded - classes of traffic, and PHBs selected by a Class Selector Codepoint - - - -Nichols, et. al. Standards Track [Page 11] - -RFC 2474 Differentiated Services Field December 1998 - - - SHOULD give packets a probability of timely forwarding that is not - lower than that given to packets marked with a Class Selector - codepoint of lower relative order, under reasonable operating - conditions and traffic loads. A discarded packet is considered to be - an extreme case of untimely forwarding. In addition, PHBs selected - by codepoints '11x000' MUST give packets a preferential forwarding - treatment by comparison to the PHB selected by codepoint '000000' to - preserve the common usage of IP Precedence values '110' and '111' for - routing traffic. - - Further, PHBs selected by distinct Class Selector Codepoints SHOULD - be independently forwarded; that is, packets marked with different - Class Selector Codepoints MAY be re-ordered. A network node MAY - enforce limits on the amount of the node's resources that can be - utilized by each of these PHBs. - - PHB groups whose specification satisfy these requirements are - referred to as Class Selector Compliant PHBs. - - The Class Selector PHB Requirements on codepoint '000000' are - compatible with those listed for the Default PHB in Sec. 4.1. - -4.2.2.3 Using the Class Selector PHB Requirements for IP Precedence - Compatibility - - A DS-compliant network node can be deployed with a set of one or more - Class Selector Compliant PHB groups. This document states that the - set of codepoints 'xxx000' MUST map to such a set of PHBs. As it is - also possible to map multiple codepoints to the same PHB, the vendor - or the network administrator MAY configure the network node to map - codepoints to PHBs irrespective of bits 3-5 of the DSCP field to - yield a network that is compatible with historical IP Precedence use. - Thus, for example, codepoint '011010' would map to the same PHB as - codepoint '011000'. - -4.2.2.4 Example Mechanisms for Implementing Class Selector Compliant - PHB Groups - - Class Selector Compliant PHBs can be realized by a variety of - mechanisms, including strict priority queueing, weighted fair - queueing (WFQ), WRR, or variants [RPS, HPFQA, DRR], or Class-Based - Queuing [CBQ]. The distinction between PHBs and mechanisms is - described in more detail in Sec. 5. - - It is important to note that these mechanisms might be available - through other PHBs (standardized or not) that are available in a - particular vendor's equipment. For example, future documents may - standardize a Strict Priority Queueing PHB group for a set of - - - -Nichols, et. al. Standards Track [Page 12] - -RFC 2474 Differentiated Services Field December 1998 - - - recommended codepoints. A network administrator might configure - those routers to select the Strict Priority Queueing PHBs with - codepoints 'xxx000' in conformance with the requirements of this - document. - - As a further example, another vendor might employ a CBQ mechanism in - its routers. The CBQ mechanism could be used to implement the Strict - Priority Queueing PHBs as well as a set of Class Selector Compliant - PHBs with a wider range of features than would be available in a set - of PHBs that did no more than meet the minimum Class Selector PHB - requirements. - -4.3 Summary - - This document defines codepoints 'xxx000' as the Class Selector - codepoints, where PHBs selected by these codepoints MUST meet the - Class Selector PHB Requirements described in Sec. 4.2.2.2. This is - done to preserve a useful level of backward compatibility with - current uses of the IP Precedence field in the Internet without - unduly limiting future flexibility. In addition, codepoint '000000' - is used as the Default PHB value for the Internet and, as such, is - not configurable. The remaining seven non-zero Class Selector - codepoints are configurable only to the extent that they map to PHBs - that meet the requirements in Sec. 4.2.2.2. - -5. Per-Hop Behavior Standardization Guidelines - - The behavioral characteristics of a PHB are to be standardized, and - not the particular algorithms or the mechanisms used to implement - them. A node may have a (possibly large) set of parameters that can - be used to control how packets are scheduled onto an output interface - (e.g., N separate queues with settable priorities, queue lengths, - round-robin weights, drop algorithm, drop preference weights and - thresholds, etc). To illustrate the distinction between a PHB and a - mechanism, we point out that Class Selector Compliant PHBs might be - implemented by several mechanisms, including: strict priority - queueing, WFQ, WRR, or variants [HPFQA, RPS, DRR], or CBQ [CBQ], in - isolation or in combination. - - PHBs may be specified individually, or as a group (a single PHB is a - special case of a PHB group). A PHB group usually consists of a set - of two or more PHBs that can only be meaningfully specified and - implemented simultaneously, due to a common constraint applying to - each PHB within the group, such as a queue servicing or queue - management policy. A PHB group specification SHOULD describe - conditions under which a packet might be re-marked to select another - PHB within the group. It is RECOMMENDED that PHB implementations do - not introduce any packet re-ordering within a microflow. PHB group - - - -Nichols, et. al. Standards Track [Page 13] - -RFC 2474 Differentiated Services Field December 1998 - - - specifications MUST identify any possible packet re-ordering - implications which may occur for each individual PHB, and which may - occur if different packets within a microflow are marked for - different PHBs within the group. - - Only those per-hop behaviors that are not described by an existing - PHB standard, and have been implemented, deployed, and shown to be - useful, SHOULD be standardized. Since current experience with - differentiated services is quite limited, it is premature to - hypothesize the exact specification of these per-hop behaviors. - - Each standardized PHB MUST have an associated RECOMMENDED codepoint, - allocated out of a space of 32 codepoints (see Sec. 6). This - specification has left room in the codepoint space to allow for - evolution, thus the defined space ('xxx000') is intentionally sparse. - - Network equipment vendors are free to offer whatever parameters and - capabilities are deemed useful or marketable. When a particular, - standardized PHB is implemented in a node, a vendor MAY use any - algorithm that satisfies the definition of the PHB according to the - standard. The node's capabilities and its particular configuration - determine the different ways that packets can be treated. - - Service providers are not required to use the same node mechanisms or - configurations to enable service differentiation within their - networks, and are free to configure the node parameters in whatever - way that is appropriate for their service offerings and traffic - engineering objectives. Over time certain common per-hop behaviors - are likely to evolve (i.e., ones that are particularly useful for - implementing end-to-end services) and these MAY be associated with - particular EXP/LU PHB codepoints in the DS field, allowing use across - domain boundaries (see Sec. 6). These PHBs are candidates for future - standardization. - - It is RECOMMENDED that standardized PHBs be specified in accordance - with the guidelines set out in [ARCH]. - -6. IANA Considerations - - The DSCP field within the DS field is capable of conveying 64 - distinct codepoints. The codepoint space is divided into three pools - for the purpose of codepoint assignment and management: a pool of 32 - RECOMMENDED codepoints (Pool 1) to be assigned by Standards Action as - defined in [CONS], a pool of 16 codepoints (Pool 2) to be reserved - for experimental or Local Use (EXP/LU) as defined in [CONS], and a - pool of 16 codepoints (Pool 3) which are initially available for - experimental or local use, but which should be preferentially - - - - -Nichols, et. al. Standards Track [Page 14] - -RFC 2474 Differentiated Services Field December 1998 - - - utilized for standardized assignments if Pool 1 is ever exhausted. - The pools are defined in the following table (where 'x' refers to - either '0' or '1'): - - Pool Codepoint space Assignment Policy - ---- --------------- ----------------- - - 1 xxxxx0 Standards Action - 2 xxxx11 EXP/LU - 3 xxxx01 EXP/LU (*) - - (*) may be utilized for future Standards Action allocations as - necessary - - This document assigns eight RECOMMENDED codepoints ('xxx000') which - are drawn from Pool 1 above. These codepoints MUST be mapped, not to - specific PHBs, but to PHBs that meet "at least" the requirements set - forth in Sec. 4.2.2.2 to provide a minimal level of backwards - compatibility with IP Precedence as defined in [RFC791] and as - deployed in some current equipment. - -7. Security Considerations - - This section considers security issues raised by the introduction of - differentiated services, primarily the potential for denial-of- - service attacks, and the related potential for theft of service by - unauthorized traffic (Section 7.1). Section 7.2 addresses the - operation of differentiated services in the presence of IPsec - including its interaction with IPsec tunnel mode and other tunnelling - protocols. See [ARCH] for more extensive treatment of the security - concerns raised by the overall differentiated services architecture. - -7.1 Theft and Denial of Service - - The primary goal of differentiated services is to allow different - levels of service to be provided for traffic streams on a common - network infrastructure. A variety of techniques may be used to - achieve this, but the end result will be that some packets receive - different (e.g., better) service than others. The mapping of network - traffic to the specific behaviors that result in different (e.g., - better or worse) service is indicated primarily by the DS codepoint, - and hence an adversary may be able to obtain better service by - modifying the codepoint to values indicating behaviors used for - enhanced services or by injecting packets with such codepoint values. - Taken to its limits, such theft-of-service becomes a denial-of- - service attack when the modified or injected traffic depletes the - resources available to forward it and other traffic streams. - - - - -Nichols, et. al. Standards Track [Page 15] - -RFC 2474 Differentiated Services Field December 1998 - - - The defense against this class of theft- and denial-of-service - attacks consists of the combination of traffic conditioning at DS - domain boundaries with security and integrity of the network - infrastructure within a DS domain. DS domain boundary nodes MUST - ensure that all traffic entering the domain is marked with codepoint - values appropriate to the traffic and the domain, remarking the - traffic with new codepoint values if necessary. These DS boundary - nodes are the primary line of defense against theft- and denial-of- - service attacks based on modified codepoints, as success of any such - attack indicates that the codepoints used by the attacking traffic - were inappropriate. An important instance of a boundary node is that - any traffic-originating node within a DS domain is the initial - boundary node for that traffic. Interior nodes in a DS domain rely - on DS codepoints to associate traffic with the forwarding PHBs, and - are NOT REQUIRED to check codepoint values before using them. As a - result, the interior nodes depend on the correct operation of the DS - domain boundary nodes to prevent the arrival of traffic with - inappropriate codepoints or in excess of provisioned levels that - would disrupt operation of the domain. - -7.2 IPsec and Tunnelling Interactions - - The IPsec protocol, as defined in [ESP, AH], does not include the IP - header's DS field in any of its cryptographic calculations (in the - case of tunnel mode, it is the outer IP header's DS field that is not - included). Hence modification of the DS field by a network node has - no effect on IPsec's end-to-end security, because it cannot cause any - IPsec integrity check to fail. As a consequence, IPsec does not - provide any defense against an adversary's modification of the DS - field (i.e., a man-in-the-middle attack), as the adversary's - modification will also have no effect on IPsec's end-to-end security. - - IPsec's tunnel mode provides security for the encapsulated IP - header's DS field. A tunnel mode IPsec packet contains two IP - headers: an outer header supplied by the tunnel ingress node and an - encapsulated inner header supplied by the original source of the - packet. When an IPsec tunnel is hosted (in whole or in part) on a - differentiated services network, the intermediate network nodes - operate on the DS field in the outer header. At the tunnel egress - node, IPsec processing includes removing the outer header and - forwarding the packet (if required) using the inner header. The - IPsec protocol REQUIRES that the inner header's DS field not be - changed by this decapsulation processing to ensure that modifications - to the DS field cannot be used to launch theft- or denial-of-service - attacks across an IPsec tunnel endpoint. This document makes no - change to that requirement. If the inner IP header has not been - processed by a DS boundary node for the tunnel egress node's DS - - - - -Nichols, et. al. Standards Track [Page 16] - -RFC 2474 Differentiated Services Field December 1998 - - - domain, the tunnel egress node is the boundary node for traffic - exiting the tunnel, and hence MUST ensure that the resulting traffic - has appropriate DS codepoints. - - When IPsec tunnel egress decapsulation processing includes a - sufficiently strong cryptographic integrity check of the encapsulated - packet (where sufficiency is determined by local security policy), - the tunnel egress node can safely assume that the DS field in the - inner header has the same value as it had at the tunnel ingress node. - An important consequence is that otherwise insecure links within a DS - domain can be secured by a sufficiently strong IPsec tunnel. This - analysis and its implications apply to any tunnelling protocol that - performs integrity checks, but the level of assurance of the inner - header's DS field depends on the strength of the integrity check - performed by the tunnelling protocol. In the absence of sufficient - assurance for a tunnel that may transit nodes outside the current DS - domain (or is otherwise vulnerable), the encapsulated packet MUST be - treated as if it had arrived at a boundary from outside the DS - domain. - -8. Acknowledgements - - The authors would like to acknowledge the Differentiated Services - Working Group for discussions which helped shape this document. - -9. References - - [AH] Kent, S. and R. Atkinson, "IP Authentication Header", - RFC 2402, November 1998. - - [ARCH] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. - and W. Weiss, "An Architecture for Differentiated - Services", RFC 2475, December 1998. - - [CBQ] S. Floyd and V. Jacobson, "Link-sharing and Resource - Management Models for Packet Networks", IEEE/ACM - Transactions on Networking, Vol. 3 no. 4, pp. 365-386, - August 1995. - - [CONS] Narten, T. and H. Alvestrand, "Guidelines for Writing an - IANA Considerations Section in RFCs", RFC 2434, October - 1998. - - [DRR] M. Shreedhar and G. Varghese, Efficient Fair Queueing - using Deficit Round Robin", Proc. ACM SIGCOMM 95, 1995. - - - - - - -Nichols, et. al. Standards Track [Page 17] - -RFC 2474 Differentiated Services Field December 1998 - - - [ESP] Kent, S. and R. Atkinson, "IP Encapsulating Security - Payload (ESP)", RFC 2406, November 1998. - - [HPFQA] J. Bennett and Hui Zhang, "Hierarchical Packet Fair - Queueing Algorithms", Proc. ACM SIGCOMM 96, August 1996. - - [IPv6] Deering, S. and R. Hinden, "Internet Protocol, Version 6 - (IPv6) Specification", RFC 2460, December 1998. - - [RFC791] Postel, J., Editor, "Internet Protocol", STD 5, RFC 791, - September 1981. - - [RFC1122] Braden, R., "Requirements for Internet hosts - - communication layers", STD 3, RFC 1122, October 1989. - - [RFC1812] Baker, F., Editor, "Requirements for IP Version 4 - Routers", RFC 1812, June 1995. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RPS] D. Stiliadis and A. Varma, "Rate-Proportional Servers: A - Design Methodology for Fair Queueing Algorithms", IEEE/ - ACM Trans. on Networking, April 1998. - - - - - - - - - - - - - - - - - - - - - - - - - - - -Nichols, et. al. Standards Track [Page 18] - -RFC 2474 Differentiated Services Field December 1998 - - -Authors' Addresses - - Kathleen Nichols - Cisco Systems - 170 West Tasman Drive - San Jose, CA 95134-1706 - - Phone: +1-408-525-4857 - EMail: kmn@cisco.com - - - Steven Blake - Torrent Networking Technologies - 3000 Aerial Center, Suite 140 - Morrisville, NC 27560 - - Phone: +1-919-468-8466 x232 - EMail: slblake@torrentnet.com - - - Fred Baker - Cisco Systems - 519 Lado Drive - Santa Barbara, CA 93111 - - Phone: +1-408-526-4257 - EMail: fred@cisco.com - - - David L. Black - EMC Corporation - 35 Parkwood Drive - Hopkinton, MA 01748 - - Phone: +1-508-435-1000 x76140 - EMail: black_david@emc.com - - - - - - - - - - - - - - - -Nichols, et. al. Standards Track [Page 19] - -RFC 2474 Differentiated Services Field December 1998 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1998). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Nichols, et. al. Standards Track [Page 20] - diff --git a/kernel/picotcp/RFC/rfc2488.txt b/kernel/picotcp/RFC/rfc2488.txt deleted file mode 100644 index 7fa5700..0000000 --- a/kernel/picotcp/RFC/rfc2488.txt +++ /dev/null @@ -1,1067 +0,0 @@ - - - - - - -Network Working Group M. Allman -Request for Comments: 2488 NASA Lewis/Sterling Software -BCP: 28 D. Glover -Category: Best Current Practice NASA Lewis - L. Sanchez - BBN - January 1999 - - Enhancing TCP Over Satellite Channels - using Standard Mechanisms - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1999). All Rights Reserved. - -Abstract - - The Transmission Control Protocol (TCP) provides reliable delivery of - data across any network path, including network paths containing - satellite channels. While TCP works over satellite channels there - are several IETF standardized mechanisms that enable TCP to more - effectively utilize the available capacity of the network path. This - document outlines some of these TCP mitigations. At this time, all - mitigations discussed in this document are IETF standards track - mechanisms (or are compliant with IETF standards). - -1. Introduction - - Satellite channel characteristics may have an effect on the way - transport protocols, such as the Transmission Control Protocol (TCP) - [Pos81], behave. When protocols, such as TCP, perform poorly, - channel utilization is low. While the performance of a transport - protocol is important, it is not the only consideration when - constructing a network containing satellite links. For example, data - link protocol, application protocol, router buffer size, queueing - discipline and proxy location are some of the considerations that - must be taken into account. However, this document focuses on - improving TCP in the satellite environment and non-TCP considerations - are left for another document. Finally, there have been many - satellite mitigations proposed and studied by the research community. - While these mitigations may prove useful and safe for shared networks - in the future, this document only considers TCP mechanisms which are - - - -Allman, et. al. Best Current Practice [Page 1] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - currently well understood and on the IETF standards track (or are - compliant with IETF standards). - - This document is divided up as follows: Section 2 provides a brief - outline of the characteristics of satellite networks. Section 3 - outlines two non-TCP mechanisms that enable TCP to more effectively - utilize the available bandwidth. Section 4 outlines the TCP - mechanisms defined by the IETF that may benefit satellite networks. - Finally, Section 5 provides a summary of what modern TCP - implementations should include to be considered "satellite friendly". - -2. Satellite Characteristics - - There is an inherent delay in the delivery of a message over a - satellite link due to the finite speed of light and the altitude of - communications satellites. - - Many communications satellites are located at Geostationary Orbit - (GSO) with an altitude of approximately 36,000 km [Sta94]. At this - altitude the orbit period is the same as the Earth's rotation period. - Therefore, each ground station is always able to "see" the orbiting - satellite at the same position in the sky. The propagation time for - a radio signal to travel twice that distance (corresponding to a - ground station directly below the satellite) is 239.6 milliseconds - (ms) [Mar78]. For ground stations at the edge of the view area of - the satellite, the distance traveled is 2 x 41,756 km for a total - propagation delay of 279.0 ms [Mar78]. These delays are for one - ground station-to-satellite-to-ground station route (or "hop"). - Therefore, the propagation delay for a message and the corresponding - reply (one round-trip time or RTT) could be at least 558 ms. The RTT - is not based solely on satellite propagation time. The RTT will be - increased by other factors in the network, such as the transmission - time and propagation time of other links in the network path and - queueing delay in gateways. Furthermore, the satellite propagation - delay will be longer if the link includes multiple hops or if - intersatellite links are used. As satellites become more complex and - include on-board processing of signals, additional delay may be - added. - - Other orbits are possible for use by communications satellites - including Low Earth Orbit (LEO) [Stu95] [Mon98] and Medium Earth - Orbit (MEO) [Mar78]. The lower orbits require the use of - constellations of satellites for constant coverage. In other words, - as one satellite leaves the ground station's sight, another satellite - appears on the horizon and the channel is switched to it. The - propagation delay to a LEO orbit ranges from several milliseconds - when communicating with a satellite directly overhead, to as much as - 80 ms when the satellite is on the horizon. These systems are more - - - -Allman, et. al. Best Current Practice [Page 2] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - likely to use intersatellite links and have variable path delay - depending on routing through the network. - - Satellite channels are dominated by two fundamental characteristics, - as described below: - - NOISE - The strength of a radio signal falls in proportion to the - square of the distance traveled. For a satellite link the - distance is large and so the signal becomes weak before reaching - its destination. This results in a low signal-to-noise ratio. - Some frequencies are particularly susceptible to atmospheric - effects such as rain attenuation. For mobile applications, - satellite channels are especially susceptible to multi-path - distortion and shadowing (e.g., blockage by buildings). Typical - bit error rates (BER) for a satellite link today are on the order - of 1 error per 10 million bits (1 x 10^-7) or less frequent. - Advanced error control coding (e.g., Reed Solomon) can be added to - existing satellite services and is currently being used by many - services. Satellite error performance approaching fiber will - become more common as advanced error control coding is used in new - systems. However, many legacy satellite systems will continue to - exhibit higher BER than newer satellite systems and terrestrial - channels. - - BANDWIDTH - The radio spectrum is a limited natural resource, - hence there is a restricted amount of bandwidth available to - satellite systems which is typically controlled by licenses. This - scarcity makes it difficult to trade bandwidth to solve other - design problems. Typical carrier frequencies for current, point- - to-point, commercial, satellite services are 6 GHz (uplink) and 4 - GHz (downlink), also known as C band, and 14/12 GHz (Ku band). A - new service at 30/20 GHz (Ka band) will be emerging over the next - few years. Satellite-based radio repeaters are known as - transponders. Traditional C band transponder bandwidth is - typically 36 MHz to accommodate one color television channel (or - 1200 voice channels). Ku band transponders are typically around - 50 MHz. Furthermore, one satellite may carry a few dozen - transponders. - - Not only is bandwidth limited by nature, but the allocations for - commercial communications are limited by international agreements so - that this scarce resource can be used fairly by many different - applications. - - - - - - - - -Allman, et. al. Best Current Practice [Page 3] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - Although satellites have certain disadvantages when compared to fiber - channels (e.g., cannot be easily repaired, rain fades, etc.), they - also have certain advantages over terrestrial links. First, - satellites have a natural broadcast capability. This gives - satellites an advantage for multicast applications. Next, satellites - can reach geographically remote areas or countries that have little - terrestrial infrastructure. A related advantage is the ability of - satellite links to reach mobile users. - - Satellite channels have several characteristics that differ from most - terrestrial channels. These characteristics may degrade the - performance of TCP. These characteristics include: - - Long feedback loop - - Due to the propagation delay of some satellite channels (e.g., - approximately 250 ms over a geosynchronous satellite) it may take - a long time for a TCP sender to determine whether or not a packet - has been successfully received at the final destination. This - delay hurts interactive applications such as telnet, as well as - some of the TCP congestion control algorithms (see section 4). - - Large delay*bandwidth product - - The delay*bandwidth product (DBP) defines the amount of data a - protocol should have "in flight" (data that has been transmitted, - but not yet acknowledged) at any one time to fully utilize the - available channel capacity. The delay used in this equation is - the RTT and the bandwidth is the capacity of the bottleneck link - in the network path. Because the delay in some satellite - environments is large, TCP will need to keep a large number of - packets "in flight" (that is, sent but not yet acknowledged) . - - Transmission errors - - Satellite channels exhibit a higher bit-error rate (BER) than - typical terrestrial networks. TCP uses all packet drops as - signals of network congestion and reduces its window size in an - attempt to alleviate the congestion. In the absence of knowledge - about why a packet was dropped (congestion or corruption), TCP - must assume the drop was due to network congestion to avoid - congestion collapse [Jac88] [FF98]. Therefore, packets dropped - due to corruption cause TCP to reduce the size of its sliding - window, even though these packet drops do not signal congestion in - the network. - - - - - - -Allman, et. al. Best Current Practice [Page 4] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - Asymmetric use - - Due to the expense of the equipment used to send data to - satellites, asymmetric satellite networks are often constructed. - For example, a host connected to a satellite network will send all - outgoing traffic over a slow terrestrial link (such as a dialup - modem channel) and receive incoming traffic via the satellite - channel. Another common situation arises when both the incoming - and outgoing traffic are sent using a satellite link, but the - uplink has less available capacity than the downlink due to the - expense of the transmitter required to provide a high bandwidth - back channel. This asymmetry may have an impact on TCP - performance. - - Variable Round Trip Times - - In some satellite environments, such as low-Earth orbit (LEO) - constellations, the propagation delay to and from the satellite - varies over time. Whether or not this will have an impact on TCP - performance is currently an open question. - - Intermittent connectivity - - In non-GSO satellite orbit configurations, TCP connections must be - transferred from one satellite to another or from one ground - station to another from time to time. This handoff may cause - packet loss if not properly performed. - - Most satellite channels only exhibit a subset of the above - characteristics. Furthermore, satellite networks are not the only - environments where the above characteristics are found. However, - satellite networks do tend to exhibit more of the above problems or - the above problems are aggravated in the satellite environment. The - mechanisms outlined in this document should benefit most networks, - especially those with one or more of the above characteristics (e.g., - gigabit networks have large delay*bandwidth products). - -3. Lower Level Mitigations - - It is recommended that those utilizing satellite channels in their - networks should use the following two non-TCP mechanisms which can - increase TCP performance. These mechanisms are Path MTU Discovery - and forward error correction (FEC) and are outlined in the following - two sections. - - The data link layer protocol employed over a satellite channel can - have a large impact on performance of higher layer protocols. While - beyond the scope of this document, those constructing satellite - - - -Allman, et. al. Best Current Practice [Page 5] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - networks should tune these protocols in an appropriate manner to - ensure that the data link protocol does not limit TCP performance. - In particular, data link layer protocols often implement a flow - control window and retransmission mechanisms. When the link level - window size is too small, performance will suffer just as when the - TCP window size is too small (see section 4.3 for a discussion of - appropriate window sizes). The impact that link level - retransmissions have on TCP transfers is not currently well - understood. The interaction between TCP retransmissions and link - level retransmissions is a subject for further research. - -3.1 Path MTU Discovery - - Path MTU discovery [MD90] is used to determine the maximum packet - size a connection can use on a given network path without being - subjected to IP fragmentation. The sender transmits a packet that is - the appropriate size for the local network to which it is connected - (e.g., 1500 bytes on an Ethernet) and sets the IP "don't fragment" - (DF) bit. If the packet is too large to be forwarded without being - fragmented to a given channel along the network path, the gateway - that would normally fragment the packet and forward the fragments - will instead return an ICMP message to the originator of the packet. - The ICMP message will indicate that the original segment could not be - transmitted without being fragmented and will also contain the size - of the largest packet that can be forwarded by the gateway. - Additional information from the IESG regarding Path MTU discovery is - available in [Kno93]. - - Path MTU Discovery allows TCP to use the largest possible packet - size, without incurring the cost of fragmentation and reassembly. - Large packets reduce the packet overhead by sending more data bytes - per overhead byte. As outlined in section 4, increasing TCP's - congestion window is segment based, rather than byte based and - therefore, larger segments enable TCP senders to increase the - congestion window more rapidly, in terms of bytes, than smaller - segments. - - The disadvantage of Path MTU Discovery is that it may cause a delay - before TCP is able to start sending data. For example, assume a - packet is sent with the DF bit set and one of the intervening - gateways (G1) returns an ICMP message indicating that it cannot - forward the segment. At this point, the sending host reduces the - packet size per the ICMP message returned by G1 and sends another - packet with the DF bit set. The packet will be forwarded by G1, - however this does not ensure all subsequent gateways in the network - path will be able to forward the segment. If a second gateway (G2) - cannot forward the segment it will return an ICMP message to the - transmitting host and the process will be repeated. Therefore, path - - - -Allman, et. al. Best Current Practice [Page 6] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - MTU discovery can spend a large amount of time determining the - maximum allowable packet size on the network path between the sender - and receiver. Satellite delays can aggravate this problem (consider - the case when the channel between G1 and G2 is a satellite link). - However, in practice, Path MTU Discovery does not consume a large - amount of time due to wide support of common MTU values. - Additionally, caching MTU values may be able to eliminate discovery - time in many instances, although the exact implementation of this and - the aging of cached values remains an open problem. - - The relationship between BER and segment size is likely to vary - depending on the error characteristics of the given channel. This - relationship deserves further study, however with the use of good - forward error correction (see section 3.2) larger segments should - provide better performance, as with any network [MSMO97]. While the - exact method for choosing the best MTU for a satellite link is - outside the scope of this document, the use of Path MTU Discovery is - recommended to allow TCP to use the largest possible MTU over the - satellite channel. - -3.2 Forward Error Correction - - A loss event in TCP is always interpreted as an indication of - congestion and always causes TCP to reduce its congestion window - size. Since the congestion window grows based on returning - acknowledgments (see section 4), TCP spends a long time recovering - from loss when operating in satellite networks. When packet loss is - due to corruption, rather than congestion, TCP does not need to - reduce its congestion window size. However, at the present time - detecting corruption loss is a research issue. - - Therefore, for TCP to operate efficiently, the channel - characteristics should be such that nearly all loss is due to network - congestion. The use of forward error correction coding (FEC) on a - satellite link should be used to improve the bit-error rate (BER) of - the satellite channel. Reducing the BER is not always possible in - satellite environments. However, since TCP takes a long time to - recover from lost packets because the long propagation delay imposed - by a satellite link delays feedback from the receiver [PS97], the - link should be made as clean as possible to prevent TCP connections - from receiving false congestion signals. This document does not make - a specific BER recommendation for TCP other than it should be as low - as possible. - - FEC should not be expected to fix all problems associated with noisy - satellite links. There are some situations where FEC cannot be - expected to solve the noise problem (such as military jamming, deep - space missions, noise caused by rain fade, etc.). In addition, link - - - -Allman, et. al. Best Current Practice [Page 7] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - outages can also cause problems in satellite systems that do not - occur as frequently in terrestrial networks. Finally, FEC is not - without cost. FEC requires additional hardware and uses some of the - available bandwidth. It can add delay and timing jitter due to the - processing time of the coder/decoder. - - Further research is needed into mechanisms that allow TCP to - differentiate between congestion induced drops and those caused by - corruption. Such a mechanism would allow TCP to respond to - congestion in an appropriate manner, as well as repairing corruption - induced loss without reducing the transmission rate. However, in the - absence of such a mechanism packet loss must be assumed to indicate - congestion to preserve network stability. Incorrectly interpreting - loss as caused by corruption and not reducing the transmission rate - accordingly can lead to congestive collapse [Jac88] [FF98]. - -4. Standard TCP Mechanisms - - This section outlines TCP mechanisms that may be necessary in - satellite or hybrid satellite/terrestrial networks to better utilize - the available capacity of the link. These mechanisms may also be - needed to fully utilize fast terrestrial channels. Furthermore, - these mechanisms do not fundamentally hurt performance in a shared - terrestrial network. Each of the following sections outlines one - mechanism and why that mechanism may be needed. - -4.1 Congestion Control - - To avoid generating an inappropriate amount of network traffic for - the current network conditions, during a connection TCP employs four - congestion control mechanisms [Jac88] [Jac90] [Ste97]. These - algorithms are slow start, congestion avoidance, fast retransmit and - fast recovery. These algorithms are used to adjust the amount of - unacknowledged data that can be injected into the network and to - retransmit segments dropped by the network. - - TCP senders use two state variables to accomplish congestion control. - The first variable is the congestion window (cwnd). This is an upper - bound on the amount of data the sender can inject into the network - before receiving an acknowledgment (ACK). The value of cwnd is - limited to the receiver's advertised window. The congestion window - is increased or decreased during the transfer based on the inferred - amount of congestion present in the network. The second variable is - the slow start threshold (ssthresh). This variable determines which - algorithm is used to increase the value of cwnd. If cwnd is less - than ssthresh the slow start algorithm is used to increase the value - of cwnd. However, if cwnd is greater than or equal to (or just - greater than in some TCP implementations) ssthresh the congestion - - - -Allman, et. al. Best Current Practice [Page 8] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - avoidance algorithm is used. The initial value of ssthresh is the - receiver's advertised window size. Furthermore, the value of - ssthresh is set when congestion is detected. - - The four congestion control algorithms are outlined below, followed - by a brief discussion of the impact of satellite environments on - these algorithms. - -4.1.1 Slow Start and Congestion Avoidance - - When a host begins sending data on a TCP connection the host has no - knowledge of the current state of the network between itself and the - data receiver. In order to avoid transmitting an inappropriately - large burst of traffic, the data sender is required to use the slow - start algorithm at the beginning of a transfer [Jac88] [Bra89] - [Ste97]. Slow start begins by initializing cwnd to 1 segment - (although an IETF experimental mechanism would increase the size of - the initial window to roughly 4 Kbytes [AFP98]) and ssthresh to the - receiver's advertised window. This forces TCP to transmit one - segment and wait for the corresponding ACK. For each ACK that is - received during slow start, the value of cwnd is increased by 1 - segment. For example, after the first ACK is received cwnd will be 2 - segments and the sender will be allowed to transmit 2 data packets. - This continues until cwnd meets or exceeds ssthresh (or, in some - implementations when cwnd equals ssthresh), or loss is detected. - - When the value of cwnd is greater than or equal to (or equal to in - certain implementations) ssthresh the congestion avoidance algorithm - is used to increase cwnd [Jac88] [Bra89] [Ste97]. This algorithm - increases the size of cwnd more slowly than does slow start. - Congestion avoidance is used to slowly probe the network for - additional capacity. During congestion avoidance, cwnd is increased - by 1/cwnd for each incoming ACK. Therefore, if one ACK is received - for every data segment, cwnd will increase by roughly 1 segment per - round-trip time (RTT). - - The slow start and congestion control algorithms can force poor - utilization of the available channel bandwidth when using long-delay - satellite networks [All97]. For example, transmission begins with - the transmission of one segment. After the first segment is - transmitted the data sender is forced to wait for the corresponding - ACK. When using a GSO satellite this leads to an idle time of - roughly 500 ms when no useful work is being accomplished. Therefore, - slow start takes more real time over GSO satellites than on typical - terrestrial channels. This holds for congestion avoidance, as well - [All97]. This is precisely why Path MTU Discovery is an important - algorithm. While the number of segments we transmit is determined by - the congestion control algorithms, the size of these segments is not. - - - -Allman, et. al. Best Current Practice [Page 9] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - Therefore, using larger packets will enable TCP to send more data per - segment which yields better channel utilization. - -4.1.2 Fast Retransmit and Fast Recovery - - TCP's default mechanism to detect dropped segments is a timeout - [Pos81]. In other words, if the sender does not receive an ACK for a - given packet within the expected amount of time the segment will be - retransmitted. The retransmission timeout (RTO) is based on - observations of the RTT. In addition to retransmitting a segment - when the RTO expires, TCP also uses the lost segment as an indication - of congestion in the network. In response to the congestion, the - value of ssthresh is set to half of the cwnd and the value of cwnd is - then reduced to 1 segment. This triggers the use of the slow start - algorithm to increase cwnd until the value of cwnd reaches half of - its value when congestion was detected. After the slow start phase, - the congestion avoidance algorithm is used to probe the network for - additional capacity. - - TCP ACKs always acknowledge the highest in-order segment that has - arrived. Therefore an ACK for segment X also effectively ACKs all - segments < X. Furthermore, if a segment arrives out-of-order the ACK - triggered will be for the highest in-order segment, rather than the - segment that just arrived. For example, assume segment 11 has been - dropped somewhere in the network and segment 12 arrives at the - receiver. The receiver is going to send a duplicate ACK covering - segment 10 (and all previous segments). - - The fast retransmit algorithm uses these duplicate ACKs to detect - lost segments. If 3 duplicate ACKs arrive at the data originator, - TCP assumes that a segment has been lost and retransmits the missing - segment without waiting for the RTO to expire. After a segment is - resent using fast retransmit, the fast recovery algorithm is used to - adjust the congestion window. First, the value of ssthresh is set to - half of the value of cwnd. Next, the value of cwnd is halved. - Finally, the value of cwnd is artificially increased by 1 segment for - each duplicate ACK that has arrived. The artificial inflation can be - done because each duplicate ACK represents 1 segment that has left - the network. When the cwnd permits, TCP is able to transmit new - data. This allows TCP to keep data flowing through the network at - half the rate it was when loss was detected. When an ACK for the - retransmitted packet arrives, the value of cwnd is reduced back to - ssthresh (half the value of cwnd when the congestion was detected). - - - - - - - - -Allman, et. al. Best Current Practice [Page 10] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - Generally, fast retransmit can resend only one segment per window of - data sent. When multiple segments are lost in a given window of - data, one of the segments will be resent using fast retransmit and - the rest of the dropped segments must usually wait for the RTO to - expire, which causes TCP to revert to slow start. - - TCP's response to congestion differs based on the way the congestion - is detected. If the retransmission timer causes a packet to be - resent, TCP drops ssthresh to half the current cwnd and reduces the - value of cwnd to 1 segment (thus triggering slow start). However, if - a segment is resent via fast retransmit both ssthresh and cwnd are - set to half the current value of cwnd and congestion avoidance is - used to send new data. The difference is that when retransmitting - due to duplicate ACKs, TCP knows that packets are still flowing - through the network and can therefore infer that the congestion is - not that bad. However, when resending a packet due to the expiration - of the retransmission timer, TCP cannot infer anything about the - state of the network and therefore must proceed conservatively by - sending new data using the slow start algorithm. - - Note that the fast retransmit/fast recovery algorithms, as discussed - above can lead to a phenomenon that allows multiple fast retransmits - per window of data [Flo94]. This can reduce the size of the - congestion window multiple times in response to a single "loss - event". The problem is particularly noticeable in connections that - utilize large congestion windows, since these connections are able to - inject enough new segments into the network during recovery to - trigger the multiple fast retransmits. Reducing cwnd multiple times - for a single loss event may hurt performance [GJKFV98]. - - The best way to improve the fast retransmit/fast recovery algorithms - is to use a selective acknowledgment (SACK) based algorithm for loss - recovery. As discussed below, these algorithms are generally able to - quickly recover from multiple lost segments without needlessly - reducing the value of cwnd. In the absence of SACKs, the fast - retransmit and fast recovery algorithms should be used. Fixing these - algorithms to achieve better performance in the face of multiple fast - retransmissions is beyond the scope of this document. Therefore, TCP - implementers are advised to implement the current version of fast - retransmit/fast recovery outlined in RFC 2001 [Ste97] or subsequent - versions of RFC 2001. - -4.1.3 Congestion Control in Satellite Environment - - The above algorithms have a negative impact on the performance of - individual TCP connection's performance because the algorithms slowly - probe the network for additional capacity, which in turn wastes - bandwidth. This is especially true over long-delay satellite - - - -Allman, et. al. Best Current Practice [Page 11] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - channels because of the large amount of time required for the sender - to obtain feedback from the receiver [All97] [AHKO97]. However, the - algorithms are necessary to prevent congestive collapse in a shared - network [Jac88]. Therefore, the negative impact on a given - connection is more than offset by the benefit to the entire network. - -4.2 Large TCP Windows - - The standard maximum TCP window size (65,535 bytes) is not adequate - to allow a single TCP connection to utilize the entire bandwidth - available on some satellite channels. TCP throughput is limited by - the following formula [Pos81]: - - throughput = window size / RTT - - Therefore, using the maximum window size of 65,535 bytes and a - geosynchronous satellite channel RTT of 560 ms [Kru95] the maximum - throughput is limited to: - - throughput = 65,535 bytes / 560 ms = 117,027 bytes/second - - Therefore, a single standard TCP connection cannot fully utilize, for - example, T1 rate (approximately 192,000 bytes/second) GSO satellite - channels. However, TCP has been extended to support larger windows - [JBB92]. The window scaling options outlined in [JBB92] should be - used in satellite environments, as well as the companion algorithms - PAWS (Protection Against Wrapped Sequence space) and RTTM (Round-Trip - Time Measurements). - - It should be noted that for a satellite link shared among many flows, - large windows may not be necessary. For instance, two long-lived TCP - connections each using a window of 65,535 bytes, as in the above - example, can fully utilize a T1 GSO satellite channel. - - Using large windows often requires both client and server - applications or TCP stacks to be hand tuned (usually by an expert) to - utilize large windows. Research into operating system mechanisms - that are able to adjust the buffer capacity as dictated by the - current network conditions is currently underway [SMM98]. This will - allow stock TCP implementations and applications to better utilize - the capacity provided by the underlying network. - -4.3 Acknowledgment Strategies - - There are two standard methods that can be used by TCP receivers to - generated acknowledgments. The method outlined in [Pos81] generates - an ACK for each incoming segment. [Bra89] states that hosts SHOULD - use "delayed acknowledgments". Using this algorithm, an ACK is - - - -Allman, et. al. Best Current Practice [Page 12] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - generated for every second full-sized segment, or if a second full- - size segment does not arrive within a given timeout (which must not - exceed 500 ms). The congestion window is increased based on the - number of incoming ACKs and delayed ACKs reduce the number of ACKs - being sent by the receiver. Therefore, cwnd growth occurs much more - slowly when using delayed ACKs compared to the case when the receiver - ACKs each incoming segment [All98]. - - A tempting "fix" to the problem caused by delayed ACKs is to simply - turn the mechanism off and let the receiver ACK each incoming - segment. However, this is not recommended. First, [Bra89] says that - a TCP receiver SHOULD generate delayed ACKs. And, second, increasing - the number of ACKs by a factor of two in a shared network may have - consequences that are not yet understood. Therefore, disabling - delayed ACKs is still a research issue and thus, at this time TCP - receivers should continue to generate delayed ACKs, per [Bra89]. - -4.4 Selective Acknowledgments - - Selective acknowledgments (SACKs) [MMFR96] allow TCP receivers to - inform TCP senders exactly which packets have arrived. SACKs allow - TCP to recover more quickly from lost segments, as well as avoiding - needless retransmissions. - - The fast retransmit algorithm can generally only repair one loss per - window of data. When multiple losses occur, the sender generally - must rely on a timeout to determine which segment needs to be - retransmitted next. While waiting for a timeout, the data segments - and their acknowledgments drain from the network. In the absence of - incoming ACKs to clock new segments into the network, the sender must - use the slow start algorithm to restart transmission. As discussed - above, the slow start algorithm can be time consuming over satellite - channels. When SACKs are employed, the sender is generally able to - determine which segments need to be retransmitted in the first RTT - following loss detection. This allows the sender to continue to - transmit segments (retransmissions and new segments, if appropriate) - at an appropriate rate and therefore sustain the ACK clock. This - avoids a costly slow start period following multiple lost segments. - Generally SACK is able to retransmit all dropped segments within the - first RTT following the loss detection. [MM96] and [FF96] discuss - specific congestion control algorithms that rely on SACK information - to determine which segments need to be retransmitted and when it is - appropriate to transmit those segments. Both these algorithms follow - the basic principles of congestion control outlined in [Jac88] and - reduce the window by half when congestion is detected. - - - - - - -Allman, et. al. Best Current Practice [Page 13] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - -5. Mitigation Summary - - Table 1 summarizes the mechanisms that have been discussed in this - document. Those mechanisms denoted "Recommended" are IETF standards - track mechanisms that are recommended by the authors for use in - networks containing satellite channels. Those mechanisms marked - "Required' have been defined by the IETF as required for hosts using - the shared Internet [Bra89]. Along with the section of this document - containing the discussion of each mechanism, we note where the - mechanism needs to be implemented. The codes listed in the last - column are defined as follows: "S" for the data sender, "R" for the - data receiver and "L" for the satellite link. - - Mechanism Use Section Where - +------------------------+-------------+------------+--------+ - | Path-MTU Discovery | Recommended | 3.1 | S | - | FEC | Recommended | 3.2 | L | - | TCP Congestion Control | | | | - | Slow Start | Required | 4.1.1 | S | - | Congestion Avoidance | Required | 4.1.1 | S | - | Fast Retransmit | Recommended | 4.1.2 | S | - | Fast Recovery | Recommended | 4.1.2 | S | - | TCP Large Windows | | | | - | Window Scaling | Recommended | 4.2 | S,R | - | PAWS | Recommended | 4.2 | S,R | - | RTTM | Recommended | 4.2 | S,R | - | TCP SACKs | Recommended | 4.4 | S,R | - +------------------------+-------------+------------+--------+ - Table 1 - - Satellite users should check with their TCP vendors (implementors) to - ensure the recommended mechanisms are supported in their stack in - current and/or future versions. Alternatively, the Pittsburgh - Supercomputer Center tracks TCP implementations and which extensions - they support, as well as providing guidance on tuning various TCP - implementations [PSC]. - - Research into improving the efficiency of TCP over satellite channels - is ongoing and will be summarized in a planned memo along with other - considerations, such as satellite network architectures. - -6. Security Considerations - - The authors believe that the recommendations contained in this memo - do not alter the security implications of TCP. However, when using a - broadcast medium such as satellites links to transfer user data - and/or network control traffic, one should be aware of the intrinsic - security implications of such technology. - - - -Allman, et. al. Best Current Practice [Page 14] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - Eavesdropping on network links is a form of passive attack that, if - performed successfully, could reveal critical traffic control - information that would jeopardize the proper functioning of the - network. These attacks could reduce the ability of the network to - provide data transmission services efficiently. Eavesdroppers could - also compromise the privacy of user data, especially if end-to-end - security mechanisms are not in use. While passive monitoring can - occur on any network, the wireless broadcast nature of satellite - links allows reception of signals without physical connection to the - network which enables monitoring to be conducted without detection. - However, it should be noted that the resources needed to monitor a - satellite link are non-trivial. - - Data encryption at the physical and/or link layers can provide secure - communication over satellite channels. However, this still leaves - traffic vulnerable to eavesdropping on networks before and after - traversing the satellite link. Therefore, end-to-end security - mechanisms should be considered. This document does not make any - recommendations as to which security mechanisms should be employed. - However, those operating and using satellite networks should survey - the currently available network security mechanisms and choose those - that meet their security requirements. - -Acknowledgments - - This document has benefited from comments from the members of the TCP - Over Satellite Working Group. In particular, we would like to thank - Aaron Falk, Matthew Halsey, Hans Kruse, Matt Mathis, Greg Nakanishi, - Vern Paxson, Jeff Semke, Bill Sepmeier and Eric Travis for their - useful comments about this document. - -References - - [AFP98] Allman, M., Floyd, S. and C. Partridge, "Increasing TCP's - Initial Window", RFC 2414, September 1998. - - [AHKO97] Mark Allman, Chris Hayes, Hans Kruse, and Shawn Ostermann. - TCP Performance Over Satellite Links. In Proceedings of - the 5th International Conference on Telecommunication - Systems, March 1997. - - [All97] Mark Allman. Improving TCP Performance Over Satellite - Channels. Master's thesis, Ohio University, June 1997. - - [All98] Mark Allman. On the Generation and Use of TCP - Acknowledgments. ACM Computer Communication Review, 28(5), - October 1998. - - - - -Allman, et. al. Best Current Practice [Page 15] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - [Bra89] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [FF96] Kevin Fall and Sally Floyd. Simulation-based Comparisons - of Tahoe, Reno and SACK TCP. Computer Communication - Review, July 1996. - - [FF98] Sally Floyd, Kevin Fall. Promoting the Use of End-to-End - Congestion Control in the Internet. Submitted to IEEE - Transactions on Networking. - - [Flo94] S. Floyd, TCP and Successive Fast Retransmits. Technical - report, October 1994. - ftp://ftp.ee.lbl.gov/papers/fastretrans.ps. - - [GJKFV98] Rohit Goyal, Raj Jain, Shiv Kalyanaraman, Sonia Fahmy, - Bobby Vandalore, Improving the Performance of TCP over the - ATM-UBR service, 1998. Sumbitted to Computer - Communications. - - [Jac90] Van Jacobson. Modified TCP Congestion Avoidance Algorithm. - Technical Report, LBL, April 1990. - - [JBB92] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for - High Performance", RFC 1323, May 1992. - - [Jac88] Van Jacobson. Congestion Avoidance and Control. In ACM - SIGCOMM, 1988. - - [Kno93] Knowles, S., "IESG Advice from Experience with Path MTU - Discovery", RFC 1435, March 1993. - - [Mar78] James Martin. Communications Satellite Systems. Prentice - Hall, 1978. - - [MD90] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - [MM96] Matt Mathis and Jamshid Mahdavi. Forward Acknowledgment: - Refining TCP Congestion Control. In ACM SIGCOMM, 1996. - - [MMFR96] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [Mon98] M. J. Montpetit. TELEDESIC: Enabling The Global Community - Interaccess. In Proc. of the International Wireless - Symposium, May 1998. - - - - -Allman, et. al. Best Current Practice [Page 16] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - - [MSMO97] M. Mathis, J. Semke, J. Mahdavi, T. Ott, "The Macroscopic - Behavior of the TCP Congestion Avoidance Algorithm", - Computer Communication Review, volume 27, number3, July - 1997. available from - http://www.psc.edu/networking/papers/papers.html. - - [Pos81] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [PS97] Craig Partridge and Tim Shepard. TCP Performance Over - Satellite Links. IEEE Network, 11(5), September/October - 1997. - - [PSC] Jamshid Mahdavi. Enabling High Performance Data Transfers - on Hosts. http://www.psc.edu/networking/perf_tune.html. - - [SMM98] Jeff Semke, Jamshid Mahdavi and Matt Mathis. Automatic TCP - Buffer Tuning. In ACM SIGCOMM, August 1998. To appear. - - [Sta94] William Stallings. Data and Computer Communications. - MacMillian, 4th edition, 1994. - - [Ste97] Stevens, W., "TCP Slow Start, Congestion Avoidance, Fast - Retransmit, and Fast Recovery Algorithms", RFC 2001,January - 1997. - - [Stu95] M. A. Sturza. Architecture of the TELEDESIC Satellite - System. In Proceedings of the International Mobile - Satellite Conference, 1995. - - - - - - - - - - - - - - - - - - - - - - -Allman, et. al. Best Current Practice [Page 17] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - -Authors' Addresses - - Mark Allman - NASA Lewis Research Center/Sterling Software - 21000 Brookpark Rd. MS 54-2 - Cleveland, OH 44135 - - Phone: +1 216 433 6586 - EMail: mallman@lerc.nasa.gov - http://roland.lerc.nasa.gov/~mallman - - - Daniel R. Glover - NASA Lewis Research Center - 21000 Brookpark Rd. - Cleveland, OH 44135 - - Phone: +1 216 433 2847 - EMail: Daniel.R.Glover@lerc.nasa.gov - - - Luis A. Sanchez - BBN Technologies - GTE Internetworking - 10 Moulton Street - Cambridge, MA 02140 - USA - - Phone: +1 617 873 3351 - EMail: lsanchez@ir.bbn.com - - - - - - - - - - - - - - - - - - - - - -Allman, et. al. Best Current Practice [Page 18] - -RFC 2488 Enhancing TCP Over Satellite Channels January 1999 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1999). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Allman, et. al. Best Current Practice [Page 19] - diff --git a/kernel/picotcp/RFC/rfc2525.txt b/kernel/picotcp/RFC/rfc2525.txt deleted file mode 100644 index d5f30e8..0000000 --- a/kernel/picotcp/RFC/rfc2525.txt +++ /dev/null @@ -1,3419 +0,0 @@ - - - - - - -Network Working Group V. Paxson -Request for Comments: 2525 Editor -Category: Informational ACIRI / ICSI - M. Allman - NASA Glenn Research Center/Sterling Software - S. Dawson - Real-Time Computing Laboratory - W. Fenner - Xerox PARC - J. Griner - NASA Glenn Research Center - I. Heavens - Spider Software Ltd. - K. Lahey - NASA Ames Research Center/MRJ - J. Semke - Pittsburgh Supercomputing Center - B. Volz - Process Software Corporation - March 1999 - - - Known TCP Implementation Problems - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1999). All Rights Reserved. - -Table of Contents - - 1. INTRODUCTION....................................................2 - 2. KNOWN IMPLEMENTATION PROBLEMS...................................3 - 2.1 No initial slow start........................................3 - 2.2 No slow start after retransmission timeout...................6 - 2.3 Uninitialized CWND...........................................9 - 2.4 Inconsistent retransmission.................................11 - 2.5 Failure to retain above-sequence data.......................13 - 2.6 Extra additive constant in congestion avoidance.............17 - 2.7 Initial RTO too low.........................................23 - 2.8 Failure of window deflation after loss recovery.............26 - 2.9 Excessively short keepalive connection timeout..............28 - 2.10 Failure to back off retransmission timeout..................31 - - - -Paxson, et. al. Informational [Page 1] - -RFC 2525 TCP Implementation Problems March 1999 - - - 2.11 Insufficient interval between keepalives....................34 - 2.12 Window probe deadlock.......................................36 - 2.13 Stretch ACK violation.......................................40 - 2.14 Retransmission sends multiple packets.......................43 - 2.15 Failure to send FIN notification promptly...................45 - 2.16 Failure to send a RST after Half Duplex Close...............47 - 2.17 Failure to RST on close with data pending...................50 - 2.18 Options missing from TCP MSS calculation....................54 - 3. SECURITY CONSIDERATIONS........................................56 - 4. ACKNOWLEDGEMENTS...............................................56 - 5. REFERENCES.....................................................57 - 6. AUTHORS' ADDRESSES.............................................58 - 7. FULL COPYRIGHT STATEMENT.......................................60 - -1. Introduction - - This memo catalogs a number of known TCP implementation problems. - The goal in doing so is to improve conditions in the existing - Internet by enhancing the quality of current TCP/IP implementations. - It is hoped that both performance and correctness issues can be - resolved by making implementors aware of the problems and their - solutions. In the long term, it is hoped that this will provide a - reduction in unnecessary traffic on the network, the rate of - connection failures due to protocol errors, and load on network - servers due to time spent processing both unsuccessful connections - and retransmitted data. This will help to ensure the stability of - the global Internet. - - Each problem is defined as follows: - - Name of Problem - The name associated with the problem. In this memo, the name is - given as a subsection heading. - - Classification - One or more problem categories for which the problem is - classified: "congestion control", "performance", "reliability", - "resource management". - - Description - A definition of the problem, succinct but including necessary - background material. - - Significance - A brief summary of the sorts of environments for which the problem - is significant. - - - - - -Paxson, et. al. Informational [Page 2] - -RFC 2525 TCP Implementation Problems March 1999 - - - Implications - Why the problem is viewed as a problem. - - Relevant RFCs - The RFCs defining the TCP specification with which the problem - conflicts. These RFCs often qualify behavior using terms such as - MUST, SHOULD, MAY, and others written capitalized. See RFC 2119 - for the exact interpretation of these terms. - - Trace file demonstrating the problem - One or more ASCII trace files demonstrating the problem, if - applicable. - - Trace file demonstrating correct behavior - One or more examples of how correct behavior appears in a trace, - if applicable. - - References - References that further discuss the problem. - - How to detect - How to test an implementation to see if it exhibits the problem. - This discussion may include difficulties and subtleties associated - with causing the problem to manifest itself, and with interpreting - traces to detect the presence of the problem (if applicable). - - How to fix - For known causes of the problem, how to correct the - implementation. - -2. Known implementation problems - -2.1. - - Name of Problem - No initial slow start - - Classification - Congestion control - - Description - When a TCP begins transmitting data, it is required by RFC 1122, - 4.2.2.15, to engage in a "slow start" by initializing its - congestion window, cwnd, to one packet (one segment of the maximum - size). (Note that an experimental change to TCP, documented in - [RFC2414], allows an initial value somewhat larger than one - packet.) It subsequently increases cwnd by one packet for each - ACK it receives for new data. The minimum of cwnd and the - - - -Paxson, et. al. Informational [Page 3] - -RFC 2525 TCP Implementation Problems March 1999 - - - receiver's advertised window bounds the highest sequence number - the TCP can transmit. A TCP that fails to initialize and - increment cwnd in this fashion exhibits "No initial slow start". - - Significance - In congested environments, detrimental to the performance of other - connections, and possibly to the connection itself. - - Implications - A TCP failing to slow start when beginning a connection results in - traffic bursts that can stress the network, leading to excessive - queueing delays and packet loss. - - Implementations exhibiting this problem might do so because they - suffer from the general problem of not including the required - congestion window. These implementations will also suffer from - "No slow start after retransmission timeout". - - There are different shades of "No initial slow start". From the - perspective of stressing the network, the worst is a connection - that simply always sends based on the receiver's advertised - window, with no notion of a separate congestion window. Another - form is described in "Uninitialized CWND" below. - - Relevant RFCs - RFC 1122 requires use of slow start. RFC 2001 gives the specifics - of slow start. - - Trace file demonstrating it - Made using tcpdump [Jacobson89] recording at the connection - responder. No losses reported by the packet filter. - - 10:40:42.244503 B > A: S 1168512000:1168512000(0) win 32768 - (DF) [tos 0x8] - 10:40:42.259908 A > B: S 3688169472:3688169472(0) - ack 1168512001 win 32768 - 10:40:42.389992 B > A: . ack 1 win 33580 (DF) [tos 0x8] - 10:40:42.664975 A > B: P 1:513(512) ack 1 win 32768 - 10:40:42.700185 A > B: . 513:1973(1460) ack 1 win 32768 - 10:40:42.718017 A > B: . 1973:3433(1460) ack 1 win 32768 - 10:40:42.762945 A > B: . 3433:4893(1460) ack 1 win 32768 - 10:40:42.811273 A > B: . 4893:6353(1460) ack 1 win 32768 - 10:40:42.829149 A > B: . 6353:7813(1460) ack 1 win 32768 - 10:40:42.853687 B > A: . ack 1973 win 33580 (DF) [tos 0x8] - 10:40:42.864031 B > A: . ack 3433 win 33580 (DF) [tos 0x8] - - - - - - -Paxson, et. al. Informational [Page 4] - -RFC 2525 TCP Implementation Problems March 1999 - - - After the third packet, the connection is established. A, the - connection responder, begins transmitting to B, the connection - initiator. Host A quickly sends 6 packets comprising 7812 bytes, - even though the SYN exchange agreed upon an MSS of 1460 bytes - (implying an initial congestion window of 1 segment corresponds to - 1460 bytes), and so A should have sent at most 1460 bytes. - - The ACKs sent by B to A in the last two lines indicate that this - trace is not a measurement error (slow start really occurring but - the corresponding ACKs having been dropped by the packet filter). - - A second trace confirmed that the problem is repeatable. - - Trace file demonstrating correct behavior - Made using tcpdump recording at the connection originator. No - losses reported by the packet filter. - - 12:35:31.914050 C > D: S 1448571845:1448571845(0) - win 4380 - 12:35:32.068819 D > C: S 1755712000:1755712000(0) - ack 1448571846 win 4096 - 12:35:32.069341 C > D: . ack 1 win 4608 - 12:35:32.075213 C > D: P 1:513(512) ack 1 win 4608 - 12:35:32.286073 D > C: . ack 513 win 4096 - 12:35:32.287032 C > D: . 513:1025(512) ack 1 win 4608 - 12:35:32.287506 C > D: . 1025:1537(512) ack 1 win 4608 - 12:35:32.432712 D > C: . ack 1537 win 4096 - 12:35:32.433690 C > D: . 1537:2049(512) ack 1 win 4608 - 12:35:32.434481 C > D: . 2049:2561(512) ack 1 win 4608 - 12:35:32.435032 C > D: . 2561:3073(512) ack 1 win 4608 - 12:35:32.594526 D > C: . ack 3073 win 4096 - 12:35:32.595465 C > D: . 3073:3585(512) ack 1 win 4608 - 12:35:32.595947 C > D: . 3585:4097(512) ack 1 win 4608 - 12:35:32.596414 C > D: . 4097:4609(512) ack 1 win 4608 - 12:35:32.596888 C > D: . 4609:5121(512) ack 1 win 4608 - 12:35:32.733453 D > C: . ack 4097 win 4096 - - References - This problem is documented in [Paxson97]. - - How to detect - For implementations always manifesting this problem, it shows up - immediately in a packet trace or a sequence plot, as illustrated - above. - - - - - - - -Paxson, et. al. Informational [Page 5] - -RFC 2525 TCP Implementation Problems March 1999 - - - How to fix - If the root problem is that the implementation lacks a notion of a - congestion window, then unfortunately this requires significant - work to fix. However, doing so is important, as such - implementations also exhibit "No slow start after retransmission - timeout". - -2.2. - - Name of Problem - No slow start after retransmission timeout - - Classification - Congestion control - - Description - When a TCP experiences a retransmission timeout, it is required by - RFC 1122, 4.2.2.15, to engage in "slow start" by initializing its - congestion window, cwnd, to one packet (one segment of the maximum - size). It subsequently increases cwnd by one packet for each ACK - it receives for new data until it reaches the "congestion - avoidance" threshold, ssthresh, at which point the congestion - avoidance algorithm for updating the window takes over. A TCP - that fails to enter slow start upon a timeout exhibits "No slow - start after retransmission timeout". - - Significance - In congested environments, severely detrimental to the performance - of other connections, and also the connection itself. - - Implications - Entering slow start upon timeout forms one of the cornerstones of - Internet congestion stability, as outlined in [Jacobson88]. If - TCPs fail to do so, the network becomes at risk of suffering - "congestion collapse" [RFC896]. - - Relevant RFCs - RFC 1122 requires use of slow start after loss. RFC 2001 gives - the specifics of how to implement slow start. RFC 896 describes - congestion collapse. - - The retransmission timeout discussed here should not be confused - with the separate "fast recovery" retransmission mechanism - discussed in RFC 2001. - - Trace file demonstrating it - Made using tcpdump recording at the sending TCP (A). No losses - reported by the packet filter. - - - -Paxson, et. al. Informational [Page 6] - -RFC 2525 TCP Implementation Problems March 1999 - - - 10:40:59.090612 B > A: . ack 357125 win 33580 (DF) [tos 0x8] - 10:40:59.222025 A > B: . 357125:358585(1460) ack 1 win 32768 - 10:40:59.868871 A > B: . 357125:358585(1460) ack 1 win 32768 - 10:41:00.016641 B > A: . ack 364425 win 33580 (DF) [tos 0x8] - 10:41:00.036709 A > B: . 364425:365885(1460) ack 1 win 32768 - 10:41:00.045231 A > B: . 365885:367345(1460) ack 1 win 32768 - 10:41:00.053785 A > B: . 367345:368805(1460) ack 1 win 32768 - 10:41:00.062426 A > B: . 368805:370265(1460) ack 1 win 32768 - 10:41:00.071074 A > B: . 370265:371725(1460) ack 1 win 32768 - 10:41:00.079794 A > B: . 371725:373185(1460) ack 1 win 32768 - 10:41:00.089304 A > B: . 373185:374645(1460) ack 1 win 32768 - 10:41:00.097738 A > B: . 374645:376105(1460) ack 1 win 32768 - 10:41:00.106409 A > B: . 376105:377565(1460) ack 1 win 32768 - 10:41:00.115024 A > B: . 377565:379025(1460) ack 1 win 32768 - 10:41:00.123576 A > B: . 379025:380485(1460) ack 1 win 32768 - 10:41:00.132016 A > B: . 380485:381945(1460) ack 1 win 32768 - 10:41:00.141635 A > B: . 381945:383405(1460) ack 1 win 32768 - 10:41:00.150094 A > B: . 383405:384865(1460) ack 1 win 32768 - 10:41:00.158552 A > B: . 384865:386325(1460) ack 1 win 32768 - 10:41:00.167053 A > B: . 386325:387785(1460) ack 1 win 32768 - 10:41:00.175518 A > B: . 387785:389245(1460) ack 1 win 32768 - 10:41:00.210835 A > B: . 389245:390705(1460) ack 1 win 32768 - 10:41:00.226108 A > B: . 390705:392165(1460) ack 1 win 32768 - 10:41:00.241524 B > A: . ack 389245 win 8760 (DF) [tos 0x8] - - The first packet indicates the ack point is 357125. 130 msec - after receiving the ACK, A transmits the packet after the ACK - point, 357125:358585. 640 msec after this transmission, it - retransmits 357125:358585, in an apparent retransmission timeout. - At this point, A's cwnd should be one MSS, or 1460 bytes, as A - enters slow start. The trace is consistent with this possibility. - - B replies with an ACK of 364425, indicating that A has filled a - sequence hole. At this point, A's cwnd should be 1460*2 = 2920 - bytes, since in slow start receiving an ACK advances cwnd by MSS. - However, A then launches 19 consecutive packets, which is - inconsistent with slow start. - - A second trace confirmed that the problem is repeatable. - - Trace file demonstrating correct behavior - Made using tcpdump recording at the sending TCP (C). No losses - reported by the packet filter. - - 12:35:48.442538 C > D: P 465409:465921(512) ack 1 win 4608 - 12:35:48.544483 D > C: . ack 461825 win 4096 - 12:35:48.703496 D > C: . ack 461825 win 4096 - 12:35:49.044613 C > D: . 461825:462337(512) ack 1 win 4608 - - - -Paxson, et. al. Informational [Page 7] - -RFC 2525 TCP Implementation Problems March 1999 - - - 12:35:49.192282 D > C: . ack 465921 win 2048 - 12:35:49.192538 D > C: . ack 465921 win 4096 - 12:35:49.193392 C > D: P 465921:466433(512) ack 1 win 4608 - 12:35:49.194726 C > D: P 466433:466945(512) ack 1 win 4608 - 12:35:49.350665 D > C: . ack 466945 win 4096 - 12:35:49.351694 C > D: . 466945:467457(512) ack 1 win 4608 - 12:35:49.352168 C > D: . 467457:467969(512) ack 1 win 4608 - 12:35:49.352643 C > D: . 467969:468481(512) ack 1 win 4608 - 12:35:49.506000 D > C: . ack 467969 win 3584 - - After C transmits the first packet shown to D, it takes no action - in response to D's ACKs for 461825, because the first packet - already reached the advertised window limit of 4096 bytes above - 461825. 600 msec after transmitting the first packet, C - retransmits 461825:462337, presumably due to a timeout. Its - congestion window is now MSS (512 bytes). - - D acks 465921, indicating that C's retransmission filled a - sequence hole. This ACK advances C's cwnd from 512 to 1024. Very - shortly after, D acks 465921 again in order to update the offered - window from 2048 to 4096. This ACK does not advance cwnd since it - is not for new data. Very shortly after, C responds to the newly - enlarged window by transmitting two packets. D acks both, - advancing cwnd from 1024 to 1536. C in turn transmits three - packets. - - References - This problem is documented in [Paxson97]. - - How to detect - Packet loss is common enough in the Internet that generally it is - not difficult to find an Internet path that will force - retransmission due to packet loss. - - If the effective window prior to loss is large enough, however, - then the TCP may retransmit using the "fast recovery" mechanism - described in RFC 2001. In a packet trace, the signature of fast - recovery is that the packet retransmission occurs in response to - the receipt of three duplicate ACKs, and subsequent duplicate ACKs - may lead to the transmission of new data, above both the ack point - and the highest sequence transmitted so far. An absence of three - duplicate ACKs prior to retransmission suffices to distinguish - between timeout and fast recovery retransmissions. In the face of - only observing fast recovery retransmissions, generally it is not - difficult to repeat the data transfer until observing a timeout - retransmission. - - - - - -Paxson, et. al. Informational [Page 8] - -RFC 2525 TCP Implementation Problems March 1999 - - - Once armed with a trace exhibiting a timeout retransmission, - determining whether the TCP follows slow start is done by - computing the correct progression of cwnd and comparing it to the - amount of data transmitted by the TCP subsequent to the timeout - retransmission. - - How to fix - If the root problem is that the implementation lacks a notion of a - congestion window, then unfortunately this requires significant - work to fix. However, doing so is critical, for reasons outlined - above. - -2.3. - - Name of Problem - Uninitialized CWND - - Classification - Congestion control - - Description - As described above for "No initial slow start", when a TCP - connection begins cwnd is initialized to one segment (or perhaps a - few segments, if experimenting with [RFC2414]). One particular - form of "No initial slow start", worth separate mention as the bug - is fairly widely deployed, is "Uninitialized CWND". That is, - while the TCP implements the proper slow start mechanism, it fails - to initialize cwnd properly, so slow start in fact fails to occur. - - One way the bug can occur is if, during the connection - establishment handshake, the SYN ACK packet arrives without an MSS - option. The faulty implementation uses receipt of the MSS option - to initialize cwnd to one segment; if the option fails to arrive, - then cwnd is instead initialized to a very large value. - - Significance - In congested environments, detrimental to the performance of other - connections, and likely to the connection itself. The burst can - be so large (see below) that it has deleterious effects even in - uncongested environments. - - Implications - A TCP exhibiting this behavior is stressing the network with a - large burst of packets, which can cause loss in the network. - - Relevant RFCs - RFC 1122 requires use of slow start. RFC 2001 gives the specifics - of slow start. - - - -Paxson, et. al. Informational [Page 9] - -RFC 2525 TCP Implementation Problems March 1999 - - - Trace file demonstrating it - This trace was made using tcpdump running on host A. Host A is - the sender and host B is the receiver. The advertised window and - timestamp options have been omitted for clarity, except for the - first segment sent by host A. Note that A sends an MSS option in - its initial SYN but B does not include one in its reply. - - 16:56:02.226937 A > B: S 237585307:237585307(0) win 8192 - - 16:56:02.557135 B > A: S 1617216000:1617216000(0) - ack 237585308 win 16384 - 16:56:02.557788 A > B: . ack 1 win 8192 - 16:56:02.566014 A > B: . 1:537(536) ack 1 - 16:56:02.566557 A > B: . 537:1073(536) ack 1 - 16:56:02.567120 A > B: . 1073:1609(536) ack 1 - 16:56:02.567662 A > B: P 1609:2049(440) ack 1 - 16:56:02.568349 A > B: . 2049:2585(536) ack 1 - 16:56:02.568909 A > B: . 2585:3121(536) ack 1 - - [54 additional burst segments deleted for brevity] - - 16:56:02.936638 A > B: . 32065:32601(536) ack 1 - 16:56:03.018685 B > A: . ack 1 - - After the three-way handshake, host A bursts 61 segments into the - network, before duplicate ACKs on the first segment cause a - retransmission to occur. Since host A did not wait for the ACK on - the first segment before sending additional segments, it is - exhibiting "Uninitialized CWND" - - Trace file demonstrating correct behavior - - See the example for "No initial slow start". - - References - This problem is documented in [Paxson97]. - - How to detect - This problem can be detected by examining a packet trace recorded - at either the sender or the receiver. However, the bug can be - difficult to induce because it requires finding a remote TCP peer - that does not send an MSS option in its SYN ACK. - - How to fix - This problem can be fixed by ensuring that cwnd is initialized - upon receipt of a SYN ACK, even if the SYN ACK does not contain an - MSS option. - - - - -Paxson, et. al. Informational [Page 10] - -RFC 2525 TCP Implementation Problems March 1999 - - -2.4. - - Name of Problem - Inconsistent retransmission - - Classification - Reliability - - Description - If, for a given sequence number, a sending TCP retransmits - different data than previously sent for that sequence number, then - a strong possibility arises that the receiving TCP will - reconstruct a different byte stream than that sent by the sending - application, depending on which instance of the sequence number it - accepts. - - Such a sending TCP exhibits "Inconsistent retransmission". - - Significance - Critical for all environments. - - Implications - Reliable delivery of data is a fundamental property of TCP. - - Relevant RFCs - RFC 793, section 1.5, discusses the central role of reliability in - TCP operation. - - Trace file demonstrating it - Made using tcpdump recording at the receiving TCP (B). No losses - reported by the packet filter. - - 12:35:53.145503 A > B: FP 90048435:90048461(26) - ack 393464682 win 4096 - 4500 0042 9644 0000 - 3006 e4c2 86b1 0401 83f3 010a b2a4 0015 - 055e 07b3 1773 cb6a 5019 1000 68a9 0000 - data starts here>504f 5254 2031 3334 2c31 3737*2c34 2c31 - 2c31 3738 2c31 3635 0d0a - 12:35:53.146479 B > A: R 393464682:393464682(0) win 8192 - 12:35:53.851714 A > B: FP 90048429:90048463(34) - ack 393464682 win 4096 - 4500 004a 965b 0000 - 3006 e4a3 86b1 0401 83f3 010a b2a4 0015 - 055e 07ad 1773 cb6a 5019 1000 8bd3 0000 - data starts here>5041 5356 0d0a 504f 5254 2031 3334 2c31 - 3737*2c31 3035 2c31 3431 2c34 2c31 3539 - 0d0a - - - -Paxson, et. al. Informational [Page 11] - -RFC 2525 TCP Implementation Problems March 1999 - - - The sequence numbers shown in this trace are absolute and not - adjusted to reflect the ISN. The 4-digit hex values show a dump - of the packet's IP and TCP headers, as well as payload. A first - sends to B data for 90048435:90048461. The corresponding data - begins with hex words 504f, 5254, etc. - - B responds with a RST. Since the recording location was local to - B, it is unknown whether A received the RST. - - A then sends 90048429:90048463, which includes six sequence - positions below the earlier transmission, all 26 positions of the - earlier transmission, and two additional sequence positions. - - The retransmission disagrees starting just after sequence - 90048447, annotated above with a leading '*'. These two bytes - were originally transmitted as hex 2c34 but retransmitted as hex - 2c31. Subsequent positions disagree as well. - - This behavior has been observed in other traces involving - different hosts. It is unknown how to repeat it. - - In this instance, no corruption would occur, since B has already - indicated it will not accept further packets from A. - - A second example illustrates a slightly different instance of the - problem. The tracing again was made with tcpdump at the receiving - TCP (D). - - 22:23:58.645829 C > D: P 185:212(27) ack 565 win 4096 - 4500 0043 90a3 0000 - 3306 0734 cbf1 9eef 83f3 010a 0525 0015 - a3a2 faba 578c 70a4 5018 1000 9a53 0000 - data starts here>504f 5254 2032 3033 2c32 3431 2c31 3538 - 2c32 3339 2c35 2c34 330d 0a - 22:23:58.646805 D > C: . ack 184 win 8192 - 4500 0028 beeb 0000 - 3e06 ce06 83f3 010a cbf1 9eef 0015 0525 - 578c 70a4 a3a2 fab9 5010 2000 342f 0000 - 22:31:36.532244 C > D: FP 186:213(27) ack 565 win 4096 - 4500 0043 9435 0000 - 3306 03a2 cbf1 9eef 83f3 010a 0525 0015 - a3a2 fabb 578c 70a4 5019 1000 9a51 0000 - data starts here>504f 5254 2032 3033 2c32 3431 2c31 3538 - 2c32 3339 2c35 2c34 330d 0a - - - - - - - -Paxson, et. al. Informational [Page 12] - -RFC 2525 TCP Implementation Problems March 1999 - - - In this trace, sequence numbers are relative. C sends 185:212, - but D only sends an ACK for 184 (so sequence number 184 is - missing). C then sends 186:213. The packet payload is identical - to the previous payload, but the base sequence number is one - higher, resulting in an inconsistent retransmission. - - Neither trace exhibits checksum errors. - - Trace file demonstrating correct behavior - (Omitted, as presumably correct behavior is obvious.) - - References - None known. - - How to detect - This problem unfortunately can be very difficult to detect, since - available experience indicates it is quite rare that it is - manifested. No "trigger" has been identified that can be used to - reproduce the problem. - - How to fix - In the absence of a known "trigger", we cannot always assess how - to fix the problem. - - In one implementation (not the one illustrated above), the problem - manifested itself when (1) the sender received a zero window and - stalled; (2) eventually an ACK arrived that offered a window - larger than that in effect at the time of the stall; (3) the - sender transmitted out of the buffer of data it held at the time - of the stall, but (4) failed to limit this transfer to the buffer - length, instead using the newly advertised (and larger) offered - window. Consequently, in addition to the valid buffer contents, - it sent whatever garbage values followed the end of the buffer. - If it then retransmitted the corresponding sequence numbers, at - that point it sent the correct data, resulting in an inconsistent - retransmission. Note that this instance of the problem reflects a - more general problem, that of initially transmitting incorrect - data. - -2.5. - - Name of Problem - Failure to retain above-sequence data - - Classification - Congestion control, performance - - - - - -Paxson, et. al. Informational [Page 13] - -RFC 2525 TCP Implementation Problems March 1999 - - - Description - When a TCP receives an "above sequence" segment, meaning one with - a sequence number exceeding RCV.NXT but below RCV.NXT+RCV.WND, it - SHOULD queue the segment for later delivery (RFC 1122, 4.2.2.20). - (See RFC 793 for the definition of RCV.NXT and RCV.WND.) A TCP - that fails to do so is said to exhibit "Failure to retain above- - sequence data". - - It may sometimes be appropriate for a TCP to discard above- - sequence data to reclaim memory. If they do so only rarely, then - we would not consider them to exhibit this problem. Instead, the - particular concern is with TCPs that always discard above-sequence - data. - - Significance - In environments prone to packet loss, detrimental to the - performance of both other connections and the connection itself. - - Implications - In times of congestion, a failure to retain above-sequence data - will lead to numerous otherwise-unnecessary retransmissions, - aggravating the congestion and potentially reducing performance by - a large factor. - - Relevant RFCs - RFC 1122 revises RFC 793 by upgrading the latter's MAY to a SHOULD - on this issue. - - Trace file demonstrating it - Made using tcpdump recording at the receiving TCP. No losses - reported by the packet filter. - - B is the TCP sender, A the receiver. A exhibits failure to retain - above sequence-data: - - 10:38:10.164860 B > A: . 221078:221614(536) ack 1 win 33232 [tos 0x8] - 10:38:10.170809 B > A: . 221614:222150(536) ack 1 win 33232 [tos 0x8] - 10:38:10.177183 B > A: . 222150:222686(536) ack 1 win 33232 [tos 0x8] - 10:38:10.225039 A > B: . ack 222686 win 25800 - - Here B has sent up to (relative) sequence 222686 in-sequence, and - A accordingly acknowledges. - - 10:38:10.268131 B > A: . 223222:223758(536) ack 1 win 33232 [tos 0x8] - 10:38:10.337995 B > A: . 223758:224294(536) ack 1 win 33232 [tos 0x8] - 10:38:10.344065 B > A: . 224294:224830(536) ack 1 win 33232 [tos 0x8] - 10:38:10.350169 B > A: . 224830:225366(536) ack 1 win 33232 [tos 0x8] - 10:38:10.356362 B > A: . 225366:225902(536) ack 1 win 33232 [tos 0x8] - - - -Paxson, et. al. Informational [Page 14] - -RFC 2525 TCP Implementation Problems March 1999 - - - 10:38:10.362445 B > A: . 225902:226438(536) ack 1 win 33232 [tos 0x8] - 10:38:10.368579 B > A: . 226438:226974(536) ack 1 win 33232 [tos 0x8] - 10:38:10.374732 B > A: . 226974:227510(536) ack 1 win 33232 [tos 0x8] - 10:38:10.380825 B > A: . 227510:228046(536) ack 1 win 33232 [tos 0x8] - 10:38:10.387027 B > A: . 228046:228582(536) ack 1 win 33232 [tos 0x8] - 10:38:10.393053 B > A: . 228582:229118(536) ack 1 win 33232 [tos 0x8] - 10:38:10.399193 B > A: . 229118:229654(536) ack 1 win 33232 [tos 0x8] - 10:38:10.405356 B > A: . 229654:230190(536) ack 1 win 33232 [tos 0x8] - - A now receives 13 additional packets from B. These are above- - sequence because 222686:223222 was dropped. The packets do - however fit within the offered window of 25800. A does not - generate any duplicate ACKs for them. - - The trace contributor (V. Paxson) verified that these 13 packets - had valid IP and TCP checksums. - - 10:38:11.917728 B > A: . 222686:223222(536) ack 1 win 33232 [tos 0x8] - 10:38:11.930925 A > B: . ack 223222 win 32232 - - B times out for 222686:223222 and retransmits it. Upon receiving - it, A only acknowledges 223222. Had it retained the valid above- - sequence packets, it would instead have ack'd 230190. - - 10:38:12.048438 B > A: . 223222:223758(536) ack 1 win 33232 [tos 0x8] - 10:38:12.054397 B > A: . 223758:224294(536) ack 1 win 33232 [tos 0x8] - 10:38:12.068029 A > B: . ack 224294 win 31696 - - B retransmits two more packets, and A only acknowledges them. - This pattern continues as B retransmits the entire set of - previously-received packets. - - A second trace confirmed that the problem is repeatable. - - Trace file demonstrating correct behavior - Made using tcpdump recording at the receiving TCP (C). No losses - reported by the packet filter. - - 09:11:25.790417 D > C: . 33793:34305(512) ack 1 win 61440 - 09:11:25.791393 D > C: . 34305:34817(512) ack 1 win 61440 - 09:11:25.792369 D > C: . 34817:35329(512) ack 1 win 61440 - 09:11:25.792369 D > C: . 35329:35841(512) ack 1 win 61440 - 09:11:25.793345 D > C: . 36353:36865(512) ack 1 win 61440 - 09:11:25.794321 C > D: . ack 35841 win 59904 - - A sequence hole occurs because 35841:36353 has been dropped. - - - - - -Paxson, et. al. Informational [Page 15] - -RFC 2525 TCP Implementation Problems March 1999 - - - 09:11:25.794321 D > C: . 36865:37377(512) ack 1 win 61440 - 09:11:25.794321 C > D: . ack 35841 win 59904 - 09:11:25.795297 D > C: . 37377:37889(512) ack 1 win 61440 - 09:11:25.795297 C > D: . ack 35841 win 59904 - 09:11:25.796273 C > D: . ack 35841 win 61440 - 09:11:25.798225 D > C: . 37889:38401(512) ack 1 win 61440 - 09:11:25.799201 C > D: . ack 35841 win 61440 - 09:11:25.807009 D > C: . 38401:38913(512) ack 1 win 61440 - 09:11:25.807009 C > D: . ack 35841 win 61440 - (many additional lines omitted) - 09:11:25.884113 D > C: . 52737:53249(512) ack 1 win 61440 - 09:11:25.884113 C > D: . ack 35841 win 61440 - - Each additional, above-sequence packet C receives from D elicits a - duplicate ACK for 35841. - - 09:11:25.887041 D > C: . 35841:36353(512) ack 1 win 61440 - 09:11:25.887041 C > D: . ack 53249 win 44032 - - D retransmits 35841:36353 and C acknowledges receipt of data all - the way up to 53249. - - References - This problem is documented in [Paxson97]. - - How to detect - Packet loss is common enough in the Internet that generally it is - not difficult to find an Internet path that will result in some - above-sequence packets arriving. A TCP that exhibits "Failure to - retain ..." may not generate duplicate ACKs for these packets. - However, some TCPs that do retain above-sequence data also do not - generate duplicate ACKs, so failure to do so does not definitively - identify the problem. Instead, the key observation is whether - upon retransmission of the dropped packet, data that was - previously above-sequence is acknowledged. - - Two considerations in detecting this problem using a packet trace - are that it is easiest to do so with a trace made at the TCP - receiver, in order to unambiguously determine which packets - arrived successfully, and that such packets may still be correctly - discarded if they arrive with checksum errors. The latter can be - tested by capturing the entire packet contents and performing the - IP and TCP checksum algorithms to verify their integrity; or by - confirming that the packets arrive with the same checksum and - contents as that with which they were sent, with a presumption - that the sending TCP correctly calculates checksums for the - packets it transmits. - - - - -Paxson, et. al. Informational [Page 16] - -RFC 2525 TCP Implementation Problems March 1999 - - - It is considerably easier to verify that an implementation does - NOT exhibit this problem. This can be done by recording a trace - at the data sender, and observing that sometimes after a - retransmission the receiver acknowledges a higher sequence number - than just that which was retransmitted. - - How to fix - If the root problem is that the implementation lacks buffer, then - then unfortunately this requires significant work to fix. - However, doing so is important, for reasons outlined above. - -2.6. - - Name of Problem - Extra additive constant in congestion avoidance - - Classification - Congestion control / performance - - Description - RFC 1122 section 4.2.2.15 states that TCP MUST implement - Jacobson's "congestion avoidance" algorithm [Jacobson88], which - calls for increasing the congestion window, cwnd, by: - - MSS * MSS / cwnd - - for each ACK received for new data [RFC2001]. This has the effect - of increasing cwnd by approximately one segment in each round trip - time. - - Some TCP implementations add an additional fraction of a segment - (typically MSS/8) to cwnd for each ACK received for new data - [Stevens94, Wright95]: - - (MSS * MSS / cwnd) + MSS/8 - - These implementations exhibit "Extra additive constant in - congestion avoidance". - - Significance - May be detrimental to performance even in completely uncongested - environments (see Implications). - - In congested environments, may also be detrimental to the - performance of other connections. - - - - - - -Paxson, et. al. Informational [Page 17] - -RFC 2525 TCP Implementation Problems March 1999 - - - Implications - The extra additive term allows a TCP to more aggressively open its - congestion window (quadratic rather than linear increase). For - congested networks, this can increase the loss rate experienced by - all connections sharing a bottleneck with the aggressive TCP. - - However, even for completely uncongested networks, the extra - additive term can lead to diminished performance, as follows. In - congestion avoidance, a TCP sender probes the network path to - determine its available capacity, which often equates to the - number of buffers available at a bottleneck link. With linear - congestion avoidance, the TCP only probes for sufficient capacity - (buffer) to hold one extra packet per RTT. - - Thus, when it exceeds the available capacity, generally only one - packet will be lost (since on the previous RTT it already found - that the path could sustain a window with one less packet in - flight). If the congestion window is sufficiently large, then the - TCP will recover from this single loss using fast retransmission - and avoid an expensive (in terms of performance) retransmission - timeout. - - However, when the additional additive term is used, then cwnd can - increase by more than one packet per RTT, in which case the TCP - probes more aggressively. If in the previous RTT it had reached - the available capacity of the path, then the excess due to the - extra increase will again be lost, but now this will result in - multiple losses from the flight instead of a single loss. TCPs - that do not utilize SACK [RFC2018] generally will not recover from - multiple losses without incurring a retransmission timeout - [Fall96,Hoe96], significantly diminishing performance. - - Relevant RFCs - RFC 1122 requires use of the "congestion avoidance" algorithm. - RFC 2001 outlines the fast retransmit/fast recovery algorithms. - RFC 2018 discusses the SACK option. - - Trace file demonstrating it - Recorded using tcpdump running on the same FDDI LAN as host A. - Host A is the sender and host B is the receiver. The connection - establishment specified an MSS of 4,312 bytes and a window scale - factor of 4. We omit the establishment and the first 2.5 MB of - data transfer, as the problem is best demonstrated when the window - has grown to a large value. At the beginning of the trace - excerpt, the congestion window is 31 packets. The connection is - never receiver-window limited, so we omit window advertisements - from the trace for clarity. - - - - -Paxson, et. al. Informational [Page 18] - -RFC 2525 TCP Implementation Problems March 1999 - - - 11:42:07.697951 B > A: . ack 2383006 - 11:42:07.699388 A > B: . 2508054:2512366(4312) - 11:42:07.699962 A > B: . 2512366:2516678(4312) - 11:42:07.700012 B > A: . ack 2391630 - 11:42:07.701081 A > B: . 2516678:2520990(4312) - 11:42:07.701656 A > B: . 2520990:2525302(4312) - 11:42:07.701739 B > A: . ack 2400254 - 11:42:07.702685 A > B: . 2525302:2529614(4312) - 11:42:07.703257 A > B: . 2529614:2533926(4312) - 11:42:07.703295 B > A: . ack 2408878 - 11:42:07.704414 A > B: . 2533926:2538238(4312) - 11:42:07.704989 A > B: . 2538238:2542550(4312) - 11:42:07.705040 B > A: . ack 2417502 - 11:42:07.705935 A > B: . 2542550:2546862(4312) - 11:42:07.706506 A > B: . 2546862:2551174(4312) - 11:42:07.706544 B > A: . ack 2426126 - 11:42:07.707480 A > B: . 2551174:2555486(4312) - 11:42:07.708051 A > B: . 2555486:2559798(4312) - 11:42:07.708088 B > A: . ack 2434750 - 11:42:07.709030 A > B: . 2559798:2564110(4312) - 11:42:07.709604 A > B: . 2564110:2568422(4312) - 11:42:07.710175 A > B: . 2568422:2572734(4312) * - - 11:42:07.710215 B > A: . ack 2443374 - 11:42:07.710799 A > B: . 2572734:2577046(4312) - 11:42:07.711368 A > B: . 2577046:2581358(4312) - 11:42:07.711405 B > A: . ack 2451998 - 11:42:07.712323 A > B: . 2581358:2585670(4312) - 11:42:07.712898 A > B: . 2585670:2589982(4312) - 11:42:07.712938 B > A: . ack 2460622 - 11:42:07.713926 A > B: . 2589982:2594294(4312) - 11:42:07.714501 A > B: . 2594294:2598606(4312) - 11:42:07.714547 B > A: . ack 2469246 - 11:42:07.715747 A > B: . 2598606:2602918(4312) - 11:42:07.716287 A > B: . 2602918:2607230(4312) - 11:42:07.716328 B > A: . ack 2477870 - 11:42:07.717146 A > B: . 2607230:2611542(4312) - 11:42:07.717717 A > B: . 2611542:2615854(4312) - 11:42:07.717762 B > A: . ack 2486494 - 11:42:07.718754 A > B: . 2615854:2620166(4312) - 11:42:07.719331 A > B: . 2620166:2624478(4312) - 11:42:07.719906 A > B: . 2624478:2628790(4312) ** - - 11:42:07.719958 B > A: . ack 2495118 - 11:42:07.720500 A > B: . 2628790:2633102(4312) - 11:42:07.721080 A > B: . 2633102:2637414(4312) - 11:42:07.721739 B > A: . ack 2503742 - 11:42:07.722348 A > B: . 2637414:2641726(4312) - - - -Paxson, et. al. Informational [Page 19] - -RFC 2525 TCP Implementation Problems March 1999 - - - 11:42:07.722918 A > B: . 2641726:2646038(4312) - 11:42:07.769248 B > A: . ack 2512366 - - The receiver's acknowledgment policy is one ACK per two packets - received. Thus, for each ACK arriving at host A, two new packets - are sent, except when cwnd increases due to congestion avoidance, - in which case three new packets are sent. - - With an ack-every-two-packets policy, cwnd should only increase - one MSS per 2 RTT. However, at the point marked "*" the window - increases after 7 ACKs have arrived, and then again at "**" after - 6 more ACKs. - - While we do not have space to show the effect, this trace suffered - from repeated timeout retransmissions due to multiple packet - losses during a single RTT. - - Trace file demonstrating correct behavior - Made using the same host and tracing setup as above, except now - A's TCP has been modified to remove the MSS/8 additive constant. - Tcpdump reported 77 packet drops; the excerpt below is fully - self-consistent so it is unlikely that any of these occurred - during the excerpt. - - We again begin when cwnd is 31 packets (this occurs significantly - later in the trace, because the congestion avoidance is now less - aggressive with opening the window). - - 14:22:21.236757 B > A: . ack 5194679 - 14:22:21.238192 A > B: . 5319727:5324039(4312) - 14:22:21.238770 A > B: . 5324039:5328351(4312) - 14:22:21.238821 B > A: . ack 5203303 - 14:22:21.240158 A > B: . 5328351:5332663(4312) - 14:22:21.240738 A > B: . 5332663:5336975(4312) - 14:22:21.270422 B > A: . ack 5211927 - 14:22:21.271883 A > B: . 5336975:5341287(4312) - 14:22:21.272458 A > B: . 5341287:5345599(4312) - 14:22:21.279099 B > A: . ack 5220551 - 14:22:21.280539 A > B: . 5345599:5349911(4312) - 14:22:21.281118 A > B: . 5349911:5354223(4312) - 14:22:21.281183 B > A: . ack 5229175 - 14:22:21.282348 A > B: . 5354223:5358535(4312) - 14:22:21.283029 A > B: . 5358535:5362847(4312) - 14:22:21.283089 B > A: . ack 5237799 - 14:22:21.284213 A > B: . 5362847:5367159(4312) - 14:22:21.284779 A > B: . 5367159:5371471(4312) - 14:22:21.285976 B > A: . ack 5246423 - 14:22:21.287465 A > B: . 5371471:5375783(4312) - - - -Paxson, et. al. Informational [Page 20] - -RFC 2525 TCP Implementation Problems March 1999 - - - 14:22:21.288036 A > B: . 5375783:5380095(4312) - 14:22:21.288073 B > A: . ack 5255047 - 14:22:21.289155 A > B: . 5380095:5384407(4312) - 14:22:21.289725 A > B: . 5384407:5388719(4312) - 14:22:21.289762 B > A: . ack 5263671 - 14:22:21.291090 A > B: . 5388719:5393031(4312) - 14:22:21.291662 A > B: . 5393031:5397343(4312) - 14:22:21.291701 B > A: . ack 5272295 - 14:22:21.292870 A > B: . 5397343:5401655(4312) - 14:22:21.293441 A > B: . 5401655:5405967(4312) - 14:22:21.293481 B > A: . ack 5280919 - 14:22:21.294476 A > B: . 5405967:5410279(4312) - 14:22:21.295053 A > B: . 5410279:5414591(4312) - 14:22:21.295106 B > A: . ack 5289543 - 14:22:21.296306 A > B: . 5414591:5418903(4312) - 14:22:21.296878 A > B: . 5418903:5423215(4312) - 14:22:21.296917 B > A: . ack 5298167 - 14:22:21.297716 A > B: . 5423215:5427527(4312) - 14:22:21.298285 A > B: . 5427527:5431839(4312) - 14:22:21.298324 B > A: . ack 5306791 - 14:22:21.299413 A > B: . 5431839:5436151(4312) - 14:22:21.299986 A > B: . 5436151:5440463(4312) - 14:22:21.303696 B > A: . ack 5315415 - 14:22:21.305177 A > B: . 5440463:5444775(4312) - 14:22:21.305755 A > B: . 5444775:5449087(4312) - 14:22:21.308032 B > A: . ack 5324039 - 14:22:21.309525 A > B: . 5449087:5453399(4312) - 14:22:21.310101 A > B: . 5453399:5457711(4312) - 14:22:21.310144 B > A: . ack 5332663 *** - - 14:22:21.311615 A > B: . 5457711:5462023(4312) - 14:22:21.312198 A > B: . 5462023:5466335(4312) - 14:22:21.341876 B > A: . ack 5341287 - 14:22:21.343451 A > B: . 5466335:5470647(4312) - 14:22:21.343985 A > B: . 5470647:5474959(4312) - 14:22:21.350304 B > A: . ack 5349911 - 14:22:21.351852 A > B: . 5474959:5479271(4312) - 14:22:21.352430 A > B: . 5479271:5483583(4312) - 14:22:21.352484 B > A: . ack 5358535 - 14:22:21.353574 A > B: . 5483583:5487895(4312) - 14:22:21.354149 A > B: . 5487895:5492207(4312) - 14:22:21.354205 B > A: . ack 5367159 - 14:22:21.355467 A > B: . 5492207:5496519(4312) - 14:22:21.356039 A > B: . 5496519:5500831(4312) - 14:22:21.357361 B > A: . ack 5375783 - 14:22:21.358855 A > B: . 5500831:5505143(4312) - 14:22:21.359424 A > B: . 5505143:5509455(4312) - 14:22:21.359465 B > A: . ack 5384407 - - - -Paxson, et. al. Informational [Page 21] - -RFC 2525 TCP Implementation Problems March 1999 - - - 14:22:21.360605 A > B: . 5509455:5513767(4312) - 14:22:21.361181 A > B: . 5513767:5518079(4312) - 14:22:21.361225 B > A: . ack 5393031 - 14:22:21.362485 A > B: . 5518079:5522391(4312) - 14:22:21.363057 A > B: . 5522391:5526703(4312) - 14:22:21.363096 B > A: . ack 5401655 - 14:22:21.364236 A > B: . 5526703:5531015(4312) - 14:22:21.364810 A > B: . 5531015:5535327(4312) - 14:22:21.364867 B > A: . ack 5410279 - 14:22:21.365819 A > B: . 5535327:5539639(4312) - 14:22:21.366386 A > B: . 5539639:5543951(4312) - 14:22:21.366427 B > A: . ack 5418903 - 14:22:21.367586 A > B: . 5543951:5548263(4312) - 14:22:21.368158 A > B: . 5548263:5552575(4312) - 14:22:21.368199 B > A: . ack 5427527 - 14:22:21.369189 A > B: . 5552575:5556887(4312) - 14:22:21.369758 A > B: . 5556887:5561199(4312) - 14:22:21.369803 B > A: . ack 5436151 - 14:22:21.370814 A > B: . 5561199:5565511(4312) - 14:22:21.371398 A > B: . 5565511:5569823(4312) - 14:22:21.375159 B > A: . ack 5444775 - 14:22:21.376658 A > B: . 5569823:5574135(4312) - 14:22:21.377235 A > B: . 5574135:5578447(4312) - 14:22:21.379303 B > A: . ack 5453399 - 14:22:21.380802 A > B: . 5578447:5582759(4312) - 14:22:21.381377 A > B: . 5582759:5587071(4312) - 14:22:21.381947 A > B: . 5587071:5591383(4312) **** - - "***" marks the end of the first round trip. Note that cwnd did - not increase (as evidenced by each ACK eliciting two new data - packets). Only at "****", which comes near the end of the second - round trip, does cwnd increase by one packet. - - This trace did not suffer any timeout retransmissions. It - transferred the same amount of data as the first trace in about - half as much time. This difference is repeatable between hosts A - and B. - - References - [Stevens94] and [Wright95] discuss this problem. The problem of - Reno TCP failing to recover from multiple losses except via a - retransmission timeout is discussed in [Fall96,Hoe96]. - - - - - - - - - -Paxson, et. al. Informational [Page 22] - -RFC 2525 TCP Implementation Problems March 1999 - - - How to detect - If source code is available, that is generally the easiest way to - detect this problem. Search for each modification to the cwnd - variable; (at least) one of these will be for congestion - avoidance, and inspection of the related code should immediately - identify the problem if present. - - The problem can also be detected by closely examining packet - traces taken near the sender. During congestion avoidance, cwnd - will increase by an additional segment upon the receipt of - (typically) eight acknowledgements without a loss. This increase - is in addition to the one segment increase per round trip time (or - two round trip times if the receiver is using delayed ACKs). - - Furthermore, graphs of the sequence number vs. time, taken from - packet traces, are normally linear during congestion avoidance. - When viewing packet traces of transfers from senders exhibiting - this problem, the graphs appear quadratic instead of linear. - - Finally, the traces will show that, with sufficiently large - windows, nearly every loss event results in a timeout. - - How to fix - This problem may be corrected by removing the "+ MSS/8" term from - the congestion avoidance code that increases cwnd each time an ACK - of new data is received. - -2.7. - - Name of Problem - Initial RTO too low - - Classification - Performance - - Description - When a TCP first begins transmitting data, it lacks the RTT - measurements necessary to have computed an adaptive retransmission - timeout (RTO). RFC 1122, 4.2.3.1, states that a TCP SHOULD - initialize RTO to 3 seconds. A TCP that uses a lower value - exhibits "Initial RTO too low". - - Significance - In environments with large RTTs (where "large" means any value - larger than the initial RTO), TCPs will experience very poor - performance. - - - - - -Paxson, et. al. Informational [Page 23] - -RFC 2525 TCP Implementation Problems March 1999 - - - Implications - Whenever RTO < RTT, very poor performance can result as packets - are unnecessarily retransmitted (because RTO will expire before an - ACK for the packet can arrive) and the connection enters slow - start and congestion avoidance. Generally, the algorithms for - computing RTO avoid this problem by adding a positive term to the - estimated RTT. However, when a connection first begins it must - use some estimate for RTO, and if it picks a value less than RTT, - the above problems will arise. - - Furthermore, when the initial RTO < RTT, it can take a long time - for the TCP to correct the problem by adapting the RTT estimate, - because the use of Karn's algorithm (mandated by RFC 1122, - 4.2.3.1) will discard many of the candidate RTT measurements made - after the first timeout, since they will be measurements of - retransmitted segments. - - Relevant RFCs - RFC 1122 states that TCPs SHOULD initialize RTO to 3 seconds and - MUST implement Karn's algorithm. - - Trace file demonstrating it - The following trace file was taken using tcpdump at host A, the - data sender. The advertised window and SYN options have been - omitted for clarity. - - 07:52:39.870301 A > B: S 2786333696:2786333696(0) - 07:52:40.548170 B > A: S 130240000:130240000(0) ack 2786333697 - 07:52:40.561287 A > B: P 1:513(512) ack 1 - 07:52:40.753466 A > B: . 1:513(512) ack 1 - 07:52:41.133687 A > B: . 1:513(512) ack 1 - 07:52:41.458529 B > A: . ack 513 - 07:52:41.458686 A > B: . 513:1025(512) ack 1 - 07:52:41.458797 A > B: P 1025:1537(512) ack 1 - 07:52:41.541633 B > A: . ack 513 - 07:52:41.703732 A > B: . 513:1025(512) ack 1 - 07:52:42.044875 B > A: . ack 513 - 07:52:42.173728 A > B: . 513:1025(512) ack 1 - 07:52:42.330861 B > A: . ack 1537 - 07:52:42.331129 A > B: . 1537:2049(512) ack 1 - 07:52:42.331262 A > B: P 2049:2561(512) ack 1 - 07:52:42.623673 A > B: . 1537:2049(512) ack 1 - 07:52:42.683203 B > A: . ack 1537 - 07:52:43.044029 B > A: . ack 1537 - 07:52:43.193812 A > B: . 1537:2049(512) ack 1 - - - - - - -Paxson, et. al. Informational [Page 24] - -RFC 2525 TCP Implementation Problems March 1999 - - - Note from the SYN/SYN-ACK exchange, the RTT is over 600 msec. - However, from the elapsed time between the third and fourth lines - (the first packet being sent and then retransmitted), it is - apparent the RTO was initialized to under 200 msec. The next line - shows that this value has doubled to 400 msec (correct exponential - backoff of RTO), but that still does not suffice to avoid an - unnecessary retransmission. - - Finally, an ACK from B arrives for the first segment. Later two - more duplicate ACKs for 513 arrive, indicating that both the - original and the two retransmissions arrived at B. (Indeed, a - concurrent trace at B showed that no packets were lost during the - entire connection). This ACK opens the congestion window to two - packets, which are sent back-to-back, but at 07:52:41.703732 RTO - again expires after a little over 200 msec, leading to an - unnecessary retransmission, and the pattern repeats. By the end - of the trace excerpt above, 1536 bytes have been successfully - transmitted from A to B, over an interval of more than 2 seconds, - reflecting terrible performance. - - Trace file demonstrating correct behavior - The following trace file was taken using tcpdump at host C, the - data sender. The advertised window and SYN options have been - omitted for clarity. - - 17:30:32.090299 C > D: S 2031744000:2031744000(0) - 17:30:32.900325 D > C: S 262737964:262737964(0) ack 2031744001 - 17:30:32.900326 C > D: . ack 1 - 17:30:32.910326 C > D: . 1:513(512) ack 1 - 17:30:34.150355 D > C: . ack 513 - 17:30:34.150356 C > D: . 513:1025(512) ack 1 - 17:30:34.150357 C > D: . 1025:1537(512) ack 1 - 17:30:35.170384 D > C: . ack 1025 - 17:30:35.170385 C > D: . 1537:2049(512) ack 1 - 17:30:35.170386 C > D: . 2049:2561(512) ack 1 - 17:30:35.320385 D > C: . ack 1537 - 17:30:35.320386 C > D: . 2561:3073(512) ack 1 - 17:30:35.320387 C > D: . 3073:3585(512) ack 1 - 17:30:35.730384 D > C: . ack 2049 - - The initial SYN/SYN-ACK exchange shows that RTT is more than 800 - msec, and for some subsequent packets it rises above 1 second, but - C's retransmit timer does not ever expire. - - References - This problem is documented in [Paxson97]. - - - - - -Paxson, et. al. Informational [Page 25] - -RFC 2525 TCP Implementation Problems March 1999 - - - How to detect - This problem is readily detected by inspecting a packet trace of - the startup of a TCP connection made over a long-delay path. It - can be diagnosed from either a sender-side or receiver-side trace. - Long-delay paths can often be found by locating remote sites on - other continents. - - How to fix - As this problem arises from a faulty initialization, one hopes - fixing it requires a one-line change to the TCP source code. - -2.8. - - Name of Problem - Failure of window deflation after loss recovery - - Classification - Congestion control / performance - - Description - The fast recovery algorithm allows TCP senders to continue to - transmit new segments during loss recovery. First, fast - retransmission is initiated after a TCP sender receives three - duplicate ACKs. At this point, a retransmission is sent and cwnd - is halved. The fast recovery algorithm then allows additional - segments to be sent when sufficient additional duplicate ACKs - arrive. Some implementations of fast recovery compute when to - send additional segments by artificially incrementing cwnd, first - by three segments to account for the three duplicate ACKs that - triggered fast retransmission, and subsequently by 1 MSS for each - new duplicate ACK that arrives. When cwnd allows, the sender - transmits new data segments. - - When an ACK arrives that covers new data, cwnd is to be reduced by - the amount by which it was artificially increased. However, some - TCP implementations fail to "deflate" the window, causing an - inappropriate amount of data to be sent into the network after - recovery. One cause of this problem is the "header prediction" - code, which is used to handle incoming segments that require - little work. In some implementations of TCP, the header - prediction code does not check to make sure cwnd has not been - artificially inflated, and therefore does not reduce the - artificially increased cwnd when appropriate. - - Significance - TCP senders that exhibit this problem will transmit a burst of - data immediately after recovery, which can degrade performance, as - well as network stability. Effectively, the sender does not - - - -Paxson, et. al. Informational [Page 26] - -RFC 2525 TCP Implementation Problems March 1999 - - - reduce the size of cwnd as much as it should (to half its value - when loss was detected), if at all. This can harm the performance - of the TCP connection itself, as well as competing TCP flows. - - Implications - A TCP sender exhibiting this problem does not reduce cwnd - appropriately in times of congestion, and therefore may contribute - to congestive collapse. - - Relevant RFCs - RFC 2001 outlines the fast retransmit/fast recovery algorithms. - [Brakmo95] outlines this implementation problem and offers a fix. - - Trace file demonstrating it - The following trace file was taken using tcpdump at host A, the - data sender. The advertised window (which never changed) has been - omitted for clarity, except for the first packet sent by each - host. - - 08:22:56.825635 A.7505 > B.7505: . 29697:30209(512) ack 1 win 4608 - 08:22:57.038794 B.7505 > A.7505: . ack 27649 win 4096 - 08:22:57.039279 A.7505 > B.7505: . 30209:30721(512) ack 1 - 08:22:57.321876 B.7505 > A.7505: . ack 28161 - 08:22:57.322356 A.7505 > B.7505: . 30721:31233(512) ack 1 - 08:22:57.347128 B.7505 > A.7505: . ack 28673 - 08:22:57.347572 A.7505 > B.7505: . 31233:31745(512) ack 1 - 08:22:57.347782 A.7505 > B.7505: . 31745:32257(512) ack 1 - 08:22:57.936393 B.7505 > A.7505: . ack 29185 - 08:22:57.936864 A.7505 > B.7505: . 32257:32769(512) ack 1 - 08:22:57.950802 B.7505 > A.7505: . ack 29697 win 4096 - 08:22:57.951246 A.7505 > B.7505: . 32769:33281(512) ack 1 - 08:22:58.169422 B.7505 > A.7505: . ack 29697 - 08:22:58.638222 B.7505 > A.7505: . ack 29697 - 08:22:58.643312 B.7505 > A.7505: . ack 29697 - 08:22:58.643669 A.7505 > B.7505: . 29697:30209(512) ack 1 - 08:22:58.936436 B.7505 > A.7505: . ack 29697 - 08:22:59.002614 B.7505 > A.7505: . ack 29697 - 08:22:59.003026 A.7505 > B.7505: . 33281:33793(512) ack 1 - 08:22:59.682902 B.7505 > A.7505: . ack 33281 - 08:22:59.683391 A.7505 > B.7505: P 33793:34305(512) ack 1 - 08:22:59.683748 A.7505 > B.7505: P 34305:34817(512) ack 1 *** - 08:22:59.684043 A.7505 > B.7505: P 34817:35329(512) ack 1 - 08:22:59.684266 A.7505 > B.7505: P 35329:35841(512) ack 1 - 08:22:59.684567 A.7505 > B.7505: P 35841:36353(512) ack 1 - 08:22:59.684810 A.7505 > B.7505: P 36353:36865(512) ack 1 - 08:22:59.685094 A.7505 > B.7505: P 36865:37377(512) ack 1 - - - - - -Paxson, et. al. Informational [Page 27] - -RFC 2525 TCP Implementation Problems March 1999 - - - The first 12 lines of the trace show incoming ACKs clocking out a - window of data segments. At this point in the transfer, cwnd is 7 - segments. The next 4 lines of the trace show 3 duplicate ACKs - arriving from the receiver, followed by a retransmission from the - sender. At this point, cwnd is halved (to 3 segments) and - artificially incremented by the three duplicate ACKs that have - arrived, making cwnd 6 segments. The next two lines show 2 more - duplicate ACKs arriving, each of which increases cwnd by 1 - segment. So, after these two duplicate ACKs arrive the cwnd is 8 - segments and the sender has permission to send 1 new segment - (since there are 7 segments outstanding). The next line in the - trace shows this new segment being transmitted. The next packet - shown in the trace is an ACK from host B that covers the first 7 - outstanding segments (all but the new segment sent during - recovery). This should cause cwnd to be reduced to 3 segments and - 2 segments to be transmitted (since there is already 1 outstanding - segment in the network). However, as shown by the last 7 lines of - the trace, cwnd is not reduced, causing a line-rate burst of 7 new - segments. - - Trace file demonstrating correct behavior - The trace would appear identical to the one above, only it would - stop after the line marked "***", because at this point host A - would correctly reduce cwnd after recovery, allowing only 2 - segments to be transmitted, rather than producing a burst of 7 - segments. - - References - This problem is documented and the performance implications - analyzed in [Brakmo95]. - - How to detect - Failure of window deflation after loss recovery can be found by - examining sender-side packet traces recorded during periods of - moderate loss (so cwnd can grow large enough to allow for fast - recovery when loss occurs). - - How to fix - When this bug is caused by incorrect header prediction, the fix is - to add a predicate to the header prediction test that checks to - see whether cwnd is inflated; if so, the header prediction test - fails and the usual ACK processing occurs, which (in this case) - takes care to deflate the window. See [Brakmo95] for details. - -2.9. - - Name of Problem - Excessively short keepalive connection timeout - - - -Paxson, et. al. Informational [Page 28] - -RFC 2525 TCP Implementation Problems March 1999 - - - Classification - Reliability - - Description - Keep-alive is a mechanism for checking whether an idle connection - is still alive. According to RFC 1122, keepalive should only be - invoked in server applications that might otherwise hang - indefinitely and consume resources unnecessarily if a client - crashes or aborts a connection during a network failure. - - RFC 1122 also specifies that if a keep-alive mechanism is - implemented it MUST NOT interpret failure to respond to any - specific probe as a dead connection. The RFC does not specify a - particular mechanism for timing out a connection when no response - is received for keepalive probes. However, if the mechanism does - not allow ample time for recovery from network congestion or - delay, connections may be timed out unnecessarily. - - Significance - In congested networks, can lead to unwarranted termination of - connections. - - Implications - It is possible for the network connection between two peer - machines to become congested or to exhibit packet loss at the time - that a keep-alive probe is sent on a connection. If the keep- - alive mechanism does not allow sufficient time before dropping - connections in the face of unacknowledged probes, connections may - be dropped even when both peers of a connection are still alive. - - Relevant RFCs - RFC 1122 specifies that the keep-alive mechanism may be provided. - It does not specify a mechanism for determining dead connections - when keepalive probes are not acknowledged. - - Trace file demonstrating it - Made using the Orchestra tool at the peer of the machine using - keep-alive. After connection establishment, incoming keep-alives - were dropped by Orchestra to simulate a dead connection. - - 22:11:12.040000 A > B: 22666019:0 win 8192 datasz 4 SYN - 22:11:12.060000 B > A: 2496001:22666020 win 4096 datasz 4 SYN ACK - 22:11:12.130000 A > B: 22666020:2496002 win 8760 datasz 0 ACK - (more than two hours elapse) - 00:23:00.680000 A > B: 22666019:2496002 win 8760 datasz 1 ACK - 00:23:01.770000 A > B: 22666019:2496002 win 8760 datasz 1 ACK - 00:23:02.870000 A > B: 22666019:2496002 win 8760 datasz 1 ACK - 00:23.03.970000 A > B: 22666019:2496002 win 8760 datasz 1 ACK - - - -Paxson, et. al. Informational [Page 29] - -RFC 2525 TCP Implementation Problems March 1999 - - - 00:23.05.070000 A > B: 22666019:2496002 win 8760 datasz 1 ACK - - The initial three packets are the SYN exchange for connection - setup. About two hours later, the keepalive timer fires because - the connection has been idle. Keepalive probes are transmitted a - total of 5 times, with a 1 second spacing between probes, after - which the connection is dropped. This is problematic because a 5 - second network outage at the time of the first probe results in - the connection being killed. - - Trace file demonstrating correct behavior - Made using the Orchestra tool at the peer of the machine using - keep-alive. After connection establishment, incoming keep-alives - were dropped by Orchestra to simulate a dead connection. - - 16:01:52.130000 A > B: 1804412929:0 win 4096 datasz 4 SYN - 16:01:52.360000 B > A: 16512001:1804412930 win 4096 datasz 4 SYN ACK - 16:01:52.410000 A > B: 1804412930:16512002 win 4096 datasz 0 ACK - (two hours elapse) - 18:01:57.170000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:03:12.220000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:04:27.270000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:05:42.320000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:06:57.370000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:08:12.420000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:09:27.480000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:10:43.290000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:11:57.580000 A > B: 1804412929:16512002 win 4096 datasz 0 ACK - 18:13:12.630000 A > B: 1804412929:16512002 win 4096 datasz 0 RST ACK - - In this trace, when the keep-alive timer expires, 9 keepalive - probes are sent at 75 second intervals. 75 seconds after the last - probe is sent, a final RST segment is sent indicating that the - connection has been closed. This implementation waits about 11 - minutes before timing out the connection, while the first - implementation shown allows only 5 seconds. - - References - This problem is documented in [Dawson97]. - - How to detect - For implementations manifesting this problem, it shows up on a - packet trace after the keepalive timer fires if the peer machine - receiving the keepalive does not respond. Usually the keepalive - timer will fire at least two hours after keepalive is turned on, - but it may be sooner if the timer value has been configured lower, - or if the keepalive mechanism violates the specification (see - Insufficient interval between keepalives problem). In this - - - -Paxson, et. al. Informational [Page 30] - -RFC 2525 TCP Implementation Problems March 1999 - - - example, suppressing the response of the peer to keepalive probes - was accomplished using the Orchestra toolkit, which can be - configured to drop packets. It could also have been done by - creating a connection, turning on keepalive, and disconnecting the - network connection at the receiver machine. - - How to fix - This problem can be fixed by using a different method for timing - out keepalives that allows a longer period of time to elapse - before dropping the connection. For example, the algorithm for - timing out on dropped data could be used. Another possibility is - an algorithm such as the one shown in the trace above, which sends - 9 probes at 75 second intervals and then waits an additional 75 - seconds for a response before closing the connection. - -2.10. - - Name of Problem - Failure to back off retransmission timeout - - Classification - Congestion control / reliability - - Description - The retransmission timeout is used to determine when a packet has - been dropped in the network. When this timeout has expired - without the arrival of an ACK, the segment is retransmitted. Each - time a segment is retransmitted, the timeout is adjusted according - to an exponential backoff algorithm, doubling each time. If a TCP - fails to receive an ACK after numerous attempts at retransmitting - the same segment, it terminates the connection. A TCP that fails - to double its retransmission timeout upon repeated timeouts is - said to exhibit "Failure to back off retransmission timeout". - - Significance - Backing off the retransmission timer is a cornerstone of network - stability in the presence of congestion. Consequently, this bug - can have severe adverse affects in congested networks. It also - affects TCP reliability in congested networks, as discussed in the - next section. - - Implications - It is possible for the network connection between two TCP peers to - become congested or to exhibit packet loss at the time that a - retransmission is sent on a connection. If the retransmission - mechanism does not allow sufficient time before dropping - - - - - -Paxson, et. al. Informational [Page 31] - -RFC 2525 TCP Implementation Problems March 1999 - - - connections in the face of unacknowledged segments, connections - may be dropped even when, by waiting longer, the connection could - have continued. - - Relevant RFCs - RFC 1122 specifies mandatory exponential backoff of the - retransmission timeout, and the termination of connections after - some period of time (at least 100 seconds). - - Trace file demonstrating it - Made using tcpdump on an intermediate host: - - 16:51:12.671727 A > B: S 510878852:510878852(0) win 16384 - 16:51:12.672479 B > A: S 2392143687:2392143687(0) - ack 510878853 win 16384 - 16:51:12.672581 A > B: . ack 1 win 16384 - 16:51:15.244171 A > B: P 1:3(2) ack 1 win 16384 - 16:51:15.244933 B > A: . ack 3 win 17518 (DF) - - - - 16:51:19.381176 A > B: P 3:5(2) ack 1 win 16384 - 16:51:20.162016 A > B: P 3:5(2) ack 1 win 16384 - 16:51:21.161936 A > B: P 3:5(2) ack 1 win 16384 - 16:51:22.161914 A > B: P 3:5(2) ack 1 win 16384 - 16:51:23.161914 A > B: P 3:5(2) ack 1 win 16384 - 16:51:24.161879 A > B: P 3:5(2) ack 1 win 16384 - 16:51:25.161857 A > B: P 3:5(2) ack 1 win 16384 - 16:51:26.161836 A > B: P 3:5(2) ack 1 win 16384 - 16:51:27.161814 A > B: P 3:5(2) ack 1 win 16384 - 16:51:28.161791 A > B: P 3:5(2) ack 1 win 16384 - 16:51:29.161769 A > B: P 3:5(2) ack 1 win 16384 - 16:51:30.161750 A > B: P 3:5(2) ack 1 win 16384 - 16:51:31.161727 A > B: P 3:5(2) ack 1 win 16384 - - 16:51:32.161701 A > B: R 5:5(0) ack 1 win 16384 - - The initial three packets are the SYN exchange for connection - setup, then a single data packet, to verify that data can be - transferred. Then the connection to the destination host was - disconnected, and more data sent. Retransmissions occur every - second for 12 seconds, and then the connection is terminated with - a RST. This is problematic because a 12 second pause in - connectivity could result in the termination of a connection. - - Trace file demonstrating correct behavior - Again, a tcpdump taken from a third host: - - - - -Paxson, et. al. Informational [Page 32] - -RFC 2525 TCP Implementation Problems March 1999 - - - 16:59:05.398301 A > B: S 2503324757:2503324757(0) win 16384 - 16:59:05.399673 B > A: S 2492674648:2492674648(0) - ack 2503324758 win 16384 - 16:59:05.399866 A > B: . ack 1 win 17520 - 16:59:06.538107 A > B: P 1:3(2) ack 1 win 17520 - 16:59:06.540977 B > A: . ack 3 win 17518 (DF) - - - - 16:59:13.121542 A > B: P 3:5(2) ack 1 win 17520 - 16:59:14.010928 A > B: P 3:5(2) ack 1 win 17520 - 16:59:16.010979 A > B: P 3:5(2) ack 1 win 17520 - 16:59:20.011229 A > B: P 3:5(2) ack 1 win 17520 - 16:59:28.011896 A > B: P 3:5(2) ack 1 win 17520 - 16:59:44.013200 A > B: P 3:5(2) ack 1 win 17520 - 17:00:16.015766 A > B: P 3:5(2) ack 1 win 17520 - 17:01:20.021308 A > B: P 3:5(2) ack 1 win 17520 - 17:02:24.027752 A > B: P 3:5(2) ack 1 win 17520 - 17:03:28.034569 A > B: P 3:5(2) ack 1 win 17520 - 17:04:32.041567 A > B: P 3:5(2) ack 1 win 17520 - 17:05:36.048264 A > B: P 3:5(2) ack 1 win 17520 - 17:06:40.054900 A > B: P 3:5(2) ack 1 win 17520 - - 17:07:44.061306 A > B: R 5:5(0) ack 1 win 17520 - - In this trace, when the retransmission timer expires, 12 - retransmissions are sent at exponentially-increasing intervals, - until the interval value reaches 64 seconds, at which time the - interval stops growing. 64 seconds after the last retransmission, - a final RST segment is sent indicating that the connection has - been closed. This implementation waits about 9 minutes before - timing out the connection, while the first implementation shown - allows only 12 seconds. - - References - None known. - - How to detect - A simple transfer can be easily interrupted by disconnecting the - receiving host from the network. tcpdump or another appropriate - tool should show the retransmissions being sent. Several trials - in a low-rtt environment may be required to demonstrate the bug. - - How to fix - For one of the implementations studied, this problem seemed to be - the result of an error introduced with the addition of the - Brakmo-Peterson RTO algorithm [Brakmo95], which can return a value - of zero where the older Jacobson algorithm always returns a - - - -Paxson, et. al. Informational [Page 33] - -RFC 2525 TCP Implementation Problems March 1999 - - - positive value. Brakmo and Peterson specified an additional step - of min(rtt + 2, RTO) to avoid problems with this. Unfortunately, - in the implementation this step was omitted when calculating the - exponential backoff for the RTO. This results in an RTO of 0 - seconds being multiplied by the backoff, yielding again zero, and - then being subjected to a later MAX operation that increases it to - 1 second, regardless of the backoff factor. - - A similar TCP persist failure has the same cause. - -2.11. - - Name of Problem - Insufficient interval between keepalives - - Classification - Reliability - - Description - Keep-alive is a mechanism for checking whether an idle connection - is still alive. According to RFC 1122, keep-alive may be included - in an implementation. If it is included, the interval between - keep-alive packets MUST be configurable, and MUST default to no - less than two hours. - - Significance - In congested networks, can lead to unwarranted termination of - connections. - - Implications - According to RFC 1122, keep-alive is not required of - implementations because it could: (1) cause perfectly good - connections to break during transient Internet failures; (2) - consume unnecessary bandwidth ("if no one is using the connection, - who cares if it is still good?"); and (3) cost money for an - Internet path that charges for packets. Regarding this last - point, we note that in addition the presence of dial-on-demand - links in the route can greatly magnify the cost penalty of excess - keepalives, potentially forcing a full-time connection on a link - that would otherwise only be connected a few minutes a day. - - If keepalive is provided the RFC states that the required inter- - keepalive distance MUST default to no less than two hours. If it - does not, the probability of connections breaking increases, the - bandwidth used due to keepalives increases, and cost increases - over paths which charge per packet. - - - - - -Paxson, et. al. Informational [Page 34] - -RFC 2525 TCP Implementation Problems March 1999 - - - Relevant RFCs - RFC 1122 specifies that the keep-alive mechanism may be provided. - It also specifies the two hour minimum for the default interval - between keepalive probes. - - Trace file demonstrating it - Made using the Orchestra tool at the peer of the machine using - keep-alive. Machine A was configured to use default settings for - the keepalive timer. - - 11:36:32.910000 A > B: 3288354305:0 win 28672 datasz 4 SYN - 11:36:32.930000 B > A: 896001:3288354306 win 4096 datasz 4 SYN ACK - 11:36:32.950000 A > B: 3288354306:896002 win 28672 datasz 0 ACK - - 11:50:01.190000 A > B: 3288354305:896002 win 28672 datasz 0 ACK - 11:50:01.210000 B > A: 896002:3288354306 win 4096 datasz 0 ACK - - 12:03:29.410000 A > B: 3288354305:896002 win 28672 datasz 0 ACK - 12:03:29.430000 B > A: 896002:3288354306 win 4096 datasz 0 ACK - - 12:16:57.630000 A > B: 3288354305:896002 win 28672 datasz 0 ACK - 12:16:57.650000 B > A: 896002:3288354306 win 4096 datasz 0 ACK - - 12:30:25.850000 A > B: 3288354305:896002 win 28672 datasz 0 ACK - 12:30:25.870000 B > A: 896002:3288354306 win 4096 datasz 0 ACK - - 12:43:54.070000 A > B: 3288354305:896002 win 28672 datasz 0 ACK - 12:43:54.090000 B > A: 896002:3288354306 win 4096 datasz 0 ACK - - The initial three packets are the SYN exchange for connection - setup. About 13 minutes later, the keepalive timer fires because - the connection is idle. The keepalive is acknowledged, and the - timer fires again in about 13 more minutes. This behavior - continues indefinitely until the connection is closed, and is a - violation of the specification. - - Trace file demonstrating correct behavior - Made using the Orchestra tool at the peer of the machine using - keep-alive. Machine A was configured to use default settings for - the keepalive timer. - - 17:37:20.500000 A > B: 34155521:0 win 4096 datasz 4 SYN - 17:37:20.520000 B > A: 6272001:34155522 win 4096 datasz 4 SYN ACK - 17:37:20.540000 A > B: 34155522:6272002 win 4096 datasz 0 ACK - - 19:37:25.430000 A > B: 34155521:6272002 win 4096 datasz 0 ACK - 19:37:25.450000 B > A: 6272002:34155522 win 4096 datasz 0 ACK - - - - -Paxson, et. al. Informational [Page 35] - -RFC 2525 TCP Implementation Problems March 1999 - - - 21:37:30.560000 A > B: 34155521:6272002 win 4096 datasz 0 ACK - 21:37:30.570000 B > A: 6272002:34155522 win 4096 datasz 0 ACK - - 23:37:35.580000 A > B: 34155521:6272002 win 4096 datasz 0 ACK - 23:37:35.600000 B > A: 6272002:34155522 win 4096 datasz 0 ACK - - 01:37:40.620000 A > B: 34155521:6272002 win 4096 datasz 0 ACK - 01:37:40.640000 B > A: 6272002:34155522 win 4096 datasz 0 ACK - - 03:37:45.590000 A > B: 34155521:6272002 win 4096 datasz 0 ACK - 03:37:45.610000 B > A: 6272002:34155522 win 4096 datasz 0 ACK - - The initial three packets are the SYN exchange for connection - setup. Just over two hours later, the keepalive timer fires - because the connection is idle. The keepalive is acknowledged, - and the timer fires again just over two hours later. This - behavior continues indefinitely until the connection is closed. - - References - This problem is documented in [Dawson97]. - - How to detect - For implementations manifesting this problem, it shows up on a - packet trace. If the connection is left idle, the keepalive - probes will arrive closer together than the two hour minimum. - -2.12. - - Name of Problem - Window probe deadlock - - Classification - Reliability - - Description - When an application reads a single byte from a full window, the - window should not be updated, in order to avoid Silly Window - Syndrome (SWS; see [RFC813]). If the remote peer uses a single - byte of data to probe the window, that byte can be accepted into - the buffer. In some implementations, at this point a negative - argument to a signed comparison causes all further new data to be - considered outside the window; consequently, it is discarded - (after sending an ACK to resynchronize). These discards include - the ACKs for the data packets sent by the local TCP, so the TCP - will consider the data unacknowledged. - - - - - - -Paxson, et. al. Informational [Page 36] - -RFC 2525 TCP Implementation Problems March 1999 - - - Consequently, the application may be unable to complete sending - new data to the remote peer, because it has exhausted the transmit - buffer available to its local TCP, and buffer space is never being - freed because incoming ACKs that would do so are being discarded. - If the application does not read any more data, which may happen - due to its failure to complete such sends, then deadlock results. - - Significance - It's relatively rare for applications to use TCP in a manner that - can exercise this problem. Most applications only transmit bulk - data if they know the other end is prepared to receive the data. - However, if a client fails to consume data, putting the server in - persist mode, and then consumes a small amount of data, it can - mistakenly compute a negative window. At this point the client - will discard all further packets from the server, including ACKs - of the client's own data, since they are not inside the - (impossibly-sized) window. If subsequently the client consumes - enough data to then send a window update to the server, the - situation will be rectified. That is, this situation can only - happen if the client consumes 1 < N < MSS bytes, so as not to - cause a window update, and then starts its own transmission - towards the server of more than a window's worth of data. - - Implications - TCP connections will hang and eventually time out. - - Relevant RFCs - RFC 793 describes zero window probing. RFC 813 describes Silly - Window Syndrome. - - Trace file demonstrating it - Trace made from a version of tcpdump modified to print out the - sequence number attached to an ACK even if it's dataless. An - unmodified tcpdump would not print seq:seq(0); however, for this - bug, the sequence number in the ACK is important for unambiguously - determining how the TCP is behaving. - - [ Normal connection startup and data transmission from B to A. - Options, including MSS of 16344 in both directions, omitted - for clarity. ] - 16:07:32.327616 A > B: S 65360807:65360807(0) win 8192 - 16:07:32.327304 B > A: S 65488807:65488807(0) ack 65360808 win 57344 - 16:07:32.327425 A > B: . 1:1(0) ack 1 win 57344 - 16:07:32.345732 B > A: P 1:2049(2048) ack 1 win 57344 - 16:07:32.347013 B > A: P 2049:16385(14336) ack 1 win 57344 - 16:07:32.347550 B > A: P 16385:30721(14336) ack 1 win 57344 - 16:07:32.348683 B > A: P 30721:45057(14336) ack 1 win 57344 - 16:07:32.467286 A > B: . 1:1(0) ack 45057 win 12288 - - - -Paxson, et. al. Informational [Page 37] - -RFC 2525 TCP Implementation Problems March 1999 - - - 16:07:32.467854 B > A: P 45057:57345(12288) ack 1 win 57344 - - [ B fills up A's offered window ] - 16:07:32.667276 A > B: . 1:1(0) ack 57345 win 0 - - [ B probes A's window with a single byte ] - 16:07:37.467438 B > A: . 57345:57346(1) ack 1 win 57344 - - [ A resynchronizes without accepting the byte ] - 16:07:37.467678 A > B: . 1:1(0) ack 57345 win 0 - - [ B probes A's window again ] - 16:07:45.467438 B > A: . 57345:57346(1) ack 1 win 57344 - - [ A resynchronizes and accepts the byte (per the ack field) ] - 16:07:45.667250 A > B: . 1:1(0) ack 57346 win 0 - - [ The application on A has started generating data. The first - packet A sends is small due to a memory allocation bug. ] - 16:07:51.358459 A > B: P 1:2049(2048) ack 57346 win 0 - - [ B acks A's first packet ] - 16:07:51.467239 B > A: . 57346:57346(0) ack 2049 win 57344 - - [ This looks as though A accepted B's ACK and is sending - another packet in response to it. In fact, A is trying - to resynchronize with B, and happens to have data to send - and can send it because the first small packet didn't use - up cwnd. ] - 16:07:51.467698 A > B: . 2049:14337(12288) ack 57346 win 0 - - [ B acks all of the data that A has sent ] - 16:07:51.667283 B > A: . 57346:57346(0) ack 14337 win 57344 - - [ A tries to resynchronize. Notice that by the packets - seen on the network, A and B *are* in fact synchronized; - A only thinks that they aren't. ] - 16:07:51.667477 A > B: . 14337:14337(0) ack 57346 win 0 - - [ A's retransmit timer fires, and B acks all of the data. - A once again tries to resynchronize. ] - 16:07:52.467682 A > B: . 1:14337(14336) ack 57346 win 0 - 16:07:52.468166 B > A: . 57346:57346(0) ack 14337 win 57344 - 16:07:52.468248 A > B: . 14337:14337(0) ack 57346 win 0 - - [ A's retransmit timer fires again, and B acks all of the data. - A once again tries to resynchronize. ] - 16:07:55.467684 A > B: . 1:14337(14336) ack 57346 win 0 - - - -Paxson, et. al. Informational [Page 38] - -RFC 2525 TCP Implementation Problems March 1999 - - - 16:07:55.468172 B > A: . 57346:57346(0) ack 14337 win 57344 - 16:07:55.468254 A > B: . 14337:14337(0) ack 57346 win 0 - - Trace file demonstrating correct behavior - Made between the same two hosts after applying the bug fix - mentioned below (and using the same modified tcpdump). - - [ Connection starts up with data transmission from B to A. - Note that due to a separate bug (the fact that A and B - are communicating over a loopback driver), B erroneously - skips slow start. ] - 17:38:09.510854 A > B: S 3110066585:3110066585(0) win 16384 - 17:38:09.510926 B > A: S 3110174850:3110174850(0) - ack 3110066586 win 57344 - 17:38:09.510953 A > B: . 1:1(0) ack 1 win 57344 - 17:38:09.512956 B > A: P 1:2049(2048) ack 1 win 57344 - 17:38:09.513222 B > A: P 2049:16385(14336) ack 1 win 57344 - 17:38:09.513428 B > A: P 16385:30721(14336) ack 1 win 57344 - 17:38:09.513638 B > A: P 30721:45057(14336) ack 1 win 57344 - 17:38:09.519531 A > B: . 1:1(0) ack 45057 win 12288 - 17:38:09.519638 B > A: P 45057:57345(12288) ack 1 win 57344 - - [ B fills up A's offered window ] - 17:38:09.719526 A > B: . 1:1(0) ack 57345 win 0 - - [ B probes A's window with a single byte. A resynchronizes - without accepting the byte ] - 17:38:14.499661 B > A: . 57345:57346(1) ack 1 win 57344 - 17:38:14.499724 A > B: . 1:1(0) ack 57345 win 0 - - [ B probes A's window again. A resynchronizes and accepts - the byte, as indicated by the ack field ] - 17:38:19.499764 B > A: . 57345:57346(1) ack 1 win 57344 - 17:38:19.519731 A > B: . 1:1(0) ack 57346 win 0 - - [ B probes A's window with a single byte. A resynchronizes - without accepting the byte ] - 17:38:24.499865 B > A: . 57346:57347(1) ack 1 win 57344 - 17:38:24.499934 A > B: . 1:1(0) ack 57346 win 0 - - [ The application on A has started generating data. - B acks A's data and A accepts the ACKs and the - data transfer continues ] - 17:38:28.530265 A > B: P 1:2049(2048) ack 57346 win 0 - 17:38:28.719914 B > A: . 57346:57346(0) ack 2049 win 57344 - - 17:38:28.720023 A > B: . 2049:16385(14336) ack 57346 win 0 - 17:38:28.720089 A > B: . 16385:30721(14336) ack 57346 win 0 - - - -Paxson, et. al. Informational [Page 39] - -RFC 2525 TCP Implementation Problems March 1999 - - - 17:38:28.720370 B > A: . 57346:57346(0) ack 30721 win 57344 - - 17:38:28.720462 A > B: . 30721:45057(14336) ack 57346 win 0 - 17:38:28.720526 A > B: P 45057:59393(14336) ack 57346 win 0 - 17:38:28.720824 A > B: P 59393:73729(14336) ack 57346 win 0 - 17:38:28.721124 B > A: . 57346:57346(0) ack 73729 win 47104 - - 17:38:28.721198 A > B: P 73729:88065(14336) ack 57346 win 0 - 17:38:28.721379 A > B: P 88065:102401(14336) ack 57346 win 0 - - 17:38:28.721557 A > B: P 102401:116737(14336) ack 57346 win 0 - 17:38:28.721863 B > A: . 57346:57346(0) ack 116737 win 36864 - - References - None known. - - How to detect - Initiate a connection from a client to a server. Have the server - continuously send data until its buffers have been full for long - enough to exhaust the window. Next, have the client read 1 byte - and then delay for long enough that the server TCP sends a window - probe. Now have the client start sending data. At this point, if - it ignores the server's ACKs, then the client's TCP suffers from - the problem. - - How to fix - In one implementation known to exhibit the problem (derived from - 4.3-Reno), the problem was introduced when the macro MAX() was - replaced by the function call max() for computing the amount of - space in the receive window: - - tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); - - When data has been received into a window beyond what has been - advertised to the other side, rcv_nxt > rcv_adv, making this - negative. It's clear from the (int) cast that this is intended, - but the unsigned max() function sign-extends so the negative - number is "larger". The fix is to change max() to imax(): - - tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); - - 4.3-Tahoe and before did not have this bug, since it used the - macro MAX() for this calculation. - -2.13. - - Name of Problem - Stretch ACK violation - - - -Paxson, et. al. Informational [Page 40] - -RFC 2525 TCP Implementation Problems March 1999 - - - Classification - Congestion Control/Performance - - Description - To improve efficiency (both computer and network) a data receiver - may refrain from sending an ACK for each incoming segment, - according to [RFC1122]. However, an ACK should not be delayed an - inordinate amount of time. Specifically, ACKs SHOULD be sent for - every second full-sized segment that arrives. If a second full- - sized segment does not arrive within a given timeout (of no more - than 0.5 seconds), an ACK should be transmitted, according to - [RFC1122]. A TCP receiver which does not generate an ACK for - every second full-sized segment exhibits a "Stretch ACK - Violation". - - Significance - TCP receivers exhibiting this behavior will cause TCP senders to - generate burstier traffic, which can degrade performance in - congested environments. In addition, generating fewer ACKs - increases the amount of time needed by the slow start algorithm to - open the congestion window to an appropriate point, which - diminishes performance in environments with large bandwidth-delay - products. Finally, generating fewer ACKs may cause needless - retransmission timeouts in lossy environments, as it increases the - possibility that an entire window of ACKs is lost, forcing a - retransmission timeout. - - Implications - When not in loss recovery, every ACK received by a TCP sender - triggers the transmission of new data segments. The burst size is - determined by the number of previously unacknowledged segments - each ACK covers. Therefore, a TCP receiver ack'ing more than 2 - segments at a time causes the sending TCP to generate a larger - burst of traffic upon receipt of the ACK. This large burst of - traffic can overwhelm an intervening gateway, leading to higher - drop rates for both the connection and other connections passing - through the congested gateway. - - In addition, the TCP slow start algorithm increases the congestion - window by 1 segment for each ACK received. Therefore, increasing - the ACK interval (thus decreasing the rate at which ACKs are - transmitted) increases the amount of time it takes slow start to - increase the congestion window to an appropriate operating point, - and the connection consequently suffers from reduced performance. - This is especially true for connections using large windows. - - Relevant RFCs - RFC 1122 outlines delayed ACKs as a recommended mechanism. - - - -Paxson, et. al. Informational [Page 41] - -RFC 2525 TCP Implementation Problems March 1999 - - - Trace file demonstrating it - Trace file taken using tcpdump at host B, the data receiver (and - ACK originator). The advertised window (which never changed) and - timestamp options have been omitted for clarity, except for the - first packet sent by A: - - 12:09:24.820187 A.1174 > B.3999: . 2049:3497(1448) ack 1 - win 33580 [tos 0x8] - 12:09:24.824147 A.1174 > B.3999: . 3497:4945(1448) ack 1 - 12:09:24.832034 A.1174 > B.3999: . 4945:6393(1448) ack 1 - 12:09:24.832222 B.3999 > A.1174: . ack 6393 - 12:09:24.934837 A.1174 > B.3999: . 6393:7841(1448) ack 1 - 12:09:24.942721 A.1174 > B.3999: . 7841:9289(1448) ack 1 - 12:09:24.950605 A.1174 > B.3999: . 9289:10737(1448) ack 1 - 12:09:24.950797 B.3999 > A.1174: . ack 10737 - 12:09:24.958488 A.1174 > B.3999: . 10737:12185(1448) ack 1 - 12:09:25.052330 A.1174 > B.3999: . 12185:13633(1448) ack 1 - 12:09:25.060216 A.1174 > B.3999: . 13633:15081(1448) ack 1 - 12:09:25.060405 B.3999 > A.1174: . ack 15081 - - This portion of the trace clearly shows that the receiver (host B) - sends an ACK for every third full sized packet received. Further - investigation of this implementation found that the cause of the - increased ACK interval was the TCP options being used. The - implementation sent an ACK after it was holding 2*MSS worth of - unacknowledged data. In the above case, the MSS is 1460 bytes so - the receiver transmits an ACK after it is holding at least 2920 - bytes of unacknowledged data. However, the length of the TCP - options being used [RFC1323] took 12 bytes away from the data - portion of each packet. This produced packets containing 1448 - bytes of data. But the additional bytes used by the options in - the header were not taken into account when determining when to - trigger an ACK. Therefore, it took 3 data segments before the - data receiver was holding enough unacknowledged data (>= 2*MSS, or - 2920 bytes in the above example) to transmit an ACK. - - Trace file demonstrating correct behavior - Trace file taken using tcpdump at host B, the data receiver (and - ACK originator), again with window and timestamp information - omitted except for the first packet: - - 12:06:53.627320 A.1172 > B.3999: . 1449:2897(1448) ack 1 - win 33580 [tos 0x8] - 12:06:53.634773 A.1172 > B.3999: . 2897:4345(1448) ack 1 - 12:06:53.634961 B.3999 > A.1172: . ack 4345 - 12:06:53.737326 A.1172 > B.3999: . 4345:5793(1448) ack 1 - 12:06:53.744401 A.1172 > B.3999: . 5793:7241(1448) ack 1 - 12:06:53.744592 B.3999 > A.1172: . ack 7241 - - - -Paxson, et. al. Informational [Page 42] - -RFC 2525 TCP Implementation Problems March 1999 - - - 12:06:53.752287 A.1172 > B.3999: . 7241:8689(1448) ack 1 - 12:06:53.847332 A.1172 > B.3999: . 8689:10137(1448) ack 1 - 12:06:53.847525 B.3999 > A.1172: . ack 10137 - - This trace shows the TCP receiver (host B) ack'ing every second - full-sized packet, according to [RFC1122]. This is the same - implementation shown above, with slight modifications that allow - the receiver to take the length of the options into account when - deciding when to transmit an ACK. - - References - This problem is documented in [Allman97] and [Paxson97]. - - How to detect - Stretch ACK violations show up immediately in receiver-side packet - traces of bulk transfers, as shown above. However, packet traces - made on the sender side of the TCP connection may lead to - ambiguities when diagnosing this problem due to the possibility of - lost ACKs. - -2.14. - - Name of Problem - Retransmission sends multiple packets - - Classification - Congestion control - - Description - When a TCP retransmits a segment due to a timeout expiration or - beginning a fast retransmission sequence, it should only transmit - a single segment. A TCP that transmits more than one segment - exhibits "Retransmission Sends Multiple Packets". - - Instances of this problem have been known to occur due to - miscomputations involving the use of TCP options. TCP options - increase the TCP header beyond its usual size of 20 bytes. The - total size of header must be taken into account when - retransmitting a packet. If a TCP sender does not account for the - length of the TCP options when determining how much data to - retransmit, it will send too much data to fit into a single - packet. In this case, the correct retransmission will be followed - by a short segment (tinygram) containing data that may not need to - be retransmitted. - - A specific case is a TCP using the RFC 1323 timestamp option, - which adds 12 bytes to the standard 20-byte TCP header. On - retransmission of a packet, the 12 byte option is incorrectly - - - -Paxson, et. al. Informational [Page 43] - -RFC 2525 TCP Implementation Problems March 1999 - - - interpreted as part of the data portion of the segment. A - standard TCP header and a new 12-byte option is added to the data, - which yields a transmission of 12 bytes more data than contained - in the original segment. This overflow causes a smaller packet, - with 12 data bytes, to be transmitted. - - Significance - This problem is somewhat serious for congested environments - because the TCP implementation injects more packets into the - network than is appropriate. However, since a tinygram is only - sent in response to a fast retransmit or a timeout, it does not - effect the sustained sending rate. - - Implications - A TCP exhibiting this behavior is stressing the network with more - traffic than appropriate, and stressing routers by increasing the - number of packets they must process. The redundant tinygram will - also elicit a duplicate ACK from the receiver, resulting in yet - another unnecessary transmission. - - Relevant RFCs - RFC 1122 requires use of slow start after loss; RFC 2001 - explicates slow start; RFC 1323 describes the timestamp option - that has been observed to lead to some implementations exhibiting - this problem. - - Trace file demonstrating it - Made using tcpdump recording at a machine on the same subnet as - Host A. Host A is the sender and Host B is the receiver. The - advertised window and timestamp options have been omitted for - clarity, except for the first segment sent by host A. In - addition, portions of the trace file not pertaining to the packet - in question have been removed (missing packets are denoted by - "[...]" in the trace). - - 11:55:22.701668 A > B: . 7361:7821(460) ack 1 - win 49324 - 11:55:22.702109 A > B: . 7821:8281(460) ack 1 - [...] - - 11:55:23.112405 B > A: . ack 7821 - 11:55:23.113069 A > B: . 12421:12881(460) ack 1 - 11:55:23.113511 A > B: . 12881:13341(460) ack 1 - 11:55:23.333077 B > A: . ack 7821 - 11:55:23.336860 B > A: . ack 7821 - 11:55:23.340638 B > A: . ack 7821 - 11:55:23.341290 A > B: . 7821:8281(460) ack 1 - 11:55:23.341317 A > B: . 8281:8293(12) ack 1 - - - -Paxson, et. al. Informational [Page 44] - -RFC 2525 TCP Implementation Problems March 1999 - - - 11:55:23.498242 B > A: . ack 7821 - 11:55:23.506850 B > A: . ack 7821 - 11:55:23.510630 B > A: . ack 7821 - - [...] - - 11:55:23.746649 B > A: . ack 10581 - - The second line of the above trace shows the original transmission - of a segment which is later dropped. After 3 duplicate ACKs, line - 9 of the trace shows the dropped packet (7821:8281), with a 460- - byte payload, being retransmitted. Immediately following this - retransmission, a packet with a 12-byte payload is unnecessarily - sent. - - Trace file demonstrating correct behavior - The trace file would be identical to the one above, with a single - line: - - 11:55:23.341317 A > B: . 8281:8293(12) ack 1 - - omitted. - - References - [Brakmo95] - - How to detect - This problem can be detected by examining a packet trace of the - TCP connections of a machine using TCP options, during which a - packet is retransmitted. - -2.15. - - Name of Problem - Failure to send FIN notification promptly - - Classification - Performance - - Description - When an application closes a connection, the corresponding TCP - should send the FIN notification promptly to its peer (unless - prevented by the congestion window). If a TCP implementation - delays in sending the FIN notification, for example due to waiting - until unacknowledged data has been acknowledged, then it is said - to exhibit "Failure to send FIN notification promptly". - - - - - -Paxson, et. al. Informational [Page 45] - -RFC 2525 TCP Implementation Problems March 1999 - - - Also, while not strictly required, FIN segments should include the - PSH flag to ensure expedited delivery of any pending data at the - receiver. - - Significance - The greatest impact occurs for short-lived connections, since for - these the additional time required to close the connection - introduces the greatest relative delay. - - The additional time can be significant in the common case of the - sender waiting for an ACK that is delayed by the receiver. - - Implications - Can diminish total throughput as seen at the application layer, - because connection termination takes longer to complete. - - Relevant RFCs - RFC 793 indicates that a receiver should treat an incoming FIN - flag as implying the push function. - - Trace file demonstrating it - Made using tcpdump (no losses reported by the packet filter). - - 10:04:38.68 A > B: S 1031850376:1031850376(0) win 4096 - (DF) - 10:04:38.71 B > A: S 596916473:596916473(0) ack 1031850377 - win 8760 (DF) - 10:04:38.73 A > B: . ack 1 win 4096 (DF) - 10:04:41.98 A > B: P 1:4(3) ack 1 win 4096 (DF) - 10:04:42.15 B > A: . ack 4 win 8757 (DF) - 10:04:42.23 A > B: P 4:7(3) ack 1 win 4096 (DF) - 10:04:42.25 B > A: P 1:11(10) ack 7 win 8754 (DF) - 10:04:42.32 A > B: . ack 11 win 4096 (DF) - 10:04:42.33 B > A: P 11:51(40) ack 7 win 8754 (DF) - 10:04:42.51 A > B: . ack 51 win 4096 (DF) - 10:04:42.53 B > A: F 51:51(0) ack 7 win 8754 (DF) - 10:04:42.56 A > B: FP 7:7(0) ack 52 win 4096 (DF) - 10:04:42.58 B > A: . ack 8 win 8754 (DF) - - Machine B in the trace above does not send out a FIN notification - promptly if there is any data outstanding. It instead waits for - all unacknowledged data to be acknowledged before sending the FIN - segment. The connection was closed at 10:04.42.33 after - requesting 40 bytes to be sent. However, the FIN notification - isn't sent until 10:04.42.51, after the (delayed) acknowledgement - of the 40 bytes of data. - - - - - -Paxson, et. al. Informational [Page 46] - -RFC 2525 TCP Implementation Problems March 1999 - - - Trace file demonstrating correct behavior - Made using tcpdump (no losses reported by the packet filter). - - 10:27:53.85 C > D: S 419744533:419744533(0) win 4096 - (DF) - 10:27:53.92 D > C: S 10082297:10082297(0) ack 419744534 - win 8760 (DF) - 10:27:53.95 C > D: . ack 1 win 4096 (DF) - 10:27:54.42 C > D: P 1:4(3) ack 1 win 4096 (DF) - 10:27:54.62 D > C: . ack 4 win 8757 (DF) - 10:27:54.76 C > D: P 4:7(3) ack 1 win 4096 (DF) - 10:27:54.89 D > C: P 1:11(10) ack 7 win 8754 (DF) - 10:27:54.90 D > C: FP 11:51(40) ack7 win 8754 (DF) - 10:27:54.92 C > D: . ack 52 win 4096 (DF) - 10:27:55.01 C > D: FP 7:7(0) ack 52 win 4096 (DF) - 10:27:55.09 D > C: . ack 8 win 8754 (DF) - - Here, Machine D sends a FIN with 40 bytes of data even before the - original 10 octets have been acknowledged. This is correct - behavior as it provides for the highest performance. - - References - This problem is documented in [Dawson97]. - - How to detect - For implementations manifesting this problem, it shows up on a - packet trace. - -2.16. - - Name of Problem - Failure to send a RST after Half Duplex Close - - Classification - Resource management - - Description - RFC 1122 4.2.2.13 states that a TCP SHOULD send a RST if data is - received after "half duplex close", i.e. if it cannot be delivered - to the application. A TCP that fails to do so is said to exhibit - "Failure to send a RST after Half Duplex Close". - - Significance - Potentially serious for TCP endpoints that manage large numbers of - connections, due to exhaustion of memory and/or process slots - available for managing connection state. - - - - - -Paxson, et. al. Informational [Page 47] - -RFC 2525 TCP Implementation Problems March 1999 - - - Implications - Failure to send the RST can lead to permanently hung TCP - connections. This problem has been demonstrated when HTTP clients - abort connections, common when users move on to a new page before - the current page has finished downloading. The HTTP client closes - by transmitting a FIN while the server is transmitting images, - text, etc. The server TCP receives the FIN, but its application - does not close the connection until all data has been queued for - transmission. Since the server will not transmit a FIN until all - the preceding data has been transmitted, deadlock results if the - client TCP does not consume the pending data or tear down the - connection: the window decreases to zero, since the client cannot - pass the data to the application, and the server sends probe - segments. The client acknowledges the probe segments with a zero - window. As mandated in RFC1122 4.2.2.17, the probe segments are - transmitted forever. Server connection state remains in - CLOSE_WAIT, and eventually server processes are exhausted. - - Note that there are two bugs. First, probe segments should be - ignored if the window can never subsequently increase. Second, a - RST should be sent when data is received after half duplex close. - Fixing the first bug, but not the second, results in the probe - segments eventually timing out the connection, but the server - remains in CLOSE_WAIT for a significant and unnecessary period. - - Relevant RFCs - RFC 1122 sections 4.2.2.13 and 4.2.2.17. - - Trace file demonstrating it - Made using an unknown network analyzer. No drop information - available. - - client.1391 > server.8080: S 0:1(0) ack: 0 win: 2000 - server.8080 > client.1391: SA 8c01:8c02(0) ack: 1 win: 8000 - client.1391 > server.8080: PA - client.1391 > server.8080: PA 1:1c2(1c1) ack: 8c02 win: 2000 - server.8080 > client.1391: [DF] PA 8c02:8cde(dc) ack: 1c2 win: 8000 - server.8080 > client.1391: [DF] A 8cde:9292(5b4) ack: 1c2 win: 8000 - server.8080 > client.1391: [DF] A 9292:9846(5b4) ack: 1c2 win: 8000 - server.8080 > client.1391: [DF] A 9846:9dfa(5b4) ack: 1c2 win: 8000 - client.1391 > server.8080: PA - server.8080 > client.1391: [DF] A 9dfa:a3ae(5b4) ack: 1c2 win: 8000 - server.8080 > client.1391: [DF] A a3ae:a962(5b4) ack: 1c2 win: 8000 - server.8080 > client.1391: [DF] A a962:af16(5b4) ack: 1c2 win: 8000 - server.8080 > client.1391: [DF] A af16:b4ca(5b4) ack: 1c2 win: 8000 - client.1391 > server.8080: PA - server.8080 > client.1391: [DF] A b4ca:ba7e(5b4) ack: 1c2 win: 8000 - server.8080 > client.1391: [DF] A b4ca:ba7e(5b4) ack: 1c2 win: 8000 - - - -Paxson, et. al. Informational [Page 48] - -RFC 2525 TCP Implementation Problems March 1999 - - - client.1391 > server.8080: PA - server.8080 > client.1391: [DF] A ba7e:bdfa(37c) ack: 1c2 win: 8000 - client.1391 > server.8080: PA - server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c2 win: 8000 - client.1391 > server.8080: PA - - [ HTTP client aborts and enters FIN_WAIT_1 ] - - client.1391 > server.8080: FPA - - [ server ACKs the FIN and enters CLOSE_WAIT ] - - server.8080 > client.1391: [DF] A - - [ client enters FIN_WAIT_2 ] - - server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000 - - [ server continues to try to send its data ] - - client.1391 > server.8080: PA < window = 0 > - server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000 - client.1391 > server.8080: PA < window = 0 > - server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000 - client.1391 > server.8080: PA < window = 0 > - server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000 - client.1391 > server.8080: PA < window = 0 > - server.8080 > client.1391: [DF] A bdfa:bdfb(1) ack: 1c3 win: 8000 - client.1391 > server.8080: PA < window = 0 > - - [ ... repeat ad exhaustium ... ] - - Trace file demonstrating correct behavior - Made using an unknown network analyzer. No drop information - available. - - client > server D=80 S=59500 Syn Seq=337 Len=0 Win=8760 - server > client D=59500 S=80 Syn Ack=338 Seq=80153 Len=0 Win=8760 - client > server D=80 S=59500 Ack=80154 Seq=338 Len=0 Win=8760 - - [ ... normal data omitted ... ] - - client > server D=80 S=59500 Ack=14559 Seq=596 Len=0 Win=8760 - server > client D=59500 S=80 Ack=596 Seq=114559 Len=1460 Win=8760 - - [ client closes connection ] - - client > server D=80 S=59500 Fin Seq=596 Len=0 Win=8760 - - - -Paxson, et. al. Informational [Page 49] - -RFC 2525 TCP Implementation Problems March 1999 - - - server > client D=59500 S=80 Ack=597 Seq=116019 Len=1460 Win=8760 - - [ client sends RST (RFC1122 4.2.2.13) ] - - client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0 - server > client D=59500 S=80 Ack=597 Seq=117479 Len=1460 Win=8760 - client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0 - server > client D=59500 S=80 Ack=597 Seq=118939 Len=1460 Win=8760 - client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0 - server > client D=59500 S=80 Ack=597 Seq=120399 Len=892 Win=8760 - client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0 - server > client D=59500 S=80 Ack=597 Seq=121291 Len=1460 Win=8760 - client > server D=80 S=59500 Rst Seq=597 Len=0 Win=0 - - "client" sends a number of RSTs, one in response to each incoming - packet from "server". One might wonder why "server" keeps sending - data packets after it has received a RST from "client"; the - explanation is that "server" had already transmitted all five of - the data packets before receiving the first RST from "client", so - it is too late to avoid transmitting them. - - How to detect - The problem can be detected by inspecting packet traces of a - large, interrupted bulk transfer. - -2.17. - - Name of Problem - Failure to RST on close with data pending - - Classification - Resource management - - Description - When an application closes a connection in such a way that it can - no longer read any received data, the TCP SHOULD, per section - 4.2.2.13 of RFC 1122, send a RST if there is any unread received - data, or if any new data is received. A TCP that fails to do so - exhibits "Failure to RST on close with data pending". - - Note that, for some TCPs, this situation can be caused by an - application "crashing" while a peer is sending data. - - We have observed a number of TCPs that exhibit this problem. The - problem is less serious if any subsequent data sent to the now- - closed connection endpoint elicits a RST (see illustration below). - - - - - -Paxson, et. al. Informational [Page 50] - -RFC 2525 TCP Implementation Problems March 1999 - - - Significance - This problem is most significant for endpoints that engage in - large numbers of connections, as their ability to do so will be - curtailed as they leak away resources. - - Implications - Failure to reset the connection can lead to permanently hung - connections, in which the remote endpoint takes no further action - to tear down the connection because it is waiting on the local TCP - to first take some action. This is particularly the case if the - local TCP also allows the advertised window to go to zero, and - fails to tear down the connection when the remote TCP engages in - "persist" probes (see example below). - - Relevant RFCs - RFC 1122 section 4.2.2.13. Also, 4.2.2.17 for the zero-window - probing discussion below. - - Trace file demonstrating it - Made using tcpdump. No drop information available. - - 13:11:46.04 A > B: S 458659166:458659166(0) win 4096 - (DF) - 13:11:46.04 B > A: S 792320000:792320000(0) ack 458659167 - win 4096 - 13:11:46.04 A > B: . ack 1 win 4096 (DF) - 13:11.55.80 A > B: . 1:513(512) ack 1 win 4096 (DF) - 13:11.55.80 A > B: . 513:1025(512) ack 1 win 4096 (DF) - 13:11:55.83 B > A: . ack 1025 win 3072 - 13:11.55.84 A > B: . 1025:1537(512) ack 1 win 4096 (DF) - 13:11.55.84 A > B: . 1537:2049(512) ack 1 win 4096 (DF) - 13:11.55.85 A > B: . 2049:2561(512) ack 1 win 4096 (DF) - 13:11:56.03 B > A: . ack 2561 win 1536 - 13:11.56.05 A > B: . 2561:3073(512) ack 1 win 4096 (DF) - 13:11.56.06 A > B: . 3073:3585(512) ack 1 win 4096 (DF) - 13:11.56.06 A > B: . 3585:4097(512) ack 1 win 4096 (DF) - 13:11:56.23 B > A: . ack 4097 win 0 - 13:11:58.16 A > B: . 4096:4097(1) ack 1 win 4096 (DF) - 13:11:58.16 B > A: . ack 4097 win 0 - 13:12:00.16 A > B: . 4096:4097(1) ack 1 win 4096 (DF) - 13:12:00.16 B > A: . ack 4097 win 0 - 13:12:02.16 A > B: . 4096:4097(1) ack 1 win 4096 (DF) - 13:12:02.16 B > A: . ack 4097 win 0 - 13:12:05.37 A > B: . 4096:4097(1) ack 1 win 4096 (DF) - 13:12:05.37 B > A: . ack 4097 win 0 - 13:12:06.36 B > A: F 1:1(0) ack 4097 win 0 - 13:12:06.37 A > B: . ack 2 win 4096 (DF) - 13:12:11.78 A > B: . 4096:4097(1) ack 2 win 4096 (DF) - - - -Paxson, et. al. Informational [Page 51] - -RFC 2525 TCP Implementation Problems March 1999 - - - 13:12:11.78 B > A: . ack 4097 win 0 - 13:12:24.59 A > B: . 4096:4097(1) ack 2 win 4096 (DF) - 13:12:24.60 B > A: . ack 4097 win 0 - 13:12:50.22 A > B: . 4096:4097(1) ack 2 win 4096 (DF) - 13:12:50.22 B > A: . ack 4097 win 0 - - Machine B in the trace above does not drop received data when the - socket is "closed" by the application (in this case, the - application process was terminated). This occurred at - approximately 13:12:06.36 and resulted in the FIN being sent in - response to the close. However, because there is no longer an - application to deliver the data to, the TCP should have instead - sent a RST. - - Note: Machine A's zero-window probing is also broken. It is - resending old data, rather than new data. Section 3.7 in RFC 793 - and Section 4.2.2.17 in RFC 1122 discuss zero-window probing. - - Trace file demonstrating better behavior - Made using tcpdump. No drop information available. - - Better, but still not fully correct, behavior, per the discussion - below. We show this behavior because it has been observed for a - number of different TCP implementations. - - 13:48:29.24 C > D: S 73445554:73445554(0) win 4096 - (DF) - 13:48:29.24 D > C: S 36050296:36050296(0) ack 73445555 - win 4096 (DF) - 13:48:29.25 C > D: . ack 1 win 4096 (DF) - 13:48:30.78 C > D: . 1:1461(1460) ack 1 win 4096 (DF) - 13:48:30.79 C > D: . 1461:2921(1460) ack 1 win 4096 (DF) - 13:48:30.80 D > C: . ack 2921 win 1176 (DF) - 13:48:32.75 C > D: . 2921:4097(1176) ack 1 win 4096 (DF) - 13:48:32.82 D > C: . ack 4097 win 0 (DF) - 13:48:34.76 C > D: . 4096:4097(1) ack 1 win 4096 (DF) - 13:48:34.84 D > C: . ack 4097 win 0 (DF) - 13:48:36.34 D > C: FP 1:1(0) ack 4097 win 4096 (DF) - 13:48:36.34 C > D: . 4097:5557(1460) ack 2 win 4096 (DF) - 13:48:36.34 D > C: R 36050298:36050298(0) win 24576 - 13:48:36.34 C > D: . 5557:7017(1460) ack 2 win 4096 (DF) - 13:48:36.34 D > C: R 36050298:36050298(0) win 24576 - - In this trace, the application process is terminated on Machine D - at approximately 13:48:36.34. Its TCP sends the FIN with the - window opened again (since it discarded the previously received - data). Machine C promptly sends more data, causing Machine D to - - - - -Paxson, et. al. Informational [Page 52] - -RFC 2525 TCP Implementation Problems March 1999 - - - reset the connection since it cannot deliver the data to the - application. Ideally, Machine D SHOULD send a RST instead of - dropping the data and re-opening the receive window. - - Note: Machine C's zero-window probing is broken, the same as in - the example above. - - Trace file demonstrating correct behavior - Made using tcpdump. No losses reported by the packet filter. - - 14:12:02.19 E > F: S 1143360000:1143360000(0) win 4096 - 14:12:02.19 F > E: S 1002988443:1002988443(0) ack 1143360001 - win 4096 (DF) - 14:12:02.19 E > F: . ack 1 win 4096 - 14:12:10.43 E > F: . 1:513(512) ack 1 win 4096 - 14:12:10.61 F > E: . ack 513 win 3584 (DF) - 14:12:10.61 E > F: . 513:1025(512) ack 1 win 4096 - 14:12:10.61 E > F: . 1025:1537(512) ack 1 win 4096 - 14:12:10.81 F > E: . ack 1537 win 2560 (DF) - 14:12:10.81 E > F: . 1537:2049(512) ack 1 win 4096 - 14:12:10.81 E > F: . 2049:2561(512) ack 1 win 4096 - 14:12:10.81 E > F: . 2561:3073(512) ack 1 win 4096 - 14:12:11.01 F > E: . ack 3073 win 1024 (DF) - 14:12:11.01 E > F: . 3073:3585(512) ack 1 win 4096 - 14:12:11.01 E > F: . 3585:4097(512) ack 1 win 4096 - 14:12:11.21 F > E: . ack 4097 win 0 (DF) - 14:12:15.88 E > F: . 4097:4098(1) ack 1 win 4096 - 14:12:16.06 F > E: . ack 4097 win 0 (DF) - 14:12:20.88 E > F: . 4097:4098(1) ack 1 win 4096 - 14:12:20.91 F > E: . ack 4097 win 0 (DF) - 14:12:21.94 F > E: R 1002988444:1002988444(0) win 4096 - - When the application terminates at 14:12:21.94, F immediately - sends a RST. - - Note: Machine E's zero-window probing is (finally) correct. - - How to detect - The problem can often be detected by inspecting packet traces of a - transfer in which the receiving application terminates abnormally. - When doing so, there can be an ambiguity (if only looking at the - trace) as to whether the receiving TCP did indeed have unread data - that it could now no longer deliver. To provoke this to happen, - it may help to suspend the receiving application so that it fails - to consume any data, eventually exhausting the advertised window. - At this point, since the advertised window is zero, we know that - - - - - -Paxson, et. al. Informational [Page 53] - -RFC 2525 TCP Implementation Problems March 1999 - - - the receiving TCP has undelivered data buffered up. Terminating - the application process then should suffice to test the - correctness of the TCP's behavior. - -2.18. - - Name of Problem - Options missing from TCP MSS calculation - - Classification - Reliability / performance - - Description - When a TCP determines how much data to send per packet, it - calculates a segment size based on the MTU of the path. It must - then subtract from that MTU the size of the IP and TCP headers in - the packet. If IP options and TCP options are not taken into - account correctly in this calculation, the resulting segment size - may be too large. TCPs that do so are said to exhibit "Options - missing from TCP MSS calculation". - - Significance - In some implementations, this causes the transmission of strangely - fragmented packets. In some implementations with Path MTU (PMTU) - discovery [RFC1191], this problem can actually result in a total - failure to transmit any data at all, regardless of the environment - (see below). - - Arguably, especially since the wide deployment of firewalls, IP - options appear only rarely in normal operations. - - Implications - In implementations using PMTU discovery, this problem can result - in packets that are too large for the output interface, and that - have the DF (don't fragment) bit set in the IP header. Thus, the - IP layer on the local machine is not allowed to fragment the - packet to send it out the interface. It instead informs the TCP - layer of the correct MTU size of the interface; the TCP layer - again miscomputes the MSS by failing to take into account the size - of IP options; and the problem repeats, with no data flowing. - - Relevant RFCs - RFC 1122 describes the calculation of the effective send MSS. RFC - 1191 describes Path MTU discovery. - - - - - - - -Paxson, et. al. Informational [Page 54] - -RFC 2525 TCP Implementation Problems March 1999 - - - Trace file demonstrating it - Trace file taking using tcpdump on host C. The first trace - demonstrates the fragmentation that occurs without path MTU - discovery: - - 13:55:25.488728 A.65528 > C.discard: - P 567833:569273(1440) ack 1 win 17520 - - (frag 20828:1472@0+) - (ttl 62, optlen=8 LSRR{B#} NOP) - - 13:55:25.488943 A > C: - (frag 20828:8@1472) - (ttl 62, optlen=8 LSRR{B#} NOP) - - 13:55:25.489052 C.discard > A.65528: - . ack 566385 win 60816 - (DF) - (ttl 60, id 41266) - - Host A repeatedly sends 1440-octet data segments, but these hare - fragmented into two packets, one with 1432 octets of data, and - another with 8 octets of data. - - The second trace demonstrates the failure to send any data - segments, sometimes seen with hosts doing path MTU discovery: - - 13:55:44.332219 A.65527 > C.discard: - S 1018235390:1018235390(0) win 16384 - (DF) - (ttl 62, id 20912, optlen=8 LSRR{B#} NOP) - - 13:55:44.333015 C.discard > A.65527: - S 1271629000:1271629000(0) ack 1018235391 win 60816 - (DF) - (ttl 60, id 41427) - - 13:55:44.333206 C.discard > A.65527: - S 1271629000:1271629000(0) ack 1018235391 win 60816 - (DF) - (ttl 60, id 41427) - - This is all of the activity seen on this connection. Eventually - host C will time out attempting to establish the connection. - - How to detect - The "netcat" utility [Hobbit96] is useful for generating source - routed packets: - - - -Paxson, et. al. Informational [Page 55] - -RFC 2525 TCP Implementation Problems March 1999 - - - 1% nc C discard - (interactive typing) - ^C - 2% nc C discard < /dev/zero - ^C - 3% nc -g B C discard - (interactive typing) - ^C - 4% nc -g B C discard < /dev/zero - ^C - - Lines 1 through 3 should generate appropriate packets, which can - be verified using tcpdump. If the problem is present, line 4 - should generate one of the two kinds of packet traces shown. - - How to fix - The implementation should ensure that the effective send MSS - calculation includes a term for the IP and TCP options, as - mandated by RFC 1122. - -3. Security Considerations - - This memo does not discuss any specific security-related TCP - implementation problems, as the working group decided to pursue - documenting those in a separate document. Some of the implementation - problems discussed here, however, can be used for denial-of-service - attacks. Those classified as congestion control present - opportunities to subvert TCPs used for legitimate data transfer into - excessively loading network elements. Those classified as - "performance", "reliability" and "resource management" may be - exploitable for launching surreptitious denial-of-service attacks - against the user of the TCP. Both of these types of attacks can be - extremely difficult to detect because in most respects they look - identical to legitimate network traffic. - -4. Acknowledgements - - Thanks to numerous correspondents on the tcp-impl mailing list for - their input: Steve Alexander, Larry Backman, Jerry Chu, Alan Cox, - Kevin Fall, Richard Fox, Jim Gettys, Rick Jones, Allison Mankin, Neal - McBurnett, Perry Metzger, der Mouse, Thomas Narten, Andras Olah, - Steve Parker, Francesco Potorti`, Luigi Rizzo, Allyn Romanow, Al - Smith, Jerry Toporek, Joe Touch, and Curtis Villamizar. - - Thanks also to Josh Cohen for the traces documenting the "Failure to - send a RST after Half Duplex Close" problem; and to John Polstra, who - analyzed the "Window probe deadlock" problem. - - - - -Paxson, et. al. Informational [Page 56] - -RFC 2525 TCP Implementation Problems March 1999 - - -5. References - - [Allman97] M. Allman, "Fixing Two BSD TCP Bugs," Technical Report - CR-204151, NASA Lewis Research Center, Oct. 1997. - http://roland.grc.nasa.gov/~mallman/papers/bug.ps - - [RFC2414] Allman, M., Floyd, S. and C. Partridge, "Increasing - TCP's Initial Window", RFC 2414, September 1998. - - [RFC1122] Braden, R., Editor, "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [Brakmo95] L. Brakmo and L. Peterson, "Performance Problems in - BSD4.4 TCP," ACM Computer Communication Review, - 25(5):69-86, 1995. - - [RFC813] Clark, D., "Window and Acknowledgement Strategy in TCP," - RFC 813, July 1982. - - [Dawson97] S. Dawson, F. Jahanian, and T. Mitton, "Experiments on - Six Commercial TCP Implementations Using a Software - Fault Injection Tool," to appear in Software Practice & - Experience, 1997. A technical report version of this - paper can be obtained at - ftp://rtcl.eecs.umich.edu/outgoing/sdawson/CSE-TR-298- - 96.ps.gz. - - [Fall96] K. Fall and S. Floyd, "Simulation-based Comparisons of - Tahoe, Reno, and SACK TCP," ACM Computer Communication - Review, 26(3):5-21, 1996. - - [Hobbit96] Hobbit, Avian Research, netcat, available via anonymous - ftp to ftp.avian.org, 1996. - - [Hoe96] J. Hoe, "Improving the Start-up Behavior of a Congestion - Control Scheme for TCP," Proc. SIGCOMM '96. - - [Jacobson88] V. Jacobson, "Congestion Avoidance and Control," Proc. - SIGCOMM '88. ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z - - [Jacobson89] V. Jacobson, C. Leres, and S. McCanne, tcpdump, - available via anonymous ftp to ftp.ee.lbl.gov, Jun. - 1989. - - - - - -Paxson, et. al. Informational [Page 57] - -RFC 2525 TCP Implementation Problems March 1999 - - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October - 1996. - - [RFC1191] Mogul, J. and S. Deering, "Path MTU discovery", RFC - 1191, November 1990. - - [RFC896] Nagle, J., "Congestion Control in IP/TCP Internetworks", - RFC 896, January 1984. - - [Paxson97] V. Paxson, "Automated Packet Trace Analysis of TCP - Implementations," Proc. SIGCOMM '97, available from - ftp://ftp.ee.lbl.gov/papers/vp-tcpanaly-sigcomm97.ps.Z. - - [RFC793] Postel, J., Editor, "Transmission Control Protocol," STD - 7, RFC 793, September 1981. - - [RFC2001] Stevens, W., "TCP Slow Start, Congestion Avoidance, Fast - Retransmit, and Fast Recovery Algorithms", RFC 2001, - January 1997. - - [Stevens94] W. Stevens, "TCP/IP Illustrated, Volume 1", Addison- - Wesley Publishing Company, Reading, Massachusetts, 1994. - - [Wright95] G. Wright and W. Stevens, "TCP/IP Illustrated, Volume - 2", Addison-Wesley Publishing Company, Reading - Massachusetts, 1995. - -6. Authors' Addresses - - Vern Paxson - ACIRI / ICSI - 1947 Center Street - Suite 600 - Berkeley, CA 94704-1198 - - Phone: +1 510/642-4274 x302 - EMail: vern@aciri.org - - - - - - - - - - - - - -Paxson, et. al. Informational [Page 58] - -RFC 2525 TCP Implementation Problems March 1999 - - - Mark Allman - NASA Glenn Research Center/Sterling Software - Lewis Field - 21000 Brookpark Road - MS 54-2 - Cleveland, OH 44135 - USA - - Phone: +1 216/433-6586 - Email: mallman@grc.nasa.gov - - Scott Dawson - Real-Time Computing Laboratory - EECS Building - University of Michigan - Ann Arbor, MI 48109-2122 - USA - - Phone: +1 313/763-5363 - EMail: sdawson@eecs.umich.edu - - - William C. Fenner - Xerox PARC - 3333 Coyote Hill Road - Palo Alto, CA 94304 - USA - - Phone: +1 650/812-4816 - EMail: fenner@parc.xerox.com - - - Jim Griner - NASA Glenn Research Center - Lewis Field - 21000 Brookpark Road - MS 54-2 - Cleveland, OH 44135 - USA - - Phone: +1 216/433-5787 - EMail: jgriner@grc.nasa.gov - - - - - - - - - -Paxson, et. al. Informational [Page 59] - -RFC 2525 TCP Implementation Problems March 1999 - - - Ian Heavens - Spider Software Ltd. - 8 John's Place, Leith - Edinburgh EH6 7EL - UK - - Phone: +44 131/475-7015 - EMail: ian@spider.com - - Kevin Lahey - NASA Ames Research Center/MRJ - MS 258-6 - Moffett Field, CA 94035 - USA - - Phone: +1 650/604-4334 - EMail: kml@nas.nasa.gov - - - Jeff Semke - Pittsburgh Supercomputing Center - 4400 Fifth Ave - Pittsburgh, PA 15213 - USA - - Phone: +1 412/268-4960 - EMail: semke@psc.edu - - - Bernie Volz - Process Software Corporation - 959 Concord Street - Framingham, MA 01701 - USA - - Phone: +1 508/879-6994 - EMail: volz@process.com - - - - - - - - - - - - - - -Paxson, et. al. Informational [Page 60] - -RFC 2525 TCP Implementation Problems March 1999 - - -7. Full Copyright Statement - - Copyright (C) The Internet Society (1999). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Paxson, et. al. Informational [Page 61] - diff --git a/kernel/picotcp/RFC/rfc2581.txt b/kernel/picotcp/RFC/rfc2581.txt deleted file mode 100644 index 07c1475..0000000 --- a/kernel/picotcp/RFC/rfc2581.txt +++ /dev/null @@ -1,787 +0,0 @@ - - - - - - -Network Working Group M. Allman -Request for Comments: 2581 NASA Glenn/Sterling Software -Obsoletes: 2001 V. Paxson -Category: Standards Track ACIRI / ICSI - W. Stevens - Consultant - April 1999 - - - TCP Congestion Control - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1999). All Rights Reserved. - -Abstract - - This document defines TCP's four intertwined congestion control - algorithms: slow start, congestion avoidance, fast retransmit, and - fast recovery. In addition, the document specifies how TCP should - begin transmission after a relatively long idle period, as well as - discussing various acknowledgment generation methods. - -1. Introduction - - This document specifies four TCP [Pos81] congestion control - algorithms: slow start, congestion avoidance, fast retransmit and - fast recovery. These algorithms were devised in [Jac88] and [Jac90]. - Their use with TCP is standardized in [Bra89]. - - This document is an update of [Ste97]. In addition to specifying the - congestion control algorithms, this document specifies what TCP - connections should do after a relatively long idle period, as well as - specifying and clarifying some of the issues pertaining to TCP ACK - generation. - - Note that [Ste94] provides examples of these algorithms in action and - [WS95] provides an explanation of the source code for the BSD - implementation of these algorithms. - - - - -Allman, et. al. Standards Track [Page 1] - -RFC 2581 TCP Congestion Control April 1999 - - - This document is organized as follows. Section 2 provides various - definitions which will be used throughout the document. Section 3 - provides a specification of the congestion control algorithms. - Section 4 outlines concerns related to the congestion control - algorithms and finally, section 5 outlines security considerations. - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in [Bra97]. - -2. Definitions - - This section provides the definition of several terms that will be - used throughout the remainder of this document. - - SEGMENT: - A segment is ANY TCP/IP data or acknowledgment packet (or both). - - SENDER MAXIMUM SEGMENT SIZE (SMSS): The SMSS is the size of the - largest segment that the sender can transmit. This value can be - based on the maximum transmission unit of the network, the path - MTU discovery [MD90] algorithm, RMSS (see next item), or other - factors. The size does not include the TCP/IP headers and - options. - - RECEIVER MAXIMUM SEGMENT SIZE (RMSS): The RMSS is the size of the - largest segment the receiver is willing to accept. This is the - value specified in the MSS option sent by the receiver during - connection startup. Or, if the MSS option is not used, 536 bytes - [Bra89]. The size does not include the TCP/IP headers and - options. - - FULL-SIZED SEGMENT: A segment that contains the maximum number of - data bytes permitted (i.e., a segment containing SMSS bytes of - data). - - RECEIVER WINDOW (rwnd) The most recently advertised receiver window. - - CONGESTION WINDOW (cwnd): A TCP state variable that limits the - amount of data a TCP can send. At any given time, a TCP MUST NOT - send data with a sequence number higher than the sum of the - highest acknowledged sequence number and the minimum of cwnd and - rwnd. - - INITIAL WINDOW (IW): The initial window is the size of the sender's - congestion window after the three-way handshake is completed. - - - - - -Allman, et. al. Standards Track [Page 2] - -RFC 2581 TCP Congestion Control April 1999 - - - LOSS WINDOW (LW): The loss window is the size of the congestion - window after a TCP sender detects loss using its retransmission - timer. - - RESTART WINDOW (RW): The restart window is the size of the - congestion window after a TCP restarts transmission after an idle - period (if the slow start algorithm is used; see section 4.1 for - more discussion). - - FLIGHT SIZE: The amount of data that has been sent but not yet - acknowledged. - -3. Congestion Control Algorithms - - This section defines the four congestion control algorithms: slow - start, congestion avoidance, fast retransmit and fast recovery, - developed in [Jac88] and [Jac90]. In some situations it may be - beneficial for a TCP sender to be more conservative than the - algorithms allow, however a TCP MUST NOT be more aggressive than the - following algorithms allow (that is, MUST NOT send data when the - value of cwnd computed by the following algorithms would not allow - the data to be sent). - -3.1 Slow Start and Congestion Avoidance - - The slow start and congestion avoidance algorithms MUST be used by a - TCP sender to control the amount of outstanding data being injected - into the network. To implement these algorithms, two variables are - added to the TCP per-connection state. The congestion window (cwnd) - is a sender-side limit on the amount of data the sender can transmit - into the network before receiving an acknowledgment (ACK), while the - receiver's advertised window (rwnd) is a receiver-side limit on the - amount of outstanding data. The minimum of cwnd and rwnd governs - data transmission. - - Another state variable, the slow start threshold (ssthresh), is used - to determine whether the slow start or congestion avoidance algorithm - is used to control data transmission, as discussed below. - - Beginning transmission into a network with unknown conditions - requires TCP to slowly probe the network to determine the available - capacity, in order to avoid congesting the network with an - inappropriately large burst of data. The slow start algorithm is - used for this purpose at the beginning of a transfer, or after - repairing loss detected by the retransmission timer. - - - - - - -Allman, et. al. Standards Track [Page 3] - -RFC 2581 TCP Congestion Control April 1999 - - - IW, the initial value of cwnd, MUST be less than or equal to 2*SMSS - bytes and MUST NOT be more than 2 segments. - - We note that a non-standard, experimental TCP extension allows that a - TCP MAY use a larger initial window (IW), as defined in equation 1 - [AFP98]: - - IW = min (4*SMSS, max (2*SMSS, 4380 bytes)) (1) - - With this extension, a TCP sender MAY use a 3 or 4 segment initial - window, provided the combined size of the segments does not exceed - 4380 bytes. We do NOT allow this change as part of the standard - defined by this document. However, we include discussion of (1) in - the remainder of this document as a guideline for those experimenting - with the change, rather than conforming to the present standards for - TCP congestion control. - - The initial value of ssthresh MAY be arbitrarily high (for example, - some implementations use the size of the advertised window), but it - may be reduced in response to congestion. The slow start algorithm - is used when cwnd < ssthresh, while the congestion avoidance - algorithm is used when cwnd > ssthresh. When cwnd and ssthresh are - equal the sender may use either slow start or congestion avoidance. - - During slow start, a TCP increments cwnd by at most SMSS bytes for - each ACK received that acknowledges new data. Slow start ends when - cwnd exceeds ssthresh (or, optionally, when it reaches it, as noted - above) or when congestion is observed. - - During congestion avoidance, cwnd is incremented by 1 full-sized - segment per round-trip time (RTT). Congestion avoidance continues - until congestion is detected. One formula commonly used to update - cwnd during congestion avoidance is given in equation 2: - - cwnd += SMSS*SMSS/cwnd (2) - - This adjustment is executed on every incoming non-duplicate ACK. - Equation (2) provides an acceptable approximation to the underlying - principle of increasing cwnd by 1 full-sized segment per RTT. (Note - that for a connection in which the receiver acknowledges every data - segment, (2) proves slightly more aggressive than 1 segment per RTT, - and for a receiver acknowledging every-other packet, (2) is less - aggressive.) - - - - - - - - -Allman, et. al. Standards Track [Page 4] - -RFC 2581 TCP Congestion Control April 1999 - - - Implementation Note: Since integer arithmetic is usually used in TCP - implementations, the formula given in equation 2 can fail to increase - cwnd when the congestion window is very large (larger than - SMSS*SMSS). If the above formula yields 0, the result SHOULD be - rounded up to 1 byte. - - Implementation Note: older implementations have an additional - additive constant on the right-hand side of equation (2). This is - incorrect and can actually lead to diminished performance [PAD+98]. - - Another acceptable way to increase cwnd during congestion avoidance - is to count the number of bytes that have been acknowledged by ACKs - for new data. (A drawback of this implementation is that it requires - maintaining an additional state variable.) When the number of bytes - acknowledged reaches cwnd, then cwnd can be incremented by up to SMSS - bytes. Note that during congestion avoidance, cwnd MUST NOT be - increased by more than the larger of either 1 full-sized segment per - RTT, or the value computed using equation 2. - - Implementation Note: some implementations maintain cwnd in units of - bytes, while others in units of full-sized segments. The latter will - find equation (2) difficult to use, and may prefer to use the - counting approach discussed in the previous paragraph. - - When a TCP sender detects segment loss using the retransmission - timer, the value of ssthresh MUST be set to no more than the value - given in equation 3: - - ssthresh = max (FlightSize / 2, 2*SMSS) (3) - - As discussed above, FlightSize is the amount of outstanding data in - the network. - - Implementation Note: an easy mistake to make is to simply use cwnd, - rather than FlightSize, which in some implementations may - incidentally increase well beyond rwnd. - - Furthermore, upon a timeout cwnd MUST be set to no more than the loss - window, LW, which equals 1 full-sized segment (regardless of the - value of IW). Therefore, after retransmitting the dropped segment - the TCP sender uses the slow start algorithm to increase the window - from 1 full-sized segment to the new value of ssthresh, at which - point congestion avoidance again takes over. - - - - - - - - -Allman, et. al. Standards Track [Page 5] - -RFC 2581 TCP Congestion Control April 1999 - - -3.2 Fast Retransmit/Fast Recovery - - A TCP receiver SHOULD send an immediate duplicate ACK when an out- - of-order segment arrives. The purpose of this ACK is to inform the - sender that a segment was received out-of-order and which sequence - number is expected. From the sender's perspective, duplicate ACKs - can be caused by a number of network problems. First, they can be - caused by dropped segments. In this case, all segments after the - dropped segment will trigger duplicate ACKs. Second, duplicate ACKs - can be caused by the re-ordering of data segments by the network (not - a rare event along some network paths [Pax97]). Finally, duplicate - ACKs can be caused by replication of ACK or data segments by the - network. In addition, a TCP receiver SHOULD send an immediate ACK - when the incoming segment fills in all or part of a gap in the - sequence space. This will generate more timely information for a - sender recovering from a loss through a retransmission timeout, a - fast retransmit, or an experimental loss recovery algorithm, such as - NewReno [FH98]. - - The TCP sender SHOULD use the "fast retransmit" algorithm to detect - and repair loss, based on incoming duplicate ACKs. The fast - retransmit algorithm uses the arrival of 3 duplicate ACKs (4 - identical ACKs without the arrival of any other intervening packets) - as an indication that a segment has been lost. After receiving 3 - duplicate ACKs, TCP performs a retransmission of what appears to be - the missing segment, without waiting for the retransmission timer to - expire. - - After the fast retransmit algorithm sends what appears to be the - missing segment, the "fast recovery" algorithm governs the - transmission of new data until a non-duplicate ACK arrives. The - reason for not performing slow start is that the receipt of the - duplicate ACKs not only indicates that a segment has been lost, but - also that segments are most likely leaving the network (although a - massive segment duplication by the network can invalidate this - conclusion). In other words, since the receiver can only generate a - duplicate ACK when a segment has arrived, that segment has left the - network and is in the receiver's buffer, so we know it is no longer - consuming network resources. Furthermore, since the ACK "clock" - [Jac88] is preserved, the TCP sender can continue to transmit new - segments (although transmission must continue using a reduced cwnd). - - The fast retransmit and fast recovery algorithms are usually - implemented together as follows. - - 1. When the third duplicate ACK is received, set ssthresh to no more - than the value given in equation 3. - - - - -Allman, et. al. Standards Track [Page 6] - -RFC 2581 TCP Congestion Control April 1999 - - - 2. Retransmit the lost segment and set cwnd to ssthresh plus 3*SMSS. - This artificially "inflates" the congestion window by the number - of segments (three) that have left the network and which the - receiver has buffered. - - 3. For each additional duplicate ACK received, increment cwnd by - SMSS. This artificially inflates the congestion window in order - to reflect the additional segment that has left the network. - - 4. Transmit a segment, if allowed by the new value of cwnd and the - receiver's advertised window. - - 5. When the next ACK arrives that acknowledges new data, set cwnd to - ssthresh (the value set in step 1). This is termed "deflating" - the window. - - This ACK should be the acknowledgment elicited by the - retransmission from step 1, one RTT after the retransmission - (though it may arrive sooner in the presence of significant out- - of-order delivery of data segments at the receiver). - Additionally, this ACK should acknowledge all the intermediate - segments sent between the lost segment and the receipt of the - third duplicate ACK, if none of these were lost. - - Note: This algorithm is known to generally not recover very - efficiently from multiple losses in a single flight of packets - [FF96]. One proposed set of modifications to address this problem - can be found in [FH98]. - -4. Additional Considerations - -4.1 Re-starting Idle Connections - - A known problem with the TCP congestion control algorithms described - above is that they allow a potentially inappropriate burst of traffic - to be transmitted after TCP has been idle for a relatively long - period of time. After an idle period, TCP cannot use the ACK clock - to strobe new segments into the network, as all the ACKs have drained - from the network. Therefore, as specified above, TCP can potentially - send a cwnd-size line-rate burst into the network after an idle - period. - - [Jac88] recommends that a TCP use slow start to restart transmission - after a relatively long idle period. Slow start serves to restart - the ACK clock, just as it does at the beginning of a transfer. This - mechanism has been widely deployed in the following manner. When TCP - has not received a segment for more than one retransmission timeout, - cwnd is reduced to the value of the restart window (RW) before - - - -Allman, et. al. Standards Track [Page 7] - -RFC 2581 TCP Congestion Control April 1999 - - - transmission begins. - - For the purposes of this standard, we define RW = IW. - - We note that the non-standard experimental extension to TCP defined - in [AFP98] defines RW = min(IW, cwnd), with the definition of IW - adjusted per equation (1) above. - - Using the last time a segment was received to determine whether or - not to decrease cwnd fails to deflate cwnd in the common case of - persistent HTTP connections [HTH98]. In this case, a WWW server - receives a request before transmitting data to the WWW browser. The - reception of the request makes the test for an idle connection fail, - and allows the TCP to begin transmission with a possibly - inappropriately large cwnd. - - Therefore, a TCP SHOULD set cwnd to no more than RW before beginning - transmission if the TCP has not sent data in an interval exceeding - the retransmission timeout. - -4.2 Generating Acknowledgments - - The delayed ACK algorithm specified in [Bra89] SHOULD be used by a - TCP receiver. When used, a TCP receiver MUST NOT excessively delay - acknowledgments. Specifically, an ACK SHOULD be generated for at - least every second full-sized segment, and MUST be generated within - 500 ms of the arrival of the first unacknowledged packet. - - The requirement that an ACK "SHOULD" be generated for at least every - second full-sized segment is listed in [Bra89] in one place as a - SHOULD and another as a MUST. Here we unambiguously state it is a - SHOULD. We also emphasize that this is a SHOULD, meaning that an - implementor should indeed only deviate from this requirement after - careful consideration of the implications. See the discussion of - "Stretch ACK violation" in [PAD+98] and the references therein for a - discussion of the possible performance problems with generating ACKs - less frequently than every second full-sized segment. - - In some cases, the sender and receiver may not agree on what - constitutes a full-sized segment. An implementation is deemed to - comply with this requirement if it sends at least one acknowledgment - every time it receives 2*RMSS bytes of new data from the sender, - where RMSS is the Maximum Segment Size specified by the receiver to - the sender (or the default value of 536 bytes, per [Bra89], if the - receiver does not specify an MSS option during connection - establishment). The sender may be forced to use a segment size less - than RMSS due to the maximum transmission unit (MTU), the path MTU - discovery algorithm or other factors. For instance, consider the - - - -Allman, et. al. Standards Track [Page 8] - -RFC 2581 TCP Congestion Control April 1999 - - - case when the receiver announces an RMSS of X bytes but the sender - ends up using a segment size of Y bytes (Y < X) due to path MTU - discovery (or the sender's MTU size). The receiver will generate - stretch ACKs if it waits for 2*X bytes to arrive before an ACK is - sent. Clearly this will take more than 2 segments of size Y bytes. - Therefore, while a specific algorithm is not defined, it is desirable - for receivers to attempt to prevent this situation, for example by - acknowledging at least every second segment, regardless of size. - Finally, we repeat that an ACK MUST NOT be delayed for more than 500 - ms waiting on a second full-sized segment to arrive. - - Out-of-order data segments SHOULD be acknowledged immediately, in - order to accelerate loss recovery. To trigger the fast retransmit - algorithm, the receiver SHOULD send an immediate duplicate ACK when - it receives a data segment above a gap in the sequence space. To - provide feedback to senders recovering from losses, the receiver - SHOULD send an immediate ACK when it receives a data segment that - fills in all or part of a gap in the sequence space. - - A TCP receiver MUST NOT generate more than one ACK for every incoming - segment, other than to update the offered window as the receiving - application consumes new data [page 42, Pos81][Cla82]. - -4.3 Loss Recovery Mechanisms - - A number of loss recovery algorithms that augment fast retransmit and - fast recovery have been suggested by TCP researchers. While some of - these algorithms are based on the TCP selective acknowledgment (SACK) - option [MMFR96], such as [FF96,MM96a,MM96b], others do not require - SACKs [Hoe96,FF96,FH98]. The non-SACK algorithms use "partial - acknowledgments" (ACKs which cover new data, but not all the data - outstanding when loss was detected) to trigger retransmissions. - While this document does not standardize any of the specific - algorithms that may improve fast retransmit/fast recovery, these - enhanced algorithms are implicitly allowed, as long as they follow - the general principles of the basic four algorithms outlined above. - - Therefore, when the first loss in a window of data is detected, - ssthresh MUST be set to no more than the value given by equation (3). - Second, until all lost segments in the window of data in question are - repaired, the number of segments transmitted in each RTT MUST be no - more than half the number of outstanding segments when the loss was - detected. Finally, after all loss in the given window of segments - has been successfully retransmitted, cwnd MUST be set to no more than - ssthresh and congestion avoidance MUST be used to further increase - cwnd. Loss in two successive windows of data, or the loss of a - retransmission, should be taken as two indications of congestion and, - therefore, cwnd (and ssthresh) MUST be lowered twice in this case. - - - -Allman, et. al. Standards Track [Page 9] - -RFC 2581 TCP Congestion Control April 1999 - - - The algorithms outlined in [Hoe96,FF96,MM96a,MM6b] follow the - principles of the basic four congestion control algorithms outlined - in this document. - -5. Security Considerations - - This document requires a TCP to diminish its sending rate in the - presence of retransmission timeouts and the arrival of duplicate - acknowledgments. An attacker can therefore impair the performance of - a TCP connection by either causing data packets or their - acknowledgments to be lost, or by forging excessive duplicate - acknowledgments. Causing two congestion control events back-to-back - will often cut ssthresh to its minimum value of 2*SMSS, causing the - connection to immediately enter the slower-performing congestion - avoidance phase. - - The Internet to a considerable degree relies on the correct - implementation of these algorithms in order to preserve network - stability and avoid congestion collapse. An attacker could cause TCP - endpoints to respond more aggressively in the face of congestion by - forging excessive duplicate acknowledgments or excessive - acknowledgments for new data. Conceivably, such an attack could - drive a portion of the network into congestion collapse. - -6. Changes Relative to RFC 2001 - - This document has been extensively rewritten editorially and it is - not feasible to itemize the list of changes between the two - documents. The intention of this document is not to change any of the - recommendations given in RFC 2001, but to further clarify cases that - were not discussed in detail in 2001. Specifically, this document - suggests what TCP connections should do after a relatively long idle - period, as well as specifying and clarifying some of the issues - pertaining to TCP ACK generation. Finally, the allowable upper bound - for the initial congestion window has also been raised from one to - two segments. - -Acknowledgments - - The four algorithms that are described were developed by Van - Jacobson. - - Some of the text from this document is taken from "TCP/IP - Illustrated, Volume 1: The Protocols" by W. Richard Stevens - (Addison-Wesley, 1994) and "TCP/IP Illustrated, Volume 2: The - Implementation" by Gary R. Wright and W. Richard Stevens (Addison- - Wesley, 1995). This material is used with the permission of - Addison-Wesley. - - - -Allman, et. al. Standards Track [Page 10] - -RFC 2581 TCP Congestion Control April 1999 - - - Neal Cardwell, Sally Floyd, Craig Partridge and Joe Touch contributed - a number of helpful suggestions. - -References - - [AFP98] Allman, M., Floyd, S. and C. Partridge, "Increasing TCP's - Initial Window Size, RFC 2414, September 1998. - - [Bra89] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [Bra97] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [Cla82] Clark, D., "Window and Acknowledgment Strategy in TCP", RFC - 813, July 1982. - - [FF96] Fall, K. and S. Floyd, "Simulation-based Comparisons of - Tahoe, Reno and SACK TCP", Computer Communication Review, - July 1996. ftp://ftp.ee.lbl.gov/papers/sacks.ps.Z. - - [FH98] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [Flo94] Floyd, S., "TCP and Successive Fast Retransmits. Technical - report", October 1994. - ftp://ftp.ee.lbl.gov/papers/fastretrans.ps. - - [Hoe96] Hoe, J., "Improving the Start-up Behavior of a Congestion - Control Scheme for TCP", In ACM SIGCOMM, August 1996. - - [HTH98] Hughes, A., Touch, J. and J. Heidemann, "Issues in TCP - Slow-Start Restart After Idle", Work in Progress. - - [Jac88] Jacobson, V., "Congestion Avoidance and Control", Computer - Communication Review, vol. 18, no. 4, pp. 314-329, Aug. - 1988. ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z. - - [Jac90] Jacobson, V., "Modified TCP Congestion Avoidance Algorithm", - end2end-interest mailing list, April 30, 1990. - ftp://ftp.isi.edu/end2end/end2end-interest-1990.mail. - - [MD90] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - - - - - - -Allman, et. al. Standards Track [Page 11] - -RFC 2581 TCP Congestion Control April 1999 - - - [MM96a] Mathis, M. and J. Mahdavi, "Forward Acknowledgment: Refining - TCP Congestion Control", Proceedings of SIGCOMM'96, August, - 1996, Stanford, CA. Available - fromhttp://www.psc.edu/networking/papers/papers.html - - [MM96b] Mathis, M. and J. Mahdavi, "TCP Rate-Halving with Bounding - Parameters", Technical report. Available from - http://www.psc.edu/networking/papers/FACKnotes/current. - - [MMFR96] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October 1996. - - [PAD+98] Paxson, V., Allman, M., Dawson, S., Fenner, W., Griner, J., - Heavens, I., Lahey, K., Semke, J. and B. Volz, "Known TCP - Implementation Problems", RFC 2525, March 1999. - - [Pax97] Paxson, V., "End-to-End Internet Packet Dynamics", - Proceedings of SIGCOMM '97, Cannes, France, Sep. 1997. - - [Pos81] Postel, J., "Transmission Control Protocol", STD 7, RFC 793, - September 1981. - - [Ste94] Stevens, W., "TCP/IP Illustrated, Volume 1: The Protocols", - Addison-Wesley, 1994. - - [Ste97] Stevens, W., "TCP Slow Start, Congestion Avoidance, Fast - Retransmit, and Fast Recovery Algorithms", RFC 2001, January - 1997. - - [WS95] Wright, G. and W. Stevens, "TCP/IP Illustrated, Volume 2: - The Implementation", Addison-Wesley, 1995. - - - - - - - - - - - - - - - - - - - - -Allman, et. al. Standards Track [Page 12] - -RFC 2581 TCP Congestion Control April 1999 - - -Authors' Addresses - - Mark Allman - NASA Glenn Research Center/Sterling Software - Lewis Field - 21000 Brookpark Rd. MS 54-2 - Cleveland, OH 44135 - 216-433-6586 - - EMail: mallman@grc.nasa.gov - http://roland.grc.nasa.gov/~mallman - - - Vern Paxson - ACIRI / ICSI - 1947 Center Street - Suite 600 - Berkeley, CA 94704-1198 - - Phone: +1 510/642-4274 x302 - EMail: vern@aciri.org - - - W. Richard Stevens - 1202 E. Paseo del Zorro - Tucson, AZ 85718 - 520-297-9416 - - EMail: rstevens@kohala.com - http://www.kohala.com/~rstevens - - - - - - - - - - - - - - - - - - - - - -Allman, et. al. Standards Track [Page 13] - -RFC 2581 TCP Congestion Control April 1999 - - -Full Copyright Statement - - Copyright (C) The Internet Society (1999). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - - - - - - - - - - - - - - - - - - - - - - - - -Allman, et. al. Standards Track [Page 14] - diff --git a/kernel/picotcp/RFC/rfc2675.txt b/kernel/picotcp/RFC/rfc2675.txt deleted file mode 100644 index ded628c..0000000 --- a/kernel/picotcp/RFC/rfc2675.txt +++ /dev/null @@ -1,507 +0,0 @@ - - - - - - -Network Working Group D. Borman -Request for Comments: 2675 Berkeley Software Design -Obsoletes: 2147 S. Deering -Category: Standards Track Cisco - R. Hinden - Nokia - August 1999 - IPv6 Jumbograms - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (1999). All Rights Reserved. - -Abstract - - A "jumbogram" is an IPv6 packet containing a payload longer than - 65,535 octets. This document describes the IPv6 Jumbo Payload - option, which provides the means of specifying such large payload - lengths. It also describes the changes needed to TCP and UDP to make - use of jumbograms. - - Jumbograms are relevant only to IPv6 nodes that may be attached to - links with a link MTU greater than 65,575 octets, and need not be - implemented or understood by IPv6 nodes that do not support - attachment to links with such large MTUs. - -1. Introduction - - jumbo (jum'bO), - - n., pl. -bos, adj. - -n. - 1. a person, animal, or thing very large of its kind. - -adj. - 2. very large: the jumbo box of cereal. - - [1800-10; orig. uncert.; popularized as the name of a large - elephant purchased and exhibited by P.T. Barnum in 1882] - - -- www.infoplease.com - - - -Borman, et al. Standards Track [Page 1] - -RFC 2675 IPv6 Jumbograms August 1999 - - - The IPv6 header [IPv6] has a 16-bit Payload Length field and, - therefore, supports payloads up to 65,535 octets long. This document - specifies an IPv6 hop-by-hop option, called the Jumbo Payload option, - that carries a 32-bit length field in order to allow transmission of - IPv6 packets with payloads between 65,536 and 4,294,967,295 octets in - length. Packets with such long payloads are referred to as - "jumbograms". - - The Jumbo Payload option is relevant only for IPv6 nodes that may be - attached to links with a link MTU greater than 65,575 octets (that - is, 65,535 + 40, where 40 octets is the size of the IPv6 header). - The Jumbo Payload option need not be implemented or understood by - IPv6 nodes that do not support attachment to links with MTU greater - than 65,575. - - On links with configurable MTUs, the MTU must not be configured to a - value greater than 65,575 octets if there are nodes attached to that - link that do not support the Jumbo Payload option and it can not be - guaranteed that the Jumbo Payload option will not be sent to those - nodes. - - The UDP header [UDP] has a 16-bit Length field which prevents it from - making use of jumbograms, and though the TCP header [TCP] does not - have a Length field, both the TCP MSS option and the TCP Urgent field - are constrained to 16 bits. This document specifies some simple - enhancements to TCP and UDP to enable them to make use of jumbograms. - An implementation of TCP or UDP on an IPv6 node that supports the - Jumbo Payload option must include the enhancements specified here. - - Note: The 16 bit checksum used by UDP and TCP becomes less accurate - as the length of the data being checksummed is increased. - Application designers may want to take this into consideration. - -1.1 Document History - - This document merges and updates material that was previously - published in two separate documents: - - - The specification of the Jumbo Payload option previously appeared - as part of the IPv6 specification in RFC 1883. RFC 1883 has been - superseded by RFC 2460, which no longer includes specification of - the Jumbo Payload option. - - - The specification of TCP and UDP enhancements to support - jumbograms previously appeared as RFC 2147. RFC 2147 is obsoleted - by this document. - - - - - -Borman, et al. Standards Track [Page 2] - -RFC 2675 IPv6 Jumbograms August 1999 - - -2. Format of the Jumbo Payload Option - - The Jumbo Payload option is carried in an IPv6 Hop-by-Hop Options - header, immediately following the IPv6 header. This option has an - alignment requirement of 4n + 2. (See [IPv6, Section 4.2] for - discussion of option alignment.) The option has the following - format: - - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Option Type | Opt Data Len | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Jumbo Payload Length | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Option Type 8-bit value C2 (hexadecimal). - - Opt Data Len 8-bit value 4. - - Jumbo Payload Length 32-bit unsigned integer. Length of the IPv6 - packet in octets, excluding the IPv6 header - but including the Hop-by-Hop Options header - and any other extension headers present. - Must be greater than 65,535. - -3. Usage of the Jumbo Payload Option - - The Payload Length field in the IPv6 header must be set to zero in - every packet that carries the Jumbo Payload option. - - If a node that understands the Jumbo Payload option receives a packet - whose IPv6 header carries a Payload Length of zero and a Next Header - value of zero (meaning that a Hop-by-Hop Options header follows), and - whose link-layer framing indicates the presence of octets beyond the - IPv6 header, the node must proceed to process the Hop-by-Hop Options - header in order to determine the actual length of the payload from - the Jumbo Payload option. - - The Jumbo Payload option must not be used in a packet that carries a - Fragment header. - - Higher-layer protocols that use the IPv6 Payload Length field to - compute the value of the Upper-Layer Packet Length field in the - checksum pseudo-header described in [IPv6, Section 8.1] must instead - use the Jumbo Payload Length field for that computation, for packets - that carry the Jumbo Payload option. - - - - - - -Borman, et al. Standards Track [Page 3] - -RFC 2675 IPv6 Jumbograms August 1999 - - - Nodes that understand the Jumbo Payload option are required to detect - a number of possible format errors, and if the erroneous packet was - not destined to a multicast address, report the error by sending an - ICMP Parameter Problem message [ICMPv6] to the packet's source. The - following list of errors specifies the values to be used in the Code - and Pointer fields of the Parameter Problem message: - - error: IPv6 Payload Length = 0 and - IPv6 Next Header = Hop-by-Hop Options and - Jumbo Payload option not present - - Code: 0 - Pointer: high-order octet of the IPv6 Payload Length - - error: IPv6 Payload Length != 0 and - Jumbo Payload option present - - Code: 0 - Pointer: Option Type field of the Jumbo Payload option - - error: Jumbo Payload option present and - Jumbo Payload Length < 65,536 - - Code: 0 - Pointer: high-order octet of the Jumbo Payload Length - - error: Jumbo Payload option present and - Fragment header present - - Code: 0 - Pointer: high-order octet of the Fragment header. - - A node that does not understand the Jumbo Payload option is expected - to respond to erroneously-received jumbograms as follows, according - to the IPv6 specification: - - error: IPv6 Payload Length = 0 and - IPv6 Next Header = Hop-by-Hop Options - - Code: 0 - Pointer: high-order octet of the IPv6 Payload Length - - error: IPv6 Payload Length != 0 and - Jumbo Payload option present - - Code: 2 - Pointer: Option Type field of the Jumbo Payload option - - - - -Borman, et al. Standards Track [Page 4] - -RFC 2675 IPv6 Jumbograms August 1999 - - -4. UDP Jumbograms - - The 16-bit Length field of the UDP header limits the total length of - a UDP packet (that is, a UDP header plus data) to no greater than - 65,535 octets. This document specifies the following modification of - UDP to relax that limit: UDP packets longer than 65,535 octets may be - sent by setting the UDP Length field to zero, and letting the - receiver derive the actual UDP packet length from the IPv6 payload - length. (Note that, prior to this modification, zero was not a legal - value for the UDP Length field, because the UDP packet length - includes the UDP header and therefore has a minimum value of 8.) - - The specific requirements for sending a UDP jumbogram are as follows: - - When sending a UDP packet, if and only if the length of the UDP - header plus UDP data is greater than 65,535, set the Length field - in the UDP header to zero. - - The IPv6 packet carrying such a large UDP packet will necessarily - include a Jumbo Payload option in a Hop-by-Hop Options header; set - the Jumbo Payload Length field of that option to be the actual - length of the UDP header plus data, plus the length of all IPv6 - extension headers present between the IPv6 header and the UDP - header. - - For generating the UDP checksum, use the actual length of the UDP - header plus data, NOT zero, in the checksum pseudo-header [IPv6, - Section 8.1]. - - The specific requirements for receiving a UDP jumbogram are as - follows: - - When receiving a UDP packet, if and only if the Length field in - the UDP header is zero, calculate the actual length of the UDP - header plus data from the IPv6 Jumbo Payload Length field minus - the length of all extension headers present between the IPv6 - header and the UDP header. - - In the unexpected case that the UDP Length field is zero but no - Jumbo Payload option is present (i.e., the IPv6 packet is not a - jumbogram), use the Payload Length field in the IPv6 header, in - place of the Jumbo Payload Length field, in the above calculation. - - For verifying the received UDP checksum, use the calculated length - of the UDP header plus data, NOT zero, in the checksum pseudo- - header. - - - - - -Borman, et al. Standards Track [Page 5] - -RFC 2675 IPv6 Jumbograms August 1999 - - -5. TCP Jumbograms - - Because there is no length field in the TCP header, there is nothing - limiting the length of an individual TCP packet. However, the MSS - value that is negotiated at the beginning of the connection limits - the largest TCP packet that can be sent, and the Urgent Pointer - cannot reference data beyond 65,535 bytes. - -5.1 TCP MSS - - When determining what MSS value to send, if the MTU of the directly - attached interface minus 60 [IPv6, Section 8.3] is greater than or - equal to 65,535, then set the MSS value to 65,535. - - When an MSS value of 65,535 is received, it is to be treated as - infinity. The actual MSS is determined by subtracting 60 from the - value learned by performing Path MTU Discovery [MTU-DISC] over the - path to the TCP peer. - -5.2 TCP Urgent Pointer - - The Urgent Pointer problem could be fixed by adding a TCP Urgent - Pointer Option. However, since it is unlikely that applications - using jumbograms will also use Urgent Pointers, a less intrusive - change similar to the MSS change will suffice. - - When a TCP packet is to be sent with an Urgent Pointer (i.e., the URG - bit set), first calculate the offset from the Sequence Number to the - Urgent Pointer. If the offset is less than 65,535, fill in the - Urgent field and continue with the normal TCP processing. If the - offset is greater than 65,535, and the offset is greater than or - equal to the length of the TCP data, fill in the Urgent Pointer with - 65,535 and continue with the normal TCP processing. Otherwise, the - TCP packet must be split into two pieces. The first piece contains - data up to, but not including the data pointed to by the Urgent - Pointer, and the Urgent field is set to 65,535 to indicate that the - Urgent Pointer is beyond the end of this packet. The second piece - can then be sent with the Urgent field set normally. - - Note: The first piece does not have to include all of the data up to - the Urgent Pointer. It can be shorter, just as long as it ends - within 65,534 bytes of the Urgent Pointer, so that the offset to the - Urgent Pointer in the second piece will be less than 65,535 bytes. - - For TCP input processing, when a TCP packet is received with the URG - bit set and an Urgent field of 65,535, the Urgent Pointer is - calculated using an offset equal to the length of the TCP data, - rather than the offset in the Urgent field. - - - -Borman, et al. Standards Track [Page 6] - -RFC 2675 IPv6 Jumbograms August 1999 - - - It should also be noted that though the TCP window is only 16-bits, - larger windows can be used through use of the TCP Window Scale option - [TCP-EXT]. - -6. Security Considerations - - The Jumbo Payload option and TCP/UDP jumbograms do not introduce any - known new security concerns. - -7. Authors' Addresses - - David A. Borman - Berkeley Software Design, Inc. - 4719 Weston Hills Drive - Eagan, MN 55123 - USA - - Phone: +1 612 405 8194 - EMail: dab@bsdi.com - - - Stephen E. Deering - Cisco Systems, Inc. - 170 West Tasman Drive - San Jose, CA 95134-1706 - USA - - Phone: +1 408 527 8213 - EMail: deering@cisco.com - - - Robert M. Hinden - Nokia - 313 Fairchild Drive - Mountain View, CA 94043 - USA - - Phone: +1 650 625 2004 - EMail: hinden@iprg.nokia.com - - - - - - - - - - - - -Borman, et al. Standards Track [Page 7] - -RFC 2675 IPv6 Jumbograms August 1999 - - -8. References - - [ICMPv6] Conta, A. and S. Deering, "ICMP for the Internet Protocol - Version 6 (IPv6)", RFC 2463, December 1998. - - [IPv6] Deering, S. and R. Hinden, "Internet Protocol Version 6 - (IPv6) Specification", RFC 2460, December 1998. - - [MTU-DISC] McCann, J., Deering, S. and J. Mogul, "Path MTU Discovery - for IP Version 6", RFC 1981, August 1986. - - [TCP] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [TCP-EXT] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [UDP] Postel, J., "User Datagram Protocol", STD 6, RFC 768, - August 1980. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Borman, et al. Standards Track [Page 8] - -RFC 2675 IPv6 Jumbograms August 1999 - - -9. Full Copyright Statement - - Copyright (C) The Internet Society (1999). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Borman, et al. Standards Track [Page 9] - diff --git a/kernel/picotcp/RFC/rfc2757.txt b/kernel/picotcp/RFC/rfc2757.txt deleted file mode 100644 index e49f141..0000000 --- a/kernel/picotcp/RFC/rfc2757.txt +++ /dev/null @@ -1,2579 +0,0 @@ - - - - - - -Network Working Group G. Montenegro -Request for Comments: 2757 Sun Microsystems, Inc. -Category: Informational S. Dawkins - Nortel Networks - M. Kojo - University of Helsinki - V. Magret - Alcatel - N. Vaidya - Texas A&M University - January 2000 - - - Long Thin Networks - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - In view of the unpredictable and problematic nature of long thin - networks (for example, wireless WANs), arriving at an optimized - transport is a daunting task. We have reviewed the existing - proposals along with future research items. Based on this overview, - we also recommend mechanisms for implementation in long thin - networks. - - Our goal is to identify a TCP that works for all users, including - users of long thin networks. We started from the working - recommendations of the IETF TCP Over Satellite Links (tcpsat) working - group with this end in mind. - - We recognize that not every tcpsat recommendation will be required - for long thin networks as well, and work toward a set of TCP - recommendations that are 'benign' in environments that do not require - them. - - - - - - - - -Montenegro, et al. Informational [Page 1] - -RFC 2757 Long Thin Networks January 2000 - - -Table of Contents - - 1 Introduction ................................................. 3 - 1.1 Network Architecture .................................... 5 - 1.2 Assumptions about the Radio Link ........................ 6 - 2 Should it be IP or Not? ..................................... 7 - 2.1 Underlying Network Error Characteristics ................ 7 - 2.2 Non-IP Alternatives ..................................... 8 - 2.2.1 WAP ................................................ 8 - 2.2.2 Deploying Non-IP Alternatives ...................... 9 - 2.3 IP-based Considerations ................................. 9 - 2.3.1 Choosing the MTU [Stevens94, RFC1144] .............. 9 - 2.3.2 Path MTU Discovery [RFC1191] ....................... 10 - 2.3.3 Non-TCP Proposals .................................. 10 - 3 The Case for TCP ............................................. 11 - 4 Candidate Optimizations ...................................... 12 - 4.1 TCP: Current Mechanisms ................................. 12 - 4.1.1 Slow Start and Congestion Avoidance ................ 12 - 4.1.2 Fast Retransmit and Fast Recovery .................. 12 - 4.2 Connection Setup with T/TCP [RFC1397, RFC1644] .......... 14 - 4.3 Slow Start Proposals .................................... 14 - 4.3.1 Larger Initial Window .............................. 14 - 4.3.2 Growing the Window during Slow Start ............... 15 - 4.3.2.1 ACK Counting .................................. 15 - 4.3.2.2 ACK-every-segment ............................. 16 - 4.3.3 Terminating Slow Start ............................. 17 - 4.3.4 Generating ACKs during Slow Start .................. 17 - 4.4 ACK Spacing ............................................. 17 - 4.5 Delayed Duplicate Acknowlegements ....................... 18 - 4.6 Selective Acknowledgements [RFC2018] .................... 18 - 4.7 Detecting Corruption Loss ............................... 19 - 4.7.1 Without Explicit Notification ...................... 19 - 4.7.2 With Explicit Notifications ........................ 20 - 4.8 Active Queue Management ................................. 21 - 4.9 Scheduling Algorithms ................................... 21 - 4.10 Split TCP and Performance-Enhancing Proxies (PEPs) ..... 22 - 4.10.1 Split TCP Approaches .............................. 23 - 4.10.2 Application Level Proxies ......................... 26 - 4.10.3 Snoop and its Derivatives ......................... 27 - 4.10.4 PEPs to handle Periods of Disconnection ........... 29 - 4.11 Header Compression Alternatives ........................ 30 - 4.12 Payload Compression .................................... 31 - 4.13 TCP Control Block Interdependence [Touch97] ............ 32 - 5 Summary of Recommended Optimizations ......................... 33 - 6 Conclusion ................................................... 35 - 7 Acknowledgements ............................................. 35 - 8 Security Considerations ...................................... 35 - - - - -Montenegro, et al. Informational [Page 2] - -RFC 2757 Long Thin Networks January 2000 - - - 9 References ................................................... 36 - Authors' Addresses ............................................. 44 - Full Copyright Statement ....................................... 46 - -1 Introduction - - Optimized wireless networking is one of the major hurdles that Mobile - Computing must solve if it is to enable ubiquitous access to - networking resources. However, current data networking protocols have - been optimized primarily for wired networks. Wireless environments - have very different characteristics in terms of latency, jitter, and - error rate as compared to wired networks. Accordingly, traditional - protocols are ill-suited to this medium. - - Mobile Wireless networks can be grouped in W-LANs (for example, - 802.11 compliant networks) and W-WANs (for example, CDPD [CDPD], - Ricochet, CDMA [CDMA], PHS, DoCoMo, GSM [GSM] to name a few). W-WANs - present the most serious challenge, given that the length of the - wireless link (expressed as the delay*bandwidth product) is typically - 4 to 5 times as long as that of its W-LAN counterparts. For example, - for an 802.11 network, assuming the delay (round-trip time) is about - 3 ms. and the bandwidth is 1.5 Mbps, the delay*bandwidth product is - 4500 bits. For a W-WAN such as Ricochet, a typical round-trip time - may be around 500 ms. (the best is about 230 ms.), and the sustained - bandwidth is about 24 Kbps. This yields a delay*bandwidth product - roughly equal to 1.5 KB. In the near future, 3rd Generation wireless - services will offer 384Kbps and more. Assuming a 200 ms round-trip, - the delay*bandwidth product in this case is 76.8 Kbits (9.6 KB). This - value is larger than the default 8KB buffer space used by many TCP - implementations. This means that, whereas for W-LANs the default - buffer space is enough, future W-WANs will operate inefficiently - (that is, they will not be able to fill the pipe) unless they - override the default value. A 3rd Generation wireless service - offering 2 Mbps with 200-millisecond latency requires a 50 KB buffer. - - Most importantly, latency across a link adversely affects - throughput. For example, [MSMO97] derives an upper bound on TCP - throughput. Indeed, the resultant expression is inversely related to - the round-trip time. - - The long latencies also push the limits (and commonly transgress - them) for what is acceptable to users of interactive applications. - - As a quick glance to our list of references will reveal, there is a - wealth of proposals that attempt to solve the wireless networking - problem. In this document, we survey the different solutions - available or under investigation, and issue the corresponding - recommendations. - - - -Montenegro, et al. Informational [Page 3] - -RFC 2757 Long Thin Networks January 2000 - - - There is a large body of work on the subject of improving TCP - performance over satellite links. The documents under development by - the tcpsat working group of the IETF [AGS98, ADGGHOSSTT98] are very - relevant. In both cases, it is essential to start by improving the - characteristics of the medium by using forward error correction (FEC) - at the link layer to reduce the BER (bit error rate) from values as - high as 10-3 to 10-6 or better. This makes the BER manageable. Once - in this realm, retransmission schemes like ARQ (automatic repeat - request) may be used to bring it down even further. Notice that - sometimes it may be desirable to forego ARQ because of the additional - delay it implies. In particular, time sensitive traffic (video, - audio) must be delivered within a certain time limit beyond which the - data is obsolete. Exhaustive retransmissions in this case merely - succeed in wasting time in order to deliver data that will be - discarded once it arrives at its destination. This indicates the - desirability of augmenting the protocol stack implementation on - devices such that the upper protocol layers can inform the link and - MAC layer when to avoid such costly retransmission schemes. - - Networks that include satellite links are examples of "long fat - networks" (LFNs or "elephants"). They are "long" networks because - their round-trip time is quite high (for example, 0.5 sec and higher - for geosynchronous satellites). Not all satellite links fall within - the LFN regime. In particular, round-trip times in a low-earth - orbiting (LEO) satellite network may be as little as a few - milliseconds (and never extend beyond 160 to 200 ms). W-WANs share - the "L" with LFNs. However, satellite networks are also "fat" in the - sense that they may have high bandwidth. Satellite networks may often - have a delay*bandwidth product above 64 KBytes, in which case they - pose additional problems to TCP [TCPHP]. W-WANs do not generally - exhibit this behavior. Accordingly, this document only deals with - links that are "long thin pipes", and the networks that contain them: - "long thin networks". We call these "LTNs". - - This document does not give an overview of the API used to access the - underlying transport. We believe this is an orthogonal issue, even - though some of the proposals below have been put forth assuming a - given interface. It is possible, for example, to support the - traditional socket semantics without fully relying on TCP/IP - transport [MOWGLI]. - - Our focus is on the on-the-wire protocols. We try to include the most - relevant ones and briefly (given that we provide the references - needed for further study) mention their most salient points. - - - - - - - -Montenegro, et al. Informational [Page 4] - -RFC 2757 Long Thin Networks January 2000 - - -1.1 Network Architecture - - One significant difference between LFNs and LTNs is that we assume - the W-WAN link is the last hop to the end user. This allows us to - assume that a single intermediate node sees all packets transferred - between the wireless mobile device and the rest of the Internet. - This is only one of the topologies considered by the TCP Satellite - community. - - Given our focus on mobile wireless applications, we only consider a - very specific architecture that includes: - - - a wireless mobile device, connected via - - - a wireless link (which may, in fact comprise several hops at - the link layer), to - - - an intermediate node (sometimes referred to as a base station) - connected via - - - a wireline link, which in turn interfaces with - - - the landline Internet and millions of legacy servers and web - sites. - - Specifically, we are not as concerned with paths that include two - wireless segments separated by a wired one. This may occur, for - example, if one mobile device connects across its immediate wireless - segment via an intermediate node to the Internet, and then via a - second wireless segment to another mobile device. Quite often, - mobile devices connect to a legacy server on the wired Internet. - - Typically, the endpoints of the wireless segment are the intermediate - node and the mobile device. However, the latter may be a wireless - router to a mobile network. This is also important and has - applications in, for example, disaster recovery. - - Our target architecture has implications which concern the - deployability of candidate solutions. In particular, an important - requirement is that we cannot alter the networking stack on the - legacy servers. It would be preferable to only change the networking - stack at the intermediate node, although changing it at the mobile - devices is certainly an option and perhaps a necessity. - - We envision mobile devices that can use the wireless medium very - efficiently, but overcome some of its traditional constraints. That - is, full mobility implies that the devices have the flexibility and - agility to use whichever happens to be the best network connection - - - -Montenegro, et al. Informational [Page 5] - -RFC 2757 Long Thin Networks January 2000 - - - available at any given point in time or space. Accordingly, devices - could switch from a wired office LAN and hand over their ongoing - connections to continue on, say, a wireless WAN. This type of agility - also requires Mobile IP [RFC2002]. - -1.2 Assumptions about the Radio Link - - The system architecture described above assumes at most one wireless - link (perhaps comprising more than one wireless hop). However, this - is not enough to characterize a wireless link. Additional - considerations are: - - - What are the error characteristics of the wireless medium? The - link may present a higher BER than a wireline network due to - burst errors and disconnections. The techniques below usually - do not address all the types of errors. Accordingly, a complete - solution should combine the best of all the proposals. - Nevertheless, in this document we are more concerned with (and - give preference to solving) the most typical case: (1) higher - BER due to random errors (which implies longer and more - variable delays due to link-layer error corrections and - retransmissions) rather than (2) an interruption in service due - to a handoff or a disconnection. The latter are also important - and we do include relevant proposals in this survey. - - - Is the wireless service datagram oriented, or is it a virtual - circuit? Currently, switched virtual circuits are more common, - but packet networks are starting to appear, for example, - Metricom's Starmode [CB96], CDPD [CDPD] and General Packet - Radio Service (GPRS) [GPRS],[BW97] in GSM. - - - What kind of reliability does the link provide? Wireless - services typically retransmit a packet (frame) until it has - been acknowledged by the target. They may allow the user to - turn off this behavior. For example, GSM allows RLP [RLP] - (Radio Link Protocol) to be turned off. Metricom has a - similar "lightweight" mode. In GSM RLP, a frame is - retransmitted until the maximum number of retransmissions - (protocol parameter) is reached. What happens when this limit - is reached is determined by the telecom operator: the physical - link connection is either disconnected or a link reset is - enforced where the sequence numbers are resynchronized and the - transmit and receive buffers are flushed resulting in lost - data. Some wireless services, like CDMA IS95-RLP [CDMA, - Karn93], limit the latency on the wireless link by - retransmitting a frame only a couple of times. This decreases - the residual frame error rate significantly, but does not - provide fully reliable link service. - - - -Montenegro, et al. Informational [Page 6] - -RFC 2757 Long Thin Networks January 2000 - - - - Does the mobile device transmit and receive at the same time? - Doing so increases the cost of the electronics on the mobile - device. Typically, this is not the case. We assume in this - document that mobile devices do not transmit and receive - simultaneously. - - - Does the mobile device directly address more than one peer on - the wireless link? Packets to each different peer may traverse - spatially distinct wireless paths. Accordingly, the path to - each peer may exhibit very different characteristics. Quite - commonly, the mobile device addresses only one peer (the - intermediate node) at any given point in time. When this is - not the case, techniques such as Channel-State Dependent Packet - Scheduling come into play (see the section "Packet Scheduling" - below). - -2 Should it be IP or Not? - - The first decision is whether to use IP as the underlying network - protocol or not. In particular, some data protocols evolved from - wireless telephony are not always -- though at times they may be -- - layered on top of IP [MOWGLI, WAP]. These proposals are based on the - concept of proxies that provide adaptation services between the - wireless and wireline segments. - - This is a reasonable model for mobile devices that always communicate - through the proxy. However, we expect many wireless mobile devices to - utilize wireline networks whenever they are available. This model - closely follows current laptop usage patterns: devices typically - utilize LANs, and only resort to dial-up access when "out of the - office." - - For these devices, an architecture that assumes IP is the best - approach, because it will be required for communications that do not - traverse the intermediate node (for example, upon reconnection to a - W-LAN or a 10BaseT network at the office). - -2.1 Underlying Network Error Characteristics - - Using IP as the underlying network protocol requires a certain (low) - level of link robustness that is expected of wireless links. - - IP, and the protocols that are carried in IP packets, are protected - end-to-end by checksums that are relatively weak [Stevens94, - Paxson97] (and, in some cases, optional). For much of the Internet, - these checksums are sufficient; in wireless environments, the error - characteristics of the raw wireless link are much less robust than - the rest of the end-to-end path. Hence for paths that include - - - -Montenegro, et al. Informational [Page 7] - -RFC 2757 Long Thin Networks January 2000 - - - wireless links, exclusively relying on end-to-end mechanisms to - detect and correct transmission errors is undesirable. These should - be complemented by local link-level mechanisms. Otherwise, damaged IP - packets are propagated through the network only to be discarded at - the destination host. For example, intermediate routers are required - to check the IP header checksum, but not the UDP or TCP checksums. - Accordingly, when the payload of an IP packet is corrupted, this is - not detected until the packet arrives at its ultimate destination. - - A better approach is to use link-layer mechanisms such as FEC, - retransmissions, and so on in order to improve the characteristics of - the wireless link and present a much more reliable service to IP. - This approach has been taken by CDPD, Ricochet and CDMA. - - This approach is roughly analogous to the successful deployment of - Point-to-Point Protocol (PPP), with robust framing and 16-bit - checksumming, on wireline networks as a replacement for the Serial - Line Interface Protocol (SLIP), with only a single framing byte and - no checksumming. - - [AGS98] recommends the use of FEC in satellite environments. - - Notice that the link-layer could adapt its frame size to the - prevalent BER. It would perform its own fragmentation and reassembly - so that IP could still enjoy a large enough MTU size [LS98]. - - A common concern for using IP as a transport is the header overhead - it implies. Typically, the underlying link-layer appears as PPP - [RFC1661] to the IP layer above. This allows for header compression - schemes [IPHC, IPHC-RTP, IPHC-PPP] which greatly alleviate the - problem. - -2.2 Non-IP Alternatives - - A number of non-IP alternatives aimed at wireless environments have - been proposed. One representative proposal is discussed here. - -2.2.1 WAP - - The Wireless Application Protocol (WAP) specifies an application - framework and network protocols for wireless devices such as mobile - telephones, pagers, and PDAs [WAP]. The architecture requires a proxy - between the mobile device and the server. The WAP protocol stack is - layered over a datagram transport service. Such a service is - provided by most wireless networks; for example, IS-136, GSM - SMS/USSD, and UDP in IP networks like CDPD and GSM GPRS. The core of - - - - - -Montenegro, et al. Informational [Page 8] - -RFC 2757 Long Thin Networks January 2000 - - - the WAP protocols is a binary HTTP/1.1 protocol with additional - features such as header caching between requests and a shared state - between client and server. - -2.2.2 Deploying Non-IP Alternatives - - IP is such a fundamental element of the Internet that non-IP - alternatives face substantial obstacles to deployment, because they - do not exploit the IP infrastructure. Any non-IP alternative that is - used to provide gatewayed access to the Internet must map between IP - addresses and non-IP addresses, must terminate IP-level security at a - gateway, and cannot use IP-oriented discovery protocols (Dynamic Host - Configuration Protocol, Domain Name Services, Lightweight Directory - Access Protocol, Service Location Protocol, etc.) without translation - at a gateway. - - A further complexity occurs when a device supports both wireless and - wireline operation. If the device uses IP for wireless operation, - uninterrupted operation when the device is connected to a wireline - network is possible (using Mobile IP). If a non-IP alternative is - used, this switchover is more difficult to accomplish. - - Non-IP alternatives face the burden of proof that IP is so ill-suited - to a wireless environment that it is not a viable technology. - -2.3 IP-based Considerations - - Given its worldwide deployment, IP is an obvious choice for the - underlying network technology. Optimizations implemented at this - level benefit traditional Internet application protocols as well as - new ones layered on top of IP or UDP. - -2.3.1 Choosing the MTU [Stevens94, RFC1144] - - In slow networks, the time required to transmit the largest possible - packet may be considerable. Interactive response time should not - exceed the well-known human factors limit of 100 to 200 ms. This - should be considered the maximum time budget to (1) send a packet and - (2) obtain a response. In most networking stack implementations, (1) - is highly dependent on the maximum transmission unit (MTU). In the - worst case, a small packet from an interactive application may have - to wait for a large packet from a bulk transfer application before - being sent. Hence, a good rule of thumb is to choose an MTU such that - its transmission time is less than (or not much larger than) 200 ms. - - - - - - - -Montenegro, et al. Informational [Page 9] - -RFC 2757 Long Thin Networks January 2000 - - - Of course, compression and type-of-service queuing (whereby - interactive data packets are given a higher priority) may alleviate - this problem. In particular, the latter may reduce the average wait - time to about half the MTU's transmission time. - -2.3.2 Path MTU Discovery [RFC1191] - - Path MTU discovery benefits any protocol built on top of IP. It - allows a sender to determine what the maximum end-to-end transmission - unit is to a given destination. Without Path MTU discovery, the - default IPv4 MTU size is 576. The benefits of using a larger MTU are: - - - Smaller ratio of header overhead to data - - - Allows TCP to grow its congestion window faster, since it - increases in units of segments. - - Of course, for a given BER, a larger MTU has a correspondingly larger - probability of error within any given segment. The BER may be reduced - using lower level techniques like FEC and link-layer retransmissions. - The issue is that now delays may become a problem due to the - additional retransmissions, and the fact that packet transmission - time increases with a larger MTU. - - Recommendation: Path MTU discovery is recommended. [AGS98] already - recommends its use in satellite environments. - -2.3.3 Non-TCP Proposals - - Other proposals assume an underlying IP datagram service, and - implement an optimized transport either directly on top of IP - [NETBLT] or on top of UDP [MNCP]. Not relying on TCP is a bold move, - given the wealth of experience and research related to it. It could - be argued that the Internet has not collapsed because its main - protocol, TCP, is very careful in how it uses the network, and - generally treats it as a black box assuming all packet losses are due - to congestion and prudently backing off. This avoids further - congestion. - - However, in the wireless medium, packet losses may also be due to - corruption due to high BER, fading, and so on. Here, the right - approach is to try harder, instead of backing off. Alternative - transport protocols are: - - - NETBLT [NETBLT, RFC1986, RFC1030] - - - MNCP [MNCP] - - - - -Montenegro, et al. Informational [Page 10] - -RFC 2757 Long Thin Networks January 2000 - - - - ESRO [RFC2188] - - - RDP [RFC908, RFC1151] - - - VMTP [VMTP] - -3 The Case for TCP - - This is one of the most hotly debated issues in the wireless arena. - Here are some arguments against it: - - - It is generally recognized that TCP does not perform well in - the presence of significant levels of non-congestion loss. TCP - detractors argue that the wireless medium is one such case, and - that it is hard enough to fix TCP. They argue that it is easier - to start from scratch. - - - TCP has too much header overhead. - - - By the time the mechanisms are in place to fix it, TCP is very - heavy, and ill-suited for use by lightweight, portable devices. - - and here are some in support of TCP: - - - It is preferable to continue using the same protocol that the - rest of the Internet uses for compatibility reasons. Any - extensions specific to the wireless link may be negotiated. - - - Legacy mechanisms may be reused (for example three-way - handshake). - - - Link-layer FEC and ARQ can reduce the BER such that any losses - TCP does see are, in fact, caused by congestion (or a sustained - interruption of link connectivity). Modern W-WAN technologies - do this (CDPD, US-TDMA, CDMA, GSM), thus improving TCP - throughput. - - - Handoffs among different technologies are made possible by - Mobile IP [RFC2002], but only if the same protocols, namely - TCP/IP, are used throughout. - - - Given TCP's wealth of research and experience, alternative - protocols are relatively immature, and the full implications of - their widespread deployment not clearly understood. - - Overall, we feel that the performance of TCP over long-thin networks - can be improved significantly. Mechanisms to do so are discussed in - the next sections. - - - -Montenegro, et al. Informational [Page 11] - -RFC 2757 Long Thin Networks January 2000 - - -4 Candidate Optimizations - - There is a large volume of work on the subject of optimizing TCP for - operation over wireless media. Even though satellite networks - generally fall in the LFN regime, our current LTN focus has much to - benefit from it. For example, the work of the TCP-over-Satellite - working group of the IETF has been extremely helpful in preparing - this section [AGS98, ADGGHOSSTT98]. - -4.1 TCP: Current Mechanisms - - A TCP sender adapts its use of bandwidth based on feedback from the - receiver. The high latency characteristic of LTNs implies that TCP's - adaptation is correspondingly slower than on networks with shorter - delays. Similarly, delayed ACKs exacerbate the perceived latency on - the link. Given that TCP grows its congestion window in units of - segments, small MTUs may slow adaptation even further. - -4.1.1 Slow Start and Congestion Avoidance - - Slow Start and Congestion Avoidance [RFC2581] are essential the - Internet's stability. However there are two reasons why the wireless - medium adversely affects them: - - - Whenever TCP's retransmission timer expires, the sender assumes - that the network is congested and invokes slow start. This is - why it is important to minimize the losses caused by - corruption, leaving only those caused by congestion (as - expected by TCP). - - - The sender increases its window based on the number of ACKs - received. Their rate of arrival, of course, is dependent on the - RTT (round-trip-time) between sender and receiver, which - implies long ramp-up times in high latency links like LTNs. The - dependency lasts until the pipe is filled. - - - During slow start, the sender increases its window in units of - segments. This is why it is important to use an appropriately - large MTU which, in turn, requires requires link layers with - low loss. - -4.1.2 Fast Retransmit and Fast Recovery - - When a TCP sender receives several duplicate ACKs, fast retransmit - [RFC2581] allows it to infer that a segment was lost. The sender - retransmits what it considers to be this lost segment without waiting - for the full timeout, thus saving time. - - - - -Montenegro, et al. Informational [Page 12] - -RFC 2757 Long Thin Networks January 2000 - - - After a fast retransmit, a sender invokes the fast recovery [RFC2581] - algorithm. Fast recovery allows the sender to transmit at half its - previous rate (regulating the growth of its window based on - congestion avoidance), rather than having to begin a slow start. This - also saves time. - - In general, TCP can increase its window beyond the delay-bandwidth - product. However, in LTN links the congestion window may remain - rather small, less than four segments, for long periods of time due - to any of the following reasons: - - 1. Typical "file size" to be transferred over a connection is - relatively small (Web requests, Web document objects, email - messages, files, etc.) In particular, users of LTNs are not - very willing to carry out large transfers as the response time - is so long. - - 2. If the link has high BER, the congestion window tends to stay - small - - 3. When an LTN is combined with a highly congested wireline - Internet path, congestion losses on the Internet have the same - effect as 2. - - 4. Commonly, ISPs/operators configure only a small number of - buffers (even as few as for 3 packets) per user in their dial- - up routers - - 5. Often small socket buffers are recommended with LTNs in order - to prevent the RTO from inflating and to diminish the amount of - packets with competing traffic. - - A small window effectively prevents the sender from taking advantage - of Fast Retransmits. Moreover, efficient recovery from multiple - losses within a single window requires adoption of new proposals - (NewReno [RFC2582]). In addition, on slow paths with no packet - reordering waiting for three duplicate ACKs to arrive postpones - retransmission unnecessarily. - - Recommendation: Implement Fast Retransmit and Fast Recovery at this - time. This is a widely-implemented optimization and is currently at - Proposed Standard level. [AGS98] recommends implementation of Fast - Retransmit/Fast Recovery in satellite environments. NewReno - [RFC2582] apparently does help a sender better handle partial ACKs - and multiple losses in a single window, but at this point is not - recommended due to its experimental nature. Instead, SACK [RFC2018] - is the preferred mechanism. - - - - -Montenegro, et al. Informational [Page 13] - -RFC 2757 Long Thin Networks January 2000 - - -4.2 Connection Setup with T/TCP [RFC1397, RFC1644] - - TCP engages in a "three-way handshake" whenever a new connection is - set up. Data transfer is only possible after this phase has - completed successfully. T/TCP allows data to be exchanged in - parallel with the connection set up, saving valuable time for short - transactions on long-latency networks. - - Recommendation: T/TCP is not recommended, for these reasons: - - - It is an Experimental RFC. - - - It is not widely deployed, and it has to be deployed at both ends - of a connection. - - - Security concerns have been raised that T/TCP is more vulnerable - to address-spoofing attacks than TCP itself. - - - At least some of the benefits of T/TCP (eliminating three-way - handshake on subsequent query-response transactions, for instance) - are also available with persistent connections on HTTP/1.1, which - is more widely deployed. - - [ADGGHOSSTT98] does not have a recommendation on T/TCP in satellite - environments. - -4.3 Slow Start Proposals - - Because slow start dominates the network response seen by interactive - users at the beginning of a TCP connection, a number of proposals - have been made to modify or eliminate slow start in long latency - environments. - - Stability of the Internet is paramount, so these proposals must - demonstrate that they will not adversely affect Internet congestion - levels in significant ways. - -4.3.1 Larger Initial Window - - Traditional slow start, with an initial window of one segment, is a - time-consuming bandwidth adaptation procedure over LTNs. Studies on - an initial window larger than one segment [RFC2414, AHO98] resulted - in the TCP standard supporting a maximum value of 2 [RFC2581]. Higher - values are still experimental in nature. - - - - - - - -Montenegro, et al. Informational [Page 14] - -RFC 2757 Long Thin Networks January 2000 - - - In simulations with an increased initial window of three packets - [RFC2415], this proposal does not contribute significantly to packet - drop rates, and it has the added benefit of improving initial - response times when the peer device delays acknowledgements during - slow start (see next proposal). - - [RFC2416] addresses situations where the initial window exceeds the - number of buffers available to TCP and indicates that this situation - is no different from the case where the congestion window grows - beyond the number of buffers available. - - [RFC2581] now allows an initial congestion window of two segments. A - larger initial window, perhaps as many as four segments, might be - allowed in the future in environments where this significantly - improves performance (LFNs and LTNs). - - Recommendation: Implement this on devices now. The research on this - optimization indicates that 3 segments is a safe initial setting, and - is centering on choosing between 2, 3, and 4. For now, use 2 - (following RFC2581), which at least allows clients running query- - response applications to get an initial ACK from unmodified servers - without waiting for a typical delayed ACK timeout of 200 - milliseconds, and saves two round-trips. An initial window of 3 - [RFC2415] looks promising and may be adopted in the future pending - further research and experience. - -4.3.2 Growing the Window during Slow Start - - The sender increases its window based on the flow of ACKs coming back - from the receiver. Particularly during slow start, this flow is very - important. A couple of the proposals that have been studied are (1) - ACK counting and (2) ACK-every-segment. - -4.3.2.1 ACK Counting - - The main idea behind ACK counting is: - - - Make each ACK count to its fullest by growing the window based - on the data being acknowledged (byte counting) instead of the - number of ACKs (ACK counting). This has been shown to cause - bursts which lead to congestion. [Allman98] shows that Limited - Byte Counting (LBC), in which the window growth is limited to 2 - segments, does not lead to as much burstiness, and offers some - performance gains. - - Recommendation: Unlimited byte counting is not recommended. Van - Jacobson cautions against byte counting [TCPSATMIN] because it leads - to burstiness, and recommends ACK spacing [ACKSPACING] instead. - - - -Montenegro, et al. Informational [Page 15] - -RFC 2757 Long Thin Networks January 2000 - - - ACK spacing requires ACKs to consistently pass through a single ACK- - spacing router. This requirement works well for W-WAN environments - if the ACK-spacing router is also the intermediate node. - - Limited byte counting warrants further investigation before we can - recommend this proposal, but it shows promise. - -4.3.2.2 ACK-every-segment - - The main idea behind ACK-every-segment is: - - - Keep a constant stream of ACKs coming back by turning off - delayed ACKs [RFC1122] during slow start. ACK-every-segment - must be limited to slow start, in order to avoid penalizing - asymmetric-bandwidth configurations. For instance, a low - bandwidth link carrying acknowledgements back to the sender, - hinders the growth of the congestion window, even if the link - toward the client has a greater bandwidth [BPK99]. - - Even though simulations confirm its promise (it allows receivers to - receive the second segment from unmodified senders without waiting - for a typical delayed ACK timeout of 200 milliseconds), for this - technique to be practical the receiver must acknowledge every segment - only when the sender is in slow start. Continuing to do so when the - sender is in congestion avoidance may have adverse effects on the - mobile device's battery consumption and on traffic in the network. - - This violates a SHOULD in [RFC2581]: delayed acknowledgements SHOULD - be used by a TCP receiver. - - "Disabling Delayed ACKs During Slow Start" is technically - unimplementable, as the receiver has no way of knowing when the - sender crosses ssthresh (the "slow start threshold") and begins using - the congestion avoidance algorithm. If receivers follow - recommendations for increased initial windows, disabling delayed ACKs - during an increased initial window would open the TCP window more - rapidly without doubling ACK traffic in general. However, this - scheme might double ACK traffic if most connections remain in slow- - start. - - Recommendation: ACK only the first segment on a new connection with - no delay. - - - - - - - - - -Montenegro, et al. Informational [Page 16] - -RFC 2757 Long Thin Networks January 2000 - - -4.3.3 Terminating Slow Start - - New mechanisms [ADGGHOSSTT98] are being proposed to improve TCP's - adaptive properties such that the available bandwidth is better - utilized while reducing the possibility of congesting the network. - This results in the closing of the congestion window to 1 segment - (which precludes fast retransmit), and the subsequent slow start - phase. - - Theoretically, an optimum value for slow-start threshold (ssthresh) - allows connection bandwidth utilization to ramp up as aggressively as - possible without "overshoot" (using so much bandwidth that packets - are lost and congestion avoidance procedures are invoked). - - Recommendation: Estimating the slow start threshold is not - recommended. Although this would be helpful if we knew how to do it, - rough consensus on the tcp-impl and tcp-sat mailing lists is that in - non-trivial operational networks there is no reliable method to probe - during TCP startup and estimate the bandwidth available. - -4.3.4 Generating ACKs during Slow Start - - Mitigations that inject additional ACKs (whether "ACK-first-segment" - or "ACK-every-segment-during-slow-start") beyond what today's - conformant TCPs inject are only applicable during the slow-start - phases of a connection. After an initial exchange, the connection - usually completes slow-start, so TCPs only inject additional ACKs - when (1) the connection is closed, and a new connection is opened, or - (2) the TCPs handle idle connection restart correctly by performing - slow start. - - Item (1) is typical when using HTTP/1.0, in which each request- - response transaction requires a new connection. Persistent - connections in HTTP/1.1 help in maintaining a connection in - congestion avoidance instead of constantly reverting to slow-start. - Because of this, these optimizations which are only enabled during - slow-start do not get as much of a chance to act. Item (2), of - course, is independent of HTTP version. - -4.4 ACK Spacing - - During slow start, the sender responds to the incoming ACK stream by - transmitting N+1 segments for each ACK, where N is the number of new - segments acknowledged by the incoming ACK. This results in data - being sent at twice the speed at which it can be processed by the - network. Accordingly, queues will form, and due to insufficient - buffering at the bottleneck router, packets may get dropped before - the link's capacity is full. - - - -Montenegro, et al. Informational [Page 17] - -RFC 2757 Long Thin Networks January 2000 - - - Spacing out the ACKs effectively controls the rate at which the - sender will transmit into the network, and may result in little or no - queueing at the bottleneck router [ACKSPACING]. Furthermore, ack - spacing reduces the size of the bursts. - - Recommendation: No recommendation at this time. Continue monitoring - research in this area. - -4.5 Delayed Duplicate Acknowlegements - - As was mentioned above, link-layer retransmissions may decrease the - BER enough that congestion accounts for most of packet losses; still, - nothing can be done about interruptions due to handoffs, moving - beyond wireless coverage, etc. In this scenario, it is imperative to - prevent interaction between link-layer retransmission and TCP - retransmission as these layers duplicate each other's efforts. In - such an environment it may make sense to delay TCP's efforts so as to - give the link-layer a chance to recover. With this in mind, the - Delayed Dupacks [MV97, Vaidya99] scheme selectively delays duplicate - acknowledgements at the receiver. It is preferable to allow a local - mechanism to resolve a local problem, instead of invoking TCP's end- - to-end mechanism and incurring the associated costs, both in terms of - wasted bandwidth and in terms of its effect on TCP's window behavior. - - The Delayed Dupacks scheme can be used despite IP encryption since - the intermediate node does not need to examine the TCP headers. - - Currently, it is not well understood how long the receiver should - delay the duplicate acknowledgments. In particular, the impact of - wireless medium access control (MAC) protocol on the choice of delay - parameter needs to be studied. The MAC protocol may affect the - ability to choose the appropriate delay (either statically or - dynamically). In general, significant variabilities in link-level - retransmission times can have an adverse impact on the performance of - the Delayed Dupacks scheme. Furthermore, as discussed later in - section 4.10.3, Delayed Dupacks and some other schemes (such as Snoop - [SNOOP]) are only beneficial in certain types of network links. - - Recommendation: Delaying duplicate acknowledgements may be useful in - specific network topologies, but a general recommendation requires - further research and experience. - -4.6 Selective Acknowledgements [RFC2018] - - SACK may not be useful in many LTNs, according to Section 1.1 of - [TCPHP]. In particular, SACK is more useful in the LFN regime, - especially if large windows are being used, because there is a - - - - -Montenegro, et al. Informational [Page 18] - -RFC 2757 Long Thin Networks January 2000 - - - considerable probability of multiple segment losses per window. In - the LTN regime, TCP windows are much smaller, and burst errors must - be much longer in duration in order to damage multiple segments. - - Accordingly, the complexity of SACK may not be justifiable, unless - there is a high probability of burst errors and congestion on the - wireless link. A desire for compatibility with TCP recommendations - for non-LTN environments may dictate LTN support for SACK anyway. - - [AGS98] recommends use of SACK with Large TCP Windows in satellite - environments, and notes that this implies support for PAWS - (Protection Against Wrapped Sequence space) and RTTM (Round Trip Time - Measurement) as well. - - Berkeley's SNOOP protocol research [SNOOP] indicates that SACK does - improve throughput for SNOOP when multiple segments are lost per - window [BPSK96]. SACK allows SNOOP to recover from multi-segment - losses in one round-trip. In this case, the mobile device needs to - implement some form of selective acknowledgements. If SACK is not - used, TCP may enter congestion avoidance as the time needed to - retransmit the lost segments may be greater than the retransmission - timer. - - Recommendation: Implement SACK now for compatibility with other TCPs - and improved performance with SNOOP. - -4.7 Detecting Corruption Loss - -4.7.1 Without Explicit Notification - - In the absence of explicit notification from the network, some - researchers have suggested statistical methods for congestion - avoidance [Jain89, WC91, VEGAS]. A natural extension of these - heuristics would enable a sender to distinguish between losses caused - by congestion and other causes. The research results on the - reliability of sender-based heuristics is unfavorable [BV97, BV98]. - [BV98a] reports better results in constrained environments using - packet inter-arrival times measured at the receiver, but highly- - variable delay - of the type encountered in wireless environments - during intercell handoff - confounds these heuristics. - - Recommendation: No recommendation at this time - continue to monitor - research results. - - - - - - - - -Montenegro, et al. Informational [Page 19] - -RFC 2757 Long Thin Networks January 2000 - - -4.7.2 With Explicit Notifications - - With explicit notification from the network it is possible to - determine when a loss is due to congestion. Several proposals along - these lines include: - - - Explicit Loss Notification (ELN) [BPSK96] - - - Explicit Bad State Notification (EBSN) [BBKVP96] - - - Explicit Loss Notification to the Receiver (ELNR), and Explicit - Delayed Dupack Activation Notification (EDDAN) (notifications - to mobile receiver) [MV97] - - - Explicit Congestion Notification (ECN) [ECN] - - Of these proposals, Explicit Congestion Notification (ECN) seems - closest to deployment on the Internet, and will provide some benefit - for TCP connections on long thin networks (as well as for all other - TCP connections). - - Recommendation: No recommendation at this time. Schemes like ELNR and - EDDAN [MV97], in which the only systems that need to be modified are - the intermediate node and the mobile device, are slated for adoption - pending further research. However, this solution has some - limitations. Since the intermediate node must have access to the TCP - headers, the IP payload must not be encrypted. - - ECN uses the TOS byte in the IP header to carry congestion - information (ECN-capable and Congestion-encountered). This byte is - not encrypted in IPSEC, so ECN can be used on TCP connections that - are encrypted using IPSEC. - - Recommendation: Implement ECN. In spite of this, mechanisms for - explicit corruption notification are still relevant and should be - tracked. - - Note: ECN provides useful information to avoid deteriorating further - a bad situation, but has some limitations for wireless applications. - Absence of packets marked with ECN should not be interpreted by ECN- - capable TCP connections as a green light for aggressive - retransmissions. On the contrary, during periods of extreme network - congestion routers may drop packets marked with explicit notification - because their buffers are exhausted - exactly the wrong time for a - host to begin retransmitting aggressively. - - - - - - -Montenegro, et al. Informational [Page 20] - -RFC 2757 Long Thin Networks January 2000 - - -4.8 Active Queue Management - - As has been pointed out above, TCP responds to congestion by closing - down the window and invoking slow start. Long-delay networks take a - particularly long time to recover from this condition. Accordingly, - it is imperative to avoid congestion in LTNs. To remedy this, active - queue management techniques have been proposed as enhancements to - routers throughout the Internet [RED]. The primary motivation for - deployment of these mechanisms is to prevent "congestion collapse" (a - severe degradation in service) by controlling the average queue size - at the routers. As the average queue length grows, Random Early - Detection [RED] increases the possibility of dropping packets. - - The benefits are: - - - Reduce packet drops in routers. By dropping a few packets - before severe congestion sets in, RED avoids dropping bursts of - packets. In other words, the objective is to drop m packets - early to prevent n drops later on, where m is less than n. - - - Provide lower delays. This follows from the smaller queue - sizes, and is particularly important for interactive - applications, for which the inherent delays of wireless links - already push the user experience to the limits of the non- - acceptable. - - - Avoid lock-outs. Lack of resources in a router (and the - resultant packet drops) may, in effect, obliterate throughput - on certain connections. Because of active queue management, it - is more probable for an incoming packet to find available - buffer space at the router. - - Active Queue Management has two components: (1) routers detect - congestion before exhausting their resources, and (2) they provide - some form of congestion indication. Dropping packets via RED is only - one example of the latter. Another way to indicate congestion is to - use ECN [ECN] as discussed above under "Detecting Corruption Loss: - With Explicit Notifications." - - Recommendation: RED is currently being deployed in the Internet, and - LTNs should follow suit. ECN deployment should complement RED's. - -4.9 Scheduling Algorithms - - Active queue management helps control the length of the queues. - Additionally, a general solution requires replacing FIFO with other - scheduling algorithms that improve: - - - - -Montenegro, et al. Informational [Page 21] - -RFC 2757 Long Thin Networks January 2000 - - - 1. Fairness (by policing how different packet streams utilize the - available bandwidth), and - - 2. Throughput (by improving the transmitter's radio channel - utilization). - - For example, fairness is necessary for interactive applications (like - telnet or web browsing) to coexist with bulk transfer sessions. - Proposals here include: - - - Fair Queueing (FQ) [Demers90] - - - Class-based Queueing (CBQ) [Floyd95] - - Even if they are only implemented over the wireless link portion of - the communication path, these proposals are attractive in wireless - LTN environments, because new connections for interactive - applications can have difficulty starting when a bulk TCP transfer - has already stabilized using all available bandwidth. - - In our base architecture described above, the mobile device typically - communicates directly with only one wireless peer at a given time: - the intermediate node. In some W-WANs, it is possible to directly - address other mobiles within the same cell. Direct communication - with each such wireless peer may traverse a spatially distinct path, - each of which may exhibit statistically independent radio link - characteristics. Channel State Dependent Packet Scheduling (CSDP) - [BBKT96] tracks the state of the various radio links (as defined by - the target devices), and gives preferential treatment to packets - destined for radio links in a "good" state. This avoids attempting to - transmit to (and expect acknowledgements from) a peer on a "bad" - radio link, thus improving throughput. - - A further refinement of this idea suggests that both fairness and - throughput can be improved by combining a wireless-enhanced CBQ with - CSDP [FSS98]. - - Recommendation: No recommendation at this time, pending further - study. - -4.10 Split TCP and Performance-Enhancing Proxies (PEPs) - - Given the dramatic differences between the wired and the wireless - links, a very common approach is to provide some impedance matching - where the two different technologies meet: at the intermediate node. - - - - - - -Montenegro, et al. Informational [Page 22] - -RFC 2757 Long Thin Networks January 2000 - - - The idea is to replace an end-to-end TCP connection with two clearly - distinct connections: one across the wireless link, the other across - its wireline counterpart. Each of the two resulting TCP sessions - operates under very different networking characteristics, and may - adopt the policies best suited to its particular medium. For - example, in a specific LTN topology it may be desirable to modify TCP - Fast Retransmit to resend after the first duplicate ack and Fast - Recovery to not shrink the congestion window if the LTN link has an - extremely long RTT, is known to not reorder packets, and is not - subject to congestion. Moreover, on a long-delay link or on a link - with a relatively high bandwidth-delay product it may be desirable to - "slow-start" with a relatively large initial window, even larger than - four segments. While these kinds of TCP modifications can be - negotiated to be employed over the LTN link, they would not be - deployed end-to-end over the global Internet. In LTN topologies where - the underlying link characteristics are known, a various similar - types of performance enhancements can be employed without endangering - operations over the global Internet. - - In some proposals, in addition to a PEP mechanism at the intermediate - node, custom protocols are used on the wireless link (for example, - [WAP], [YB94] or [MOWGLI]). - - Even if the gains from using non-TCP protocols are moderate or - better, the wealth of research on optimizing TCP for wireless, and - compatibility with the Internet are compelling reasons to adopt TCP - on the wireless link (enhanced as suggested in section 5 below). - -4.10.1 Split TCP Approaches - - Split-TCP proposals include schemes like I-TCP [ITCP] and MTCP [YB94] - which achieve performance improvements by abandoning end-to-end - semantics. - - The Mowgli architecture [MOWGLI] proposes a split approach with - support for various enhancements at all the protocol layers, not only - at the transport layer. Mowgli provides an option to replace the - TCP/IP core protocols on the LTN link with a custom protocol that is - tuned for LTN links [KRLKA97]. In addition, the protocol provides - various features that are useful with LTNs. For example, it provides - priority-based multiplexing of concurrent connections together with - shared flow control, thus offering link capacity to interactive - applications in a timely manner even if there are bandwidth-intensive - background transfers. Also with this option, Mowgli preserves the - socket semantics on the mobile device so that legacy applications can - be run unmodified. - - - - - -Montenegro, et al. Informational [Page 23] - -RFC 2757 Long Thin Networks January 2000 - - - Employing split TCP approaches have several benefits as well as - drawbacks. Benefits related to split TCP approaches include the - following: - - - Splitting the end-to-end TCP connection into two parts is a - straightforward way to shield the problems of the wireless link - from the wireline Internet path, and vice versa. Thus, a split TCP - approach enables applying local solutions to the local problems on - the wireless link. For example, it automatically solves the - problem of distinguishing congestion related packet losses on the - wireline Internet and packet losses due to transmission error on - the wireless link as these occur on separate TCP connections. - Even if both segments experience congestion, it may be of a - different nature and may be treated as such. Moreover, temporary - disconnections of the wireless link can be effectively shielded - from the wireline Internet. - - - When one of the TCP connections crosses only a single hop wireless - link or a very limited number of hops, some or all link - characteristics for the wireless TCP path are known. For example, - with a particular link we may know that the link provides reliable - delivery of packets, packets are not delivered out of order, or - the link is not subject to congestion. Having this information for - the TCP path one could expect that defining the TCP mitigations to - be employed becomes a significantly easier task. In addition, - several mitigations that cannot be employed safely over the global - Internet, can be successfully employed over the wireless link. - - - Splitting one TCP connection into two separate ones allows much - earlier deployment of various recent proposals to improve TCP - performance over wireless links; only the TCP implementations of - the mobile device and intermediate node need to be modified, thus - allowing the vast number of Internet hosts to continue running the - legacy TCP implementations unmodified. Any mitigations that would - require modification of TCP in these wireline hosts may take far - too long to become widely deployed. - - - Allows exploitation of various application level enhancements - which may give significant performance gains (see section 4.10.2). - - Drawbacks related to split TCP approaches include the following: - - - One of the main criticisms against the split TCP approaches is - that it breaks TCP end-to-end semantics. This has various - drawbacks some of which are more severe than others. The most - detrimental drawback is probably that splitting the TCP connection - disables end-to-end usage of IP layer security mechanisms, - precluding the application of IPSec to achieve end-to-end - - - -Montenegro, et al. Informational [Page 24] - -RFC 2757 Long Thin Networks January 2000 - - - security. Still, IPSec could be employed separately in each of the - two parts, thus requiring the intermediate node to become a party - to the security association between the mobile device and the - remote host. This, however, is an undesirable or unacceptable - alternative in most cases. Other security mechanisms above the - transport layer, like TLS [RFC2246] or SOCKS [RFC1928], should be - employed for end-to-end security. - - - Another drawback of breaking end-to-end semantics is that crashes - of the intermediate node become unrecoverable resulting in - termination of the TCP connections. Whether this should be - considered a severe problem depends on the expected frequency of - such crashes. - - - In many occasions claims have been stated that if TCP end-to-end - semantics is broken, applications relying on TCP to provide - reliable data delivery become more vulnerable. This, however, is - an overstatement as a well-designed application should never fully - rely on TCP in achieving end-to-end reliability at the application - level. First, current APIs to TCP, such as the Berkeley socket - interface, do not allow applications to know when an TCP - acknowledgement for previously sent user data arrives at TCP - sender. Second, even if the application is informed of the TCP - acknowledgements, the sending application cannot know whether the - receiving application has received the data: it only knows that - the data reached the TCP receive buffer at the receiving end. - Finally, in order to achieve end-to-end reliability at the - application level an application level acknowledgement is required - to confirm that the receiver has taken the appropriate actions on - the data it received. - - - When a mobile device moves, it is subject to handovers by the - serving base station. If the base station acts as the intermediate - node for the split TCP connection, the state of both TCP endpoints - on the previous intermediate node must be transferred to the new - intermediate node to ensure continued operation over the split TCP - connection. This requires extra work and causes overhead. However, - in most of the W-WAN wireless networks, unlike in W-LANs, the W- - WAN base station does not provide the mobile device with the - connection point to the wireline Internet (such base stations may - not even have an IP stack). Instead, the W-WAN network takes care - of the mobility and retains the connection point to the wireline - Internet unchanged while the mobile device moves. Thus, TCP state - handover is not required in most W-WANs. - - - The packets traversing through all the protocol layers up to - transport layer and again down to the link layer result in extra - overhead at the intermediate node. In case of LTNs with low - - - -Montenegro, et al. Informational [Page 25] - -RFC 2757 Long Thin Networks January 2000 - - - bandwidth, this extra overhead does not cause serious additional - performance problems unlike with W-LANs that typically have much - higher bandwidth. - - - Split TCP proposals are not applicable to networks with asymmetric - routing. Deploying a split TCP approach requires that traffic to - and from the mobile device be routed through the intermediate - node. With some networks, this cannot be accomplished, or it - requires that the intermediate node is located several hops away - from the wireless network edge which in turn is unpractical in - many cases and may result in non-optimal routing. - - - Split TCP, as the name implies, does not address problems related - to UDP. - - It should noted that using split TCP does not necessarily exclude - simultaneous usage of IP for end-to-end connectivity. Correct usage - of split TCP should be managed per application or per connection and - should be under the end-user control so that the user can decide - whether a particular TCP connection or application makes use of split - TCP or whether it operates end-to-end directly over IP. - - Recommendation: Split TCP proposals that alter TCP semantics are not - recommended. Deploying custom protocols on the wireless link, such as - MOWGLI proposes is not recommended, because this note gives - preference to (1) improving TCP instead of designing a custom - protocol and (2) allowing end-to-end sessions at all times. - -4.10.2 Application Level Proxies - - Nowadays, application level proxies are widely used in the Internet. - Such proxies include Web proxy caches, relay MTAs (Mail Transfer - Agents), and secure transport proxies (e.g., SOCKS). In effect, - employing an application level proxy results in a "split TCP - connection" with the proxy as the intermediary. Hence, some of the - problems present with wireless links, such as combining of a - congested wide-area Internet path with a wireless LTN link, are - automatically alleviated to some extent. - - The application protocols often employ plenty of (unnecessary) round - trips, lots of headers and inefficient encoding. Even unnecessary - data may get delivered over the wireless link in regular application - protocol operation. In many cases a significant amount of this - overhead can be reduced by simply running an application level proxy - on the intermediate node. With LTN links, significant additional - improvement can be achieved by introducing application level proxies - with application-specific enhancements. Such a proxy may employ an - enhanced version of the application protocol over the wireless link. - - - -Montenegro, et al. Informational [Page 26] - -RFC 2757 Long Thin Networks January 2000 - - - In an LTN environment enhancements at the application layer may - provide much more notable performance improvements than any transport - level enhancements. - - The Mowgli system provides full support for adding application level - agent-proxy pairs between the client and the server, the agent on the - mobile device and the proxy on the intermediate node. Such a pair may - be either explicit or fully transparent to the applications, but it - is, at all times, under the end-user control. Good examples of - enhancements achieved with application-specific proxies include - Mowgli WWW [LAKLR95], [LHKR96] and WebExpress [HL96], [CTCSM97]. - - Recommendation: Usage of application level proxies is conditionally - recommended: an application must be proxy enabled and the decision of - employing a proxy for an application must be under the user control - at all times. - -4.10.3 Snoop and its Derivatives - - Berkeley's SNOOP protocol [SNOOP] is a hybrid scheme mixing link- - layer reliability mechanisms with the split connection approach. It - is an improvement over split TCP approaches in that end-to-end - semantics are retained. SNOOP does two things: - - 1. Locally (on the wireless link) retransmit lost packets, instead - of allowing TCP to do so end-to-end. - - 2. Suppress the duplicate acks on their way from the receiver back - to the sender, thus avoiding fast retransmit and congestion - avoidance at the latter. - - Thus, the Snoop protocol is designed to avoid unnecessary fast - retransmits by the TCP sender, when the wireless link layer - retransmits a packet locally. Consider a system that does not use the - Snoop agent. Consider a TCP sender S that sends packets to receiver R - via an intermediate node IN. Assume that the sender sends packet A, - B, C, D, E (in that order) which are forwarded by IN to the wireless - receiver R. Assume that the intermediate node then retransmits B - subsequently, because the first transmission of packet B is lost due - to errors on the wireless link. In this case, receiver R receives - packets A, C, D, E and B (in that order). Receipt of packets C, D and - E triggers duplicate acknowledgements. When the TCP sender receives - three duplicate acknowledgements, it triggers fast retransmit (which - results in a retransmission, as well as reduction of congestion - window). The fast retransmit occurs despite the link level - retransmit on the wireless link, degrading throughput. - - - - - -Montenegro, et al. Informational [Page 27] - -RFC 2757 Long Thin Networks January 2000 - - - SNOOP [SNOOP] deals with this problem by dropping TCP dupacks - appropriately (at the intermediate node). The Delayed Dupacks (see - section 4.5) attempts to approximate Snoop without requiring - modifications at the intermediate node. Such schemes are needed only - if the possibility of a fast retransmit due to wireless errors is - non-negligible. In particular, if the wireless link uses a stop-and- - go protocol (or otherwise delivers packets in-order), then these - schemes are not very beneficial. Also, if the bandwidth-delay - product of the wireless link is smaller than four segments, the - probability that the intermediate node will have an opportunity to - send three new packets before a lost packet is retransmitted is - small. Since at least three dupacks are needed to trigger a fast - retransmit, with a wireless bandwidth-delay product less than four - packets, schemes such as Snoop and Delayed Dupacks would not be - necessary (unless the link layer is not designed properly). - Conversely, when the wireless bandwidth-delay product is large - enough, Snoop can provide significant performance improvement - (compared with standard TCP). For further discussion on these topics, - please refer to [Vaidya99]. - - The Delayed Dupacks scheme tends to provide performance benefit in - environments where Snoop performs well. In general, performance - improvement achieved by the Delayed Dupacks scheme is a function of - packet loss rates due to congestion and transmission errors. When - congestion-related losses occur, the Delayed Dupacks scheme - unnecessarily delays retransmission. Thus, in the presence of - congestion losses, the Delayed Dupacks scheme cannot achieve the same - performance improvement as Snoop. However, simulation results - [Vaidya99] indicate that the Delayed Dupacks can achieve a - significant improvement in performance despite moderate congestion - losses. - - WTCP [WTCP] is similar to SNOOP in that it preserves end-to-end - semantics. In WTCP, the intermediate node uses a complex scheme to - hide the time it spends recovering from local errors across the - wireless link (this typically includes retransmissions due to error - recovery, but may also include time spent dealing with congestion). - The idea is for the sender to derive a smooth estimate of round-trip - time. In order to work effectively, it assumes that the TCP - endpoints implement the Timestamps option in RFC 1323 [TCPHP]. - Unfortunately, support for RFC 1323 in TCP implementations is not yet - widespread. Beyond this, WTCP requires changes only at the - intermediate node. - - SNOOP and WTCP require the intermediate node to examine and operate - on the traffic between the portable wireless device and the TCP - server on the wired Internet. SNOOP and WTCP do not work if the IP - traffic is encrypted, unless, of course, the intermediate node shares - - - -Montenegro, et al. Informational [Page 28] - -RFC 2757 Long Thin Networks January 2000 - - - the security association between the mobile device and its end-to-end - peer. They also require that both the data and the corresponding - ACKs traverse the same intermediate node. Furthermore, if the - intermediate node retransmits packets at the transport layer across - the wireless link, this may duplicate efforts by the link-layer. - SNOOP has been described by its designers as a TCP-aware link-layer. - This is the right approach: the link and network layers can be much - more aware of each other than traditional OSI layering suggests. - - Encryption of IP packets via IPSEC's ESP header (in either transport - or tunnel mode) renders the TCP header and payload unintelligible to - the intermediate node. This precludes SNOOP (and WTCP) from working, - because it needs to examine the TCP headers in both directions. - Possible solutions involve: - - - making the SNOOP (or WTCP) intermediate node a party to the - security association between the client and the server - - - IPSEC tunneling mode, terminated at the SNOOPing intermediate node - - However, these techniques require that users trust intermediate - nodes. Users valuing both privacy and performance should use SSL or - SOCKS for end-to-end security. These, however, are implemented above - the transport layer, and are not as resistant to some security - attacks (for example, those based on guessing TCP sequence numbers) - as IPSEC. - - Recommendation: Implement SNOOP on intermediate nodes now. Research - results are encouraging, and it is an "invisible" optimization in - that neither the client nor the server needs to change, only the - intermediate node (for basic SNOOP without SACK). However, as - discussed above there is little or no benefit from implementing SNOOP - if: - - 1. The wireless link provides reliable, in-order packet delivery, - or, - - 2. The bandwidth-delay product of the wireless link is smaller - than four segments. - -4.10.4 PEPs to handle Periods of Disconnection - - Periods of disconnection are very common in wireless networks, either - during handoff, due to lack of resources (dropped connections) or - natural obstacles. During these periods, a TCP sender does not - receive the expected acknowledgements. Upon expiration of the - retransmit timer, this causes TCP to close its congestion window - with all the related drawbacks. Re-transmitting packets is useless - - - -Montenegro, et al. Informational [Page 29] - -RFC 2757 Long Thin Networks January 2000 - - - since the connection is broken. [M-TCP] aims at enabling TCP to - better handle handoffs and periods of disconnection, while preserving - end-to-end semantics. M-TCP adds an element: supervisor host (SH- - TCP) at the edge of the wireless network. - - This intermediate node monitors the traffic coming from the sender to - the mobile device. It does not break end-to-end semantics because the - ACKs sent from the intermediate node to the sender are effectively - the ones sent by the mobile node. The principle is to generally leave - the last byte unacknowledged. Hence, SH-TCP could shut down the - sender's window by sending the ACK for the last byte with a window - set to zero. Thus the sender will go to persist mode. - - The second optimization is done on both the intermediate node and the - mobile host. On the latter, TCP is aware of the current state of the - connection. In the event of a disconnection, it is capable of - freezing all timers. Upon reconnection, the mobile sends a specially - marked ACK with the number of the highest byte received. The - intermediate node assumes that the mobile is disconnected because it - monitors the flow on the wireless link, so in the absence of - acknowledgments from the mobile, it will inform SH-TCP, which will - send the ACK closing the sender window as described in the previous - paragraph. The intermediate node learns that the mobile is again - connected when it receives a duplicate acknowledgment marked as - reconnected. At this point it sends a duplicate ACK to the sender - and grows the window. The sender exits persist mode and resumes - transmitting at the same rate as before. It begins by retransmitting - any data previously unacknowledged by the mobile node. Non - overlapping or non soft handoffs are lightweight because the previous - intermediate system can shrink the window, and the new one modifies - it as soon as it has received an indication from the mobile. - - Recommendation: M-TCP is not slated for adoption at this moment, - because of the highly experimental nature of the proposal, and the - uncertainty that TCP/IP implementations handle zero window updates - correctly. Continue tracking developments in this space. - -4.11 Header Compression Alternatives - - Because Long Thin Networks are bandwidth-constrained, compressing - every byte out of over-the-air segments is worth while. - - Mechanisms for TCP and IP header compression defined in [RFC1144, - IPHC, IPHC-RTP, IPHC-PPP] provide the following benefits: - - - Improve interactive response time - - - Allow using small packets for bulk data with good line efficiency - - - -Montenegro, et al. Informational [Page 30] - -RFC 2757 Long Thin Networks January 2000 - - - - Allow using small packets for delay sensitive low data-rate - traffic - - - Decrease header overhead (for a common TCP segment size of 512 - the header overhead of IPv4/TCP within a Mobile IP tunnel can - decrease from 11.7 to less than 1 per cent. - - - Reduce packet loss rate over lossy links (because of the - smaller cross-section of compressed packets). - - Van Jacobson (VJ) header compression [RFC1144] describes a Proposed - Standard for TCP Header compression that is widely deployed. It uses - TCP timeouts to detect a loss of synchronization between the - compressor and decompressor. [IPHC] includes an explicit request for - transmission of uncompressed headers to allow resynchronization - without waiting for a TCP timeout (and executing congestion avoidance - procedures). - - Recommendation: Implement [IPHC], in particular as it relates to IP- - in-IP [RFC2003] and Minimal Encapsulation [RFC2004] for Mobile IP, as - well as TCP header compression for lossy links and links that - reorder packets. PPP capable devices should implement [IPHC-PPP]. VJ - header compression may optionally be implemented as it is a widely - deployed Proposed Standard. However, it should only be enabled when - operating over reliable LTNs, because even a single bit error most - probably would result in a full TCP window being dropped, followed by - a costly recovery via slow-start. - -4.12 Payload Compression - - Compression of IP payloads is also desirable. "IP Payload Compression - Protocol (IPComp)" [IPPCP] defines a framework where common - compression algorithms can be applied to arbitrary IP segment - payloads. IP payload compression is something of a niche - optimization. It is necessary because IP-level security converts IP - payloads to random bitstreams, defeating commonly-deployed link-layer - compression mechanisms which are faced with payloads that have no - redundant "information" that can be more compactly represented. - - However, many IP payloads are already compressed (images, audio, - video, "zipped" files being FTPed), or are already encrypted above - the IP layer (SSL/TLS, etc.). These payloads will not "compress" - further, limiting the benefit of this optimization. - - HTTP/1.1 already supports compression of the message body. For - example, to use zlib compression the relevant directives are: - "Content-Encoding: deflate" and "Accept-Encoding: deflate" [HTTP- - PERF]. - - - -Montenegro, et al. Informational [Page 31] - -RFC 2757 Long Thin Networks January 2000 - - - HTTP-NG is considering supporting compression of resources at the - HTTP level, which would provide equivalent benefits for common - compressible MIME types like text/html. This will reduce the need for - IPComp. If IPComp is deployed more rapidly than HTTP-NG, IPComp - compression of HTML and MIME headers would be beneficial. - - In general, application-level compression can often outperform - IPComp, because of the opportunity to use compression dictionaries - based on knowledge of the specific data being compressed. - - Recommendation: IPComp may optionally be implemented. Track HTTP-NG - standardization and deployment for now. Implementing HTTP/1.1 - compression using zlib SHOULD is recommended. - -4.13 TCP Control Block Interdependence [Touch97] - - TCP maintains per-connection information such as connection state, - current round-trip time, congestion control or maximum segment size. - Sharing information between two consecutive connections or when - creating a new connection while the first is still active to the same - host may improve performance of the latter connection. The principle - could easily be extended to sharing information amongst systems in a - LAN not just within a given system. [Touch97] describes cache update - for both cases. - - Users of W-WAN devices frequently request connections to the same - servers or set of servers. For example, in order to read their email - or to initiate connections to other servers, the devices may be - configured to always use the same email server or WWW proxy. The - main advantage of this proposal is that it relieves the application - of the burden of optimizing the transport layer. In order to improve - the performance of TCP connections, this mechanism only requires - changes at the wireless device. - - In general, this scheme should improve the dynamism of connection - setup without increasing the cost of the implementation. - - Recommendation: This mechanism is recommended, although HTTP/1.1 with - its persistent connections may partially achieve the same effect - without it. Other applications (even HTTP/1.0) may find it useful. - Continue monitoring research on this. In particular, work on a - "Congestion Manager" [CM] may generalize this concept of sharing - information among protocols and applications with a view to making - them more adaptable to network conditions. - - - - - - - -Montenegro, et al. Informational [Page 32] - -RFC 2757 Long Thin Networks January 2000 - - -5 Summary of Recommended Optimizations - - The table below summarizes our recommendations with regards to the - main proposals mentioned above. - - The first column, "Stability of the Proposal," refers to the maturity - of the mechanism in question. Some proposals are being pursued - within the IETF in a somewhat open fashion. An IETF proposal is - either an Internet Drafts (I-D) or a Request for Comments (RFC). The - former is a preliminary version. There are several types of RFCs. A - Draft Standards (DS) is standards track, and carries more weight than - a Proposed Standard (PS), which may still undergo revisions. - Informational or Experimental RFCs do not specify a standard. Other - proposals are isolated efforts with little or no public review, and - unknown chances of garnering industry backing. - - "Implemented at" indicates which participant in a TCP session must be - modified to implement the proposal. Legacy servers typically cannot - be modified, so this column indicates whether implementation happens - at either or both of the two nodes under some control: mobile device - and intermediate node. The symbols used are: WS (wireless sender, - that is, the mobile device's TCP send operation must be modified), WR - (wireless receiver, that is, the mobile device's TCP receive - operation must be modified), WD (wireless device, that is, - modifications at the mobile device are not specific to either TCP - send or receive), IN (intermediate node) and NI (network - infrastructure). These entities are to be understood within the - context of Section 1.1 ("Network Architecture"). NA simply means "not - applicable." - - The "Recommendation" column captures our suggestions. Some - mechanisms are endorsed for immediate adoption, others need more - evidence and research, and others are not recommended. - -Name Stability of Implemented Recommendation - the Proposal at -==================== ============= =========== ================= - -Increased Initial RFC 2581 (PS) WS Yes -Window (initial_window=2) - -Disable delayed ACKs NA WR When stable -during slow start - -Byte counting NA WS No -instead of ACK -counting - - - - -Montenegro, et al. Informational [Page 33] - -RFC 2757 Long Thin Networks January 2000 - - -TCP Header RFC 1144 (PS) WD Yes -compression for PPP IN (see 4.11) - -IP Payload RFC 2393 (PS) WD Yes -Compression (simultaneously -(IPComp) needed on Server) - -Header RFC 2507 (PS), WD Yes -Compression RFC 2509 (PS) IN (For IPv4, TCP and - Mobile IP, PPP) - -SNOOP plus SACK In limited use IN Yes - WD (for SACK) - -Fast retransmit/fast RFC 2581 (PS) WD Yes (should be -recovery there already) - -Transaction/TCP RFC 1644 WD No - (Experimental) (simultaneously - needed on Server) - -Estimating Slow NA WS No -Start Threshold -(ssthresh) - -Delayed Duplicate Not stable WR When stable -Acknowledgements IN (for - notifications) - -Class-based Queuing NA WD When stable -on End Systems - -Explicit Congestion RFC 2481 (EXP) WD Yes - -Notification NI - -TCP Control Block RFC 2140 WD Yes -Interdependence (Informational) (Track research) - - - Of all the optimizations in the table above, only SNOOP plus SACK and - Delayed duplicate acknowledgements are currently being proposed only - for wireless networks. The others are being considered even for non- - wireless applications. Their more general applicability attracts more - attention and analysis from the research community. - - Of the above mechanisms, only Header Compression (for IP and TCP) and - "SNOOP plus SACK" cease to work in the presence of IPSec. - - - -Montenegro, et al. Informational [Page 34] - -RFC 2757 Long Thin Networks January 2000 - - -6 Conclusion - - In view of the unpredictable and problematic nature of long thin - networks, arriving at an optimized transport is a daunting task. We - have reviewed the existing proposals along with future research - items. Based on this overview, we also recommend mechanisms for - implementation in long thin networks (LTNs). - -7 Acknowledgements - - The authors are deeply indebted to the IETF tcpsat and tcpimpl - working groups. The following individuals have also provided valuable - feedback: Mark Allman (NASA), Vern Paxson (ACIRI), Raphi Rom - (Technion/Sun), Charlie Perkins (Nokia), Peter Stark (Phone.com). - -8 Security Considerations - - The mechanisms discussed and recommended in this document have been - proposed in previous publications. The security considerations - outlined in the original discussions apply here as well. Several - security issues are also discussed throughout this document. - Additionally, we present below a non-exhaustive list of the most - salient issues concerning our recommended mechanisms: - - - Larger Initial TCP Window Size - - No known security issues [RFC2414, RFC2581]. - - - Header Compression - - May be open to some denial of service attacks. But any attacker in - a position to launch these attacks would have much stronger - attacks at his disposal [IPHC, IPHC-RTP]. - - - Congestion Control, Fast Retransmit/Fast Recovery - - An attacker may force TCP connections to grind to a halt, or, more - dangerously, behave more aggressively. The latter possibility may - lead to congestion collapse, at least in some regions of the - network [RFC2581]. - - - Explicit Congestion Notification - - It does not appear to increase the vulnerabilities in the network. - On the contrary, it may reduce them by aiding in the - identification of flows unresponsive to or non-compliant with TCP - congestion control [ECN]. - - - - -Montenegro, et al. Informational [Page 35] - -RFC 2757 Long Thin Networks January 2000 - - - - Sharing of Network Performance Information (TCP Control Block - Sharing and Congestion Manager module) - - Some information should not be shared. For example, TCP sequence - numbers are used to protect against spoofing attacks. Even - limiting the sharing to performance values leaves open the - possibility of denial-of-service attacks [Touch97]. - - - Performance Enhancing Proxies - - These systems are men-in-the-middle from the point of view of - their security vulnerabilities. Accordingly, they must be used - with extreme care so as to prevent their being hijacked and - misused. - - This last point is not to be underestimated: there is a general - security concern whenever an intermediate node performs operations - different from those carried out in an end-to-end basis. This is not - specific to performance-enhancing proxies. In particular, there may - be a tendency to forego IPSEC-based privacy in order to allow, for - example, a SNOOP module, header compression (TCP, UDP, RTP, etc), or - HTTP proxies to work. - - Adding end-to-end security at higher layers (for example via RTP - encryption, or via TLS encryption of the TCP payload) alleviates the - problem. However, this still leaves protocol headers in the clear, - and these may be exploited for traffic analysis and denial-of-service - attacks. - -9 References - - [ACKSPACING] Partridge, C., "ACK Spacing for High Delay-Bandwidth - Paths with Insufficient Buffering", Work in Progress. - - [ADGGHOSSTT98] Allman, M., Dawkins, S., Glover, D., Griner, J., - Henderson, T., Heidemann, J., Kruse, H., Osterman, S., - Scott, K., Semke, J., Touch, J. and D. Tran, "Ongoing - TCP Research Related to Satellites", Work in Progress. - - [AGS98] Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP - Over Satellite Channels using Standard Mechanisms", - BCP 28, RFC 2488, January 1999. - - - - - - - - - -Montenegro, et al. Informational [Page 36] - -RFC 2757 Long Thin Networks January 2000 - - - [Allman98] Mark Allman. On the Generation and Use of TCP - Acknowledgments. ACM Computer Communication Review, - 28(5), October 1998. - - [AHO98] Allman, M., Hayes, C., Ostermann, S., "An Evaluation - of TCP with Larger Initial Windows," Computer - Communication Review, 28(3), July 1998. - - [BBKT96] Bhagwat, P., Bhattacharya, P., Krishna, A., Tripathi, - S., "Enhancing Throughput over Wireless LANs Using - Channel State Dependent Packet Scheduling," in Proc. - IEEE INFOCOM'96, pp. 1133-40, March 1996. - - [BBKVP96] Bakshi, B., P., Krishna, N., Vaidya, N., Pradhan, - D.K., "Improving Performance of TCP over Wireless - Networks," Technical Report 96-014, Texas A&M - University, 1996. - - [BPSK96] Balakrishnan, H., Padmanabhan, V., Seshan, S., Katz, - R., "A Comparison of Mechanisms for Improving TCP - Performance over Wireless Links," in ACM SIGCOMM, - Stanford, California, August 1996. - - [BPK99] Balakrishnan, H., Padmanabhan, V., Katz, R., "The - effects of asymmetry on TCP performance," ACM Mobile - Networks and Applications (MONET), Vol. 4, No. 3, - 1999, pp. 219-241. - - [BV97] S. Biaz and N. H. Vaidya, "Distinguishing Congestion - Losses from Wireless Transmission Losses: A Negative - Result," Seventh International Conference on Computer - Communications and Networks (IC3N), New Orleans, - October 1998. - - [BV98] Biaz, S., Vaidya, N., "Sender-Based heuristics for - Distinguishing Congestion Losses from Wireless - Transmission Losses," Texas A&M University, Technical - Report 98-013, June 1998. - - [BV98a] Biaz, S., Vaidya, N., "Discriminating Congestion - Losses from Wireless Losses using Inter-Arrival Times - at the Receiver," Texas A&M University, Technical - Report 98-014, June 1998. - - [BW97] Brasche, G., Walke, B., "Concepts, Services, and - Protocols of the New GSM Phase 2+ general Packet Radio - Service," IEEE Communications Magazine, Vol. 35, No. - 8, August 1997. - - - -Montenegro, et al. Informational [Page 37] - -RFC 2757 Long Thin Networks January 2000 - - - [CB96] Cheshire, S., Baker, M., "Experiences with a Wireless - Network in MosquitoNet," IEEE Micro, February 1996. - Available online as: - http://rescomp.stanford.edu/~cheshire/papers - /wireless.ps. - - [CDMA] Electronic Industry Alliance(EIA)/Telecommunications - Industry Association (TIA), IS-95: Mobile Station-Base - Station Compatibility Standard for Dual-Mode Wideband - Spread Spectrum Cellular System, 1993. - - [CDPD] Wireless Data Forum, CDPD System Specification, - Release 1.1, 1995. - - [CM] Hari Balakrishnan and Srinivasan Seshan, "The - Congestion Manager," Work in Progress. - - [CTCSM97] Chang, H., Tait, C., Cohen, N., Shapiro, M., - Mastrianni, S., Floyd, R., Housel, B., Lindquist, D., - "Web Browsing in a Wireless Environment: Disconnected - and Asynchronous Operation in ARTour Web Express," in - Proc. MobiCom'97, Budapest, Hungary, September 1997. - - [Demers90] Demers, A., Keshav, S., and Shenker, S., Analysis and - Simulation of a Fair Queueing Algorithm, - Internetworking: Research and Experience, Vol. 1, - 1990, pp. 3-26. - - [ECN] Ramakrishnan, K. and S. Floyd, "A Proposal to add - Explicit Congestion Notification (ECN) to IP", RFC - 2481, January 1999. - - [Floyd95] Floyd, S., and Jacobson, V., Link-sharing and Resource - Management Models for Packet Networks. IEEE/ACM - Transactions on Networking, Vol. 3 No. 4, pp. 365-386, - August 1995. - - [FSS98] Fragouli, C., Sivaraman, V., Srivastava, M., - "Controlled Multimedia Wireless Link Sharing via - Enhanced Class-Based Queueing with Channel-State- - Dependent Packet Scheduling," Proc. IEEE INFOCOM'98, - April 1998. - - [GPRS] ETSI, "General Packet Radio Service (GPRS): Service - Description, Stage 2," GSM03.60, v.6.1.1 August 1998. - - - - - - -Montenegro, et al. Informational [Page 38] - -RFC 2757 Long Thin Networks January 2000 - - - [GSM] Rahnema, M., "Overview of the GSM system and protocol - architecture," IEEE Communications Magazine, vol. 31, - pp 92-100, April 1993. - - [HL96] Hausel, B., Lindquist, D., "WebExpress: A System for - Optimizing Web Browsing in a Wireless Environment," in - Proc. MobiCom'96, Rye, New York, USA, November 1996. - - [HTTP-PERF] Henrik Frystyk Nielsen (W3C, MIT), Jim Gettys (W3C, - Digital), Anselm Baird-Smith (W3C, INRIA), Eric - Prud'hommeaux (W3C, MIT), Hon Lie (W3C, INRIA), Chris - Lilley (W3C, INRIA), "Network Performance Effects of - HTTP/1.1, CSS1, and PNG," ACM SIGCOMM '97, Cannes, - France, September 1997. Available at: - http://www.w3.org/Protocols/HTTP/Performance - /Pipeline.html - - [IPPCP] Shacham, A., Monsour, R., Pereira, R. and M. Thomas, - "IP Payload Compression Protocol (IPComp)", RFC 2393, - December 1998. - - [IPHC] Degermark, M., Nordgren, B. and S. Pink, "IP Header - Compression", RFC 2507, February 1999. - - [IPHC-RTP] Casner, S. and V. Jacobson, "Compressing IP/UDP/RTP - Headers for Low-Speed Serial Links", RFC 2508, - February 1999. - - [IPHC-PPP] Engan, M., Casner, S. and C. Bormann, "IP Header - Compression over PPP", RFC 2509, February 1999. - - [ITCP] Bakre, A., Badrinath, B.R., "Handoff and Systems - Support for Indirect TCP/IP. In Proceedings of the - Second USENIX Symposium on Mobile and Location- - Independent Computing, Ann Arbor, Michigan, April 10- - 11, 1995. - - [Jain89] Jain, R., "A Delay-Based Approach for Congestion - Avoidance in Interconnected Heterogeneous Computer - Networks," Digital Equipment Corporation, Technical - Report DEC-TR-566, April 1989. - - [Karn93] Karn, P., "The Qualcomm CDMA Digital Cellular System" - Proc. USENIX Mobile and Location-Independent Computing - Symposium, USENIX Association, August 1993. - - - - - - -Montenegro, et al. Informational [Page 39] - -RFC 2757 Long Thin Networks January 2000 - - - [KRLKA97] Kojo, M., Raatikainen, K., Liljeberg, M., Kiiskinen, - J., Alanko, T., "An Efficient Transport Service for - Slow Wireless Telephone Links," in IEEE Journal on - Selected Areas of Communication, volume 15, number 7, - September 1997. - - [LAKLR95] Liljeberg, M., Alanko, T., Kojo, M., Laamanen, H., - Raatikainen, K., "Optimizing World-Wide Web for - Weakly-Connected Mobile Workstations: An Indirect - Approach," in Proc. 2nd Int. Workshop on Services in - Distributed and Networked Environments, Whistler, - Canada, pp. 132-139, June 1995. - - [LHKR96] Liljeberg, M., Helin, H., Kojo, M., Raatikainen, K., - "Mowgli WWW Software: Improved Usability of WWW in - Mobile WAN Environments," in Proc. IEEE Global - Internet 1996 Conference, London, UK, November 1996. - - [LS98] Lettieri, P., Srivastava, M., "Adaptive Frame Length - Control for Improving Wireless Link Throughput, Range, - and Energy Efficiency," Proc. IEEE INFOCOM'98, April - 1998. - - [MNCP] Piscitello, D., Phifer, L., Wang, Y., Hovey, R., - "Mobile Network Computing Protocol (MNCP)", Work in - Progress. - - [MOWGLI] Kojo, M., Raatikainen, K., Alanko, T., "Connecting - Mobile Workstations to the Internet over a Digital - Cellular Telephone Network," in Proc. Workshop on - Mobile and Wireless Information Systems (MOBIDATA), - Rutgers University, NJ, November 1994. Available at: - http://www.cs.Helsinki.FI/research/mowgli/. Revised - version published in Mobile Computing, pp. 253-270, - Kluwer, 1996. - - [MSMO97] Mathis, M., Semke, J., Mahdavi, J., Ott, T., "The - Macroscopic Behavior of the TCP Congestion Avoidance - Algorithm," in Computer Communications Review, a - publication of ACM SIGCOMM, volume 27, number 3, July - 1997. - - [MTCP] Brown, K. Singh, S., "A Network Architecture for - Mobile Computing," Proc. IEEE INFOCOM'96, pp. 1388- - 1396, March 1996. Available at - ftp://ftp.ece.orst.edu/pub/singh/papers - /transport.ps.gz - - - - -Montenegro, et al. Informational [Page 40] - -RFC 2757 Long Thin Networks January 2000 - - - [M-TCP] Brown, K. Singh, S., "M-TCP: TCP for Mobile Cellular - Networks," ACM Computer Communications Review Vol. - 27(5), 1997. Available at - ftp://ftp.ece.orst.edu/pub/singh/papers/mtcp.ps.gz - - [MV97] Mehta, M., Vaidya, N., "Delayed Duplicate- - Acknowledgements: A Proposal to Improve Performance - of TCP on Wireless Links," Texas A&M University, - December 24, 1997. Available at - http://www.cs.tamu.edu/faculty/vaidya/mobile.html - - [NETBLT] White, J., "NETBLT (Network Block Transfer Protocol)", - Work in Progress. - - [Paxson97] V. Paxson, "End-to-End Internet Packet Dynamics," - Proc. SIGCOMM '97. Available at - ftp://ftp.ee.lbl.gov/papers/vp-pkt-dyn-sigcomm97.ps.Z - - [RED] Braden, B., Clark, D., Crowcroft, J., Davie, B., - Deering, S., Estrin, D., Floyd, S., Jacobson, V., - Minshall, G., Partridge, C., Peterson, L., - Ramakrishnan, K., Shenker, S., Wroclawski, J. and L. - Zhang, "Recommendations on Queue Management and - Congestion Avoidance in the Internet", RFC 2309, April - 1998. - - [RLP] ETSI, "Radio Link Protocol for Data and Telematic - Services on the Mobile Station - Base Station System - (MS-BSS) interface and the Base Station System - - Mobile Switching Center (BSS-MSC) interface," GSM - Specification 04.22, Version 3.7.0, February 1992. - - [RFC908] Velten, D., Hinden, R. and J. Sax, "Reliable Data - Protocol", RFC 908, July 1984. - - [RFC1030] Lambert, M., "On Testing the NETBLT Protocol over - Divers Networks", RFC 1030, November 1987. - - [RFC1122] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC1144] Jacobson, V., "Compressing TCP/IP Headers for Low- - Speed Serial Links", RFC 1144, February 1990. - - [RFC1151] Partridge, C., Hinden, R., "Version 2 of the Reliable - Data Protocol (RDP)", RFC 1151, April 1990. - - - - - -Montenegro, et al. Informational [Page 41] - -RFC 2757 Long Thin Networks January 2000 - - - [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC - 1191, November 1990. - - [RFC1397] Braden, R., "Extending TCP for Transactions -- - Concepts", RFC 1397, November 1992. - - [RFC1644] Braden, R., "T/TCP -- TCP Extensions for Transactions - Functional Specification", RFC 1644, July 1994. - - [RFC1661] Simpson, W., "The Point-To-Point Protocol (PPP)", STD - 51, RFC 1661, July 1994. - - [RFC1928] Leech, M., Ganis, M., Lee, Y., Kuris, R., Koblas, D. - and L. Jones, "SOCKS Protocol Version 5", RFC 1928, - March 1996. - - [RFC1986] Polites, W., Wollman, W., Woo, D. and R. Langan, - "Experiments with a Simple File Transfer Protocol for - Radio Links using Enhanced Trivial File Transfer - Protocol (ETFTP)", RFC 1986, August 1996. - - [RFC2002] Perkins, C., "IP Mobility Support", RFC 2002, October - 1996. - - [RFC2003] Perkins, C., "IP Encapsulation within IP", RFC 2003, - October 1996. - - [RFC2004] Perkins, C., "Minimal Encapsulation within IP", RFC - 2004, October 1996. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, - "TCP Selective Acknowledgment Options", RFC 2018, - October 1996. - - [RFC2188] Banan, M., Taylor, M. and J. Cheng, "AT&T/Neda's - Efficient Short Remote Operations (ESRO) Protocol - Specification Version 1.2", RFC 2188, September 1997. - - [RFC2246] Dierk, T. and E. Allen, "TLS Protocol Version 1", RFC - 2246, January 1999. - - [RFC2414] Allman, M., Floyd, S. and C. Partridge. "Increasing - TCP's Initial Window", RFC 2414, September 1998. - - [RFC2415] Poduri, K.and K. Nichols, "Simulation Studies of - Increased Initial TCP Window Size", RFC 2415, - September 1998. - - - - -Montenegro, et al. Informational [Page 42] - -RFC 2757 Long Thin Networks January 2000 - - - [RFC2416] Shepard, T. and C. Partridge, "When TCP Starts Up With - Four Packets Into Only Three Buffers", RFC 2416, - September 1998. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification - to TCP's Fast Recovery Algorithm", RFC 2582, April - 1999. - - [SNOOP] Balakrishnan, H., Seshan, S., Amir, E., Katz, R., - "Improving TCP/IP Performance over Wireless Networks," - Proc. 1st ACM Conf. on Mobile Computing and Networking - (Mobicom), Berkeley, CA, November 1995. - - [Stevens94] R. Stevens, "TCP/IP Illustrated, Volume 1," Addison- - Wesley, 1994 (section 2.10 for MTU size considerations - and section 11.3 for weak checksums). - - [TCPHP] Jacobson, V., Braden, R. and D. Borman, "TCP - Extensions for High Performance", RFC 1323, May 1992. - - [TCPSATMIN] TCPSAT Minutes, August, 1997. Available at: - http://tcpsat.lerc.nasa.gov/tcpsat/meetings/munich- - minutes.txt. - - [Touch97] Touch, T., "TCP Control Block Interdependence", RFC - 2140, April 1997. - - [Vaidya99] N. H. Vaidya, M. Mehta, C. Perkins, G. Montenegro, - "Delayed Duplicate Acknowledgements: A TCP-Unaware - Approach to Improve Performance of TCP over Wireless," - Technical Report 99-003, Computer Science Dept., Texas - A&M University, February 1999. - - [VEGAS] Brakmo, L., O'Malley, S., "TCP Vegas, New Techniques - for Congestion Detection and Avoidance," SIGCOMM'94, - London, pp 24-35, October 1994. - - [VMTP] Cheriton, D., "VMTP: Versatile Message Transaction - Protocol", RFC 1045, February 1988. - - [WAP] Wireless Application Protocol Forum. - http://www.wapforum.org/ - - - - - - -Montenegro, et al. Informational [Page 43] - -RFC 2757 Long Thin Networks January 2000 - - - [WC91] Wang, Z., Crowcroft, J., "A New Congestion Control - Scheme: Slow Start and Search," ACM Computer - Communication Review, vol 21, pp 32-43, January 1991. - - [WTCP] Ratnam, K., Matta, I., "WTCP: An Efficient - Transmission Control Protocol for Networks with - Wireless Links," Technical Report NU-CCS-97-11, - Northeastern University, July 1997. Available at: - http://www.ece.neu.edu/personal/karu/papers/WTCP- - NU.ps.gz - - [YB94] Yavatkar, R., Bhagawat, N., "Improving End-to-End - Performance of TCP over Mobile Internetworks," Proc. - Workshop on Mobile Computing Systems and Applications, - IEEE Computer Society Press, Los Alamitos, California, - 1994. - -Authors' Addresses - - Questions about this document may be directed at: - - Gabriel E. Montenegro - Sun Labs Networking and Security Group - Sun Microsystems, Inc. - 901 San Antonio Road - Mailstop UMPK 15-214 - Mountain View, California 94303 - - Phone: +1-650-786-6288 - Fax: +1-650-786-6445 - EMail: gab@sun.com - - - Spencer Dawkins - Nortel Networks - P.O. Box 833805 - Richardson, Texas 75083-3805 - - Phone: +1-972-684-4827 - Fax: +1-972-685-3292 - EMail: sdawkins@nortel.com - - - - - - - - - - -Montenegro, et al. Informational [Page 44] - -RFC 2757 Long Thin Networks January 2000 - - - Markku Kojo - Department of Computer Science - University of Helsinki - P.O. Box 26 (Teollisuuskatu 23) - FIN-00014 HELSINKI - Finland - - Phone: +358-9-1914-4179 - Fax: +358-9-1914-4441 - EMail: kojo@cs.helsinki.fi - - - Vincent Magret - Corporate Research Center - Alcatel Network Systems, Inc - 1201 Campbell - Mail stop 446-310 - Richardson Texas 75081 USA - M/S 446-310 - - Phone: +1-972-996-2625 - Fax: +1-972-996-5902 - EMail: vincent.magret@aud.alcatel.com - - - Nitin Vaidya - Dept. of Computer Science - Texas A&M University - College Station, TX 77843-3112 - - Phone: 979-845-0512 - Fax: 979-847-8578 - EMail: vaidya@cs.tamu.edu - - - - - - - - - - - - - - - - - - -Montenegro, et al. Informational [Page 45] - -RFC 2757 Long Thin Networks January 2000 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Montenegro, et al. Informational [Page 46] - diff --git a/kernel/picotcp/RFC/rfc2760.txt b/kernel/picotcp/RFC/rfc2760.txt deleted file mode 100644 index 5779d8d..0000000 --- a/kernel/picotcp/RFC/rfc2760.txt +++ /dev/null @@ -1,2579 +0,0 @@ - - - - - - -Network Working Group M. Allman, Editor -Request for Comments: 2760 NASA Glenn Research Center/BBN Technologies -Category: Informational S. Dawkins - Nortel - D. Glover - J. Griner - D. Tran - NASA Glenn Research Center - T. Henderson - University of California at Berkeley - J. Heidemann - J. Touch - University of Southern California/ISI - H. Kruse - S. Ostermann - Ohio University - K. Scott - The MITRE Corporation - J. Semke - Pittsburgh Supercomputing Center - February 2000 - - - Ongoing TCP Research Related to Satellites - - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - This document outlines possible TCP enhancements that may allow TCP - to better utilize the available bandwidth provided by networks - containing satellite links. The algorithms and mechanisms outlined - have not been judged to be mature enough to be recommended by the - IETF. The goal of this document is to educate researchers as to the - current work and progress being done in TCP research related to - satellite networks. - - - - - - -Allman, et al. Informational [Page 1] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -Table of Contents - - 1 Introduction. . . . . . . . . . . . . . . . . . . . 2 - 2 Satellite Architectures . . . . . . . . . . . . . . 3 - 2.1 Asymmetric Satellite Networks . . . . . . . . . . . 3 - 2.2 Satellite Link as Last Hop. . . . . . . . . . . . . 3 - 2.3 Hybrid Satellite Networks . . . . . . . . . . . 4 - 2.4 Point-to-Point Satellite Networks . . . . . . . . . 4 - 2.5 Multiple Satellite Hops . . . . . . . . . . . . . . 4 - 3 Mitigations . . . . . . . . . . . . . . . . . . . . 4 - 3.1 TCP For Transactions. . . . . . . . . . . . . . . . 4 - 3.2 Slow Start. . . . . . . . . . . . . . . . . . . . . 5 - 3.2.1 Larger Initial Window . . . . . . . . . . . . . . . 6 - 3.2.2 Byte Counting . . . . . . . . . . . . . . . . . . . 7 - 3.2.3 Delayed ACKs After Slow Start . . . . . . . . . . . 9 - 3.2.4 Terminating Slow Start. . . . . . . . . . . . . . . 11 - 3.3 Loss Recovery . . . . . . . . . . . . . . . . . . . 12 - 3.3.1 Non-SACK Based Mechanisms . . . . . . . . . . . . . 12 - 3.3.2 SACK Based Mechanisms . . . . . . . . . . . . . . . 13 - 3.3.3 Explicit Congestion Notification. . . . . . . . . . 16 - 3.3.4 Detecting Corruption Loss . . . . . . . . . . . . . 18 - 3.4 Congestion Avoidance. . . . . . . . . . . . . . . . 21 - 3.5 Multiple Data Connections . . . . . . . . . . . . . 22 - 3.6 Pacing TCP Segments . . . . . . . . . . . . . . . . 24 - 3.7 TCP Header Compression. . . . . . . . . . . . . . . 26 - 3.8 Sharing TCP State Among Similar Connections . . . . 29 - 3.9 ACK Congestion Control. . . . . . . . . . . . . . . 32 - 3.10 ACK Filtering . . . . . . . . . . . . . . . . . . . 34 - 4 Conclusions . . . . . . . . . . . . . . . . . . . . 36 - 5 Security Considerations . . . . . . . . . . . . . . 36 - 6 Acknowledgments . . . . . . . . . . . . . . . . . . 37 - 7 References. . . . . . . . . . . . . . . . . . . . . 37 - 8 Authors' Addresses. . . . . . . . . . . . . . . . . 43 - 9 Full Copyright Statement. . . . . . . . . . . . . . 46 - -1 Introduction - - This document outlines mechanisms that may help the Transmission - Control Protocol (TCP) [Pos81] better utilize the bandwidth provided - by long-delay satellite environments. These mechanisms may also help - in other environments or for other protocols. The proposals outlined - in this document are currently being studied throughout the research - community. Therefore, these mechanisms are not mature enough to be - recommended for wide-spread use by the IETF. However, some of these - mechanisms may be safely used today. It is hoped that this document - will stimulate further study into the described mechanisms. If, at - - - - - -Allman, et al. Informational [Page 2] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - some point, the mechanisms discussed in this memo prove to be safe - and appropriate to be recommended for general use, the appropriate - IETF documents will be written. - - It should be noted that non-TCP mechanisms that help performance over - satellite links do exist (e.g., application-level changes, queueing - disciplines, etc.). However, outlining these non-TCP mitigations is - beyond the scope of this document and therefore is left as future - work. Additionally, there are a number of mitigations to TCP's - performance problems that involve very active intervention by - gateways along the end-to-end path from the sender to the receiver. - Documenting the pros and cons of such solutions is also left as - future work. - -2 Satellite Architectures - - Specific characteristics of satellite links and the impact these - characteristics have on TCP are presented in RFC 2488 [AGS99]. This - section discusses several possible topologies where satellite links - may be integrated into the global Internet. The mitigation outlined - in section 3 will include a discussion of which environment the - mechanism is expected to benefit. - -2.1 Asymmetric Satellite Networks - - Some satellite networks exhibit a bandwidth asymmetry, a larger data - rate in one direction than the reverse direction, because of limits - on the transmission power and the antenna size at one end of the - link. Meanwhile, some other satellite systems are unidirectional and - use a non-satellite return path (such as a dialup modem link). The - nature of most TCP traffic is asymmetric with data flowing in one - direction and acknowledgments in opposite direction. However, the - term asymmetric in this document refers to different physical - capacities in the forward and return links. Asymmetry has been shown - to be a problem for TCP [BPK97,BPK98]. - -2.2 Satellite Link as Last Hop - - Satellite links that provide service directly to end users, as - opposed to satellite links located in the middle of a network, may - allow for specialized design of protocols used over the last hop. - Some satellite providers use the satellite link as a shared high - speed downlink to users with a lower speed, non-shared terrestrial - link that is used as a return link for requests and acknowledgments. - Many times this creates an asymmetric network, as discussed above. - - - - - - -Allman, et al. Informational [Page 3] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -2.3 Hybrid Satellite Networks - - In the more general case, satellite links may be located at any point - in the network topology. In this case, the satellite link acts as - just another link between two gateways. In this environment, a given - connection may be sent over terrestrial links (including terrestrial - wireless), as well as satellite links. On the other hand, a - connection could also travel over only the terrestrial network or - only over the satellite portion of the network. - -2.4 Point-to-Point Satellite Networks - - In point-to-point satellite networks, the only hop in the network is - over the satellite link. This pure satellite environment exhibits - only the problems associated with the satellite links, as outlined in - [AGS99]. Since this is a private network, some mitigations that are - not appropriate for shared networks can be considered. - -2.5 Multiple Satellite Hops - - In some situations, network traffic may traverse multiple satellite - hops between the source and the destination. Such an environment - aggravates the satellite characteristics described in [AGS99]. - -3 Mitigations - - The following sections will discuss various techniques for mitigating - the problems TCP faces in the satellite environment. Each of the - following sections will be organized as follows: First, each - mitigation will be briefly outlined. Next, research work involving - the mechanism in question will be briefly discussed. Next the - implementation issues of the mechanism will be presented (including - whether or not the particular mechanism presents any dangers to - shared networks). Then a discussion of the mechanism's potential - with regard to the topologies outlined above is given. Finally, the - relationships and possible interactions with other TCP mechanisms are - outlined. The reader is expected to be familiar with the TCP - terminology used in [AGS99]. - -3.1 TCP For Transactions - -3.1.1 Mitigation Description - - TCP uses a three-way handshake to setup a connection between two - hosts [Pos81]. This connection setup requires 1-1.5 round-trip times - (RTTs), depending upon whether the data sender started the connection - actively or passively. This startup time can be eliminated by using - TCP extensions for transactions (T/TCP) [Bra94]. After the first - - - -Allman, et al. Informational [Page 4] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - connection between a pair of hosts is established, T/TCP is able to - bypass the three-way handshake, allowing the data sender to begin - transmitting data in the first segment sent (along with the SYN). - This is especially helpful for short request/response traffic, as it - saves a potentially long setup phase when no useful data is being - transmitted. - -3.1.2 Research - - T/TCP is outlined and analyzed in [Bra92,Bra94]. - -3.1.3 Implementation Issues - - T/TCP requires changes in the TCP stacks of both the data sender and - the data receiver. While T/TCP is safe to implement in shared - networks from a congestion control perspective, several security - implications of sending data in the first data segment have been - identified [ddKI99]. - -3.1.4 Topology Considerations - - It is expected that T/TCP will be equally beneficial in all - environments outlined in section 2. - -3.1.5 Possible Interaction and Relationships with Other Research - - T/TCP allows data transfer to start more rapidly, much like using a - larger initial congestion window (see section 3.2.1), delayed ACKs - after slow start (section 3.2.3) or byte counting (section 3.2.2). - -3.2 Slow Start - - The slow start algorithm is used to gradually increase the size of - TCP's congestion window (cwnd) [Jac88,Ste97,APS99]. The algorithm is - an important safe-guard against transmitting an inappropriate amount - of data into the network when the connection starts up. However, - slow start can also waste available network capacity, especially in - long-delay networks [All97a,Hay97]. Slow start is particularly - inefficient for transfers that are short compared to the - delay*bandwidth product of the network (e.g., WWW transfers). - - Delayed ACKs are another source of wasted capacity during the slow - start phase. RFC 1122 [Bra89] suggests data receivers refrain from - ACKing every incoming data segment. However, every second full-sized - segment should be ACKed. If a second full-sized segment does not - arrive within a given timeout, an ACK must be generated (this timeout - cannot exceed 500 ms). Since the data sender increases the size of - cwnd based on the number of arriving ACKs, reducing the number of - - - -Allman, et al. Informational [Page 5] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - ACKs slows the cwnd growth rate. In addition, when TCP starts - sending, it sends 1 segment. When using delayed ACKs a second - segment must arrive before an ACK is sent. Therefore, the receiver - is always forced to wait for the delayed ACK timer to expire before - ACKing the first segment, which also increases the transfer time. - - Several proposals have suggested ways to make slow start less time - consuming. These proposals are briefly outlined below and references - to the research work given. - -3.2.1 Larger Initial Window - -3.2.1.1 Mitigation Description - - One method that will reduce the amount of time required by slow start - (and therefore, the amount of wasted capacity) is to increase the - initial value of cwnd. An experimental TCP extension outlined in - [AFP98] allows the initial size of cwnd to be increased from 1 - segment to that given in equation (1). - - min (4*MSS, max (2*MSS, 4380 bytes)) (1) - - By increasing the initial value of cwnd, more packets are sent during - the first RTT of data transmission, which will trigger more ACKs, - allowing the congestion window to open more rapidly. In addition, by - sending at least 2 segments initially, the first segment does not - need to wait for the delayed ACK timer to expire as is the case when - the initial size of cwnd is 1 segment (as discussed above). - Therefore, the value of cwnd given in equation 1 saves up to 3 RTTs - and a delayed ACK timeout when compared to an initial cwnd of 1 - segment. - - Also, we note that RFC 2581 [APS99], a standards-track document, - allows a TCP to use an initial cwnd of up to 2 segments. This change - is highly recommended for satellite networks. - -3.2.1.2 Research - - Several researchers have studied the use of a larger initial window - in various environments. [Nic97] and [KAGT98] show a reduction in - WWW page transfer time over hybrid fiber coax (HFC) and satellite - links respectively. Furthermore, it has been shown that using an - initial cwnd of 4 segments does not negatively impact overall - performance over dialup modem links with a small number of buffers - [SP98]. [AHO98] shows an improvement in transfer time for 16 KB - files across the Internet and dialup modem links when using a larger - initial value for cwnd. However, a slight increase in dropped - - - - -Allman, et al. Informational [Page 6] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - segments was also shown. Finally, [PN98] shows improved transfer - time for WWW traffic in simulations with competing traffic, in - addition to a small increase in the drop rate. - -3.2.1.3 Implementation Issues - - The use of a larger initial cwnd value requires changes to the - sender's TCP stack. Using an initial congestion window of 2 segments - is allowed by RFC 2581 [APS99]. Using an initial congestion window - of 3 or 4 segments is not expected to present any danger of - congestion collapse [AFP98], however may degrade performance in some - networks. - -3.2.1.4 Topology Considerations - - It is expected that the use of a large initial window would be - equally beneficial to all network architectures outlined in section - 2. - -3.2.1.5 Possible Interaction and Relationships with Other Research - - Using a fixed larger initial congestion window decreases the impact - of a long RTT on transfer time (especially for short transfers) at - the cost of bursting data into a network with unknown conditions. A - mechanism that mitigates bursts may make the use of a larger initial - congestion window more appropriate (e.g., limiting the size of line- - rate bursts [FF96] or pacing the segments in a burst [VH97a]). - - Also, using delayed ACKs only after slow start (as outlined in - section 3.2.3) offers an alternative way to immediately ACK the first - segment of a transfer and open the congestion window more rapidly. - Finally, using some form of TCP state sharing among a number of - connections (as discussed in 3.8) may provide an alternative to using - a fixed larger initial window. - -3.2.2 Byte Counting - -3.2.2.1 Mitigation Description - - As discussed above, the wide-spread use of delayed ACKs increases the - time needed by a TCP sender to increase the size of the congestion - window during slow start. This is especially harmful to flows - traversing long-delay GEO satellite links. One mechanism that has - been suggested to mitigate the problems caused by delayed ACKs is the - use of "byte counting", rather than standard ACK counting - [All97a,All98]. Using standard ACK counting, the congestion window - is increased by 1 segment for each ACK received during slow start. - However, using byte counting the congestion window increase is based - - - -Allman, et al. Informational [Page 7] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - on the number of previously unacknowledged bytes covered by each - incoming ACK, rather than on the number of ACKs received. This makes - the increase relative to the amount of data transmitted, rather than - being dependent on the ACK interval used by the receiver. - - Two forms of byte counting are studied in [All98]. The first is - unlimited byte counting (UBC). This mechanism simply uses the number - of previously unacknowledged bytes to increase the congestion window - each time an ACK arrives. The second form is limited byte counting - (LBC). LBC limits the amount of cwnd increase to 2 segments. This - limit throttles the size of the burst of data sent in response to a - "stretch ACK" [Pax97]. Stretch ACKs are acknowledgments that cover - more than 2 segments of previously unacknowledged data. Stretch ACKs - can occur by design [Joh95] (although this is not standard), due to - implementation bugs [All97b,PADHV99] or due to ACK loss. [All98] - shows that LBC prevents large line-rate bursts when compared to UBC, - and therefore offers fewer dropped segments and better performance. - In addition, UBC causes large bursts during slow start based loss - recovery due to the large cumulative ACKs that can arrive during loss - recovery. The behavior of UBC during loss recovery can cause large - decreases in performance and [All98] strongly recommends UBC not be - deployed without further study into mitigating the large bursts. - - Note: The standards track RFC 2581 [APS99] allows a TCP to use byte - counting to increase cwnd during congestion avoidance, however not - during slow start. - -3.2.2.2 Research - - Using byte counting, as opposed to standard ACK counting, has been - shown to reduce the amount of time needed to increase the value of - cwnd to an appropriate size in satellite networks [All97a]. In - addition, [All98] presents a simulation comparison of byte counting - and the standard cwnd increase algorithm in uncongested networks and - networks with competing traffic. This study found that the limited - form of byte counting outlined above can improve performance, while - also increasing the drop rate slightly. - - [BPK97,BPK98] also investigated unlimited byte counting in - conjunction with various ACK filtering algorithms (discussed in - section 3.10) in asymmetric networks. - - - - - - - - - - -Allman, et al. Informational [Page 8] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.2.2.3 Implementation Issues - - Changing from ACK counting to byte counting requires changes to the - data sender's TCP stack. Byte counting violates the algorithm for - increasing the congestion window outlined in RFC 2581 [APS99] (by - making congestion window growth more aggressive during slow start) - and therefore should not be used in shared networks. - -3.2.2.4 Topology Considerations - - It has been suggested by some (and roundly criticized by others) that - byte counting will allow TCP to provide uniform cwnd increase, - regardless of the ACKing behavior of the receiver. In addition, byte - counting also mitigates the retarded window growth provided by - receivers that generate stretch ACKs because of the capacity of the - return link, as discussed in [BPK97,BPK98]. Therefore, this change - is expected to be especially beneficial to asymmetric networks. - -3.2.2.5 Possible Interaction and Relationships with Other Research - - Unlimited byte counting should not be used without a method to - mitigate the potentially large line-rate bursts the algorithm can - cause. Also, LBC may send bursts that are too large for the given - network conditions. In this case, LBC may also benefit from some - algorithm that would lessen the impact of line-rate bursts of - segments. Also note that using delayed ACKs only after slow start - (as outlined in section 3.2.3) negates the limited byte counting - algorithm because each ACK covers only one segment during slow start. - Therefore, both ACK counting and byte counting yield the same - increase in the congestion window at this point (in the first RTT). - -3.2.3 Delayed ACKs After Slow Start - -3.2.3.1 Mitigation Description - - As discussed above, TCP senders use the number of incoming ACKs to - increase the congestion window during slow start. And, since delayed - ACKs reduce the number of ACKs returned by the receiver by roughly - half, the rate of growth of the congestion window is reduced. One - proposed solution to this problem is to use delayed ACKs only after - the slow start (DAASS) phase. This provides more ACKs while TCP is - aggressively increasing the congestion window and less ACKs while TCP - is in steady state, which conserves network resources. - - - - - - - - -Allman, et al. Informational [Page 9] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.2.3.2 Research - - [All98] shows that in simulation, using delayed ACKs after slow start - (DAASS) improves transfer time when compared to a receiver that - always generates delayed ACKs. However, DAASS also slightly - increases the loss rate due to the increased rate of cwnd growth. - -3.2.3.3 Implementation Issues - - The major problem with DAASS is in the implementation. The receiver - has to somehow know when the sender is using the slow start - algorithm. The receiver could implement a heuristic that attempts to - watch the change in the amount of data being received and change the - ACKing behavior accordingly. Or, the sender could send a message (a - flipped bit in the TCP header, perhaps) indicating that it was using - slow start. The implementation of DAASS is, therefore, an open - issue. - - Using DAASS does not violate the TCP congestion control specification - [APS99]. However, the standards (RFC 2581 [APS99]) currently - recommend using delayed acknowledgments and DAASS goes (partially) - against this recommendation. - -3.2.3.4 Topology Considerations - - DAASS should work equally well in all scenarios presented in section - 2. However, in asymmetric networks it may aggravate ACK congestion - in the return link, due to the increased number of ACKs (see sections - 3.9 and 3.10 for a more detailed discussion of ACK congestion). - -3.2.3.5 Possible Interaction and Relationships with Other Research - - DAASS has several possible interactions with other proposals made in - the research community. DAASS can aggravate congestion on the path - between the data receiver and the data sender due to the increased - number of returning acknowledgments. This can have an especially - adverse effect on asymmetric networks that are prone to experiencing - ACK congestion. As outlined in sections 3.9 and 3.10, several - mitigations have been proposed to reduce the number of ACKs that are - passed over a low-bandwidth return link. Using DAASS will increase - the number of ACKs sent by the receiver. The interaction between - DAASS and the methods for reducing the number of ACKs is an open - research question. Also, as noted in section 3.2.1.5 above, DAASS - provides some of the same benefits as using a larger initial - congestion window and therefore it may not be desirable to use both - mechanisms together. However, this remains an open question. - Finally, DAASS and limited byte counting are both used to increase - - - - -Allman, et al. Informational [Page 10] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - the rate at which the congestion window is opened. The DAASS - algorithm substantially reduces the impact limited byte counting has - on the rate of congestion window increase. - -3.2.4 Terminating Slow Start - -3.2.4.1 Mitigation Description - - The initial slow start phase is used by TCP to determine an - appropriate congestion window size for the given network conditions - [Jac88]. Slow start is terminated when TCP detects congestion, or - when the size of cwnd reaches the size of the receiver's advertised - window. Slow start is also terminated if cwnd grows beyond a certain - size. The threshold at which TCP ends slow start and begins using - the congestion avoidance algorithm is called "ssthresh" [Jac88]. In - most implementations, the initial value for ssthresh is the - receiver's advertised window. During slow start, TCP roughly doubles - the size of cwnd every RTT and therefore can overwhelm the network - with at most twice as many segments as the network can handle. By - setting ssthresh to a value less than the receiver's advertised - window initially, the sender may avoid overwhelming the network with - twice the appropriate number of segments. Hoe [Hoe96] proposes using - the packet-pair algorithm [Kes91] and the measured RTT to determine a - more appropriate value for ssthresh. The algorithm observes the - spacing between the first few returning ACKs to determine the - bandwidth of the bottleneck link. Together with the measured RTT, - the delay*bandwidth product is determined and ssthresh is set to this - value. When TCP's cwnd reaches this reduced ssthresh, slow start is - terminated and transmission continues using congestion avoidance, - which is a more conservative algorithm for increasing the size of the - congestion window. - -3.2.4.2 Research - - It has been shown that estimating ssthresh can improve performance - and decrease packet loss in simulations [Hoe96]. However, obtaining - an accurate estimate of the available bandwidth in a dynamic network - is very challenging, especially attempting to do so on the sending - side of the TCP connection [AP99]. Therefore, before this mechanism - is widely deployed, bandwidth estimation must be studied in a more - detail. - -3.2.4.3 Implementation Issues - - As outlined in [Hoe96], estimating ssthresh requires changes to the - data sender's TCP stack. As suggested in [AP99], bandwidth estimates - may be more accurate when taken by the TCP receiver, and therefore - both sender and receiver changes would be required. Estimating - - - -Allman, et al. Informational [Page 11] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - ssthresh is safe to implement in production networks from a - congestion control perspective, as it can only make TCP more - conservative than outlined in RFC 2581 [APS99] (assuming the TCP - implementation is using an initial ssthresh of infinity as allowed by - [APS99]). - -3.2.4.4 Topology Considerations - - It is expected that this mechanism will work equally well in all - symmetric topologies outlined in section 2. However, asymmetric - links pose a special problem, as the rate of the returning ACKs may - not be the bottleneck bandwidth in the forward direction. This can - lead to the sender setting ssthresh too low. Premature termination - of slow start can hurt performance, as congestion avoidance opens - cwnd more conservatively. Receiver-based bandwidth estimators do not - suffer from this problem. - -3.2.4.5 Possible Interaction and Relationships with Other Research - - Terminating slow start at the right time is useful to avoid multiple - dropped segments. However, using a selective acknowledgment-based - loss recovery scheme (as outlined in section 3.3.2) can drastically - improve TCP's ability to quickly recover from multiple lost segments - Therefore, it may not be as important to terminate slow start before - a large loss event occurs. [AP99] shows that using delayed - acknowledgments [Bra89] reduces the effectiveness of sender-side - bandwidth estimation. Therefore, using delayed ACKs only during slow - start (as outlined in section 3.2.3) may make bandwidth estimation - more feasible. - -3.3 Loss Recovery - -3.3.1 Non-SACK Based Mechanisms - -3.3.1.1 Mitigation Description - - Several similar algorithms have been developed and studied that - improve TCP's ability to recover from multiple lost segments in a - window of data without relying on the (often long) retransmission - timeout. These sender-side algorithms, known as NewReno TCP, do not - depend on the availability of selective acknowledgments (SACKs) - [MMFR96]. - - These algorithms generally work by updating the fast recovery - algorithm to use information provided by "partial ACKs" to trigger - retransmissions. A partial ACK covers some new data, but not all - data outstanding when a particular loss event starts. For instance, - consider the case when segment N is retransmitted using the fast - - - -Allman, et al. Informational [Page 12] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - retransmit algorithm and segment M is the last segment sent when - segment N is resent. If segment N is the only segment lost, the ACK - elicited by the retransmission of segment N would be for segment M. - If, however, segment N+1 was also lost, the ACK elicited by the - retransmission of segment N will be N+1. This can be taken as an - indication that segment N+1 was lost and used to trigger a - retransmission. - -3.3.1.2 Research - - Hoe [Hoe95,Hoe96] introduced the idea of using partial ACKs to - trigger retransmissions and showed that doing so could improve - performance. [FF96] shows that in some cases using partial ACKs to - trigger retransmissions reduces the time required to recover from - multiple lost segments. However, [FF96] also shows that in some - cases (many lost segments) relying on the RTO timer can improve - performance over simply using partial ACKs to trigger all - retransmissions. [HK99] shows that using partial ACKs to trigger - retransmissions, in conjunction with SACK, improves performance when - compared to TCP using fast retransmit/fast recovery in a satellite - environment. Finally, [FH99] describes several slightly different - variants of NewReno. - -3.3.1.3 Implementation Issues - - Implementing these fast recovery enhancements requires changes to the - sender-side TCP stack. These changes can safely be implemented in - production networks and are allowed by RFC 2581 [APS99]. - -3.3.1.4 Topology Considerations - - It is expected that these changes will work well in all environments - outlined in section 2. - -3.3.1.5 Possible Interaction and Relationships with Other Research - - See section 3.3.2.2.5. - -3.3.2 SACK Based Mechanisms - -3.3.2.1 Fast Recovery with SACK - -3.3.2.1.1 Mitigation Description - - Fall and Floyd [FF96] describe a conservative extension to the fast - recovery algorithm that takes into account information provided by - selective acknowledgments (SACKs) [MMFR96] sent by the receiver. The - algorithm starts after fast retransmit triggers the resending of a - - - -Allman, et al. Informational [Page 13] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - segment. As with fast retransmit, the algorithm cuts cwnd in half - when a loss is detected. The algorithm keeps a variable called - "pipe", which is an estimate of the number of outstanding segments in - the network. The pipe variable is decremented by 1 segment for each - duplicate ACK that arrives with new SACK information. The pipe - variable is incremented by 1 for each new or retransmitted segment - sent. A segment may be sent when the value of pipe is less than cwnd - (this segment is either a retransmission per the SACK information or - a new segment if the SACK information indicates that no more - retransmits are needed). - - This algorithm generally allows TCP to recover from multiple segment - losses in a window of data within one RTT of loss detection. Like - the forward acknowledgment (FACK) algorithm described below, the SACK - information allows the pipe algorithm to decouple the choice of when - to send a segment from the choice of what segment to send. - - [APS99] allows the use of this algorithm, as it is consistent with - the spirit of the fast recovery algorithm. - -3.3.2.1.2 Research - - [FF96] shows that the above described SACK algorithm performs better - than several non-SACK based recovery algorithms when 1--4 segments - are lost from a window of data. [AHKO97] shows that the algorithm - improves performance over satellite links. Hayes [Hay97] shows the - in certain circumstances, the SACK algorithm can hurt performance by - generating a large line-rate burst of data at the end of loss - recovery, which causes further loss. - -3.3.2.1.3 Implementation Issues - - This algorithm is implemented in the sender's TCP stack. However, it - relies on SACK information generated by the receiver. This algorithm - is safe for shared networks and is allowed by RFC 2581 [APS99]. - -3.3.2.1.4 Topology Considerations - - It is expected that the pipe algorithm will work equally well in all - scenarios presented in section 2. - -3.3.2.1.5 Possible Interaction and Relationships with Other Research - - See section 3.3.2.2.5. - - - - - - - -Allman, et al. Informational [Page 14] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.3.2.2 Forward Acknowledgments - -3.3.2.2.1 Mitigation Description - - The Forward Acknowledgment (FACK) algorithm [MM96a,MM96b] was - developed to improve TCP congestion control during loss recovery. - FACK uses TCP SACK options to glean additional information about the - congestion state, adding more precise control to the injection of - data into the network during recovery. FACK decouples the congestion - control algorithms from the data recovery algorithms to provide a - simple and direct way to use SACK to improve congestion control. Due - to the separation of these two algorithms, new data may be sent - during recovery to sustain TCP's self-clock when there is no further - data to retransmit. - - The most recent version of FACK is Rate-Halving [MM96b], in which one - packet is sent for every two ACKs received during recovery. - Transmitting a segment for every-other ACK has the result of reducing - the congestion window in one round trip to half of the number of - packets that were successfully handled by the network (so when cwnd - is too large by more than a factor of two it still gets reduced to - half of what the network can sustain). Another important aspect of - FACK with Rate-Halving is that it sustains the ACK self-clock during - recovery because transmitting a packet for every-other ACK does not - require half a cwnd of data to drain from the network before - transmitting, as required by the fast recovery algorithm - [Ste97,APS99]. - - In addition, the FACK with Rate-Halving implementation provides - Thresholded Retransmission to each lost segment. "Tcprexmtthresh" is - the number of duplicate ACKs required by TCP to trigger a fast - retransmit and enter recovery. FACK applies thresholded - retransmission to all segments by waiting until tcprexmtthresh SACK - blocks indicate that a given segment is missing before resending the - segment. This allows reasonable behavior on links that reorder - segments. As described above, FACK sends a segment for every second - ACK received during recovery. New segments are transmitted except - when tcprexmtthresh SACK blocks have been observed for a dropped - segment, at which point the dropped segment is retransmitted. - - [APS99] allows the use of this algorithm, as it is consistent with - the spirit of the fast recovery algorithm. - -3.3.2.2.2 Research - - The original FACK algorithm is outlined in [MM96a]. The algorithm - was later enhanced to include Rate-Halving [MM96b]. The real-world - performance of FACK with Rate-Halving was shown to be much closer to - - - -Allman, et al. Informational [Page 15] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - the theoretical maximum for TCP than either TCP Reno or the SACK- - based extensions to fast recovery outlined in section 3.3.2.1 - [MSMO97]. - -3.3.2.2.3 Implementation Issues - - In order to use FACK, the sender's TCP stack must be modified. In - addition, the receiver must be able to generate SACK options to - obtain the full benefit of using FACK. The FACK algorithm is safe - for shared networks and is allowed by RFC 2581 [APS99]. - -3.3.2.2.4 Topology Considerations - - FACK is expected to improve performance in all environments outlined - in section 2. Since it is better able to sustain its self-clock than - TCP Reno, it may be considerably more attractive over long delay - paths. - -3.3.2.2.5 Possible Interaction and Relationships with Other Research - - Both SACK based loss recovery algorithms described above (the fast - recovery enhancement and the FACK algorithm) are similar in that they - attempt to effectively repair multiple lost segments from a window of - data. Which of the SACK-based loss recovery algorithms to use is - still an open research question. In addition, these algorithms are - similar to the non-SACK NewReno algorithm described in section 3.3.1, - in that they attempt to recover from multiple lost segments without - reverting to using the retransmission timer. As has been shown, the - above SACK based algorithms are more robust than the NewReno - algorithm. However, the SACK algorithm requires a cooperating TCP - receiver, which the NewReno algorithm does not. A reasonable TCP - implementation might include both a SACK-based and a NewReno-based - loss recovery algorithm such that the sender can use the most - appropriate loss recovery algorithm based on whether or not the - receiver supports SACKs. Finally, both SACK-based and non-SACK-based - versions of fast recovery have been shown to transmit a large burst - of data upon leaving loss recovery, in some cases [Hay97]. - Therefore, the algorithms may benefit from some burst suppression - algorithm. - -3.3.3 Explicit Congestion Notification - -3.3.3.1 Mitigation Description - - Explicit congestion notification (ECN) allows routers to inform TCP - senders about imminent congestion without dropping segments. Two - major forms of ECN have been studied. A router employing backward - ECN (BECN), transmits messages directly to the data originator - - - -Allman, et al. Informational [Page 16] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - informing it of congestion. IP routers can accomplish this with an - ICMP Source Quench message. The arrival of a BECN signal may or may - not mean that a TCP data segment has been dropped, but it is a clear - indication that the TCP sender should reduce its sending rate (i.e., - the value of cwnd). The second major form of congestion notification - is forward ECN (FECN). FECN routers mark data segments with a - special tag when congestion is imminent, but forward the data - segment. The data receiver then echos the congestion information - back to the sender in the ACK packet. A description of a FECN - mechanism for TCP/IP is given in [RF99]. - - As described in [RF99], senders transmit segments with an "ECN- - Capable Transport" bit set in the IP header of each packet. If a - router employing an active queueing strategy, such as Random Early - Detection (RED) [FJ93,BCC+98], would otherwise drop this segment, an - "Congestion Experienced" bit in the IP header is set instead. Upon - reception, the information is echoed back to TCP senders using a bit - in the TCP header. The TCP sender adjusts the congestion window just - as it would if a segment was dropped. - - The implementation of ECN as specified in [RF99] requires the - deployment of active queue management mechanisms in the affected - routers. This allows the routers to signal congestion by sending TCP - a small number of "congestion signals" (segment drops or ECN - messages), rather than discarding a large number of segments, as can - happen when TCP overwhelms a drop-tail router queue. - - Since satellite networks generally have higher bit-error rates than - terrestrial networks, determining whether a segment was lost due to - congestion or corruption may allow TCP to achieve better performance - in high BER environments than currently possible (due to TCP's - assumption that all loss is due to congestion). While not a solution - to this problem, adding an ECN mechanism to TCP may be a part of a - mechanism that will help achieve this goal. See section 3.3.4 for a - more detailed discussion of differentiating between corruption and - congestion based losses. - -3.3.3.2 Research - - [Flo94] shows that ECN is effective in reducing the segment loss rate - which yields better performance especially for short and interactive - TCP connections. Furthermore, [Flo94] also shows that ECN avoids - some unnecessary, and costly TCP retransmission timeouts. Finally, - [Flo94] also considers some of the advantages and disadvantages of - various forms of explicit congestion notification. - - - - - - -Allman, et al. Informational [Page 17] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.3.3.3 Implementation Issues - - Deployment of ECN requires changes to the TCP implementation on both - sender and receiver. Additionally, deployment of ECN requires - deployment of some active queue management infrastructure in routers. - RED is assumed in most ECN discussions, because RED is already - identifying segments to drop, even before its buffer space is - exhausted. ECN simply allows the delivery of "marked" segments while - still notifying the end nodes that congestion is occurring along the - path. ECN is safe (from a congestion control perspective) for shared - networks, as it maintains the same TCP congestion control principles - as are used when congestion is detected via segment drops. - -3.3.3.4 Topology Considerations - - It is expected that none of the environments outlined in section 2 - will present a bias towards or against ECN traffic. - -3.3.3.5 Possible Interaction and Relationships with Other Research - - Note that some form of active queueing is necessary to use ECN (e.g., - RED queueing). - -3.3.4 Detecting Corruption Loss - - Differentiating between congestion (loss of segments due to router - buffer overflow or imminent buffer overflow) and corruption (loss of - segments due to damaged bits) is a difficult problem for TCP. This - differentiation is particularly important because the action that TCP - should take in the two cases is entirely different. In the case of - corruption, TCP should merely retransmit the damaged segment as soon - as its loss is detected; there is no need for TCP to adjust its - congestion window. On the other hand, as has been widely discussed - above, when the TCP sender detects congestion, it should immediately - reduce its congestion window to avoid making the congestion worse. - - TCP's defined behavior, as motivated by [Jac88,Jac90] and defined in - [Bra89,Ste97,APS99], is to assume that all loss is due to congestion - and to trigger the congestion control algorithms, as defined in - [Ste97,APS99]. The loss may be detected using the fast retransmit - algorithm, or in the worst case is detected by the expiration of - TCP's retransmission timer. - - TCP's assumption that loss is due to congestion rather than - corruption is a conservative mechanism that prevents congestion - collapse [Jac88,FF98]. Over satellite networks, however, as in many - wireless environments, loss due to corruption is more common than on - terrestrial networks. One common partial solution to this problem is - - - -Allman, et al. Informational [Page 18] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - to add Forward Error Correction (FEC) to the data that's sent over - the satellite/wireless link. A more complete discussion of the - benefits of FEC can be found in [AGS99]. However, given that FEC - does not always work or cannot be universally applied, other - mechanisms have been studied to attempt to make TCP able to - differentiate between congestion-based and corruption-based loss. - - TCP segments that have been corrupted are most often dropped by - intervening routers when link-level checksum mechanisms detect that - an incoming frame has errors. Occasionally, a TCP segment containing - an error may survive without detection until it arrives at the TCP - receiving host, at which point it will almost always either fail the - IP header checksum or the TCP checksum and be discarded as in the - link-level error case. Unfortunately, in either of these cases, it's - not generally safe for the node detecting the corruption to return - information about the corrupt packet to the TCP sender because the - sending address itself might have been corrupted. - -3.3.4.1 Mitigation Description - - Because the probability of link errors on a satellite link is - relatively greater than on a hardwired link, it is particularly - important that the TCP sender retransmit these lost segments without - reducing its congestion window. Because corrupt segments do not - indicate congestion, there is no need for the TCP sender to enter a - congestion avoidance phase, which may waste available bandwidth. - Simulations performed in [SF98] show a performance improvement when - TCP can properly differentiate between between corruption and - congestion of wireless links. - - Perhaps the greatest research challenge in detecting corruption is - getting TCP (a transport-layer protocol) to receive appropriate - information from either the network layer (IP) or the link layer. - Much of the work done to date has involved link-layer mechanisms that - retransmit damaged segments. The challenge seems to be to get these - mechanisms to make repairs in such a way that TCP understands what - happened and can respond appropriately. - -3.3.4.2 Research - - Research into corruption detection to date has focused primarily on - making the link level detect errors and then perform link-level - retransmissions. This work is summarized in [BKVP97,BPSK96]. One of - the problems with this promising technique is that it causes an - effective reordering of the segments from the TCP receiver's point of - view. As a simple example, if segments A B C D are sent across a - noisy link and segment B is corrupted, segments C and D may have - already crossed the link before B can be retransmitted at the link - - - -Allman, et al. Informational [Page 19] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - level, causing them to arrive at the TCP receiver in the order A C D - B. This segment reordering would cause the TCP receiver to generate - duplicate ACKs upon the arrival of segments C and D. If the - reordering was bad enough, the sender would trigger the fast - retransmit algorithm in the TCP sender, in response to the duplicate - ACKs. Research presented in [MV98] proposes the idea of suppressing - or delaying the duplicate ACKs in the reverse direction to counteract - this behavior. Alternatively, proposals that make TCP more robust in - the face of re-ordered segment arrivals [Flo99] may reduce the side - effects of the re-ordering caused by link-layer retransmissions. - - A more high-level approach, outlined in the [DMT96], uses a new - "corruption experienced" ICMP error message generated by routers that - detect corruption. These messages are sent in the forward direction, - toward the packet's destination, rather than in the reverse direction - as is done with ICMP Source Quench messages. Sending the error - messages in the forward direction allows this feedback to work over - asymmetric paths. As noted above, generating an error message in - response to a damaged packet is problematic because the source and - destination addresses may not be valid. The mechanism outlined in - [DMT96] gets around this problem by having the routers maintain a - small cache of recent packet destinations; when the router - experiences an error rate above some threshold, it sends an ICMP - corruption-experienced message to all of the destinations in its - cache. Each TCP receiver then must return this information to its - respective TCP sender (through a TCP option). Upon receiving an ACK - with this "corruption-experienced" option, the TCP sender assumes - that packet loss is due to corruption rather than congestion for two - round trip times (RTT) or until it receives additional link state - information (such as "link down", source quench, or additional - "corruption experienced" messages). Note that in shared networks, - ignoring segment loss for 2 RTTs may aggravate congestion by making - TCP unresponsive. - -3.3.4.3 Implementation Issues - - All of the techniques discussed above require changes to at least the - TCP sending and receiving stacks, as well as intermediate routers. - Due to the concerns over possibly ignoring congestion signals (i.e., - segment drops), the above algorithm is not recommended for use in - shared networks. - -3.3.4.4 Topology Considerations - - It is expected that corruption detection, in general would be - beneficial in all environments outlined in section 2. It would be - particularly beneficial in the satellite/wireless environment over - which these errors may be more prevalent. - - - -Allman, et al. Informational [Page 20] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.3.4.5 Possible Interaction and Relationships with Other Research - - SACK-based loss recovery algorithms (as described in 3.3.2) may - reduce the impact of corrupted segments on mostly clean links because - recovery will be able to happen more rapidly (and without relying on - the retransmission timer). Note that while SACK-based loss recovery - helps, throughput will still suffer in the face of non-congestion - related packet loss. - -3.4 Congestion Avoidance - -3.4.1 Mitigation Description - - During congestion avoidance, in the absence of loss, the TCP sender - adds approximately one segment to its congestion window during each - RTT [Jac88,Ste97,APS99]. Several researchers have observed that this - policy leads to unfair sharing of bandwidth when multiple connections - with different RTTs traverse the same bottleneck link, with the long - RTT connections obtaining only a small fraction of their fair share - of the bandwidth. - - One effective solution to this problem is to deploy fair queueing and - TCP-friendly buffer management in network routers [Sut98]. However, - in the absence of help from the network, other researchers have - investigated changes to the congestion avoidance policy at the TCP - sender, as described in [Flo91,HK98]. - -3.4.2 Research - - The "Constant-Rate" increase policy has been studied in [Flo91,HK98]. - It attempts to equalize the rate at which TCP senders increase their - sending rate during congestion avoidance. Both [Flo91] and [HK98] - illustrate cases in which the "Constant-Rate" policy largely corrects - the bias against long RTT connections, although [HK98] presents some - evidence that such a policy may be difficult to incrementally deploy - in an operational network. The proper selection of a constant (for - the constant rate of increase) is an open issue. - - The "Increase-by-K" policy can be selectively used by long RTT - connections in a heterogeneous environment. This policy simply - changes the slope of the linear increase, with connections over a - given RTT threshold adding "K" segments to the congestion window - every RTT, instead of one. [HK98] presents evidence that this - policy, when used with small values of "K", may be successful in - reducing the unfairness while keeping the link utilization high, when - a small number of connections share a bottleneck link. The selection - of the constant "K," the RTT threshold to invoke this policy, and - performance under a large number of flows are all open issues. - - - -Allman, et al. Informational [Page 21] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.4.3 Implementation Issues - - Implementation of either the "Constant-Rate" or "Increase-by-K" - policies requires a change to the congestion avoidance mechanism at - the TCP sender. In the case of "Constant-Rate," such a change must - be implemented globally. Additionally, the TCP sender must have a - reasonably accurate estimate of the RTT of the connection. The - algorithms outlined above violate the congestion avoidance algorithm - as outlined in RFC 2581 [APS99] and therefore should not be - implemented in shared networks at this time. - -3.4.4 Topology Considerations - - These solutions are applicable to all satellite networks that are - integrated with a terrestrial network, in which satellite connections - may be competing with terrestrial connections for the same bottleneck - link. - -3.4.5 Possible Interaction and Relationships with Other Research - - As shown in [PADHV99], increasing the congestion window by multiple - segments per RTT can cause TCP to drop multiple segments and force a - retransmission timeout in some versions of TCP. Therefore, the above - changes to the congestion avoidance algorithm may need to be - accompanied by a SACK-based loss recovery algorithm that can quickly - repair multiple dropped segments. - -3.5 Multiple Data Connections - -3.5.1 Mitigation Description - - One method that has been used to overcome TCP's inefficiencies in the - satellite environment is to use multiple TCP flows to transfer a - given file. The use of N TCP connections makes the sender N times - more aggressive and therefore can improve throughput in some - situations. Using N multiple TCP connections can impact the transfer - and the network in a number of ways, which are listed below. - - 1. The transfer is able to start transmission using an effective - congestion window of N segments, rather than a single segment as - one TCP flow uses. This allows the transfer to more quickly - increase the effective cwnd size to an appropriate size for the - given network. However, in some circumstances an initial window - of N segments is inappropriate for the network conditions. In - this case, a transfer utilizing more than one connection may - aggravate congestion. - - - - - -Allman, et al. Informational [Page 22] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - 2. During the congestion avoidance phase, the transfer increases the - effective cwnd by N segments per RTT, rather than the one segment - per RTT increase that a single TCP connection provides. Again, - this can aid the transfer by more rapidly increasing the effective - cwnd to an appropriate point. However, this rate of increase can - also be too aggressive for the network conditions. In this case, - the use of multiple data connections can aggravate congestion in - the network. - - 3. Using multiple connections can provide a very large overall - congestion window. This can be an advantage for TCP - implementations that do not support the TCP window scaling - extension [JBB92]. However, the aggregate cwnd size across all N - connections is equivalent to using a TCP implementation that - supports large windows. - - 4. The overall cwnd decrease in the face of dropped segments is - reduced when using N parallel connections. A single TCP - connection reduces the effective size of cwnd to half when a - single segment loss is detected. When utilizing N connections - each using a window of W bytes, a single drop reduces the window - to: - - (N * W) - (W / 2) - - Clearly this is a less dramatic reduction in the effective cwnd size - than when using a single TCP connection. And, the amount by which - the cwnd is decreased is further reduced by increasing N. - - The use of multiple data connections can increase the ability of - non-SACK TCP implementations to quickly recover from multiple dropped - segments without resorting to a timeout, assuming the dropped - segments cross connections. - - The use of multiple parallel connections makes TCP overly aggressive - for many environments and can contribute to congestive collapse in - shared networks [FF99]. The advantages provided by using multiple - TCP connections are now largely provided by TCP extensions (larger - windows, SACKs, etc.). Therefore, the use of a single TCP connection - is more "network friendly" than using multiple parallel connections. - However, using multiple parallel TCP connections may provide - performance improvement in private networks. - - - - - - - - - -Allman, et al. Informational [Page 23] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.5.2 Research - - Research on the use of multiple parallel TCP connections shows - improved performance [IL92,Hah94,AOK95,AKO96]. In addition, research - has shown that multiple TCP connections can outperform a single - modern TCP connection (with large windows and SACK) [AHKO97]. - However, these studies did not consider the impact of using multiple - TCP connections on competing traffic. [FF99] argues that using - multiple simultaneous connections to transfer a given file may lead - to congestive collapse in shared networks. - -3.5.3 Implementation Issues - - To utilize multiple parallel TCP connections a client application and - the corresponding server must be customized. As outlined in [FF99] - using multiple parallel TCP connections is not safe (from a - congestion control perspective) in shared networks and should not be - used. - -3.5.4 Topological Considerations - - As stated above, [FF99] outlines that the use of multiple parallel - connections in a shared network, such as the Internet, may lead to - congestive collapse. However, the use of multiple connections may be - safe and beneficial in private networks. The specific topology being - used will dictate the number of parallel connections required. Some - work has been done to determine the appropriate number of connections - on the fly [AKO96], but such a mechanism is far from complete. - -3.5.5 Possible Interaction and Relationships with Other Research - - Using multiple concurrent TCP connections enables use of a large - congestion window, much like the TCP window scaling option [JBB92]. - In addition, a larger initial congestion window is achieved, similar - to using [AFP98] or TCB sharing (see section 3.8). - -3.6 Pacing TCP Segments - -3.6.1 Mitigation Description - - Slow-start takes several round trips to fully open the TCP congestion - window over routes with high bandwidth-delay products. For short TCP - connections (such as WWW traffic with HTTP/1.0), the slow-start - overhead can preclude effective use of the high-bandwidth satellite - links. When senders implement slow-start restart after a TCP - connection goes idle (suggested by Jacobson and Karels [JK92]), - - - - - -Allman, et al. Informational [Page 24] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - performance is reduced in long-lived (but bursty) connections (such - as HTTP/1.1, which uses persistent TCP connections to transfer - multiple WWW page elements) [Hei97a]. - - Rate-based pacing (RBP) is a technique, used in the absence of - incoming ACKs, where the data sender temporarily paces TCP segments - at a given rate to restart the ACK clock. Upon receipt of the first - ACK, pacing is discontinued and normal TCP ACK clocking resumes. The - pacing rate may either be known from recent traffic estimates (when - restarting an idle connection or from recent prior connections), or - may be known through external means (perhaps in a point-to-point or - point-to-multipoint satellite network where available bandwidth can - be assumed to be large). - - In addition, pacing data during the first RTT of a transfer may allow - TCP to make effective use of high bandwidth-delay links even for - short transfers. However, in order to pace segments during the first - RTT a TCP will have to be using a non-standard initial congestion - window and a new mechanism to pace outgoing segments rather than send - them back-to-back. Determining an appropriate size for the initial - cwnd is an open research question. Pacing can also be used to reduce - bursts in general (due to buggy TCPs or byte counting, see section - 3.2.2 for a discussion on byte counting). - -3.6.2 Research - - Simulation studies of rate-paced pacing for WWW-like traffic have - shown reductions in router congestion and drop rates [VH97a]. In - this environment, RBP substantially improves performance compared to - slow-start-after-idle for intermittent senders, and it slightly - improves performance over burst-full-cwnd-after-idle (because of - drops) [VH98]. More recently, pacing has been suggested to eliminate - burstiness in networks with ACK filtering [BPK97]. - -3.6.3 Implementation Issues - - RBP requires only sender-side changes to TCP. Prototype - implementations of RBP are available [VH97b]. RBP requires an - additional sender timer for pacing. The overhead of timer-driven - data transfer is often considered too high for practical use. - Preliminary experiments suggest that in RBP this overhead is minimal - because RBP only requires this timer for one RTT of transmission - [VH98]. RBP is expected to make TCP more conservative in sending - bursts of data after an idle period in hosts that do not revert to - slow start after an idle period. On the other hand, RBP makes TCP - more aggressive if the sender uses the slow start algorithm to start - the ACK clock after a long idle period. - - - - -Allman, et al. Informational [Page 25] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.6.4 Topology Considerations - - RBP could be used to restart idle TCP connections for all topologies - in Section 2. Use at the beginning of new connections would be - restricted to topologies where available bandwidth can be estimated - out-of-band. - -3.6.5 Possible Interaction and Relationships with Other Research - - Pacing segments may benefit from sharing state amongst various flows - between two hosts, due to the time required to determine the needed - information. Additionally, pacing segments, rather than sending - back-to-back segments, may make estimating the available bandwidth - (as outlined in section 3.2.4) more difficult. - -3.7 TCP Header Compression - - The TCP and IP header information needed to reliably deliver packets - to a remote site across the Internet can add significant overhead, - especially for interactive applications. Telnet packets, for - example, typically carry only a few bytes of data per packet, and - standard IPv4/TCP headers add at least 40 bytes to this; IPv6/TCP - headers add at least 60 bytes. Much of this information remains - relatively constant over the course of a session and so can be - replaced by a short session identifier. - -3.7.1 Mitigation Description - - Many fields in the TCP and IP headers either remain constant during - the course of a session, change very infrequently, or can be inferred - from other sources. For example, the source and destination - addresses, as well as the IP version, protocol, and port fields - generally do not change during a session. Packet length can be - deduced from the length field of the underlying link layer protocol - provided that the link layer packet is not padded. Packet sequence - numbers in a forward data stream generally change with every packet, - but increase in a predictable manner. - - The TCP/IP header compression methods described in - [DNP99,DENP97,Jac90] reduce the overhead of TCP sessions by replacing - the data in the TCP and IP headers that remains constant, changes - slowly, or changes in a predictable manner with a short "connection - number". Using this method, the sender first sends a full TCP/IP - header, including in it a connection number that the sender will use - to reference the connection. The receiver stores the full header and - uses it as a template, filling in some fields from the limited - - - - - -Allman, et al. Informational [Page 26] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - information contained in later, compressed headers. This compression - can reduce the size of an IPv4/TCP headers from 40 to as few as 3 to - 5 bytes (3 bytes for some common cases, 5 bytes in general). - - Compression and decompression generally happen below the IP layer, at - the end-points of a given physical link (such as at two routers - connected by a serial line). The hosts on either side of the - physical link must maintain some state about the TCP connections that - are using the link. - - The decompresser must pass complete, uncompressed packets to the IP - layer. Thus header compression is transparent to routing, for - example, since an incoming packet with compressed headers is expanded - before being passed to the IP layer. - - A variety of methods can be used by the compressor/decompressor to - negotiate the use of header compression. For example, the PPP serial - line protocol allows for an option exchange, during which time the - compressor/decompressor agree on whether or not to use header - compression. For older SLIP implementations, [Jac90] describes a - mechanism that uses the first bit in the IP packet as a flag. - - The reduction in overhead is especially useful when the link is - bandwidth-limited such as terrestrial wireless and mobile satellite - links, where the overhead associated with transmitting the header - bits is nontrivial. Header compression has the added advantage that - for the case of uniformly distributed bit errors, compressing TCP/IP - headers can provide a better quality of service by decreasing the - packet error probability. The shorter, compressed packets are less - likely to be corrupted, and the reduction in errors increases the - connection's throughput. - - Extra space is saved by encoding changes in fields that change - relatively slowly by sending only their difference from their values - in the previous packet instead of their absolute values. In order to - decode headers compressed this way, the receiver keeps a copy of each - full, reconstructed TCP header after it is decoded, and applies the - delta values from the next decoded compressed header to the - reconstructed full header template. - - A disadvantage to using this delta encoding scheme where values are - encoded as deltas from their values in the previous packet is that if - a single compressed packet is lost, subsequent packets with - compressed headers can become garbled if they contain fields which - depend on the lost packet. Consider a forward data stream of packets - with compressed headers and increasing sequence numbers. If packet N - is lost, the full header of packet N+1 will be reconstructed at the - receiver using packet N-1's full header as a template. Thus the - - - -Allman, et al. Informational [Page 27] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - sequence number, which should have been calculated from packet N's - header, will be wrong, the checksum will fail, and the packet will be - discarded. When the sending TCP times out and retransmits a packet - with a full header is forwarded to re-synchronize the decompresser. - - It is important to note that the compressor does not maintain any - timers, nor does the decompresser know when an error occurred (only - the receiving TCP knows this, when the TCP checksum fails). A single - bit error will cause the decompresser to lose sync, and subsequent - packets with compressed headers will be dropped by the receiving TCP, - since they will all fail the TCP checksum. When this happens, no - duplicate acknowledgments will be generated, and the decompresser can - only re-synchronize when it receives a packet with an uncompressed - header. This means that when header compression is being used, both - fast retransmit and selective acknowledgments will not be able - correct packets lost on a compressed link. The "twice" algorithm, - described below, may be a partial solution to this problem. - - [DNP99] and [DENP97] describe TCP/IPv4 and TCP/IPv6 compression - algorithms including compressing the various IPv6 extension headers - as well as methods for compressing non-TCP streams. [DENP97] also - augments TCP header compression by introducing the "twice" algorithm. - If a particular packet fails to decompress properly, the twice - algorithm modifies its assumptions about the inferred fields in the - compressed header, assuming that a packet identical to the current - one was dropped between the last correctly decoded packet and the - current one. Twice then tries to decompress the received packet - under the new assumptions and, if the checksum passes, the packet is - passed to IP and the decompresser state has been re-synchronized. - This procedure can be extended to three or more decoding attempts. - Additional robustness can be achieved by caching full copies of - packets which don't decompress properly in the hopes that later - arrivals will fix the problem. Finally, the performance improvement - if the decompresser can explicitly request a full header is - discussed. Simulation results show that twice, in conjunction with - the full header request mechanism, can improve throughput over - uncompressed streams. - -3.7.2 Research - - [Jac90] outlines a simple header compression scheme for TCP/IP. - - In [DENP97] the authors present the results of simulations showing - that header compression is advantageous for both low and medium - bandwidth links. Simulations show that the twice algorithm, combined - with an explicit header request mechanism, improved throughput by - 10-15% over uncompressed sessions across a wide range of bit error - rates. - - - -Allman, et al. Informational [Page 28] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - Much of this improvement may have been due to the twice algorithm - quickly re-synchronizing the decompresser when a packet is lost. - This is because the twice algorithm, applied one or two times when - the decompresser becomes unsynchronized, will re-sync the - decompresser in between 83% and 99% of the cases examined. This - means that packets received correctly after twice has resynchronized - the decompresser will cause duplicate acknowledgments. This re- - enables the use of both fast retransmit and SACK in conjunction with - header compression. - -3.7.3 Implementation Issues - - Implementing TCP/IP header compression requires changes at both the - sending (compressor) and receiving (decompresser) ends of each link - that uses compression. The twice algorithm requires very little - extra machinery over and above header compression, while the explicit - header request mechanism of [DENP97] requires more extensive - modifications to the sending and receiving ends of each link that - employs header compression. Header compression does not violate - TCP's congestion control mechanisms and therefore can be safely - implemented in shared networks. - -3.7.4 Topology Considerations - - TCP/IP header compression is applicable to all of the environments - discussed in section 2, but will provide relatively more improvement - in situations where packet sizes are small (i.e., overhead is large) - and there is medium to low bandwidth and/or higher BER. When TCP's - congestion window size is large, implementing the explicit header - request mechanism, the twice algorithm, and caching packets which - fail to decompress properly becomes more critical. - -3.7.5 Possible Interaction and Relationships with Other Research - - As discussed above, losing synchronization between a sender and - receiver can cause many packet drops. The frequency of losing - synchronization and the effectiveness of the twice algorithm may - point to using a SACK-based loss recovery algorithm to reduce the - impact of multiple lost segments. However, even very robust SACK- - based algorithms may not work well if too many segments are lost. - -3.8 Sharing TCP State Among Similar Connections - - - - - - - - - -Allman, et al. Informational [Page 29] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.8.1 Mitigation Description - - Persistent TCP state information can be used to overcome limitations - in the configuration of the initial state, and to automatically tune - TCP to environments using satellite links and to coordinate multiple - TCP connections sharing a satellite link. - - TCP includes a variety of parameters, many of which are set to - initial values which can severely affect the performance of TCP - connections traversing satellite links, even though most TCP - parameters are adjusted later after the connection is established. - These parameters include initial size of cwnd and initial MSS size. - Various suggestions have been made to change these initial - conditions, to more effectively support satellite links. However, it - is difficult to select any single set of parameters which is - effective for all environments. - - An alternative to attempting to select these parameters a-priori is - sharing state across TCP connections and using this state when - initializing a new connection. For example, if all connections to a - subnet result in extended congestion windows of 1 megabyte, it is - probably more efficient to start new connections with this value, - than to rediscover it by requiring the cwnd to increase using slow - start over a period of dozens of round-trip times. - -3.8.2 Research - - Sharing state among connections brings up a number of questions such - as what information to share, with whom to share, how to share it, - and how to age shared information. First, what information is to be - shared must be determined. Some information may be appropriate to - share among TCP connections, while some information sharing may be - inappropriate or not useful. Next, we need to determine with whom to - share information. Sharing may be appropriate for TCP connections - sharing a common path to a given host. Information may be shared - among connections within a host, or even among connections between - different hosts, such as hosts on the same LAN. However, sharing - information between connections not traversing the same network may - not be appropriate. Given the state to share and the parties that - share it, a mechanism for the sharing is required. Simple state, - like MSS and RTT, is easy to share, but congestion window information - can be shared a variety of ways. The sharing mechanism determines - priorities among the sharing connections, and a variety of fairness - criteria need to be considered. Also, the mechanisms by which - information is aged require further study. See RFC 2140 for a - discussion of the security issues in both sharing state within a - single host and sharing state among hosts on a subnet. Finally, the - security concerns associated with sharing a piece of information need - - - -Allman, et al. Informational [Page 30] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - to be carefully considered before introducing such a mechanism. Many - of these open research questions must be answered before state - sharing can be widely deployed. - - The opportunity for such sharing, both among a sequence of - connections, as well as among concurrent connections, is described in - more detail in [Tou97]. The state management itself is largely an - implementation issue, however what information should be shared and - the specific ways in which the information should be shared is an - open question. - - Sharing parts of the TCB state was originally documented in T/TCP - [Bra92], and is used there to aggregate RTT values across connection - instances, to provide meaningful average RTTs, even though most - connections are expected to persist for only one RTT. T/TCP also - shares a connection identifier, a sequence number separate from the - window number and address/port pairs by which TCP connections are - typically distinguished. As a result of this shared state, T/TCP - allows a receiver to pass data in the SYN segment to the receiving - application, prior to the completion of the three-way handshake, - without compromising the integrity of the connection. In effect, this - shared state caches a partial handshake from the previous connection, - which is a variant of the more general issue of TCB sharing. - - Sharing state among connections (including transfers using non-TCP - protocols) is further investigated in [BRS99]. - -3.8.3 Implementation Issues - - Sharing TCP state across connections requires changes to the sender's - TCP stack, and possibly the receiver's TCP stack (as in the case of - T/TCP, for example). Sharing TCP state may make a particular TCP - connection more aggressive. However, the aggregate traffic should be - more conservative than a group of independent TCP connections. - Therefore, sharing TCP state should be safe for use in shared - networks. Note that state sharing does not present any new security - problems within multiuser hosts. In such a situation, users can - steal network resources from one another with or without state - sharing. - -3.8.4 Topology Considerations - - It is expected that sharing state across TCP connections may be - useful in all network environments presented in section 2. - - - - - - - -Allman, et al. Informational [Page 31] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.8.5 Possible Interaction and Relationships with Other Research - - The state sharing outlined above is very similar to the Congestion - Manager proposal [BRS99] that attempts to share congestion control - information among both TCP and UDP flows between a pair of hosts. - -3.9 ACK Congestion Control - - In highly asymmetric networks, a low-speed return link can restrict - the performance of the data flow on a high-speed forward link by - limiting the flow of acknowledgments returned to the data sender. - For example, if the data sender uses 1500 byte segments, and the - receiver generates 40 byte acknowledgments (IPv4, TCP without - options), the reverse link will congest with ACKs for asymmetries of - more than 75:1 if delayed ACKs are used, and 37:1 if every segment is - acknowledged. For a 1.5 Mb/second data link, ACK congestion will - occur for reverse link speeds below 20 kilobits/sec. These levels of - asymmetry will readily occur if the reverse link is shared among - multiple satellite receivers, as is common in many VSAT satellite - networks. If a terrestrial modem link is used as a reverse link, ACK - congestion is also likely, especially as the speed of the forward - link is increased. Current congestion control mechanisms are aimed - at controlling the flow of data segments, but do not affect the flow - of ACKs. - - In [KVR98] the authors point out that the flow of acknowledgments can - be restricted on the low-speed link not only by the bandwidth of the - link, but also by the queue length of the router. The router may - limit its queue length by counting packets, not bytes, and therefore - begin discarding ACKs even if there is enough bandwidth to forward - them. - -3.9.1 Mitigation Description - - ACK Congestion Control extends the concept of flow control for data - segments to acknowledgment segments. In the method described in - [BPK97], any intermediate router can mark an acknowledgment with an - Explicit Congestion Notification (ECN) bit once the queue occupancy - in the router exceeds a given threshold. The data sender (which - receives the acknowledgment) must "echo" the ECN bit back to the data - receiver (see section 3.3.3 for a more detailed discussion of ECN). - The proposed algorithm for marking ACK segments with an ECN bit is - Random Early Detection (RED) [FJ93]. In response to the receipt of - ECN marked data segments, the receiver will dynamically reduce the - rate of acknowledgments using a multiplicative backoff. Once - segments without ECN are received, the data receiver speeds up - acknowledgments using a linear increase, up to a rate of either 1 (no - - - - -Allman, et al. Informational [Page 32] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - delayed ACKs) or 2 (normal delayed ACKs) data segments per ACK. The - authors suggest that an ACK be generated at least once per window, - and ideally a few times per window. - - As in the RED congestion control mechanism for data flow, the - bottleneck gateway can randomly discard acknowledgments, rather than - marking them with an ECN bit, once the queue fills beyond a given - threshold. - -3.9.2 Research - - [BPK97] analyze the effect of ACK Congestion Control (ACC) on the - performance of an asymmetric network. They note that the use of ACC, - and indeed the use of any scheme which reduces the frequency of - acknowledgments, has potential unwanted side effects. Since each ACK - will acknowledge more than the usual one or two data segments, the - likelihood of segment bursts from the data sender is increased. In - addition, congestion window growth may be impeded if the receiver - grows the window by counting received ACKs, as mandated by - [Ste97,APS99]. The authors therefore combine ACC with a series of - modifications to the data sender, referred to as TCP Sender - Adaptation (SA). SA combines a limit on the number of segments sent - in a burst, regardless of window size. In addition, byte counting - (as opposed to ACK counting) is employed for window growth. Note - that byte counting has been studied elsewhere and can introduce - side-effects, as well [All98]. - - The results presented in [BPK97] indicate that using ACC and SA will - reduce the bursts produced by ACK losses in unmodified (Reno) TCP. - In cases where these bursts would lead to data loss at an - intermediate router, the ACC and SA modification significantly - improve the throughput for a single data transfer. The results - further suggest that the use of ACC and SA significantly improve - fairness between two simultaneous transfers. - - ACC is further reported to prevent the increase in round trip time - (RTT) that occurs when an unmodified TCP fills the reverse router - queue with acknowledgments. - - In networks where the forward direction is expected to suffer losses - in one of the gateways, due to queue limitations, the authors report - at best a very slight improvement in performance for ACC and SA, - compared to unmodified Reno TCP. - - - - - - - - -Allman, et al. Informational [Page 33] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.9.3 Implementation Issues - - Both ACC and SA require modification of the sending and receiving - hosts, as well as the bottleneck gateway. The current research - suggests that implementing ACC without the SA modifications results - in a data sender which generates potentially disruptive segment - bursts. It should be noted that ACC does require host modifications - if it is implemented in the way proposed in [BPK97]. The authors - note that ACC can be implemented by discarding ACKs (which requires - only a gateway modification, but no changes in the hosts), as opposed - to marking them with ECN. Such an implementation may, however, - produce bursty data senders if it is not combined with a burst - mitigation technique. ACC requires changes to the standard ACKing - behavior of a receiving TCP and therefore is not recommended for use - in shared networks. - -3.9.4 Topology Considerations - - Neither ACC nor SA require the storage of state in the gateway. - These schemes should therefore be applicable for all topologies, - provided that the hosts using the satellite or hybrid network can be - modified. However, these changes are expected to be especially - beneficial to networks containing asymmetric satellite links. - -3.9.5 Possible Interaction and Relationships with Other Research - - Note that ECN is a pre-condition for using ACK congestion control. - Additionally, the ACK Filtering algorithm discussed in the next - section attempts to solve the same problem as ACC. Choosing between - the two algorithms (or another mechanism) is currently an open - research question. - -3.10 ACK Filtering - - ACK Filtering (AF) is designed to address the same ACK congestion - effects described in 3.9. Contrary to ACC, however, AF is designed - to operate without host modifications. - -3.10.1 Mitigation Description - - AF takes advantage of the cumulative acknowledgment structure of TCP. - The bottleneck router in the reverse direction (the low speed link) - must be modified to implement AF. Upon receipt of a segment which - represents a TCP acknowledgment, the router scans the queue for - redundant ACKs for the same connection, i.e. ACKs which acknowledge - portions of the window which are included in the most recent ACK. - All of these "earlier" ACKs are removed from the queue and discarded. - - - - -Allman, et al. Informational [Page 34] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - The router does not store state information, but does need to - implement the additional processing required to find and remove - segments from the queue upon receipt of an ACK. - -3.10.2 Research - - [BPK97] analyzes the effects of AF. As is the case in ACC, the use - of ACK filtering alone would produce significant sender bursts, since - the ACKs will be acknowledging more previously-unacknowledged data. - The SA modifications described in 3.9.2 could be used to prevent - those bursts, at the cost of requiring host modifications. To - prevent the need for modifications in the TCP stack, AF is more - likely to be paired with the ACK Reconstruction (AR) technique, which - can be implemented at the router where segments exit the slow reverse - link. - - AR inspects ACKs exiting the link, and if it detects large "gaps" in - the ACK sequence, it generates additional ACKs to reconstruct an - acknowledgment flow which more closely resembles what the data sender - would have seen had ACK Filtering not been introduced. AR requires - two parameters; one parameter is the desired ACK frequency, while the - second controls the spacing, in time, between the release of - consecutive reconstructed ACKs. - - In [BPK97], the authors show the combination of AF and AR to increase - throughput, in the networks studied, over both unmodified TCP and the - ACC/SA modifications. Their results also strongly suggest that the - use of AF alone, in networks where congestion losses are expected, - decreases performance (even below the level of unmodified TCP Reno) - due to sender bursting. - - AF delays acknowledgments from arriving at the receiver by dropping - earlier ACKs in favor of later ACKs. This process can cause a slight - hiccup in the transmission of new data by the TCP sender. - -3.10.3 Implementation Issues - - Both ACK Filtering and ACK Reconstruction require only router - modification. However, the implementation of AR requires some - storage of state information in the exit router. While AF does not - require storage of state information, its use without AR (or SA) - could produce undesired side effects. Furthermore, more research is - required regarding appropriate ranges for the parameters needed in - AR. - - - - - - - -Allman, et al. Informational [Page 35] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -3.10.4 Topology Considerations - - AF and AR appear applicable to all topologies, assuming that the - storage of state information in AR does not prove to be prohibitive - for routers which handle large numbers of flows. The fact that TCP - stack modifications are not required for AF/AR makes this approach - attractive for hybrid networks and networks with diverse types of - hosts. These modifications, however, are expected to be most - beneficial in asymmetric network paths. - - On the other hand, the implementation of AF/AR requires the routers - to examine the TCP header, which prohibits their use in secure - networks where IPSEC is deployed. In such networks, AF/AR can be - effective only inside the security perimeter of a private, or virtual - private network, or in private networks where the satellite link is - protected only by link-layer encryption (as opposed to IPSEC). ACK - Filtering is safe to use in shared networks (from a congestion - control point-of-view), as the number of ACKs can only be reduced, - which makes TCP less aggressive. However, note that while TCP is - less aggressive, the delays that AF induces (outlined above) can lead - to larger bursts than would otherwise occur. - -3.10.5 Possible Interaction and Relationships with Other Research - - ACK Filtering attempts to solve the same problem as ACK Congestion - Control (as outlined in section 3.9). Which of the two algorithms is - more appropriate is currently an open research question. - -4 Conclusions - - This document outlines TCP items that may be able to mitigate the - performance problems associated with using TCP in networks containing - satellite links. These mitigations are not IETF standards track - mechanisms and require more study before being recommended by the - IETF. The research community is encouraged to examine the above - mitigations in an effort to determine which are safe for use in - shared networks such as the Internet. - -5 Security Considerations - - Several of the above sections noted specific security concerns which - a given mitigation aggravates. - - Additionally, any form of wireless communication link is more - susceptible to eavesdropping security attacks than standard wire- - based links due to the relative ease with which an attacker can watch - the network and the difficultly in finding attackers monitoring the - network. - - - -Allman, et al. Informational [Page 36] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -6 Acknowledgments - - Our thanks to Aaron Falk and Sally Floyd, who provided very helpful - comments on drafts of this document. - -7 References - - [AFP98] Allman, M., Floyd, S. and C. Partridge, "Increasing TCP's - Initial Window", RFC 2414, September 1998. - - [AGS99] Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP Over - Satellite Channels using Standard Mechanisms", BCP 28, RFC - 2488, January 1999. - - [AHKO97] Mark Allman, Chris Hayes, Hans Kruse, Shawn Ostermann. TCP - Performance Over Satellite Links. In Proceedings of the - 5th International Conference on Telecommunication Systems, - March 1997. - - [AHO98] Mark Allman, Chris Hayes, Shawn Ostermann. An Evaluation - of TCP with Larger Initial Windows. Computer Communication - Review, 28(3), July 1998. - - [AKO96] Mark Allman, Hans Kruse, Shawn Ostermann. An Application- - Level Solution to TCP's Satellite Inefficiencies. In - Proceedings of the First International Workshop on - Satellite-based Information Services (WOSBIS), November - 1996. - - [All97a] Mark Allman. Improving TCP Performance Over Satellite - Channels. Master's thesis, Ohio University, June 1997. - - [All97b] Mark Allman. Fixing Two BSD TCP Bugs. Technical Report - CR-204151, NASA Lewis Research Center, October 1997. - - [All98] Mark Allman. On the Generation and Use of TCP - Acknowledgments. ACM Computer Communication Review, 28(5), - October 1998. - - [AOK95] Mark Allman, Shawn Ostermann, Hans Kruse. Data Transfer - Efficiency Over Satellite Circuits Using a Multi-Socket - Extension to the File Transfer Protocol (FTP). In - Proceedings of the ACTS Results Conference, NASA Lewis - Research Center, September 1995. - - [AP99] Mark Allman, Vern Paxson. On Estimating End-to-End Network - Path Properties. ACM SIGCOMM, September 1999. - - - - -Allman, et al. Informational [Page 37] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - [APS99] Allman, M., Paxson, V. and W. Richard Stevens, "TCP - Congestion Control", RFC 2581, April 1999. - - [BCC+98] Braden, B., Clark, D., Crowcroft, J., Davie, B., Deering, - S., Estrin, D., Floyd, S., Jacobson, V., Minshall, G., - Partridge, C., Peterson, L., Ramakrishnan, K., Shenker, S., - Wroclawski, J. and L. Zhang, "Recommendations on Queue - Management and Congestion Avoidance in the Internet", RFC - 2309, April 1998. - - [BKVP97] B. Bakshi and P. Krishna and N. Vaidya and D. Pradham, - "Improving Performance of TCP over Wireless Networks", 17th - International Conference on Distributed Computing Systems - (ICDCS), May 1997. - - [BPK97] Hari Balakrishnan, Venkata N. Padmanabhan, and Randy H. - Katz. The Effects of Asymmetry on TCP Performance. In - Proceedings of the ACM/IEEE Mobicom, Budapest, Hungary, - ACM. September, 1997. - - [BPK98] Hari Balakrishnan, Venkata Padmanabhan, Randy H. Katz. The - Effects of Asymmetry on TCP Performance. ACM Mobile - Networks and Applications (MONET), 1998 (to appear). - - [BPSK96] H. Balakrishnan and V. Padmanabhan and S. Sechan and R. - Katz, "A Comparison of Mechanisms for Improving TCP - Performance over Wireless Links", ACM SIGCOMM, August 1996. - - [Bra89] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [Bra92] Braden, R., "Transaction TCP -- Concepts", RFC 1379, - September 1992. - - [Bra94] Braden, R., "T/TCP -- TCP Extensions for Transactions: - Functional Specification", RFC 1644, July 1994. - - [BRS99] Hari Balakrishnan, Hariharan Rahul, and Srinivasan Seshan. - An Integrated Congestion Management Architecture for - Internet Hosts. ACM SIGCOMM, September 1999. - - [ddKI99] M. deVivo, G.O. deVivo, R. Koeneke, G. Isern. Internet - Vulnerabilities Related to TCP/IP and T/TCP. Computer - Communication Review, 29(1), January 1999. - - - - - - - -Allman, et al. Informational [Page 38] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - [DENP97] Mikael Degermark, Mathias Engan, Bjorn Nordgren, Stephen - Pink. Low-Loss TCP/IP Header Compression for Wireless - Networks. ACM/Baltzer Journal on Wireless Networks, vol.3, - no.5, p. 375-87. - - [DMT96] R. C. Durst and G. J. Miller and E. J. Travis, "TCP - Extensions for Space Communications", Mobicom 96, ACM, USA, - 1996. - - [DNP99] Degermark, M., Nordgren, B. and S. Pink, "IP Header - Compression", RFC 2507, February 1999. - - [FF96] Kevin Fall, Sally Floyd. Simulation-based Comparisons of - Tahoe, Reno, and SACK TCP. Computer Communication Review, - V. 26 N. 3, July 1996, pp. 5-21. - - [FF99] Sally Floyd, Kevin Fall. Promoting the Use of End-to-End - Congestion Control in the Internet, IEEE/ACM Transactions - on Networking, August 1999. - - [FH99] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [FJ93] Sally Floyd and Van Jacobson. Random Early Detection - Gateways for Congestion Avoidance, IEEE/ACM Transactions on - Networking, V. 1 N. 4, August 1993. - - [Flo91] Sally Floyd. Connections with Multiple Congested Gateways - in Packet-Switched Networks, Part 1: One-way Traffic. ACM - Computer Communications Review, V. 21, N. 5, October 1991. - - [Flo94] Sally Floyd. TCP and Explicit Congestion Notification, ACM - Computer Communication Review, V. 24 N. 5, October 1994. - - [Flo99] Sally Floyd. "Re: TCP and out-of-order delivery", email to - end2end-interest mailing list, February, 1999. - - [Hah94] Jonathan Hahn. MFTP: Recent Enhancements and Performance - Measurements. Technical Report RND-94-006, NASA Ames - Research Center, June 1994. - - [Hay97] Chris Hayes. Analyzing the Performance of New TCP - Extensions Over Satellite Links. Master's Thesis, Ohio - University, August 1997. - - [HK98] Tom Henderson, Randy Katz. On Improving the Fairness of - TCP Congestion Avoidance. Proceedings of IEEE Globecom `98 - Conference, 1998. - - - -Allman, et al. Informational [Page 39] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - [HK99] Tim Henderson, Randy Katz. Transport Protocols for - Internet-Compatible Satellite Networks, IEEE Journal on - Selected Areas of Communications, February, 1999. - - [Hoe95] J. Hoe, Startup Dynamics of TCP's Congestion Control and - Avoidance Schemes. Master's Thesis, MIT, 1995. - - [Hoe96] Janey Hoe. Improving the Startup Behavior of a Congestion - Control Scheme for TCP. In ACM SIGCOMM, August 1996. - - [IL92] David Iannucci and John Lakashman. MFTP: Virtual TCP - Window Scaling Using Multiple Connections. Technical - Report RND-92-002, NASA Ames Research Center, January 1992. - - [Jac88] Van Jacobson. Congestion Avoidance and Control. In - Proceedings of the SIGCOMM '88, ACM. August, 1988. - - [Jac90] Jacobson, V., "Compressing TCP/IP Headers", RFC 1144, - February 1990. - - [JBB92] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for - High Performance", RFC 1323, May 1992. - - [JK92] Van Jacobson and Mike Karels. Congestion Avoidance and - Control. Originally appearing in the proceedings of - SIGCOMM '88 by Jacobson only, this revised version includes - an additional appendix. The revised version is available - at ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z. 1992. - - [Joh95] Stacy Johnson. Increasing TCP Throughput by Using an - Extended Acknowledgment Interval. Master's Thesis, Ohio - University, June 1995. - - [KAGT98] Hans Kruse, Mark Allman, Jim Griner, Diepchi Tran. HTTP - Page Transfer Rates Over Geo-Stationary Satellite Links. - March 1998. Proceedings of the Sixth International - Conference on Telecommunication Systems. - - [Kes91] Srinivasan Keshav. A Control Theoretic Approach to Flow - Control. In ACM SIGCOMM, September 1991. - - [KM97] S. Keshav, S. Morgan. SMART Retransmission: Performance - with Overload and Random Losses. Proceeding of Infocom. - 1997. - - - - - - - -Allman, et al. Informational [Page 40] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - [KVR98] Lampros Kalampoukas, Anujan Varma, and K. K.Ramakrishnan. - Improving TCP Throughput Over Two-Way Asymmetric Links: - Analysis and Solutions. Measurement and Modeling of - Computer Systems, 1998, Pages 78-89. - - [MM96a] M. Mathis, J. Mahdavi, "Forward Acknowledgment: Refining - TCP Congestion Control," Proceedings of SIGCOMM'96, August, - 1996, Stanford, CA. Available from - http://www.psc.edu/networking/papers/papers.html - - [MM96b] M. Mathis, J. Mahdavi, "TCP Rate-Halving with Bounding - Parameters" Available from - http://www.psc.edu/networking/papers/FACKnotes/current. - - [MMFR96] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [MSMO97] M. Mathis, J. Semke, J. Mahdavi, T. Ott, "The Macroscopic - Behavior of the TCP Congestion Avoidance - Algorithm",Computer Communication Review, volume 27, - number3, July 1997. Available from - http://www.psc.edu/networking/papers/papers.html - - [MV98] Miten N. Mehta and Nitin H. Vaidya. Delayed Duplicate- - Acknowledgments: A Proposal to Improve Performance of TCP - on Wireless Links. Technical Report 98-006, Department of - Computer Science, Texas A&M University, February 1998. - - [Nic97] Kathleen Nichols. Improving Network Simulation with - Feedback. Com21, Inc. Technical Report. Available from - http://www.com21.com/pages/papers/068.pdf. - - [PADHV99] Paxson, V., Allman, M., Dawson, S., Heavens, I. and B. - Volz, "Known TCP Implementation Problems", RFC 2525, March - 1999. - - [Pax97] Vern Paxson. Automated Packet Trace Analysis of TCP - Implementations. In Proceedings of ACM SIGCOMM, September - 1997. - - [PN98] Poduri, K. and K. Nichols, "Simulation Studies of Increased - Initial TCP Window Size", RFC 2415, September 1998. - - [Pos81] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - - - - - -Allman, et al. Informational [Page 41] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - [RF99] Ramakrishnan, K. and S. Floyd, "A Proposal to add Explicit - Congestion Notification (ECN) to IP", RFC 2481, January - 1999. - - [SF98] Nihal K. G. Samaraweera and Godred Fairhurst, - "Reinforcement of TCP error Recovery for Wireless - Communication", Computer Communication Review, volume 28, - number 2, April 1998. - - [SP98] Shepard, T. and C. Partridge, "When TCP Starts Up With Four - Packets Into Only Three Buffers", RFC 2416, September 1998. - - [Ste97] Stevens, W., "TCP Slow Start, Congestion Avoidance, Fast - Retransmit, and Fast Recovery Algorithms", RFC 2001, - January 1997. - - [Sut98] B. Suter, T. Lakshman, D. Stiliadis, and A. Choudhury. - Design Considerations for Supporting TCP with Per-flow - Queueing. Proceedings of IEEE Infocom `98 Conference, - 1998. - - [Tou97] Touch, J., "TCP Control Block Interdependence", RFC 2140, - April 1997. - - [VH97a] Vikram Visweswaraiah and John Heidemann. Improving Restart - of Idle TCP Connections. Technical Report 97-661, - University of Southern California, 1997. - - [VH97b] Vikram Visweswaraiah and John Heidemann. Rate-based pacing - Source Code Distribution, Web page: - http://www.isi.edu/lsam/publications/rate_based_pacing/README.html - November, 1997. - - [VH98] Vikram Visweswaraiah and John Heidemann. Improving Restart - of Idle TCP Connections (revised). Submitted for - publication. - - - - - - - - - - - - - - - -Allman, et al. Informational [Page 42] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -8 Authors' Addresses - - Mark Allman - NASA Glenn Research Center/BBN Technologies - Lewis Field - 21000 Brookpark Rd. MS 54-2 - Cleveland, OH 44135 - - EMail: mallman@grc.nasa.gov - http://roland.grc.nasa.gov/~mallman - - - Spencer Dawkins - Nortel - P.O.Box 833805 - Richardson, TX 75083-3805 - - EMail: Spencer.Dawkins.sdawkins@nt.com - - - Dan Glover - NASA Glenn Research Center - Lewis Field - 21000 Brookpark Rd. MS 3-6 - Cleveland, OH 44135 - - EMail: Daniel.R.Glover@grc.nasa.gov - http://roland.grc.nasa.gov/~dglover - - - Jim Griner - NASA Glenn Research Center - Lewis Field - 21000 Brookpark Rd. MS 54-2 - Cleveland, OH 44135 - - EMail: jgriner@grc.nasa.gov - http://roland.grc.nasa.gov/~jgriner - - - Diepchi Tran - NASA Glenn Research Center - Lewis Field - 21000 Brookpark Rd. MS 54-2 - Cleveland, OH 44135 - - EMail: dtran@grc.nasa.gov - - - - -Allman, et al. Informational [Page 43] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - Tom Henderson - University of California at Berkeley - Phone: +1 (510) 642-8919 - - EMail: tomh@cs.berkeley.edu - URL: http://www.cs.berkeley.edu/~tomh/ - - - John Heidemann - University of Southern California/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292-6695 - - EMail: johnh@isi.edu - - - Joe Touch - University of Southern California/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey, CA 90292-6601 - USA - - Phone: +1 310-448-9151 - Fax: +1 310-823-6714 - URL: http://www.isi.edu/touch - EMail: touch@isi.edu - - - Hans Kruse - J. Warren McClure School of Communication Systems Management - Ohio University - 9 S. College Street - Athens, OH 45701 - - Phone: 740-593-4891 - Fax: 740-593-4889 - EMail: hkruse1@ohiou.edu - http://www.csm.ohiou.edu/kruse - - - Shawn Ostermann - School of Electrical Engineering and Computer Science - Ohio University - 416 Morton Hall - Athens, OH 45701 - - Phone: (740) 593-1234 - EMail: ostermann@cs.ohiou.edu - - - -Allman, et al. Informational [Page 44] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - - Keith Scott - The MITRE Corporation - M/S W650 - 1820 Dolley Madison Blvd. - McLean VA 22102-3481 - - EMail: kscott@mitre.org - - - Jeffrey Semke - Pittsburgh Supercomputing Center - 4400 Fifth Ave. - Pittsburgh, PA 15213 - - EMail: semke@psc.edu - http://www.psc.edu/~semke - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Allman, et al. Informational [Page 45] - -RFC 2760 Ongoing TCP Research Related to Satellites February 2000 - - -9 Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Allman, et al. Informational [Page 46] - diff --git a/kernel/picotcp/RFC/rfc2861.txt b/kernel/picotcp/RFC/rfc2861.txt deleted file mode 100644 index e5a4998..0000000 --- a/kernel/picotcp/RFC/rfc2861.txt +++ /dev/null @@ -1,619 +0,0 @@ - - - - - - -Network Working Group M. Handley -Request for Comments: 2861 J. Padhye -Category: Experimental S. Floyd - ACIRI - June 2000 - - - TCP Congestion Window Validation - -Status of this Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - TCP's congestion window controls the number of packets a TCP flow may - have in the network at any time. However, long periods when the - sender is idle or application-limited can lead to the invalidation of - the congestion window, in that the congestion window no longer - reflects current information about the state of the network. This - document describes a simple modification to TCP's congestion control - algorithms to decay the congestion window cwnd after the transition - from a sufficiently-long application-limited period, while using the - slow-start threshold ssthresh to save information about the previous - value of the congestion window. - - An invalid congestion window also results when the congestion window - is increased (i.e., in TCP's slow-start or congestion avoidance - phases) during application-limited periods, when the previous value - of the congestion window might never have been fully utilized. We - propose that the TCP sender should not increase the congestion window - when the TCP sender has been application-limited (and therefore has - not fully used the current congestion window). We have explored - these algorithms both with simulations and with experiments from an - implementation in FreeBSD. - -1. Conventions and Acronyms - - The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, - SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this - document, are to be interpreted as described in [B97]. - - - -Handley, et al. Experimental [Page 1] - -RFC 2861 TCP Congestion Window Validation June 2000 - - -2. Introduction - - TCP's congestion window controls the number of packets a TCP flow may - have in the network at any time. The congestion window is set using - an Additive-Increase, Multiplicative-Decrease (AIMD) mechanism that - probes for available bandwidth, dynamically adapting to changing - network conditions. This AIMD mechanism works well when the sender - continually has data to send, as is typically the case for TCP used - for bulk-data transfer. In contrast, for TCP used with telnet - applications, the data sender often has little or no data to send, - and the sending rate is often determined by the rate at which data is - generated by the user. With the advent of the web, including - developments such as TCP senders with dynamically-created data and - HTTP 1.1 with persistent-connection TCP, the interaction between - application-limited periods (when the sender sends less than is - allowed by the congestion or receiver windows) and network-limited - periods (when the sender is limited by the TCP window) becomes - increasingly important. More precisely, we define a network-limited - period as any period when the sender is sending a full window of - data. - - Long periods when the sender is application-limited can lead to the - invalidation of the congestion window. During periods when the TCP - sender is network-limited, the value of the congestion window is - repeatedly "revalidated" by the successful transmission of a window - of data without loss. When the TCP sender is network-limited, there - is an incoming stream of acknowledgements that "clocks out" new data, - giving concrete evidence of recent available bandwidth in the - network. In contrast, during periods when the TCP sender is - application-limited, the estimate of available capacity represented - by the congestion window may become steadily less accurate over time. - In particular, capacity that had once been used by the network- - limited connection might now be used by other traffic. - - Current TCP implementations have a range of behaviors for starting up - after an idle period. Some current TCP implementations slow-start - after an idle period longer than the RTO estimate, as suggested in - [RFC2581] and in the appendix of [VJ88], while other implementations - don't reduce their congestion window after an idle period. RFC 2581 - [RFC2581] recommends the following: "a TCP SHOULD set cwnd to no more - than RW [the initial window] before beginning transmission if the TCP - has not sent data in an interval exceeding the retransmission - timeout." A proposal for TCP's slow-start after idle has also been - discussed in [HTH98]. The issue of validation of congestion - information during idle periods has also been addressed in contexts - other than TCP and IP, for example in "Use-it or Lose-it" mechanisms - for ATM networks [J96,J95]. - - - - -Handley, et al. Experimental [Page 2] - -RFC 2861 TCP Congestion Window Validation June 2000 - - - To address the revalidation of the congestion window after a - application-limited period, we propose a simple modification to TCP's - congestion control algorithms to decay the congestion window cwnd - after the transition from a sufficiently-long application-limited - period (i.e., at least one roundtrip time) to a network-limited - period. In particular, we propose that after an idle period, the TCP - sender should reduce its congestion window by half for every RTT that - the flow has remained idle. - - When the congestion window is reduced, the slow-start threshold - ssthresh remains as "memory" of the recent congestion window. - Specifically, ssthresh is never decreased when cwnd is reduced after - an application-limited period; before cwnd is reduced, ssthresh is - set to the maximum of its current value, and half-way between the old - and the new values of cwnd. This use of ssthresh allows a TCP sender - increasing its sending rate after an application-limited period to - quickly slow-start to recover most of the previous value of the - congestion window. To be more precise, if ssthresh is less than 3/4 - cwnd when the congestion window is reduced after an application- - limited period, then ssthresh is increased to 3/4 cwnd before the - reduction of the congestion window. - - An invalid congestion window also results when the congestion window - is increased (i.e., in TCP's slow-start or congestion avoidance - phases) during application-limited periods, when the previous value - of the congestion window might never have been fully utilized. As - far as we know, all current TCP implementations increase the - congestion window when an acknowledgement arrives, if allowed by the - receiver's advertised window and the slow-start or congestion - avoidance window increase algorithm, without checking to see if the - previous value of the congestion window has in fact been used. This - document proposes that the window increase algorithm not be invoked - during application-limited periods [MSML99]. In particular, the TCP - sender should not increase the congestion window when the TCP sender - has been application-limited (and therefore has not fully used the - current congestion window). This restriction prevents the congestion - window from growing arbitrarily large, in the absence of evidence - that the congestion window can be supported by the network. From - [MSML99, Section 5.2]: "This restriction assures that [cwnd] only - grows as long as TCP actually succeeds in injecting enough data into - the network to test the path." - - A somewhat-orthogonal problem associated with maintaining a large - congestion window after an application-limited period is that the - sender, with a sudden large amount of data to send after a quiescent - period, might immediately send a full congestion window of back-to- - back packets. This problem of sending large bursts of packets back- - to-back can be effectively handled using rate-based pacing (RBP, - - - -Handley, et al. Experimental [Page 3] - -RFC 2861 TCP Congestion Window Validation June 2000 - - - [VH97]), or using a maximum burst size control [FF96]. We would - contend that, even with mechanisms for limiting the sending of back- - to-back packets or pacing packets out over the period of a roundtrip - time, an old congestion window that has not been fully used for some - time can not be trusted as an indication of the bandwidth currently - available for that flow. We would contend that the mechanisms to - pace out packets allowed by the congestion window are largely - orthogonal to the algorithms used to determine the appropriate size - of the congestion window. - -3. Description - - When a TCP sender has sufficient data available to fill the available - network capacity for that flow, cwnd and ssthresh get set to - appropriate values for the network conditions. When a TCP sender - stops sending, the flow stops sampling the network conditions, and so - the value of the congestion window may become inaccurate. We believe - the correct conservative behavior under these circumstances is to - decay the congestion window by half for every RTT that the flow - remains inactive. The value of half is a very conservative figure - based on how quickly multiplicative decrease would have decayed the - window in the presence of loss. - - Another possibility is that the sender may not stop sending, but may - become application-limited rather than network-limited, and offer - less data to the network than the congestion window allows to be - sent. In this case the TCP flow is still sampling network - conditions, but is not offering sufficient traffic to be sure that - there is still sufficient capacity in the network for that flow to - send a full congestion window. Under these circumstances we believe - the correct conservative behavior is for the sender to keep track of - the maximum amount of the congestion window used during each RTT, and - to decay the congestion window each RTT to midway between the current - cwnd value and the maximum value used. - - Before the congestion window is reduced, ssthresh is set to the - maximum of its current value and 3/4 cwnd. If the sender then has - more data to send than the decayed cwnd allows, the TCP will slow- - start (perform exponential increase) at least half-way back up to the - old value of cwnd. - - The justification for this value of "3/4 cwnd" is that 3/4 cwnd is a - conservative estimate of the recent average value of the congestion - window, and the TCP should safely be able to slow-start at least up - to this point. For a TCP in steady-state that has been reducing its - congestion window each time the congestion window reached some - maximum value `maxwin', the average congestion window has been 3/4 - maxwin. On average, when the connection becomes application-limited, - - - -Handley, et al. Experimental [Page 4] - -RFC 2861 TCP Congestion Window Validation June 2000 - - - cwnd will be 3/4 maxwin, and in this case cwnd itself represents the - average value of the congestion window. However, if the connection - happens to become application-limited when cwnd equals maxwin, then - the average value of the congestion window is given by 3/4 cwnd. - - An alternate possibility would be to set ssthresh to the maximum of - the current value of ssthresh, and the old value of cwnd, allowing - TCP to slow-start all of the way back up to the old value of cwnd. - Further experimentation can be used to evaluate these two options for - setting ssthresh. - - For the separate issue of the increase of the congestion window in - response to an acknowledgement, we believe the correct behavior is - for the sender to increase the congestion window only if the window - was full when the acknowledgment arrived. - - We term this set of modifications to TCP Congestion Window Validation - (CWV) because they are related to ensuring the congestion window is - always a valid reflection of the current network state as probed by - the connection. - -3.1. The basic algorithm for reducing the congestion window - - A key issue in the CWV algorithm is to determine how to apply the - guideline of reducing the congestion window once for every roundtrip - time that the flow is application-limited. We use TCP's - retransmission timer (RTO) as a reasonable upper bound on the - roundtrip time, and reduce the congestion window roughly once per - RTO. - - This basic algorithm could be implemented in TCP as follows: When TCP - sends a new packet it checks to see if more than RTO seconds have - elapsed since the previous packet was sent. If RTO has elapsed, - ssthresh is set to the maximum of 3/4 cwnd and the current value of - ssthresh, and then the congestion window is halved for every RTO that - elapsed since the previous packet was sent. In addition, T_prev is - set to the current time, and W_used is reset to zero. T_prev will be - used to determine the elapsed time since the sender last was network- - limited or had reduced cwnd after an idle period. When the sender is - application-limited, W_used holds the maximum congestion window - actually used since the sender was last network-limited. - - The mechanism for determining the number of RTOs in the most recent - idle period could also be implemented by using a timer that expires - every RTO after the last packet was sent instead of a check per - packet - efficiency constraints on different operating systems may - dictate which is more efficient to implement. - - - - -Handley, et al. Experimental [Page 5] - -RFC 2861 TCP Congestion Window Validation June 2000 - - - After TCP sends a packet, it also checks to see if that packet filled - the congestion window. If so, the sender is network-limited, and - sets the variable T_prev to the current TCP clock time, and the - variable W_used to zero. - - When TCP sends a packet that does not fill the congestion window, and - the TCP send queue is empty, then the sender is application-limited. - The sender checks to see if the amount of unacknowledged data is - greater than W_used; if so, W_used is set to the amount of - unacknowledged data. In addition TCP checks to see if the elapsed - time since T_prev is greater than RTO. If so, then the TCP has not - just reduced its congestion window following an idle period. The TCP - has been application-limited rather than network-limited for at least - an entire RTO interval, but for less than two RTO intervals. In this - case, TCP sets ssthresh to the maximum of 3/4 cwnd and the current - value of ssthresh, and reduces its congestion window to - (cwnd+W_used)/2. W_used is then set to zero, and T_prev is set to - the current time, so a further reduction will not take place until at - least another RTO period has elapsed. Thus, during an application- - limited period the CWV algorithm reduces the congestion window once - per RTO. - -3.2. Pseudo-code for reducing the congestion window - - Initially: - T_last = tcpnow, T_prev = tcpnow, W_used = 0 - - After sending a data segment: - If tcpnow - T_last >= RTO - (The sender has been idle.) - ssthresh = max(ssthresh, 3*cwnd/4) - For i=1 To (tcpnow - T_last)/RTO - win = min(cwnd, receiver's declared max window) - cwnd = max(win/2, MSS) - T_prev = tcpnow - W_used = 0 - - T_last = tcpnow - - If window is full - T_prev = tcpnow - W_used = 0 - Else - If no more data is available to send - W_used = max(W_used, amount of unacknowledged data) - If tcpnow - T_prev >= RTO - (The sender has been application-limited.) - ssthresh = max(ssthresh, 3*cwnd/4) - - - -Handley, et al. Experimental [Page 6] - -RFC 2861 TCP Congestion Window Validation June 2000 - - - win = min(cwnd, receiver's declared max window) - cwnd = (win + W_used)/2 - T_prev = tcpnow - W_used = 0 - -4. Simulations - - The CWV proposal has been implemented as an option in the network - simulator NS [NS]. The simulations in the validation test suite for - CWV can be run with the command "./test-all-tcp" in the directory - "tcl/test". The simulations show the use of CWV to reduce the - congestion window after a period when the TCP connection was - application-limited, and to limit the increase in the congestion - window when a transfer is application-limited. As the simulations - illustrate, the use of ssthresh to maintain connection history is a - critical part of the Congestion Window Validation algorithm. [HPF99] - discusses these simulations in more detail. - -5. Experiments - - We have implemented the CWV mechanism in the TCP implementation in - FreeBSD 3.2. [HPF99] discusses these experiments in more detail. - - The first experiment examines the effects of the Congestion Window - Validation mechanisms for limiting cwnd increases during - application-limited periods. The experiment used a real ssh - connection through a modem link emulated using Dummynet [Dummynet]. - The link speed is 30Kb/s and the link has five packet buffers - available. Today most modem banks have more buffering available than - this, but the more buffer-limited situation sometimes occurs with - older modems. In the first half of the transfer, the user is typing - away over the connection. About half way through the time, the user - lists a moderately large file, which causes a large burst of traffic - to be transmitted. - - For the unmodified TCP, every returning ACK during the first part of - the transfer results in an increase in cwnd. As a result, the large - burst of data arriving from the application to the transport layer is - sent as many back-to-back packets, most of which get lost and - subsequently retransmitted. - - For the modified TCP with Congestion Window Validation, the - congestion window is not increased when the window is not full, and - has been decreased during application-limited periods closer to what - the user actually used. The burst of traffic is now constrained by - the congestion window, resulting in a better-behaved flow with - - - - - -Handley, et al. Experimental [Page 7] - -RFC 2861 TCP Congestion Window Validation June 2000 - - - minimal loss. The end result is that the transfer happens - approximately 30% faster than the transfer without CWV, due to - avoiding retransmission timeouts. - - The second experiment uses a real ssh connection over a real dialup - ppp connection, where the modem bank has much more buffering. For - the unmodified TCP, the initial burst from the large file does not - cause loss, but does cause the RTT to increase to approximately 5 - seconds, where the connection becomes bounded by the receiver's - window. - - For the modified TCP with Congestion Window Validation, the flow is - much better behaved, and produces no large burst of traffic. In this - case the linear increase for cwnd results in a slow increase in the - RTT as the buffer slowly fills. - - For the second experiment, both the modified and the unmodified TCP - finish delivering the data at precisely the same time. This is - because the link has been fully utilized in both cases due to the - modem buffer being larger than the receiver window. Clearly a modem - buffer of this size is undesirable due to its effect on the RTT of - competing flows, but it is necessary with current TCP implementations - that produce bursts similar to those shown in the top graph. - -6. Conclusions - - This document has presented several TCP algorithms for Congestion - Window Validation, to be employed after an idle period or a period in - which the sender was application-limited, and before an increase of - the congestion window. The goal of these algorithms is for TCP's - congestion window to reflect recent knowledge of the TCP connection - about the state of the network path, while at the same time keeping - some memory (i.e., in ssthresh) about the earlier state of the path. - We believe that these modifications will be of benefit to both the - network and to the TCP flows themselves, by preventing unnecessary - packet drops due to the TCP sender's failure to update its - information (or lack of information) about current network - conditions. Future work will document and investigate the benefit - provided by these algorithms, using both simulations and experiments. - Additional future work will describe a more complex version of the - CWV algorithm for TCP implementations where the sender does not have - an accurate estimate of the TCP roundtrip time. - - - - - - - - - -Handley, et al. Experimental [Page 8] - -RFC 2861 TCP Congestion Window Validation June 2000 - - -7. References - - [FF96] Fall, K., and Floyd, S., Simulation-based Comparisons of - Tahoe, Reno, and SACK TCP, Computer Communication Review, - V. 26 N. 3, July 1996, pp. 5-21. URL - "http://www.aciri.org/floyd/papers.html". - - [HPF99] Mark Handley, Jitendra Padhye, Sally Floyd, TCP Congestion - Window Validation, UMass CMPSCI Technical Report 99-77, - September 1999. URL "ftp://www- - net.cs.umass.edu/pub/Handley99-tcpq-tr-99-77.ps.gz". - - [HTH98] Amy Hughes, Joe Touch, John Heidemann, "Issues in TCP - Slow-Start Restart After Idle", Work in Progress. - - [J88] Jacobson, V., Congestion Avoidance and Control, Originally - from Proceedings of SIGCOMM '88 (Palo Alto, CA, Aug. - 1988), and revised in 1992. URL "http://www- - nrg.ee.lbl.gov/nrg-papers.html". - - [JKBFL96] Raj Jain, Shiv Kalyanaraman, Rohit Goyal, Sonia Fahmy, and - Fang Lu, Comments on "Use-it or Lose-it", ATM Forum - Document Number: ATM Forum/96-0178, URL - "http://www.netlab.ohio- - state.edu/~jain/atmf/af_rl5b2.htm". - - [JKGFL95] R. Jain, S. Kalyanaraman, R. Goyal, S. Fahmy, and F. Lu, A - Fix for Source End System Rule 5, AF-TM 95-1660, December - 1995, URL "http://www.netlab.ohio- - state.edu/~jain/atmf/af_rl52.htm". - - [MSML99] Matt Mathis, Jeff Semke, Jamshid Mahdavi, and Kevin Lahey, - The Rate-Halving Algorithm for TCP Congestion Control, - June 1999. URL - "http://www.psc.edu/networking/ftp/papers/draft- - ratehalving.txt". - - [NS] NS, the UCB/LBNL/VINT Network Simulator. URL - "http://www-mash.cs.berkeley.edu/ns/". - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, TCP Congestion - Control, RFC 2581, April 1999. - - [VH97] Vikram Visweswaraiah and John Heidemann. Improving Restart - of Idle TCP Connections, Technical Report 97-661, - University of Southern California, November, 1997. - - - - - -Handley, et al. Experimental [Page 9] - -RFC 2861 TCP Congestion Window Validation June 2000 - - - [Dummynet] Luigi Rizzo, "Dummynet and Forward Error Correction", - Freenix 98, June 1998, New Orleans. URL - "http://info.iet.unipi.it/~luigi/ip_dummynet/". - -8. Security Considerations - - General security considerations concerning TCP congestion control are - discussed in RFC 2581. This document describes a algorithm for one - aspect of those congestion control procedures, and so the - considerations described in RFC 2581 apply to this algorithm also. - There are no known additional security concerns for this specific - algorithm. - -9. Authors' Addresses - - Mark Handley - AT&T Center for Internet Research at ICSI (ACIRI) - - Phone: +1 510 666 2946 - EMail: mjh@aciri.org - URL: http://www.aciri.org/mjh/ - - - Jitendra Padhye - AT&T Center for Internet Research at ICSI (ACIRI) - - Phone: +1 510 666 2887 - EMail: padhye@aciri.org - URL: http://www-net.cs.umass.edu/~jitu/ - - - Sally Floyd - AT&T Center for Internet Research at ICSI (ACIRI) - - Phone: +1 510 666 2989 - EMail: floyd@aciri.org - URL: http://www.aciri.org/floyd/ - - - - - - - - - - - - - - -Handley, et al. Experimental [Page 10] - -RFC 2861 TCP Congestion Window Validation June 2000 - - -10. Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Handley, et al. Experimental [Page 11] - diff --git a/kernel/picotcp/RFC/rfc2873.txt b/kernel/picotcp/RFC/rfc2873.txt deleted file mode 100644 index e81822c..0000000 --- a/kernel/picotcp/RFC/rfc2873.txt +++ /dev/null @@ -1,451 +0,0 @@ - - - - - - -Network Working Group X. Xiao -Request for Comments: 2873 Global Crossing -Category: Standards Track A. Hannan - iVMG - V. Paxson - ACIRI/ICSI - E. Crabbe - Exodus Communications - June 2000 - - - TCP Processing of the IPv4 Precedence Field - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - This memo describes a conflict between TCP [RFC793] and DiffServ - [RFC2475] on the use of the three leftmost bits in the TOS octet of - an IPv4 header [RFC791]. In a network that contains DiffServ-capable - nodes, such a conflict can cause failures in establishing TCP - connections or can cause some established TCP connections to be reset - undesirably. This memo proposes a modification to TCP for resolving - the conflict. - - Because the IPv6 [RFC2460] traffic class octet does not have any - defined meaning except what is defined in RFC 2474, and in particular - does not define precedence or security parameter bits, there is no - conflict between TCP and DiffServ on the use of any bits in the IPv6 - traffic class octet. - -1. Introduction - - In TCP, each connection has a set of states associated with it. Such - states are reflected by a set of variables stored in the TCP Control - Block (TCB) of both ends. Such variables may include the local and - remote socket number, precedence of the connection, security level - - - - -Xiao, et al. Standards Track [Page 1] - -RFC 2873 TCP and the IPv4 Precedence Field June 2000 - - - and compartment, etc. Both ends must agree on the setting of the - precedence and security parameters in order to establish a connection - and keep it open. - - There is no field in the TCP header that indicates the precedence of - a segment. Instead, the precedence field in the header of the IP - packet is used as the indication. The security level and compartment - are likewise carried in the IP header, but as IP options rather than - a fixed header field. Because of this difference, the problem with - precedence discussed in this memo does not apply to them. - - TCP requires that the precedence (and security parameters) of a - connection must remain unchanged during the lifetime of the - connection. Therefore, for an established TCP connection with - precedence, the receipt of a segment with different precedence - indicates an error. The connection must be reset [RFC793, pp. 36, 37, - 40, 66, 67, 71]. - - With the advent of DiffServ, intermediate nodes may modify the - Differentiated Services Codepoint (DSCP) [RFC2474] of the IP header - to indicate the desired Per-hop Behavior (PHB) [RFC2475, RFC2597, - RFC2598]. The DSCP includes the three bits formerly known as the - precedence field. Because any modification to those three bits will - be considered illegal by endpoints that are precedence-aware, they - may cause failures in establishing connections, or may cause - established connections to be reset. - -2. Terminology - - Segment: the unit of data that TCP sends to IP - - Precedence Field: the three leftmost bits in the TOS octet of an IPv4 - header. Note that in DiffServ, these three bits may or may not be - used to denote the precedence of the IP packet. There is no - precedence field in the traffic class octet in IPv6. - - TOS Field: bits 3-6 in the TOS octet of IPv4 header [RFC 1349]. - - MBZ field: Must Be Zero - - The structure of the TOS octet is depicted below: - - 0 1 2 3 4 5 6 7 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | PRECEDENCE | TOS | MBZ | - +-----+-----+-----+-----+-----+-----+-----+-----+ - - - - - -Xiao, et al. Standards Track [Page 2] - -RFC 2873 TCP and the IPv4 Precedence Field June 2000 - - - DS Field: the TOS octet of an IPv4 header is renamed the - Differentiated Services (DS) Field by DiffServ. - - The structure of the DS field is depicted below: - - 0 1 2 3 4 5 6 7 - +---+---+---+---+---+---+---+---+ - | DSCP | CU | - +---+---+---+---+---+---+---+---+ - - DSCP: Differentiated Service Code Point, the leftmost 6 bits in the - DS field. - - CU: currently unused. - - Per-hop Behavior (PHB): a description of the externally observable - forwarding treatment applied at a differentiated services-compliant - node to a behavior aggregate. - -3. Problem Description - - The manipulation of the DSCP to achieve the desired PHB by DiffServ- - capable nodes may conflict with TCP's use of the precedence field. - This conflict can potentially cause problems for TCP implementations - that conform to RFC 793. First, page 36 of RFC 793 states: - - If the connection is in any non-synchronized state (LISTEN, SYN- - SENT, SYN-RECEIVED), and the incoming segment acknowledges - something not yet sent (the segment carries an unacceptable ACK), - or if an incoming segment has a security level or compartment - which does not exactly match the level and compartment requested - for the connection, a reset is sent. If our SYN has not been - acknowledged and the precedence level of the incoming segment is - higher than the precedence level requested then either raise the - local precedence level (if allowed by the user and the system) or - send a reset; or if the precedence level of the incoming segment - is lower than the precedence level requested then continue as if - the precedence matched exactly (if the remote TCP cannot raise - the precedence level to match ours this will be detected in the - next segment it sends, and the connection will be terminated - then). If our SYN has been acknowledged (perhaps in this incoming - segment) the precedence level of the incoming segment must match - the local precedence level exactly, if it does not a reset must - be sent. - - This leads to Problem #1: For a precedence-aware TCP module, if - during TCP's synchronization process, the precedence fields of the - SYN and/or ACK packets are modified by the intermediate nodes, - - - -Xiao, et al. Standards Track [Page 3] - -RFC 2873 TCP and the IPv4 Precedence Field June 2000 - - - resulting in the received ACK packet having a different precedence - from the precedence picked by this TCP module, the TCP connection - cannot be established, even if both modules actually agree on an - identical precedence for the connection. - - Then, on page 37, RFC 793 states: - - If the connection is in a synchronized state (ESTABLISHED, FIN- - WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT), - security level, or compartment, or precedence which does not - exactly match the level, and compartment, and precedence - requested for the connection, a reset is sent and connection goes - to the CLOSED state. - - This leads to Problem #2: For a precedence-aware TCP module, if the - precedence field of a received segment from an established TCP - connection has been changed en route by the intermediate nodes so as - to be different from the precedence specified during the connection - setup, the TCP connection will be reset. - - Each of problems #1 and #2 has a mirroring problem. They cause TCP - connections that must be reset according to RFC 793 not to be reset. - - Problem #3: A TCP connection may be established between two TCP - modules that pick different precedence, because the precedence fields - of the SYN and ACK packets are modified by intermediate nodes, - resulting in both modules thinking that they are in agreement for the - precedence of the connection. - - Problem #4: A TCP connection has been established normally by two - TCP modules that pick the same precedence. But in the middle of the - data transmission, one of the TCP modules changes the precedence of - its segments. According to RFC 793, the TCP connection must be reset. - In a DiffServ-capable environment, if the precedence of the segments - is altered by intermediate nodes such that it retains the expected - value when arriving at the other TCP module, the connection will not - be reset. - -4. Proposed Modification to TCP - - The proposed modification to TCP is that TCP must ignore the - precedence of all received segments. More specifically: - - (1) In TCP's synchronization process, the TCP modules at both ends - must ignore the precedence fields of the SYN and SYN ACK packets. The - TCP connection will be established if all the conditions specified by - RFC 793 are satisfied except the precedence of the connection. - - - - -Xiao, et al. Standards Track [Page 4] - -RFC 2873 TCP and the IPv4 Precedence Field June 2000 - - - (2) After a connection is established, each end sends segments with - its desired precedence. The precedence picked by one end of the TCP - connection may be the same or may be different from the precedence - picked by the other end (because precedence is ignored during - connection setup time). The precedence fields may be changed by the - intermediate nodes too. In either case, the precedence of the - received packets will be ignored by the other end. The TCP connection - will not be reset in either case. - - Problems #1 and #2 are solved by this proposed modification. Problems - #3 and #4 become non-issues because TCP must ignore the precedence. - In a DiffServ-capable environment, the two cases described in - problems #3 and #4 should be allowed. - -5. Security Considerations - - A TCP implementation that terminates a connection upon receipt of any - segment with an incorrect precedence field, regardless of the - correctness of the sequence numbers in the segment's header, poses a - serious denial-of-service threat, as all an attacker must do to - terminate a connection is guess the port numbers and then send two - segments with different precedence values; one of them is certain to - terminate the connection. Accordingly, the change to TCP processing - proposed in this memo would yield a significant gain in terms of that - TCP implementation's resilience. - - On the other hand, the stricter processing rules of RFC 793 in - principle make TCP spoofing attacks more difficult, as the attacker - must not only guess the victim TCP's initial sequence number, but - also its precedence setting. - - Finally, the security issues of each PHB group are addressed in the - PHB group's specification [RFC2597, RFC2598]. - -6. Acknowledgments - - Our thanks to Al Smith for his careful review and comments. - - - - - - - - - - - - - - -Xiao, et al. Standards Track [Page 5] - -RFC 2873 TCP and the IPv4 Precedence Field June 2000 - - -7. References - - [RFC791] Postel, J., "Internet Protocol", STD 5, RFC 791, September - 1981. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC1349] Almquist, P., "Type of Service in the Internet Protocol - Suite", RFC 1349, July 1992. - - [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 - (IPv6) Specification", RFC 2460, December 1998. - - [RFC2474] Nichols, K., Blake, S., Baker, F. and D. Black, "Definition - of the Differentiated Services Field (DS Field) in the IPv4 - and IPv6 Headers", RFC 2474, December 1998. - - [RFC2475] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. and - W. Weiss, "An Architecture for Differentiated Services", - RFC 2475, December 1998. - - [RFC2597] Heinanen, J., Baker, F., Weiss, W. and J. Wroclawski, - "Assured Forwarding PHB Group", RFC 2587, June 1999. - - [RFC2598] Jacobson, V., Nichols, K. and K. Poduri, "An Expedited - Forwarding PHB", RFC 2598, June 1999. - - - - - - - - - - - - - - - - - - - - - - - - -Xiao, et al. Standards Track [Page 6] - -RFC 2873 TCP and the IPv4 Precedence Field June 2000 - - -8. Authors' Addresses - - Xipeng Xiao - Global Crossing - 141 Caspian Court - Sunnyvale, CA 94089 - USA - - Phone: +1 408-543-4801 - EMail: xipeng@gblx.net - - - Alan Hannan - iVMG, Inc. - 112 Falkirk Court - Sunnyvale, CA 94087 - USA - - Phone: +1 408-749-7084 - EMail: alan@ivmg.net - - - Edward Crabbe - Exodus Communications - 2650 San Tomas Expressway - Santa Clara, CA 95051 - USA - - Phone: +1 408-346-1544 - EMail: edc@explosive.net - - - Vern Paxson - ACIRI/ICSI - 1947 Center Street - Suite 600 - Berkeley, CA 94704-1198 - USA - - Phone: +1 510-666-2882 - EMail: vern@aciri.org - - - - - - - - - - -Xiao, et al. Standards Track [Page 7] - -RFC 2873 TCP and the IPv4 Precedence Field June 2000 - - -9. Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Xiao, et al. Standards Track [Page 8] - diff --git a/kernel/picotcp/RFC/rfc2883.txt b/kernel/picotcp/RFC/rfc2883.txt deleted file mode 100644 index da6b6ab..0000000 --- a/kernel/picotcp/RFC/rfc2883.txt +++ /dev/null @@ -1,955 +0,0 @@ - - - - - - -Network Working Group S. Floyd -Request for Comments: 2883 ACIRI -Category: Standards Track J. Mahdavi - Novell - M. Mathis - Pittsburgh Supercomputing Center - M. Podolsky - UC Berkeley - July 2000 - - - An Extension to the Selective Acknowledgement (SACK) Option for TCP - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - This note defines an extension of the Selective Acknowledgement - (SACK) Option [RFC2018] for TCP. RFC 2018 specified the use of the - SACK option for acknowledging out-of-sequence data not covered by - TCP's cumulative acknowledgement field. This note extends RFC 2018 - by specifying the use of the SACK option for acknowledging duplicate - packets. This note suggests that when duplicate packets are - received, the first block of the SACK option field can be used to - report the sequence numbers of the packet that triggered the - acknowledgement. This extension to the SACK option allows the TCP - sender to infer the order of packets received at the receiver, - allowing the sender to infer when it has unnecessarily retransmitted - a packet. A TCP sender could then use this information for more - robust operation in an environment of reordered packets [BPS99], ACK - loss, packet replication, and/or early retransmit timeouts. - -1. Conventions and Acronyms - - The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, - SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this - document, are to be interpreted as described in [B97]. - - - - -Floyd, et al. Standards Track [Page 1] - -RFC 2883 SACK Extension July 2000 - - -2. Introduction - - The Selective Acknowledgement (SACK) option defined in RFC 2018 is - used by the TCP data receiver to acknowledge non-contiguous blocks of - data not covered by the Cumulative Acknowledgement field. However, - RFC 2018 does not specify the use of the SACK option when duplicate - segments are received. This note specifies the use of the SACK - option when acknowledging the receipt of a duplicate packet [F99]. - We use the term D-SACK (for duplicate-SACK) to refer to a SACK block - that reports a duplicate segment. - - This document does not make any changes to TCP's use of the - cumulative acknowledgement field, or to the TCP receiver's decision - of *when* to send an acknowledgement packet. This document only - concerns the contents of the SACK option when an acknowledgement is - sent. - - This extension is compatible with current implementations of the SACK - option in TCP. That is, if one of the TCP end-nodes does not - implement this D-SACK extension and the other TCP end-node does, we - believe that this use of the D-SACK extension by one of the end nodes - will not introduce problems. - - The use of D-SACK does not require separate negotiation between a TCP - sender and receiver that have already negotiated SACK capability. - The absence of separate negotiation for D-SACK means that the TCP - receiver could send D-SACK blocks when the TCP sender does not - understand this extension to SACK. In this case, the TCP sender will - simply discard any D-SACK blocks, and process the other SACK blocks - in the SACK option field as it normally would. - - - - - - - - - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 2] - -RFC 2883 SACK Extension July 2000 - - -3. The Sack Option Format as defined in RFC 2018 - - The SACK option as defined in RFC 2018 is as follows: - - +--------+--------+ - | Kind=5 | Length | - +--------+--------+--------+--------+ - | Left Edge of 1st Block | - +--------+--------+--------+--------+ - | Right Edge of 1st Block | - +--------+--------+--------+--------+ - | | - / . . . / - | | - +--------+--------+--------+--------+ - | Left Edge of nth Block | - +--------+--------+--------+--------+ - | Right Edge of nth Block | - +--------+--------+--------+--------+ - - The Selective Acknowledgement (SACK) option in the TCP header - contains a number of SACK blocks, where each block specifies the left - and right edge of a block of data received at the TCP receiver. In - particular, a block represents a contiguous sequence space of data - received and queued at the receiver, where the "left edge" of the - block is the first sequence number of the block, and the "right edge" - is the sequence number immediately following the last sequence number - of the block. - - RFC 2018 implies that the first SACK block specify the segment that - triggered the acknowledgement. From RFC 2018, when the data receiver - chooses to send a SACK option, "the first SACK block ... MUST specify - the contiguous block of data containing the segment which triggered - this ACK, unless that segment advanced the Acknowledgment Number - field in the header." - - However, RFC 2018 does not address the use of the SACK option when - acknowledging a duplicate segment. For example, RFC 2018 specifies - that "each block represents received bytes of data that are - contiguous and isolated". RFC 2018 further specifies that "if sent - at all, SACK options SHOULD be included in all ACKs which do not ACK - the highest sequence number in the data receiver's queue." RFC 2018 - does not specify the use of the SACK option when a duplicate segment - is received, and the cumulative acknowledgement field in the ACK - acknowledges all of the data in the data receiver's queue. - - - - - - -Floyd, et al. Standards Track [Page 3] - -RFC 2883 SACK Extension July 2000 - - -4. Use of the SACK option for reporting a duplicate segment - - This section specifies the use of SACK blocks when the SACK option is - used in reporting a duplicate segment. When D-SACK is used, the - first block of the SACK option should be a D-SACK block specifying - the sequence numbers for the duplicate segment that triggers the - acknowledgement. If the duplicate segment is part of a larger block - of non-contiguous data in the receiver's data queue, then the - following SACK block should be used to specify this larger block. - Additional SACK blocks can be used to specify additional non- - contiguous blocks of data, as specified in RFC 2018. - - The guidelines for reporting duplicate segments are summarized below: - - (1) A D-SACK block is only used to report a duplicate contiguous - sequence of data received by the receiver in the most recent packet. - - (2) Each duplicate contiguous sequence of data received is reported - in at most one D-SACK block. (I.e., the receiver sends two identical - D-SACK blocks in subsequent packets only if the receiver receives two - duplicate segments.) - - (3) The left edge of the D-SACK block specifies the first sequence - number of the duplicate contiguous sequence, and the right edge of - the D-SACK block specifies the sequence number immediately following - the last sequence in the duplicate contiguous sequence. - - (4) If the D-SACK block reports a duplicate contiguous sequence from - a (possibly larger) block of data in the receiver's data queue above - the cumulative acknowledgement, then the second SACK block in that - SACK option should specify that (possibly larger) block of data. - - (5) Following the SACK blocks described above for reporting duplicate - segments, additional SACK blocks can be used for reporting additional - blocks of data, as specified in RFC 2018. - - Note that because each duplicate segment is reported in only one ACK - packet, information about that duplicate segment will be lost if that - ACK packet is dropped in the network. - -4.1 Reporting Full Duplicate Segments - - We illustrate these guidelines with three examples. In each example, - we assume that the data receiver has first received eight segments of - 500 bytes each, and has sent an acknowledgement with the cumulative - acknowledgement field set to 4000 (assuming the first sequence number - is zero). The D-SACK block is underlined in each example. - - - - -Floyd, et al. Standards Track [Page 4] - -RFC 2883 SACK Extension July 2000 - - -4.1.1. Example 1: Reporting a duplicate segment. - - Because several ACK packets are lost, the data sender retransmits - packet 3000-3499, and the data receiver subsequently receives a - duplicate segment with sequence numbers 3000-3499. The receiver - sends an acknowledgement with the cumulative acknowledgement field - set to 4000, and the first, D-SACK block specifying sequence numbers - 3000-3500. - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 3000-3499 3000-3499 3500 (ACK dropped) - 3500-3999 3500-3999 4000 (ACK dropped) - 3000-3499 3000-3499 4000, SACK=3000-3500 - --------- -4.1.2. Example 2: Reporting an out-of-order segment and a duplicate - segment. - - Following a lost data packet, the receiver receives an out-of-order - data segment, which triggers the SACK option as specified in RFC - 2018. Because of several lost ACK packets, the sender then - retransmits a data packet. The receiver receives the duplicate - packet, and reports it in the first, D-SACK block: - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 3000-3499 3000-3499 3500 (ACK dropped) - 3500-3999 3500-3999 4000 (ACK dropped) - 4000-4499 (data packet dropped) - 4500-4999 4500-4999 4000, SACK=4500-5000 (ACK dropped) - 3000-3499 3000-3499 4000, SACK=3000-3500, 4500-5000 - --------- - - - - - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 5] - -RFC 2883 SACK Extension July 2000 - - -4.1.3. Example 3: Reporting a duplicate of an out-of-order segment. - - Because of a lost data packet, the receiver receives two out-of-order - segments. The receiver next receives a duplicate segment for one of - these out-of-order segments: - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 3500-3999 3500-3999 4000 - 4000-4499 (data packet dropped) - 4500-4999 4500-4999 4000, SACK=4500-5000 - 5000-5499 5000-5499 4000, SACK=4500-5500 - (duplicated packet) - 5000-5499 4000, SACK=5000-5500, 4500-5500 - --------- -4.2. Reporting Partial Duplicate Segments - - It may be possible that a sender transmits a packet that includes one - or more duplicate sub-segments--that is, only part but not all of the - transmitted packet has already arrived at the receiver. This can - occur when the size of the sender's transmitted segments increases, - which can occur when the PMTU increases in the middle of a TCP - session, for example. The guidelines in Section 4 above apply to - reporting partial as well as full duplicate segments. This section - gives examples of these guidelines when reporting partial duplicate - segments. - - When the SACK option is used for reporting partial duplicate - segments, the first D-SACK block reports the first duplicate sub- - segment. If the data packet being acknowledged contains multiple - partial duplicate sub-segments, then only the first such duplicate - sub-segment is reported in the SACK option. We illustrate this with - the examples below. - -4.2.1. Example 4: Reporting a single duplicate subsegment. - - The sender increases the packet size from 500 bytes to 1000 bytes. - The receiver subsequently receives a 1000-byte packet containing one - 500-byte subsegment that has already been received and one which has - not. The receiver reports only the already received subsegment using - a single D-SACK block. - - - - - - - - - -Floyd, et al. Standards Track [Page 6] - -RFC 2883 SACK Extension July 2000 - - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 500-999 500-999 1000 - 1000-1499 (delayed) - 1500-1999 (data packet dropped) - 2000-2499 2000-2499 1000, SACK=2000-2500 - 1000-2000 1000-1499 1500, SACK=2000-2500 - 1000-2000 2500, SACK=1000-1500 - --------- - -4.2.2. Example 5: Two non-contiguous duplicate subsegments covered by - the cumulative acknowledgement. - - After the sender increases its packet size from 500 bytes to 1500 - bytes, the receiver receives a packet containing two non-contiguous - duplicate 500-byte subsegments which are less than the cumulative - acknowledgement field. The receiver reports the first such duplicate - segment in a single D-SACK block. - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 500-999 500-999 1000 - 1000-1499 (delayed) - 1500-1999 (data packet dropped) - 2000-2499 (delayed) - 2500-2999 (data packet dropped) - 3000-3499 3000-3499 1000, SACK=3000-3500 - 1000-2499 1000-1499 1500, SACK=3000-3500 - 2000-2499 1500, SACK=2000-2500, 3000-3500 - 1000-2499 2500, SACK=1000-1500, 3000-3500 - --------- - -4.2.3. Example 6: Two non-contiguous duplicate subsegments not covered - by the cumulative acknowledgement. - - This example is similar to Example 5, except that after the sender - increases the packet size, the receiver receives a packet containing - two non-contiguous duplicate subsegments which are above the - cumulative acknowledgement field, rather than below. The first, D- - SACK block reports the first duplicate subsegment, and the second, - SACK block reports the larger block of non-contiguous data that it - belongs to. - - - - - - - -Floyd, et al. Standards Track [Page 7] - -RFC 2883 SACK Extension July 2000 - - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 500-999 500-999 1000 - 1000-1499 (data packet dropped) - 1500-1999 (delayed) - 2000-2499 (data packet dropped) - 2500-2999 (delayed) - 3000-3499 (data packet dropped) - 3500-3999 3500-3999 1000, SACK=3500-4000 - 1000-1499 (data packet dropped) - 1500-2999 1500-1999 1000, SACK=1500-2000, 3500-4000 - 2000-2499 1000, SACK=2000-2500, 1500-2000, - 3500-4000 - 1500-2999 1000, SACK=1500-2000, 1500-3000, - --------- - 3500-4000 - -4.3. Interaction Between D-SACK and PAWS - - RFC 1323 [RFC1323] specifies an algorithm for Protection Against - Wrapped Sequence Numbers (PAWS). PAWS gives a method for - distinguishing between sequence numbers for new data, and sequence - numbers from a previous cycle through the sequence number space. - Duplicate segments might be detected by PAWS as belonging to a - previous cycle through the sequence number space. - - RFC 1323 specifies that for such packets, the receiver should do the - following: - - Send an acknowledgement in reply as specified in RFC 793 page 69, - and drop the segment. - - Since PAWS still requires sending an ACK, there is no harmful - interaction between PAWS and the use of D-SACK. The D-SACK block can - be included in the SACK option of the ACK, as outlined in Section 4, - independently of the use of PAWS by the TCP receiver, and - independently of the determination by PAWS of the validity or - invalidity of the data segment. - - TCP senders receiving D-SACK blocks should be aware that a segment - reported as a duplicate segment could possibly have been from a prior - cycle through the sequence number space. This is independent of the - use of PAWS by the TCP data receiver. We do not anticipate that this - will present significant problems for senders using D-SACK - information. - - - - - -Floyd, et al. Standards Track [Page 8] - -RFC 2883 SACK Extension July 2000 - - -5. Detection of Duplicate Packets - - This extension to the SACK option enables the receiver to accurately - report the reception of duplicate data. Because each receipt of a - duplicate packet is reported in only one ACK packet, the loss of a - single ACK can prevent this information from reaching the sender. In - addition, we note that the sender can not necessarily trust the - receiver to send it accurate information [SCWA99]. - - In order for the sender to check that the first (D)SACK block of an - acknowledgement in fact acknowledges duplicate data, the sender - should compare the sequence space in the first SACK block to the - cumulative ACK which is carried IN THE SAME PACKET. If the SACK - sequence space is less than this cumulative ACK, it is an indication - that the segment identified by the SACK block has been received more - than once by the receiver. An implementation MUST NOT compare the - sequence space in the SACK block to the TCP state variable snd.una - (which carries the total cumulative ACK), as this may result in the - wrong conclusion if ACK packets are reordered. - - If the sequence space in the first SACK block is greater than the - cumulative ACK, then the sender next compares the sequence space in - the first SACK block with the sequence space in the second SACK - block, if there is one. This comparison can determine if the first - SACK block is reporting duplicate data that lies above the cumulative - ACK. - - TCP implementations which follow RFC 2581 [RFC2581] could see - duplicate packets in each of the following four situations. This - document does not specify what action a TCP implementation should - take in these cases. The extension to the SACK option simply enables - the sender to detect each of these cases. Note that these four - conditions are not an exhaustive list of possible cases for duplicate - packets, but are representative of the most common/likely cases. - Subsequent documents will describe experimental proposals for sender - responses to the detection of unnecessary retransmits due to - reordering, lost ACKS, or early retransmit timeouts. - - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 9] - -RFC 2883 SACK Extension July 2000 - - -5.1. Replication by the network - - If a packet is replicated in the network, this extension to the SACK - option can identify this. For example: - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 500-999 500-999 1000 - 1000-1499 1000-1499 1500 - (replicated) - 1000-1499 1500, SACK=1000-1500 - --------- - - In this case, the second packet was replicated in the network. An - ACK containing a D-SACK block which is lower than its ACK field and - is not identical to a previously retransmitted segment is indicative - of a replication by the network. - - WITHOUT D-SACK: - - If D-SACK was not used and the last ACK was piggybacked on a data - packet, the sender would not know that a packet had been replicated - in the network. If D-SACK was not used and neither of the last two - ACKs was piggybacked on a data packet, then the sender could - reasonably infer that either some data packet *or* the final ACK - packet had been replicated in the network. The receipt of the D-SACK - packet gives the sender positive knowledge that this data packet was - replicated in the network (assuming that the receiver is not lying). - - RESEARCH ISSUES: - - The current SACK option already allows the sender to identify - duplicate ACKs that do not acknowledge new data, but the D-SACK - option gives the sender a stronger basis for inferring that a - duplicate ACK does not acknowledge new data. The knowledge that a - duplicate ACK does not acknowledge new data allows the sender to - refrain from using that duplicate ACKs to infer packet loss (e.g., - Fast Retransmit) or to send more data (e.g., Fast Recovery). - -5.2. False retransmit due to reordering - - If packets are reordered in the network such that a segment arrives - more than 3 packets out of order, TCP's Fast Retransmit algorithm - will retransmit the out-of-order packet. An example of this is shown - below: - - - - - -Floyd, et al. Standards Track [Page 10] - -RFC 2883 SACK Extension July 2000 - - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 500-999 500-999 1000 - 1000-1499 (delayed) - 1500-1999 1500-1999 1000, SACK=1500-2000 - 2000-2499 2000-2499 1000, SACK=1500-2500 - 2500-2999 2500-2999 1000, SACK=1500-3000 - 1000-1499 1000-1499 3000 - 1000-1499 3000, SACK=1000-1500 - --------- - - In this case, an ACK containing a SACK block which is lower than its - ACK field and identical to a previously retransmitted segment is - indicative of a significant reordering followed by a false - (unnecessary) retransmission. - - WITHOUT D-SACK: - - With the use of D-SACK illustrated above, the sender knows that - either the first transmission of segment 1000-1499 was delayed in the - network, or the first transmission of segment 1000-1499 was dropped - and the second transmission of segment 1000-1499 was duplicated. - Given that no other segments have been duplicated in the network, - this second option can be considered unlikely. - - Without the use of D-SACK, the sender would only know that either the - first transmission of segment 1000-1499 was delayed in the network, - or that either one of the data segments or the final ACK was - duplicated in the network. Thus, the use of D-SACK allows the sender - to more reliably infer that the first transmission of segment - 1000-1499 was not dropped. - - [AP99], [L99], and [LK00] note that the sender could unambiguously - detect an unnecessary retransmit with the use of the timestamp - option. [LK00] proposes a timestamp-based algorithm that minimizes - the penalty for an unnecessary retransmit. [AP99] proposes a - heuristic for detecting an unnecessary retransmit in an environment - with neither timestamps nor SACK. [L99] also proposes a two-bit - field as an alternate to the timestamp option for unambiguously - marking the first three retransmissions of a packet. A similar idea - was proposed in [ISO8073]. - - RESEARCH ISSUES: - - The use of D-SACK allows the sender to detect some cases (e.g., when - no ACK packets have been lost) when a a Fast Retransmit was due to - packet reordering instead of packet loss. This allows the TCP sender - - - -Floyd, et al. Standards Track [Page 11] - -RFC 2883 SACK Extension July 2000 - - - to adjust the duplicate acknowledgment threshold, to prevent such - unnecessary Fast Retransmits in the future. Coupled with this, when - the sender determines, after the fact, that it has made an - unnecessary window reduction, the sender has the option of "undoing" - that reduction in the congestion window by resetting ssthresh to the - value of the old congestion window, and slow-starting until the - congestion window has reached that point. - - Any proposal for "undoing" a reduction in the congestion window would - have to address the possibility that the TCP receiver could be lying - in its reports of received packets [SCWA99]. - -5.3. Retransmit Timeout Due to ACK Loss - - If an entire window of ACKs is lost, a timeout will result. An - example of this is given below: - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 500-999 500-999 1000 (ACK dropped) - 1000-1499 1000-1499 1500 (ACK dropped) - 1500-1999 1500-1999 2000 (ACK dropped) - 2000-2499 2000-2499 2500 (ACK dropped) - (timeout) - 500-999 500-999 2500, SACK=500-1000 - -------- - - In this case, all of the ACKs are dropped, resulting in a timeout. - This condition can be identified because the first ACK received - following the timeout carries a D-SACK block indicating duplicate - data was received. - - WITHOUT D-SACK: - - Without the use of D-SACK, the sender in this case would be unable to - decide that no data packets has been dropped. - - RESEARCH ISSUES: - - For a TCP that implements some form of ACK congestion control - [BPK97], this ability to distinguish between dropped data packets and - dropped ACK packets would be particularly useful. In this case, the - connection could implement congestion control for the return (ACK) - path independently from the congestion control on the forward (data) - path. - - - - - -Floyd, et al. Standards Track [Page 12] - -RFC 2883 SACK Extension July 2000 - - -5.4. Early Retransmit Timeout - - If the sender's RTO is too short, an early retransmission timeout can - occur when no packets have in fact been dropped in the network. An - example of this is given below: - - Transmitted Received ACK Sent - Segment Segment (Including SACK Blocks) - - 500-999 (delayed) - 1000-1499 (delayed) - 1500-1999 (delayed) - 2000-2499 (delayed) - (timeout) - 500-999 (delayed) - 500-999 1000 - 1000-1499 (delayed) - 1000-1499 1500 - ... - 1500-1999 2000 - 2000-2499 2500 - 500-999 2500, SACK=500-1000 - -------- - 1000-1499 2500, SACK=1000-1500 - --------- - ... - - In this case, the first packet is retransmitted following the - timeout. Subsequently, the original window of packets arrives at the - receiver, resulting in ACKs for these segments. Following this, the - retransmissions of these segments arrive, resulting in ACKs carrying - SACK blocks which identify the duplicate segments. - - This can be identified as an early retransmission timeout because the - ACK for byte 1000 is received after the timeout with no SACK - information, followed by an ACK which carries SACK information (500- - 999) indicating that the retransmitted segment had already been - received. - - WITHOUT D-SACK: - - If D-SACK was not used and one of the duplicate ACKs was piggybacked - on a data packet, the sender would not know how many duplicate - packets had been received. If D-SACK was not used and none of the - duplicate ACKs were piggybacked on a data packet, then the sender - would have sent N duplicate packets, for some N, and received N - duplicate ACKs. In this case, the sender could reasonably infer that - - - - -Floyd, et al. Standards Track [Page 13] - -RFC 2883 SACK Extension July 2000 - - - some data or ACK packet had been replicated in the network, or that - an early retransmission timeout had occurred (or that the receiver is - lying). - - RESEARCH ISSUES: - - After the sender determines that an unnecessary (i.e., early) - retransmit timeout has occurred, the sender could adjust parameters - for setting the RTO, to prevent more unnecessary retransmit timeouts. - Coupled with this, when the sender determines, after the fact, that - it has made an unnecessary window reduction, the sender has the - option of "undoing" that reduction in the congestion window. - -6. Security Considerations - - This document neither strengthens nor weakens TCP's current security - properties. - -7. Acknowledgements - - We would like to thank Mark Handley, Reiner Ludwig, and Venkat - Padmanabhan for conversations on these issues, and to thank Mark - Allman for helpful feedback on this document. - -8. References - - [AP99] Mark Allman and Vern Paxson, On Estimating End-to-End - Network Path Properties, SIGCOMM 99, August 1999. URL - "http://www.acm.org/sigcomm/sigcomm99/papers/session7- - 3.html". - - [BPS99] J.C.R. Bennett, C. Partridge, and N. Shectman, Packet - Reordering is Not Pathological Network Behavior, IEEE/ACM - Transactions on Networking, Vol. 7, No. 6, December 1999, - pp. 789-798. - - [BPK97] Hari Balakrishnan, Venkata Padmanabhan, and Randy H. Katz, - The Effects of Asymmetry on TCP Performance, Third ACM/IEEE - Mobicom Conference, Budapest, Hungary, Sep 1997. URL - "http://www.cs.berkeley.edu/~padmanab/ - index.html#Publications". - - [F99] Floyd, S., Re: TCP and out-of-order delivery, Message ID - <199902030027.QAA06775@owl.ee.lbl.gov> to the end-to-end- - interest mailing list, February 1999. URL - "http://www.aciri.org/floyd/notes/TCP_Feb99.email". - - - - - -Floyd, et al. Standards Track [Page 14] - -RFC 2883 SACK Extension July 2000 - - - [ISO8073] ISO/IEC, Information-processing systems - Open Systems - Interconnection - Connection Oriented Transport Protocol - Specification, Internation Standard ISO/IEC 8073, December - 1988. - - [L99] Reiner Ludwig, A Case for Flow Adaptive Wireless links, - Technical Report UCB//CSD-99-1053, May 1999. URL - "http://iceberg.cs.berkeley.edu/papers/Ludwig- - FlowAdaptive/". - - [LK00] Reiner Ludwig and Randy H. Katz, The Eifel Algorithm: - Making TCP Robust Against Spurious Retransmissions, SIGCOMM - Computer Communication Review, V. 30, N. 1, January 2000. - URL "http://www.acm.org/sigcomm/ccr/archive/ccr-toc/ccr- - toc-2000.html". - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for - High Performance", RFC 1323, May 1992. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, April 1996. - - [RFC2581] Allman, M., Paxson,V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [SCWA99] Stefan Savage, Neal Cardwell, David Wetherall, Tom - Anderson, TCP Congestion Control with a Misbehaving - Receiver, ACM Computer Communications Review, pp. 71-78, V. - 29, N. 5, October, 1999. URL - "http://www.acm.org/sigcomm/ccr/archive/ccr-toc/ccr-toc- - 99.html". - - - - - - - - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 15] - -RFC 2883 SACK Extension July 2000 - - -Authors' Addresses - - Sally Floyd - AT&T Center for Internet Research at ICSI (ACIRI) - - Phone: +1 510-666-6989 - EMail: floyd@aciri.org - URL: http://www.aciri.org/floyd/ - - - Jamshid Mahdavi - Novell - - Phone: 1-408-967-3806 - EMail: mahdavi@novell.com - - - Matt Mathis - Pittsburgh Supercomputing Center - - Phone: 412 268-3319 - EMail: mathis@psc.edu - URL: http://www.psc.edu/~mathis/ - - - Matthew Podolsky - UC Berkeley Electrical Engineering & Computer Science Dept. - - Phone: 510-649-8914 - EMail: podolsky@eecs.berkeley.edu - URL: http://www.eecs.berkeley.edu/~podolsky - - - - - - - - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 16] - -RFC 2883 SACK Extension July 2000 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 17] - diff --git a/kernel/picotcp/RFC/rfc2884.txt b/kernel/picotcp/RFC/rfc2884.txt deleted file mode 100644 index 1092157..0000000 --- a/kernel/picotcp/RFC/rfc2884.txt +++ /dev/null @@ -1,1011 +0,0 @@ - - - - - - -Network Working Group J. Hadi Salim -Request for Comments: 2884 Nortel Networks -Category: Informational U. Ahmed - Carleton University - July 2000 - - - Performance Evaluation of Explicit Congestion Notification (ECN) - in IP Networks - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - This memo presents a performance study of the Explicit Congestion - Notification (ECN) mechanism in the TCP/IP protocol using our - implementation on the Linux Operating System. ECN is an end-to-end - congestion avoidance mechanism proposed by [6] and incorporated into - RFC 2481[7]. We study the behavior of ECN for both bulk and - transactional transfers. Our experiments show that there is - improvement in throughput over NON ECN (TCP employing any of Reno, - SACK/FACK or NewReno congestion control) in the case of bulk - transfers and substantial improvement for transactional transfers. - - A more complete pdf version of this document is available at: - http://www7.nortel.com:8080/CTL/ecnperf.pdf - - This memo in its current revision is missing a lot of the visual - representations and experimental results found in the pdf version. - -1. Introduction - - In current IP networks, congestion management is left to the - protocols running on top of IP. An IP router when congested simply - drops packets. TCP is the dominant transport protocol today [26]. - TCP infers that there is congestion in the network by detecting - packet drops (RFC 2581). Congestion control algorithms [11] [15] [21] - are then invoked to alleviate congestion. TCP initially sends at a - higher rate (slow start) until it detects a packet loss. A packet - loss is inferred by the receipt of 3 duplicate ACKs or detected by a - - - -Salim & Ahmed Informational [Page 1] - -RFC 2884 ECN in IP Networks July 2000 - - - timeout. The sending TCP then moves into a congestion avoidance state - where it carefully probes the network by sending at a slower rate - (which goes up until another packet loss is detected). Traditionally - a router reacts to congestion by dropping a packet in the absence of - buffer space. This is referred to as Tail Drop. This method has a - number of drawbacks (outlined in Section 2). These drawbacks coupled - with the limitations of end-to-end congestion control have led to - interest in introducing smarter congestion control mechanisms in - routers. One such mechanism is Random Early Detection (RED) [9] - which detects incipient congestion and implicitly signals the - oversubscribing flow to slow down by dropping its packets. A RED- - enabled router detects congestion before the buffer overflows, based - on a running average queue size, and drops packets probabilistically - before the queue actually fills up. The probability of dropping a new - arriving packet increases as the average queue size increases above a - low water mark minth, towards higher water mark maxth. When the - average queue size exceeds maxth all arriving packets are dropped. - - An extension to RED is to mark the IP header instead of dropping - packets (when the average queue size is between minth and maxth; - above maxth arriving packets are dropped as before). Cooperating end - systems would then use this as a signal that the network is congested - and slow down. This is known as Explicit Congestion Notification - (ECN). In this paper we study an ECN implementation on Linux for - both the router and the end systems in a live network. The memo is - organized as follows. In Section 2 we give an overview of queue - management in routers. Section 3 gives an overview of ECN and the - changes required at the router and the end hosts to support ECN. - Section 4 defines the experimental testbed and the terminologies used - throughout this memo. Section 5 introduces the experiments that are - carried out, outlines the results and presents an analysis of the - results obtained. Section 6 concludes the paper. - -2. Queue Management in routers - - TCP's congestion control and avoidance algorithms are necessary and - powerful but are not enough to provide good service in all - circumstances since they treat the network as a black box. Some sort - of control is required from the routers to complement the end system - congestion control mechanisms. More detailed analysis is contained in - [19]. Queue management algorithms traditionally manage the length of - packet queues in the router by dropping packets only when the buffer - overflows. A maximum length for each queue is configured. The router - will accept packets till this maximum size is exceeded, at which - point it will drop incoming packets. New packets are accepted when - buffer space allows. This technique is known as Tail Drop. This - method has served the Internet well for years, but has the several - drawbacks. Since all arriving packets (from all flows) are dropped - - - -Salim & Ahmed Informational [Page 2] - -RFC 2884 ECN in IP Networks July 2000 - - - when the buffer overflows, this interacts badly with the congestion - control mechanism of TCP. A cycle is formed with a burst of drops - after the maximum queue size is exceeded, followed by a period of - underutilization at the router as end systems back off. End systems - then increase their windows simultaneously up to a point where a - burst of drops happens again. This phenomenon is called Global - Synchronization. It leads to poor link utilization and lower overall - throughput [19] Another problem with Tail Drop is that a single - connection or a few flows could monopolize the queue space, in some - circumstances. This results in a lock out phenomenon leading to - synchronization or other timing effects [19]. Lastly, one of the - major drawbacks of Tail Drop is that queues remain full for long - periods of time. One of the major goals of queue management is to - reduce the steady state queue size[19]. Other queue management - techniques include random drop on full and drop front on full [13]. - -2.1. Active Queue Management - - Active queue management mechanisms detect congestion before the queue - overflows and provide an indication of this congestion to the end - nodes [7]. With this approach TCP does not have to rely only on - buffer overflow as the indication of congestion since notification - happens before serious congestion occurs. One such active management - technique is RED. - -2.1.1. Random Early Detection - - Random Early Detection (RED) [9] is a congestion avoidance mechanism - implemented in routers which works on the basis of active queue - management. RED addresses the shortcomings of Tail Drop. A RED - router signals incipient congestion to TCP by dropping packets - probabilistically before the queue runs out of buffer space. This - drop probability is dependent on a running average queue size to - avoid any bias against bursty traffic. A RED router randomly drops - arriving packets, with the result that the probability of dropping a - packet belonging to a particular flow is approximately proportional - to the flow's share of bandwidth. Thus, if the sender is using - relatively more bandwidth it gets penalized by having more of its - packets dropped. RED operates by maintaining two levels of - thresholds minimum (minth) and maximum (maxth). It drops a packet - probabilistically if and only if the average queue size lies between - the minth and maxth thresholds. If the average queue size is above - the maximum threshold, the arriving packet is always dropped. When - the average queue size is between the minimum and the maximum - threshold, each arriving packet is dropped with probability pa, where - pa is a function of the average queue size. As the average queue - length varies between minth and maxth, pa increases linearly towards - a configured maximum drop probability, maxp. Beyond maxth, the drop - - - -Salim & Ahmed Informational [Page 3] - -RFC 2884 ECN in IP Networks July 2000 - - - probability is 100%. Dropping packets in this way ensures that when - some subset of the source TCP packets get dropped and they invoke - congestion avoidance algorithms that will ease the congestion at the - gateway. Since the dropping is distributed across flows, the problem - of global synchronization is avoided. - -3. Explicit Congestion Notification - - Explicit Congestion Notification is an extension proposed to RED - which marks a packet instead of dropping it when the average queue - size is between minth and maxth [7]. Since ECN marks packets before - congestion actually occurs, this is useful for protocols like TCP - that are sensitive to even a single packet loss. Upon receipt of a - congestion marked packet, the TCP receiver informs the sender (in the - subsequent ACK) about incipient congestion which will in turn trigger - the congestion avoidance algorithm at the sender. ECN requires - support from both the router as well as the end hosts, i.e. the end - hosts TCP stack needs to be modified. Packets from flows that are not - ECN capable will continue to be dropped by RED (as was the case - before ECN). - -3.1. Changes at the router - - Router side support for ECN can be added by modifying current RED - implementations. For packets from ECN capable hosts, the router marks - the packets rather than dropping them (if the average queue size is - between minth and maxth). It is necessary that the router identifies - that a packet is ECN capable, and should only mark packets that are - from ECN capable hosts. This uses two bits in the IP header. The ECN - Capable Transport (ECT) bit is set by the sender end system if both - the end systems are ECN capable (for a unicast transport, only if - both end systems are ECN-capable). In TCP this is confirmed in the - pre-negotiation during the connection setup phase (explained in - Section 3.2). Packets encountering congestion are marked by the - router using the Congestion Experienced (CE) (if the average queue - size is between minth and maxth) on their way to the receiver end - system (from the sender end system), with a probability proportional - to the average queue size following the procedure used in RED - (RFC2309) routers. Bits 10 and 11 in the IPV6 header are proposed - respectively for the ECT and CE bits. Bits 6 and 7 of the IPV4 header - DSCP field are also specified for experimental purposes for the ECT - and CE bits respectively. - -3.2. Changes at the TCP Host side - - The proposal to add ECN to TCP specifies two new flags in the - reserved field of the TCP header. Bit 9 in the reserved field of the - TCP header is designated as the ECN-Echo (ECE) flag and Bit 8 is - - - -Salim & Ahmed Informational [Page 4] - -RFC 2884 ECN in IP Networks July 2000 - - - designated as the Congestion Window Reduced (CWR) flag. These two - bits are used both for the initializing phase in which the sender and - the receiver negotiate the capability and the desire to use ECN, as - well as for the subsequent actions to be taken in case there is - congestion experienced in the network during the established state. - - There are two main changes that need to be made to add ECN to TCP to - an end system and one extension to a router running RED. - - 1. In the connection setup phase, the source and destination TCPs - have to exchange information about their desire and/or capability to - use ECN. This is done by setting both the ECN-Echo flag and the CWR - flag in the SYN packet of the initial connection phase by the sender; - on receipt of this SYN packet, the receiver will set the ECN-Echo - flag in the SYN-ACK response. Once this agreement has been reached, - the sender will thereon set the ECT bit in the IP header of data - packets for that flow, to indicate to the network that it is capable - and willing to participate in ECN. The ECT bit is set on all packets - other than pure ACK's. - - 2. When a router has decided from its active queue management - mechanism, to drop or mark a packet, it checks the IP-ECT bit in the - packet header. It sets the CE bit in the IP header if the IP-ECT bit - is set. When such a packet reaches the receiver, the receiver - responds by setting the ECN-Echo flag (in the TCP header) in the next - outgoing ACK for the flow. The receiver will continue to do this in - subsequent ACKs until it receives from the sender an indication that - it (the sender) has responded to the congestion notification. - - 3. Upon receipt of this ACK, the sender triggers its congestion - avoidance algorithm by halving its congestion window, cwnd, and - updating its congestion window threshold value ssthresh. Once it has - taken these appropriate steps, the sender sets the CWR bit on the - next data outgoing packet to tell the receiver that it has reacted to - the (receiver's) notification of congestion. The receiver reacts to - the CWR by halting the sending of the congestion notifications (ECE) - to the sender if there is no new congestion in the network. - - Note that the sender reaction to the indication of congestion in the - network (when it receives an ACK packet that has the ECN-Echo flag - set) is equivalent to the Fast Retransmit/Recovery algorithm (when - there is a congestion loss) in NON-ECN-capable TCP i.e. the sender - halves the congestion window cwnd and reduces the slow start - threshold ssthresh. Fast Retransmit/Recovery is still available for - ECN capable stacks for responding to three duplicate acknowledgments. - - - - - - -Salim & Ahmed Informational [Page 5] - -RFC 2884 ECN in IP Networks July 2000 - - -4. Experimental setup - - For testing purposes we have added ECN to the Linux TCP/IP stack, - kernels version 2.0.32. 2.2.5, 2.3.43 (there were also earlier - revisions of 2.3 which were tested). The 2.0.32 implementation - conforms to RFC 2481 [7] for the end systems only. We have also - modified the code in the 2.1,2.2 and 2.3 cases for the router portion - as well as end system to conform to the RFC. An outdated version of - the 2.0 code is available at [18]. Note Linux version 2.0.32 - implements TCP Reno congestion control while kernels >= 2.2.0 default - to New Reno but will opt for a SACK/FACK combo when the remote end - understands SACK. Our initial tests were carried out with the 2.0 - kernel at the end system and 2.1 (pre 2.2) for the router part. The - majority of the test results here apply to the 2.0 tests. We did - repeat these tests on a different testbed (move from Pentium to - Pentium-II class machines)with faster machines for the 2.2 and 2.3 - kernels, so the comparisons on the 2.0 and 2.2/3 are not relative. - - We have updated this memo release to reflect the tests against SACK - and New Reno. - -4.1. Testbed setup - - ----- ---- - | ECN | | ECN | - | ON | | OFF | - data direction ---->> ----- ---- - | | - server | | - ---- ------ ------ | | - | | | R1 | | R2 | | | - | | -----| | ---- | | ---------------------- - ---- ------ ^ ------ | - ^ | - | ----- - congestion point ___| | C | - | | - ----- - - The figure above shows our test setup. - - All the physical links are 10Mbps ethernet. Using Class Based - Queuing (CBQ) [22], packets from the data server are constricted to a - 1.5Mbps pipe at the router R1. Data is always retrieved from the - server towards the clients labelled , "ECN ON", "ECN OFF", and "C". - Since the pipe from the server is 10Mbps, this creates congestion at - the exit from the router towards the clients for competing flows. The - machines labeled "ECN ON" and "ECN OFF" are running the same version - - - -Salim & Ahmed Informational [Page 6] - -RFC 2884 ECN in IP Networks July 2000 - - - of Linux and have exactly the same hardware configuration. The server - is always ECN capable (and can handle NON ECN flows as well using the - standard congestion algorithms). The machine labeled "C" is used to - create congestion in the network. Router R2 acts as a path-delay - controller. With it we adjust the RTT the clients see. Router R1 - has RED implemented in it and has capability for supporting ECN - flows. The path-delay router is a PC running the Nistnet [16] - package on a Linux platform. The latency of the link for the - experiments was set to be 20 millisecs. - -4.2. Validating the Implementation - - We spent time validating that the implementation was conformant to - the specification in RFC 2481. To do this, the popular tcpdump - sniffer [24] was modified to show the packets being marked. We - visually inspected tcpdump traces to validate the conformance to the - RFC under a lot of different scenarios. We also modified tcptrace - [25] in order to plot the marked packets for visualization and - analysis. - - Both tcpdump and tcptrace revealed that the implementation was - conformant to the RFC. - -4.3. Terminology used - - This section presents background terminology used in the next few - sections. - - * Congesting flows: These are TCP flows that are started in the - background so as to create congestion from R1 towards R2. We use the - laptop labeled "C" to introduce congesting flows. Note that "C" as is - the case with the other clients retrieves data from the server. - - * Low, Moderate and High congestion: For the case of low congestion - we start two congesting flows in the background, for moderate - congestion we start five congesting flows and for the case of high - congestion we start ten congesting flows in the background. - - * Competing flows: These are the flows that we are interested in. - They are either ECN TCP flows from/to "ECN ON" or NON ECN TCP flows - from/to "ECN OFF". - - * Maximum drop rate: This is the RED parameter that sets the maximum - probability of a packet being marked at the router. This corresponds - to maxp as explained in Section 2.1. - - - - - - -Salim & Ahmed Informational [Page 7] - -RFC 2884 ECN in IP Networks July 2000 - - - Our tests were repeated for varying levels of congestion with varying - maximum drop rates. The results are presented in the subsequent - sections. - - * Low, Medium and High drop probability: We use the term low - probability to mean a drop probability maxp of 0.02, medium - probability for 0.2 and high probability for 0.5. We also - experimented with drop probabilities of 0.05, 0.1 and 0.3. - - * Goodput: We define goodput as the effective data rate as observed - by the user, i.e., if we transmitted 4 data packets in which two of - them were retransmitted packets, the efficiency is 50% and the - resulting goodput is 2*packet size/time taken to transmit. - - * RED Region: When the router's average queue size is between minth - and maxth we denote that we are operating in the RED region. - -4.4. RED parameter selection - - In our initial testing we noticed that as we increase the number of - congesting flows the RED queue degenerates into a simple Tail Drop - queue. i.e. the average queue exceeds the maximum threshold most of - the times. Note that this phenomena has also been observed by [5] - who proposes a dynamic solution to alleviate it by adjusting the - packet dropping probability "maxp" based on the past history of the - average queue size. Hence, it is necessary that in the course of our - experiments the router operate in the RED region, i.e., we have to - make sure that the average queue is maintained between minth and - maxth. If this is not maintained, then the queue acts like a Tail - Drop queue and the advantages of ECN diminish. Our goal is to - validate ECN's benefits when used with RED at the router. To ensure - that we were operating in the RED region we monitored the average - queue size and the actual queue size in times of low, moderate and - high congestion and fine-tuned the RED parameters such that the - average queue zones around the RED region before running the - experiment proper. Our results are, therefore, not influenced by - operating in the wrong RED region. - -5. The Experiments - - We start by making sure that the background flows do not bias our - results by computing the fairness index [12] in Section 5.1. We - proceed to carry out the experiments for bulk transfer presenting the - results and analysis in Section 5.2. In Section 5.3 the results for - transactional transfers along with analysis is presented. More - details on the experimental results can be found in [27]. - - - - - -Salim & Ahmed Informational [Page 8] - -RFC 2884 ECN in IP Networks July 2000 - - -5.1. Fairness - - In the course of the experiments we wanted to make sure that our - choice of the type of background flows does not bias the results that - we collect. Hence we carried out some tests initially with both ECN - and NON ECN flows as the background flows. We repeated the - experiments for different drop probabilities and calculated the - fairness index [12]. We also noticed (when there were equal number - of ECN and NON ECN flows) that the number of packets dropped for the - NON ECN flows was equal to the number of packets marked for the ECN - flows, showing thereby that the RED algorithm was fair to both kind - of flows. - - Fairness index: The fairness index is a performance metric described - in [12]. Jain [12] postulates that the network is a multi-user - system, and derives a metric to see how fairly each user is treated. - He defines fairness as a function of the variability of throughput - across users. For a given set of user throughputs (x1, x2...xn), the - fairness index to the set is defined as follows: - - f(x1,x2,.....,xn) = square((sum[i=1..n]xi))/(n*sum[i=1..n]square(xi)) - - The fairness index always lies between 0 and 1. A value of 1 - indicates that all flows got exactly the same throughput. Each of - the tests was carried out 10 times to gain confidence in our results. - To compute the fairness index we used FTP to generate traffic. - - Experiment details: At time t = 0 we start 2 NON ECN FTP sessions in - the background to create congestion. At time t=20 seconds we start - two competing flows. We note the throughput of all the flows in the - network and calculate the fairness index. The experiment was carried - out for various maximum drop probabilities and for various congestion - levels. The same procedure is repeated with the background flows as - ECN. The fairness index was fairly constant in both the cases when - the background flows were ECN and NON ECN indicating that there was - no bias when the background flows were either ECN or NON ECN. - - Max Fairness Fairness - Drop With BG With BG - Prob flows ECN flows NON ECN - - 0.02 0.996888 0.991946 - 0.05 0.995987 0.988286 - 0.1 0.985403 0.989726 - 0.2 0.979368 0.983342 - - - - - - -Salim & Ahmed Informational [Page 9] - -RFC 2884 ECN in IP Networks July 2000 - - - With the observation that the nature of background flows does not - alter the results, we proceed by using the background flows as NON - ECN for the rest of the experiments. - -5.2. Bulk transfers - - The metric we chose for bulk transfer is end user throughput. - - Experiment Details: All TCP flows used are RENO TCP. For the case of - low congestion we start 2 FTP flows in the background at time 0. Then - after about 20 seconds we start the competing flows, one data - transfer to the ECN machine and the second to the NON ECN machine. - The size of the file used is 20MB. For the case of moderate - congestion we start 5 FTP flows in the background and for the case of - high congestion we start 10 FTP flows in the background. We repeat - the experiments for various maximum drop rates each repeated for a - number of sets. - - Observation and Analysis: - - We make three key observations: - - 1) As the congestion level increases, the relative advantage for ECN - increases but the absolute advantage decreases (expected, since there - are more flows competing for the same link resource). ECN still does - better than NON ECN even under high congestion. Infering a sample - from the collected results: at maximum drop probability of 0.1, for - example, the relative advantage of ECN increases from 23% to 50% as - the congestion level increases from low to high. - - 2) Maintaining congestion levels and varying the maximum drop - probability (MDP) reveals that the relative advantage of ECN - increases with increasing MDP. As an example, for the case of high - congestion as we vary the drop probability from 0.02 to 0.5 the - relative advantage of ECN increases from 10% to 60%. - - 3) There were hardly any retransmissions for ECN flows (except the - occasional packet drop in a minority of the tests for the case of - high congestion and low maximum drop probability). - - We analyzed tcpdump traces for NON ECN with the help of tcptrace and - observed that there were hardly any retransmits due to timeouts. - (Retransmit due to timeouts are inferred by counting the number of 3 - DUPACKS retransmit and subtracting them from the total recorded - number of retransmits). This means that over a long period of time - (as is the case of long bulk transfers), the data-driven loss - recovery mechanism of the Fast Retransmit/Recovery algorithm is very - effective. The algorithm for ECN on congestion notification from ECE - - - -Salim & Ahmed Informational [Page 10] - -RFC 2884 ECN in IP Networks July 2000 - - - is the same as that for a Fast Retransmit for NON ECN. Since both are - operating in the RED region, ECN barely gets any advantage over NON - ECN from the signaling (packet drop vs. marking). - - It is clear, however, from the results that ECN flows benefit in bulk - transfers. We believe that the main advantage of ECN for bulk - transfers is that less time is spent recovering (whereas NON ECN - spends time retransmitting), and timeouts are avoided altogether. - [23] has shown that even with RED deployed, TCP RENO could suffer - from multiple packet drops within the same window of data, likely to - lead to multiple congestion reactions or timeouts (these problems are - alleviated by ECN). However, while TCP Reno has performance problems - with multiple packets dropped in a window of data, New Reno and SACK - have no such problems. - - Thus, for scenarios with very high levels of congestion, the - advantages of ECN for TCP Reno flows could be more dramatic than the - advantages of ECN for NewReno or SACK flows. An important - observation to make from our results is that we do not notice - multiple drops within a single window of data. Thus, we would expect - that our results are not heavily influenced by Reno's performance - problems with multiple packets dropped from a window of data. We - repeated these tests with ECN patched newer Linux kernels. As - mentioned earlier these kernels would use a SACK/FACK combo with a - fallback to New Reno. SACK can be selectively turned off (defaulting - to New Reno). Our results indicate that ECN still improves - performance for the bulk transfers. More results are available in the - pdf version[27]. As in 1) above, maintaining a maximum drop - probability of 0.1 and increasing the congestion level, it is - observed that ECN-SACK improves performance from about 5% at low - congestion to about 15% at high congestion. In the scenario where - high congestion is maintained and the maximum drop probability is - moved from 0.02 to 0.5, the relative advantage of ECN-SACK improves - from 10% to 40%. Although this numbers are lower than the ones - exhibited by Reno, they do reflect the improvement that ECN offers - even in the presence of robust recovery mechanisms such as SACK. - -5.3. Transactional transfers - - We model transactional transfers by sending a small request and - getting a response from a server before sending the next request. To - generate transactional transfer traffic we use Netperf [17] with the - CRR (Connect Request Response) option. As an example let us assume - that we are retrieving a small file of say 5 - 20 KB, then in effect - we send a small request to the server and the server responds by - sending us the file. The transaction is complete when we receive the - complete file. To gain confidence in our results we carry the - simulation for about one hour. For each test there are a few thousand - - - -Salim & Ahmed Informational [Page 11] - -RFC 2884 ECN in IP Networks July 2000 - - - of these requests and responses taking place. Although not exactly - modeling HTTP 1.0 traffic, where several concurrent sessions are - opened, Netperf-CRR is nevertheless a close approximation. Since - Netperf-CRR waits for one connection to complete before opening the - next one (0 think time), that single connection could be viewed as - the slowest response in the set of the opened concurrent sessions (in - HTTP). The transactional data sizes were selected based on [2] which - indicates that the average web transaction was around 8 - 10 KB; The - smaller (5KB) size was selected to guestimate the size of - transactional processing that may become prevalent with policy - management schemes in the diffserv [4] context. Using Netperf we are - able to initiate these kind of transactional transfers for a variable - length of time. The main metric of interest in this case is the - transaction rate, which is recorded by Netperf. - - * Define Transaction rate as: The number of requests and complete - responses for a particular requested size that we are able to do per - second. For example if our request is of 1KB and the response is 5KB - then we define the transaction rate as the number of such complete - transactions that we can accomplish per second. - - Experiment Details: Similar to the case of bulk transfers we start - the background FTP flows to introduce the congestion in the network - at time 0. About 20 seconds later we start the transactional - transfers and run each test for three minutes. We record the - transactions per second that are complete. We repeat the test for - about an hour and plot the various transactions per second, averaged - out over the runs. The experiment is repeated for various maximum - drop probabilities, file sizes and various levels of congestion. - - Observation and Analysis - - There are three key observations: - - 1) As congestion increases (with fixed drop probability) the relative - advantage for ECN increases (again the absolute advantage does not - increase since more flows are sharing the same bandwidth). For - example, from the results, if we consider the 5KB transactional flow, - as we increase the congestion from medium congestion (5 congesting - flows) to high congestion (10 congesting flows) for a maximum drop - probability of 0.1 the relative gain for ECN increases from 42% to - 62%. - - 2) Maintaining the congestion level while adjusting the maximum drop - probability indicates that the relative advantage for ECN flows - increase. From the case of high congestion for the 5KB flow we - - - - - -Salim & Ahmed Informational [Page 12] - -RFC 2884 ECN in IP Networks July 2000 - - - observe that the number of transactions per second increases from 0.8 - to 2.2 which corresponds to an increase in relative gain for ECN of - 20% to 140%. - - 3) As the transactional data size increases, ECN's advantage - diminishes because the probability of recovering from a Fast - Retransmit increases for NON ECN. ECN, therefore, has a huge - advantage as the transactional data size gets smaller as is observed - in the results. This can be explained by looking at TCP recovery - mechanisms. NON ECN in the short flows depends, for recovery, on - congestion signaling via receiving 3 duplicate ACKs, or worse by a - retransmit timer expiration, whereas ECN depends mostly on the TCP- - ECE flag. This is by design in our experimental setup. [3] shows - that most of the TCP loss recovery in fact happens in timeouts for - short flows. The effectiveness of the Fast Retransmit/Recovery - algorithm is limited by the fact that there might not be enough data - in the pipe to elicit 3 duplicate ACKs. TCP RENO needs at least 4 - outstanding packets to recover from losses without going into a - timeout. For 5KB (4 packets for MTU of 1500Bytes) a NON ECN flow will - always have to wait for a retransmit timeout if any of its packets - are lost. ( This timeout could only have been avoided if the flow had - used an initial window of four packets, and the first of the four - packets was the packet dropped). We repeated these experiments with - the kernels implementing SACK/FACK and New Reno algorithms. Our - observation was that there was hardly any difference with what we saw - with Reno. For example in the case of SACK-ECN enabling: maintaining - the maximum drop probability to 0.1 and increasing the congestion - level for the 5KB transaction we noticed that the relative gain for - the ECN enabled flows increases from 47-80%. If we maintain the - congestion level for the 5KB transactions and increase the maximum - drop probabilities instead, we notice that SACKs performance - increases from 15%-120%. It is fair to comment that the difference - in the testbeds (different machines, same topology) might have - contributed to the results; however, it is worth noting that the - relative advantage of the SACK-ECN is obvious. - -6. Conclusion - - ECN enhancements improve on both bulk and transactional TCP traffic. - The improvement is more obvious in short transactional type of flows - (popularly referred to as mice). - - * Because less retransmits happen with ECN, it means less traffic on - the network. Although the relative amount of data retransmitted in - our case is small, the effect could be higher when there are more - contributing end systems. The absence of retransmits also implies an - improvement in the goodput. This becomes very important for scenarios - - - - -Salim & Ahmed Informational [Page 13] - -RFC 2884 ECN in IP Networks July 2000 - - - where bandwidth is expensive such as in low bandwidth links. This - implies also that ECN lends itself well to applications that require - reliability but would prefer to avoid unnecessary retransmissions. - - * The fact that ECN avoids timeouts by getting faster notification - (as opposed to traditional packet dropping inference from 3 duplicate - ACKs or, even worse, timeouts) implies less time is spent during - error recovery - this also improves goodput. - - * ECN could be used to help in service differentiation where the end - user is able to "probe" for their target rate faster. Assured - forwarding [1] in the diffserv working group at the IETF proposes - using RED with varying drop probabilities as a service - differentiation mechanism. It is possible that multiple packets - within a single window in TCP RENO could be dropped even in the - presence of RED, likely leading into timeouts [23]. ECN end systems - ignore multiple notifications, which help in countering this scenario - resulting in improved goodput. The ECN end system also ends up - probing the network faster (to reach an optimal bandwidth). [23] also - notes that RENO is the most widely deployed TCP implementation today. - - It is clear that the advent of policy management schemes introduces - new requirements for transactional type of applications, which - constitute a very short query and a response in the order of a few - packets. ECN provides advantages to transactional traffic as we have - shown in the experiments. - -7. Acknowledgements - - We would like to thank Alan Chapman, Ioannis Lambadaris, Thomas Kunz, - Biswajit Nandy, Nabil Seddigh, Sally Floyd, and Rupinder Makkar for - their helpful feedback and valuable suggestions. - -8. Security Considerations - - Security considerations are as discussed in section 9 of RFC 2481. - -9. References - - [1] Heinanen, J., Finland, T., Baker, F., Weiss, W. and J. - Wroclawski, "Assured Forwarding PHB Group", RFC 2597, June 1999. - - [2] B.A. Mat. "An empirical model of HTTP network traffic." In - proceedings INFOCOMM'97. - - - - - - - -Salim & Ahmed Informational [Page 14] - -RFC 2884 ECN in IP Networks July 2000 - - - [3] Balakrishnan H., Padmanabhan V., Seshan S., Stemn M. and Randy - H. Katz, "TCP Behavior of a busy Internet Server: Analysis and - Improvements", Proceedings of IEEE Infocom, San Francisco, CA, - USA, March '98 - http://nms.lcs.mit.edu/~hari/papers/infocom98.ps.gz - - [4] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. and W. - Weiss, "An Architecture for Differentiated Services", RFC 2475, - December 1998. - - [5] W. Feng, D. Kandlur, D. Saha, K. Shin, "Techniques for - Eliminating Packet Loss in Congested TCP/IP Networks", U. - Michigan CSE-TR-349-97, November 1997. - - [6] S. Floyd. "TCP and Explicit Congestion Notification." ACM - Computer Communications Review, 24, October 1994. - - [7] Ramakrishnan, K. and S. Floyd, "A Proposal to add Explicit - Congestion Notification (ECN) to IP", RFC 2481, January 1999. - - [8] Kevin Fall, Sally Floyd, "Comparisons of Tahoe, RENO and Sack - TCP", Computer Communications Review, V. 26 N. 3, July 1996, - pp. 5-21 - - [9] S. Floyd and V. Jacobson. "Random Early Detection Gateways for - Congestion Avoidance". IEEE/ACM Transactions on Networking, - 3(1), August 1993. - - [10] E. Hashem. "Analysis of random drop for gateway congestion - control." Rep. Lcs tr-465, Lav. Fot Comput. Sci., M.I.T., 1989. - - [11] V. Jacobson. "Congestion Avoidance and Control." In Proceedings - of SIGCOMM '88, Stanford, CA, August 1988. - - [12] Raj Jain, "The art of computer systems performance analysis", - John Wiley and sons QA76.9.E94J32, 1991. - - [13] T. V. Lakshman, Arnie Neidhardt, Teunis Ott, "The Drop From - Front Strategy in TCP Over ATM and Its Interworking with Other - Control Features", Infocom 96, MA28.1. - - [14] P. Mishra and H. Kanakia. "A hop by hop rate based congestion - control scheme." Proc. SIGCOMM '92, pp. 112-123, August 1992. - - [15] Floyd, S. and T. Henderson, "The NewReno Modification to TCP's - Fast Recovery Algorithm", RFC 2582, April 1999. - - - - - -Salim & Ahmed Informational [Page 15] - -RFC 2884 ECN in IP Networks July 2000 - - - [16] The NIST Network Emulation Tool - http://www.antd.nist.gov/itg/nistnet/ - - [17] The network performance tool - http://www.netperf.org/netperf/NetperfPage.html - - [18] ftp://ftp.ee.lbl.gov/ECN/ECN-package.tgz - - [19] Braden, B., Clark, D., Crowcroft, J., Davie, B., Deering, S., - Estrin, D., Floyd, S., Jacobson, V., Minshall, G., Partridge, - C., Peterson, L., Ramakrishnan, K., Shenker, S., Wroclawski, J. - and L. Zhang, "Recommendations on Queue Management and - Congestion Avoidance in the Internet", RFC 2309, April 1998. - - [20] K. K. Ramakrishnan and R. Jain. "A Binary feedback scheme for - congestion avoidance in computer networks." ACM Trans. Comput. - Syst.,8(2):158-181, 1990. - - [21] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October 1996. - - [22] S. Floyd and V. Jacobson, "Link sharing and Resource Management - Models for packet Networks", IEEE/ACM Transactions on - Networking, Vol. 3 No.4, August 1995. - - [23] Prasad Bagal, Shivkumar Kalyanaraman, Bob Packer, "Comparative - study of RED, ECN and TCP Rate Control". - http://www.packeteer.com/technology/Pdf/packeteer-final.pdf - - [24] tcpdump, the protocol packet capture & dumper program. - ftp://ftp.ee.lbl.gov/tcpdump.tar.Z - - [25] TCP dump file analysis tool: - http://jarok.cs.ohiou.edu/software/tcptrace/tcptrace.html - - [26] Thompson K., Miller, G.J., Wilder R., "Wide-Area Internet - Traffic Patterns and Characteristics". IEEE Networks Magazine, - November/December 1997. - - [27] http://www7.nortel.com:8080/CTL/ecnperf.pdf - - - - - - - - - - - -Salim & Ahmed Informational [Page 16] - -RFC 2884 ECN in IP Networks July 2000 - - -10. Authors' Addresses - - Jamal Hadi Salim - Nortel Networks - 3500 Carling Ave - Ottawa, ON, K2H 8E9 - Canada - - EMail: hadi@nortelnetworks.com - - - Uvaiz Ahmed - Dept. of Systems and Computer Engineering - Carleton University - Ottawa - Canada - - EMail: ahmed@sce.carleton.ca - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Salim & Ahmed Informational [Page 17] - -RFC 2884 ECN in IP Networks July 2000 - - -11. Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Salim & Ahmed Informational [Page 18] - diff --git a/kernel/picotcp/RFC/rfc2914.txt b/kernel/picotcp/RFC/rfc2914.txt deleted file mode 100644 index f48e1d3..0000000 --- a/kernel/picotcp/RFC/rfc2914.txt +++ /dev/null @@ -1,955 +0,0 @@ - - - - - - -Network Working Group S. Floyd -Request for Comments: 2914 ACIRI -BCP: 41 September 2000 -Category: Best Current Practice - - - Congestion Control Principles - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - The goal of this document is to explain the need for congestion - control in the Internet, and to discuss what constitutes correct - congestion control. One specific goal is to illustrate the dangers - of neglecting to apply proper congestion control. A second goal is - to discuss the role of the IETF in standardizing new congestion - control protocols. - -1. Introduction - - This document draws heavily from earlier RFCs, in some cases - reproducing entire sections of the text of earlier documents - [RFC2309, RFC2357]. We have also borrowed heavily from earlier - publications addressing the need for end-to-end congestion control - [FF99]. - -2. Current standards on congestion control - - IETF standards concerning end-to-end congestion control focus either - on specific protocols (e.g., TCP [RFC2581], reliable multicast - protocols [RFC2357]) or on the syntax and semantics of communications - between the end nodes and routers about congestion information (e.g., - Explicit Congestion Notification [RFC2481]) or desired quality-of- - service (diff-serv)). The role of end-to-end congestion control is - also discussed in an Informational RFC on "Recommendations on Queue - Management and Congestion Avoidance in the Internet" [RFC2309]. RFC - 2309 recommends the deployment of active queue management mechanisms - in routers, and the continuation of design efforts towards mechanisms - - - - -Floyd, ed. Best Current Practice [Page 1] - -RFC 2914 Congestion Control Principles September 2000 - - - in routers to deal with flows that are unresponsive to congestion - notification. We freely borrow from RFC 2309 some of their general - discussion of end-to-end congestion control. - - In contrast to the RFCs discussed above, this document is a more - general discussion of the principles of congestion control. One of - the keys to the success of the Internet has been the congestion - avoidance mechanisms of TCP. While TCP is still the dominant - transport protocol in the Internet, it is not ubiquitous, and there - are an increasing number of applications that, for one reason or - another, choose not to use TCP. Such traffic includes not only - multicast traffic, but unicast traffic such as streaming multimedia - that does not require reliability; and traffic such as DNS or routing - messages that consist of short transfers deemed critical to the - operation of the network. Much of this traffic does not use any form - of either bandwidth reservations or end-to-end congestion control. - The continued use of end-to-end congestion control by best-effort - traffic is critical for maintaining the stability of the Internet. - - This document also discusses the general role of the IETF in the - standardization of new congestion control protocols. - - The discussion of congestion control principles for differentiated - services or integrated services is not addressed in this document. - Some categories of integrated or differentiated services include a - guarantee by the network of end-to-end bandwidth, and as such do not - require end-to-end congestion control mechanisms. - -3. The development of end-to-end congestion control. - -3.1. Preventing congestion collapse. - - The Internet protocol architecture is based on a connectionless end- - to-end packet service using the IP protocol. The advantages of its - connectionless design, flexibility and robustness, have been amply - demonstrated. However, these advantages are not without cost: - careful design is required to provide good service under heavy load. - In fact, lack of attention to the dynamics of packet forwarding can - result in severe service degradation or "Internet meltdown". This - phenomenon was first observed during the early growth phase of the - Internet of the mid 1980s [RFC896], and is technically called - "congestion collapse". - - The original specification of TCP [RFC793] included window-based flow - control as a means for the receiver to govern the amount of data sent - by the sender. This flow control was used to prevent overflow of the - receiver's data buffer space available for that connection. [RFC793] - - - - -Floyd, ed. Best Current Practice [Page 2] - -RFC 2914 Congestion Control Principles September 2000 - - - reported that segments could be lost due either to errors or to - network congestion, but did not include dynamic adjustment of the - flow-control window in response to congestion. - - The original fix for Internet meltdown was provided by Van Jacobson. - Beginning in 1986, Jacobson developed the congestion avoidance - mechanisms that are now required in TCP implementations [Jacobson88, - RFC 2581]. These mechanisms operate in the hosts to cause TCP - connections to "back off" during congestion. We say that TCP flows - are "responsive" to congestion signals (i.e., dropped packets) from - the network. It is these TCP congestion avoidance algorithms that - prevent the congestion collapse of today's Internet. - - However, that is not the end of the story. Considerable research has - been done on Internet dynamics since 1988, and the Internet has - grown. It has become clear that the TCP congestion avoidance - mechanisms [RFC2581], while necessary and powerful, are not - sufficient to provide good service in all circumstances. In addition - to the development of new congestion control mechanisms [RFC2357], - router-based mechanisms are in development that complement the - endpoint congestion avoidance mechanisms. - - A major issue that still needs to be addressed is the potential for - future congestion collapse of the Internet due to flows that do not - use responsible end-to-end congestion control. RFC 896 [RFC896] - suggested in 1984 that gateways should detect and `squelch' - misbehaving hosts: "Failure to respond to an ICMP Source Quench - message, though, should be regarded as grounds for action by a - gateway to disconnect a host. Detecting such failure is non-trivial - but is a worthwhile area for further research." Current papers - still propose that routers detect and penalize flows that are not - employing acceptable end-to-end congestion control [FF99]. - -3.2. Fairness - - In addition to a concern about congestion collapse, there is a - concern about `fairness' for best-effort traffic. Because TCP "backs - off" during congestion, a large number of TCP connections can share a - single, congested link in such a way that bandwidth is shared - reasonably equitably among similarly situated flows. The equitable - sharing of bandwidth among flows depends on the fact that all flows - are running compatible congestion control algorithms. For TCP, this - means congestion control algorithms conformant with the current TCP - specification [RFC793, RFC1122, RFC2581]. - - The issue of fairness among competing flows has become increasingly - important for several reasons. First, using window scaling - [RFC1323], individual TCPs can use high bandwidth even over high- - - - -Floyd, ed. Best Current Practice [Page 3] - -RFC 2914 Congestion Control Principles September 2000 - - - propagation-delay paths. Second, with the growth of the web, - Internet users increasingly want high-bandwidth and low-delay - communications, rather than the leisurely transfer of a long file in - the background. The growth of best-effort traffic that does not use - TCP underscores this concern about fairness between competing best- - effort traffic in times of congestion. - - The popularity of the Internet has caused a proliferation in the - number of TCP implementations. Some of these may fail to implement - the TCP congestion avoidance mechanisms correctly because of poor - implementation [RFC2525]. Others may deliberately be implemented - with congestion avoidance algorithms that are more aggressive in - their use of bandwidth than other TCP implementations; this would - allow a vendor to claim to have a "faster TCP". The logical - consequence of such implementations would be a spiral of increasingly - aggressive TCP implementations, or increasingly aggressive transport - protocols, leading back to the point where there is effectively no - congestion avoidance and the Internet is chronically congested. - - There is a well-known way to achieve more aggressive performance - without even changing the transport protocol, by changing the level - of granularity: open multiple connections to the same place, as has - been done in the past by some Web browsers. Thus, instead of a - spiral of increasingly aggressive transport protocols, we would - instead have a spiral of increasingly aggressive web browsers, or - increasingly aggressive applications. - - This raises the issue of the appropriate granularity of a "flow", - where we define a `flow' as the level of granularity appropriate for - the application of both fairness and congestion control. From RFC - 2309: "There are a few `natural' answers: 1) a TCP or UDP connection - (source address/port, destination address/port); 2) a - source/destination host pair; 3) a given source host or a given - destination host. We would guess that the source/destination host - pair gives the most appropriate granularity in many circumstances. - The granularity of flows for congestion management is, at least in - part, a policy question that needs to be addressed in the wider IETF - community." - - Again borrowing from RFC 2309, we use the term "TCP-compatible" for a - flow that behaves under congestion like a flow produced by a - conformant TCP. A TCP-compatible flow is responsive to congestion - notification, and in steady-state uses no more bandwidth than a - conformant TCP running under comparable conditions (drop rate, RTT, - MTU, etc.) - - - - - - -Floyd, ed. Best Current Practice [Page 4] - -RFC 2914 Congestion Control Principles September 2000 - - - It is convenient to divide flows into three classes: (1) TCP- - compatible flows, (2) unresponsive flows, i.e., flows that do not - slow down when congestion occurs, and (3) flows that are responsive - but are not TCP-compatible. The last two classes contain more - aggressive flows that pose significant threats to Internet - performance, as we discuss below. - - In addition to steady-state fairness, the fairness of the initial - slow-start is also a concern. One concern is the transient effect on - other flows of a flow with an overly-aggressive slow-start procedure. - Slow-start performance is particularly important for the many flows - that are short-lived, and only have a small amount of data to - transfer. - -3.3. Optimizing performance regarding throughput, delay, and loss. - - In addition to the prevention of congestion collapse and concerns - about fairness, a third reason for a flow to use end-to-end - congestion control can be to optimize its own performance regarding - throughput, delay, and loss. In some circumstances, for example in - environments of high statistical multiplexing, the delay and loss - rate experienced by a flow are largely independent of its own sending - rate. However, in environments with lower levels of statistical - multiplexing or with per-flow scheduling, the delay and loss rate - experienced by a flow is in part a function of the flow's own sending - rate. Thus, a flow can use end-to-end congestion control to limit - the delay or loss experienced by its own packets. We would note, - however, that in an environment like the current best-effort - Internet, concerns regarding congestion collapse and fairness with - competing flows limit the range of congestion control behaviors - available to a flow. - -4. The role of the standards process - - The standardization of a transport protocol includes not only - standardization of aspects of the protocol that could affect - interoperability (e.g., information exchanged by the end-nodes), but - also standardization of mechanisms deemed critical to performance - (e.g., in TCP, reduction of the congestion window in response to a - packet drop). At the same time, implementation-specific details and - other aspects of the transport protocol that do not affect - interoperability and do not significantly interfere with performance - do not require standardization. Areas of TCP that do not require - standardization include the details of TCP's Fast Recovery procedure - after a Fast Retransmit [RFC2582]. The appendix uses examples from - TCP to discuss in more detail the role of the standards process in - the development of congestion control. - - - - -Floyd, ed. Best Current Practice [Page 5] - -RFC 2914 Congestion Control Principles September 2000 - - -4.1. The development of new transport protocols. - - In addition to addressing the danger of congestion collapse, the - standardization process for new transport protocols takes care to - avoid a congestion control `arms race' among competing protocols. As - an example, in RFC 2357 [RFC2357] the TSV Area Directors and their - Directorate outline criteria for the publication as RFCs of - Internet-Drafts on reliable multicast transport protocols. From - [RFC2357]: "A particular concern for the IETF is the impact of - reliable multicast traffic on other traffic in the Internet in times - of congestion, in particular the effect of reliable multicast traffic - on competing TCP traffic.... The challenge to the IETF is to - encourage research and implementations of reliable multicast, and to - enable the needs of applications for reliable multicast to be met as - expeditiously as possible, while at the same time protecting the - Internet from the congestion disaster or collapse that could result - from the widespread use of applications with inappropriate reliable - multicast mechanisms." - - The list of technical criteria that must be addressed by RFCs on new - reliable multicast transport protocols include the following: "Is - there a congestion control mechanism? How well does it perform? When - does it fail? Note that congestion control mechanisms that operate - on the network more aggressively than TCP will face a great burden of - proof that they don't threaten network stability." - - It is reasonable to expect that these concerns about the effect of - new transport protocols on competing traffic will apply not only to - reliable multicast protocols, but to unreliable unicast, reliable - unicast, and unreliable multicast traffic as well. - -4.2. Application-level issues that affect congestion control - - The specific issue of a browser opening multiple connections to the - same destination has been addressed by RFC 2616 [RFC2616], which - states in Section 8.1.4 that "Clients that use persistent connections - SHOULD limit the number of simultaneous connections that they - maintain to a given server. A single-user client SHOULD NOT maintain - more than 2 connections with any server or proxy." - -4.3. New developments in the standards process - - The most obvious developments in the IETF that could affect the - evolution of congestion control are the development of integrated and - differentiated services [RFC2212, RFC2475] and of Explicit Congestion - Notification (ECN) [RFC2481]. However, other less dramatic - developments are likely to affect congestion control as well. - - - - -Floyd, ed. Best Current Practice [Page 6] - -RFC 2914 Congestion Control Principles September 2000 - - - One such effort is that to construct Endpoint Congestion Management - [BS00], to enable multiple concurrent flows from a sender to the same - receiver to share congestion control state. By allowing multiple - connections to the same destination to act as one flow in terms of - end-to-end congestion control, a Congestion Manager could allow - individual connections slow-starting to take advantage of previous - information about the congestion state of the end-to-end path. - Further, the use of a Congestion Manager could remove the congestion - control dangers of multiple flows being opened between the same - source/destination pair, and could perhaps be used to allow a browser - to open many simultaneous connections to the same destination. - -5. A description of congestion collapse - - This section discusses congestion collapse from undelivered packets - in some detail, and shows how unresponsive flows could contribute to - congestion collapse in the Internet. This section draws heavily on - material from [FF99]. - - Informally, congestion collapse occurs when an increase in the - network load results in a decrease in the useful work done by the - network. As discussed in Section 3, congestion collapse was first - reported in the mid 1980s [RFC896], and was largely due to TCP - connections unnecessarily retransmitting packets that were either in - transit or had already been received at the receiver. We call the - congestion collapse that results from the unnecessary retransmission - of packets classical congestion collapse. Classical congestion - collapse is a stable condition that can result in throughput that is - a small fraction of normal [RFC896]. Problems with classical - congestion collapse have generally been corrected by the timer - improvements and congestion control mechanisms in modern - implementations of TCP [Jacobson88]. - - A second form of potential congestion collapse occurs due to - undelivered packets. Congestion collapse from undelivered packets - arises when bandwidth is wasted by delivering packets through the - network that are dropped before reaching their ultimate destination. - This is probably the largest unresolved danger with respect to - congestion collapse in the Internet today. Different scenarios can - result in different degrees of congestion collapse, in terms of the - fraction of the congested links' bandwidth used for productive work. - The danger of congestion collapse from undelivered packets is due - primarily to the increasing deployment of open-loop applications not - using end-to-end congestion control. Even more destructive would be - best-effort applications that *increase* their sending rate in - response to an increased packet drop rate (e.g., automatically using - an increased level of FEC). - - - - -Floyd, ed. Best Current Practice [Page 7] - -RFC 2914 Congestion Control Principles September 2000 - - - Table 1 gives the results from a scenario with congestion collapse - from undelivered packets, where scarce bandwidth is wasted by packets - that never reach their destination. The simulation uses a scenario - with three TCP flows and one UDP flow competing over a congested 1.5 - Mbps link. The access links for all nodes are 10 Mbps, except that - the access link to the receiver of the UDP flow is 128 Kbps, only 9% - of the bandwidth of shared link. When the UDP source rate exceeds - 128 Kbps, most of the UDP packets will be dropped at the output port - to that final link. - - UDP - Arrival UDP TCP Total - Rate Goodput Goodput Goodput - -------------------------------------- - 0.7 0.7 98.5 99.2 - 1.8 1.7 97.3 99.1 - 2.6 2.6 96.0 98.6 - 5.3 5.2 92.7 97.9 - 8.8 8.4 87.1 95.5 - 10.5 8.4 84.8 93.2 - 13.1 8.4 81.4 89.8 - 17.5 8.4 77.3 85.7 - 26.3 8.4 64.5 72.8 - 52.6 8.4 38.1 46.4 - 58.4 8.4 32.8 41.2 - 65.7 8.4 28.5 36.8 - 75.1 8.4 19.7 28.1 - 87.6 8.4 11.3 19.7 - 105.2 8.4 3.4 11.8 - 131.5 8.4 2.4 10.7 - - Table 1. A simulation with three TCP flows and one UDP flow. - - Table 1 shows the UDP arrival rate from the sender, the UDP goodput - (defined as the bandwidth delivered to the receiver), the TCP goodput - (as delivered to the TCP receivers), and the aggregate goodput on the - congested 1.5 Mbps link. Each rate is given as a fraction of the - bandwidth of the congested link. As the UDP source rate increases, - the TCP goodput decreases roughly linearly, and the UDP goodput is - nearly constant. Thus, as the UDP flow increases its offered load, - its only effect is to hurt the TCP and aggregate goodput. On the - congested link, the UDP flow ultimately `wastes' the bandwidth that - could have been used by the TCP flow, and reduces the goodput in the - network as a whole down to a small fraction of the bandwidth of the - congested link. - - - - - - -Floyd, ed. Best Current Practice [Page 8] - -RFC 2914 Congestion Control Principles September 2000 - - - The simulations in Table 1 illustrate both unfairness and congestion - collapse. As [FF99] discusses, compatible congestion control is not - the only way to provide fairness; per-flow scheduling at the - congested routers is an alternative mechanism at the routers that - guarantees fairness. However, as discussed in [FF99], per-flow - scheduling can not be relied upon to prevent congestion collapse. - - There are only two alternatives for eliminating the danger of - congestion collapse from undelivered packets. The first alternative - for preventing congestion collapse from undelivered packets is the - use of effective end-to-end congestion control by the end nodes. - More specifically, the requirement would be that a flow avoid a - pattern of significant losses at links downstream from the first - congested link on the path. (Here, we would consider any link a - `congested link' if any flow is using bandwidth that would otherwise - be used by other traffic on the link.) Given that an end-node is - generally unable to distinguish between a path with one congested - link and a path with multiple congested links, the most reliable way - for a flow to avoid a pattern of significant losses at a downstream - congested link is for the flow to use end-to-end congestion control, - and reduce its sending rate in the presence of loss. - - A second alternative for preventing congestion collapse from - undelivered packets would be a guarantee by the network that packets - accepted at a congested link in the network will be delivered all the - way to the receiver [RFC2212, RFC2475]. We note that the choice - between the first alternative of end-to-end congestion control and - the second alternative of end-to-end bandwidth guarantees does not - have to be an either/or decision; congestion collapse can be - prevented by the use of effective end-to-end congestion by some of - the traffic, and the use of end-to-end bandwidth guarantees from the - network for the rest of the traffic. - -6. Forms of end-to-end congestion control - - This document has discussed concerns about congestion collapse and - about fairness with TCP for new forms of congestion control. This - does not mean, however, that concerns about congestion collapse and - fairness with TCP necessitate that all best-effort traffic deploy - congestion control based on TCP's Additive-Increase Multiplicative- - Decrease (AIMD) algorithm of reducing the sending rate in half in - response to each packet drop. This section separately discusses the - implications of these two concerns of congestion collapse and - fairness with TCP. - - - - - - - -Floyd, ed. Best Current Practice [Page 9] - -RFC 2914 Congestion Control Principles September 2000 - - -6.1. End-to-end congestion control for avoiding congestion collapse. - - The avoidance of congestion collapse from undelivered packets - requires that flows avoid a scenario of a high sending rate, multiple - congested links, and a persistent high packet drop rate at the - downstream link. Because congestion collapse from undelivered - packets consists of packets that waste valuable bandwidth only to be - dropped downstream, this form of congestion collapse is not possible - in an environment where each flow traverses only one congested link, - or where only a small number of packets are dropped at links - downstream of the first congested link. Thus, any form of congestion - control that successfully avoids a high sending rate in the presence - of a high packet drop rate should be sufficient to avoid congestion - collapse from undelivered packets. - - We would note that the addition of Explicit Congestion Notification - (ECN) to the IP architecture would not, in and of itself, remove the - danger of congestion collapse for best-effort traffic. ECN allows - routers to set a bit in packet headers as an indication of congestion - to the end-nodes, rather than being forced to rely on packet drops to - indicate congestion. However, with ECN, packet-marking would replace - packet-dropping only in times of moderate congestion. In particular, - when congestion is heavy, and a router's buffers overflow, the router - has no choice but to drop arriving packets. - -6.2. End-to-end congestion control for fairness with TCP. - - The concern expressed in [RFC2357] about fairness with TCP places a - significant though not crippling constraint on the range of viable - end-to-end congestion control mechanisms for best-effort traffic. An - environment with per-flow scheduling at all congested links would - isolate flows from each other, and eliminate the need for congestion - control mechanisms to be TCP-compatible. An environment with - differentiated services, where flows marked as belonging to a certain - diff-serv class would be scheduled in isolation from best-effort - traffic, could allow the emergence of an entire diff-serv class of - traffic where congestion control was not required to be TCP- - compatible. Similarly, a pricing-controlled environment, or a diff- - serv class with its own pricing paradigm, could supercede the concern - about fairness with TCP. However, for the current Internet - environment, where other best-effort traffic could compete in a FIFO - queue with TCP traffic, the absence of fairness with TCP could lead - to one flow `starving out' another flow in a time of high congestion, - as was illustrated in Table 1 above. - - However, the list of TCP-compatible congestion control procedures is - not limited to AIMD with the same increase/ decrease parameters as - TCP. Other TCP-compatible congestion control procedures include - - - -Floyd, ed. Best Current Practice [Page 10] - -RFC 2914 Congestion Control Principles September 2000 - - - rate-based variants of AIMD; AIMD with different sets of - increase/decrease parameters that give the same steady-state - behavior; equation-based congestion control where the sender adjusts - its sending rate in response to information about the long-term - packet drop rate; layered multicast where receivers subscribe and - unsubscribe from layered multicast groups; and possibly other forms - that we have not yet begun to consider. - -7. Acknowledgements - - Much of this document draws directly on previous RFCs addressing - end-to-end congestion control. This attempts to be a summary of - ideas that have been discussed for many years, and by many people. - In particular, acknowledgement is due to the members of the End-to- - End Research Group, the Reliable Multicast Research Group, and the - Transport Area Directorate. This document has also benefited from - discussion and feedback from the Transport Area Working Group. - Particular thanks are due to Mark Allman for feedback on an earlier - version of this document. - -8. References - - [BS00] Balakrishnan H. and S. Seshan, "The Congestion Manager", - Work in Progress. - - [DMKM00] Dawkins, S., Montenegro, G., Kojo, M. and V. Magret, - "End-to-end Performance Implications of Slow Links", - Work in Progress. - - [FF99] Floyd, S. and K. Fall, "Promoting the Use of End-to-End - Congestion Control in the Internet", IEEE/ACM - Transactions on Networking, August 1999. URL - http://www.aciri.org/floyd/end2end-paper.html - - [HPF00] Handley, M., Padhye, J. and S. Floyd, "TCP Congestion - Window Validation", RFC 2861, June 2000. - - [Jacobson88] V. Jacobson, Congestion Avoidance and Control, ACM - SIGCOMM '88, August 1988. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC896] Nagle, J., "Congestion Control in IP/TCP", RFC 896, - January 1984. - - [RFC1122] Braden, R., Ed., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - - -Floyd, ed. Best Current Practice [Page 11] - -RFC 2914 Congestion Control Principles September 2000 - - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2212] Shenker, S., Partridge, C. and R. Guerin, "Specification - of Guaranteed Quality of Service", RFC 2212, September - 1997. - - [RFC2309] Braden, R., Clark, D., Crowcroft, J., Davie, B., - Deering, S., Estrin, D., Floyd, S., Jacobson, V., - Minshall, G., Partridge, C., Peterson, L., Ramakrishnan, - K.K., Shenker, S., Wroclawski, J., and L. Zhang, - "Recommendations on Queue Management and Congestion - Avoidance in the Internet", RFC 2309, April 1998. - - [RFC2357] Mankin, A., Romanow, A., Bradner, S. and V. Paxson, - "IETF Criteria for Evaluating Reliable Multicast - Transport and Application Protocols", RFC 2357, June - 1998. - - [RFC2414] Allman, M., Floyd, S. and C. Partridge, "Increasing - TCP's Initial Window", RFC 2414, September 1998. - - [RFC2475] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. - and W. Weiss, "An Architecture for Differentiated - Services", RFC 2475, December 1998. - - [RFC2481] Ramakrishnan K. and S. Floyd, "A Proposal to add - Explicit Congestion Notification (ECN) to IP", RFC 2481, - January 1999. - - [RFC2525] Paxson, V., Allman, M., Dawson, S., Fenner, W., Griner, - J., Heavens, I., Lahey, K., Semke, J. and B. Volz, - "Known TCP Implementation Problems", RFC 2525, March - 1999. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., - Masinter, L., Leach, P. and T. Berners-Lee, "Hypertext - Transfer Protocol -- HTTP/1.1", RFC 2616, June 1999. - - - - -Floyd, ed. Best Current Practice [Page 12] - -RFC 2914 Congestion Control Principles September 2000 - - - [SCWA99] S. Savage, N. Cardwell, D. Wetherall, and T. Anderson, - TCP Congestion Control with a Misbehaving Receiver, ACM - Computer Communications Review, October 1999. - - [TCPB98] Hari Balakrishnan, Venkata N. Padmanabhan, Srinivasan - Seshan, Mark Stemm, and Randy H. Katz, TCP Behavior of a - Busy Internet Server: Analysis and Improvements, IEEE - Infocom, March 1998. Available from: - "http://www.cs.berkeley.edu/~hari/papers/infocom98.ps.gz". - - [TCPF98] Dong Lin and H.T. Kung, TCP Fast Recovery Strategies: - Analysis and Improvements, IEEE Infocom, March 1998. - Available from: - "http://www.eecs.harvard.edu/networking/papers/infocom- - tcp-final-198.pdf". - -9. TCP-Specific issues - - In this section we discuss some of the particulars of TCP congestion - control, to illustrate a realization of the congestion control - principles, including some of the details that arise when - incorporating them into a production transport protocol. - -9.1. Slow-start. - - The TCP sender can not open a new connection by sending a large burst - of data (e.g., a receiver's advertised window) all at once. The TCP - sender is limited by a small initial value for the congestion window. - During slow-start, the TCP sender can increase its sending rate by at - most a factor of two in one roundtrip time. Slow-start ends when - congestion is detected, or when the sender's congestion window is - greater than the slow-start threshold ssthresh. - - An issue that potentially affects global congestion control, and - therefore has been explicitly addressed in the standards process, - includes an increase in the value of the initial window - [RFC2414,RFC2581]. - - Issues that have not been addressed in the standards process, and are - generally considered not to require standardization, include such - issues as the use (or non-use) of rate-based pacing, and mechanisms - for ending slow-start early, before the congestion window reaches - ssthresh. Such mechanisms result in slow-start behavior that is as - conservative or more conservative than standard TCP. - - - - - - - -Floyd, ed. Best Current Practice [Page 13] - -RFC 2914 Congestion Control Principles September 2000 - - -9.2. Additive Increase, Multiplicative Decrease. - - In the absence of congestion, the TCP sender increases its congestion - window by at most one packet per roundtrip time. In response to a - congestion indication, the TCP sender decreases its congestion window - by half. (More precisely, the new congestion window is half of the - minimum of the congestion window and the receiver's advertised - window.) - - An issue that potentially affects global congestion control, and - therefore would be likely to be explicitly addressed in the standards - process, would include a proposed addition of congestion control for - the return stream of `pure acks'. - - An issue that has not been addressed in the standards process, and is - generally not considered to require standardization, would be a - change to the congestion window to apply as an upper bound on the - number of bytes presumed to be in the pipe, instead of applying as a - sliding window starting from the cumulative acknowledgement. - (Clearly, the receiver's advertised window applies as a sliding - window starting from the cumulative acknowledgement field, because - packets received above the cumulative acknowledgement field are held - in TCP's receive buffer, and have not been delivered to the - application. However, the congestion window applies to the number of - packets outstanding in the pipe, and does not necessarily have to - include packets that have been received out-of-order by the TCP - receiver.) - -9.3. Retransmit timers. - - The TCP sender sets a retransmit timer to infer that a packet has - been dropped in the network. When the retransmit timer expires, the - sender infers that a packet has been lost, sets ssthresh to half of - the current window, and goes into slow-start, retransmitting the lost - packet. If the retransmit timer expires because no acknowledgement - has been received for a retransmitted packet, the retransmit timer is - also "backed-off", doubling the value of the next retransmit timeout - interval. - - An issue that potentially affects global congestion control, and - therefore would be likely to be explicitly addressed in the standards - process, might include a modified mechanism for setting the - retransmit timer that could significantly increase the number of - retransmit timers that expire prematurely, when the acknowledgement - has not yet arrived at the sender, but in fact no packets have been - dropped. This could be of concern to the Internet standards process - - - - - -Floyd, ed. Best Current Practice [Page 14] - -RFC 2914 Congestion Control Principles September 2000 - - - because retransmit timers that expire prematurely could lead to an - increase in the number of packets unnecessarily transmitted on a - congested link. - -9.4. Fast Retransmit and Fast Recovery. - - After seeing three duplicate acknowledgements, the TCP sender infers - a packet loss. The TCP sender sets ssthresh to half of the current - window, reduces the congestion window to at most half of the previous - window, and retransmits the lost packet. - - An issue that potentially affects global congestion control, and - therefore would be likely to be explicitly addressed in the standards - process, might include a proposal (if there was one) for inferring a - lost packet after only one or two duplicate acknowledgements. If - poorly designed, such a proposal could lead to an increase in the - number of packets unnecessarily transmitted on a congested path. - - An issue that has not been addressed in the standards process, and - would not be expected to require standardization, would be a proposal - to send a "new" or presumed-lost packet in response to a duplicate or - partial acknowledgement, if allowed by the congestion window. An - example of this would be sending a new packet in response to a single - duplicate acknowledgement, to keep the `ack clock' going in case no - further acknowledgements would have arrived. Such a proposal is an - example of a beneficial change that does not involve interoperability - and does not affect global congestion control, and that therefore - could be implemented by vendors without requiring the intervention of - the IETF standards process. (This issue has in fact been addressed - in [DMKM00], which suggests that "researchers may wish to experiment - with injecting new traffic into the network when duplicate - acknowledgements are being received, as described in [TCPB98] and - [TCPF98]." - -9.5. Other aspects of TCP congestion control. - - Other aspects of TCP congestion control that have not been discussed - in any of the sections above include TCP's recovery from an idle or - application-limited period [HPF00]. - -10. Security Considerations - - This document has been about the risks associated with congestion - control, or with the absence of congestion control. Section 3.2 - discusses the potentials for unfairness if competing flows don't use - compatible congestion control mechanisms, and Section 5 considers the - dangers of congestion collapse if flows don't use end-to-end - congestion control. - - - -Floyd, ed. Best Current Practice [Page 15] - -RFC 2914 Congestion Control Principles September 2000 - - - Because this document does not propose any specific congestion - control mechanisms, it is also not necessary to present specific - security measures associated with congestion control. However, we - would note that there are a range of security considerations - associated with congestion control that should be considered in IETF - documents. - - For example, individual congestion control mechanisms should be as - robust as possible to the attempts of individual end-nodes to subvert - end-to-end congestion control [SCWA99]. This is a particular concern - in multicast congestion control, because of the far-reaching - distribution of the traffic and the greater opportunities for - individual receivers to fail to report congestion. - - RFC 2309 also discussed the potential dangers to the Internet of - unresponsive flows, that is, flows that don't reduce their sending - rate in the presence of congestion, and describes the need for - mechanisms in the network to deal with flows that are unresponsive to - congestion notification. We would note that there is still a need - for research, engineering, measurement, and deployment in these - areas. - - Because the Internet aggregates very large numbers of flows, the risk - to the whole infrastructure of subverting the congestion control of a - few individual flows is limited. Rather, the risk to the - infrastructure would come from the widespread deployment of many - end-nodes subverting end-to-end congestion control. - -AUTHOR'S ADDRESS - - Sally Floyd - AT&T Center for Internet Research at ICSI (ACIRI) - - Phone: +1 (510) 642-4274 x189 - EMail: floyd@aciri.org - URL: http://www.aciri.org/floyd/ - - - - - - - - - - - - - - - -Floyd, ed. Best Current Practice [Page 16] - -RFC 2914 Congestion Control Principles September 2000 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Floyd, ed. Best Current Practice [Page 17] - diff --git a/kernel/picotcp/RFC/rfc2923.txt b/kernel/picotcp/RFC/rfc2923.txt deleted file mode 100644 index 2ac3f3a..0000000 --- a/kernel/picotcp/RFC/rfc2923.txt +++ /dev/null @@ -1,843 +0,0 @@ - - - - - - -Network Working Group K. Lahey -Request for Comments: 2923 dotRocket, Inc. -Category: Informational September 2000 - - - TCP Problems with Path MTU Discovery - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - This memo catalogs several known Transmission Control Protocol (TCP) - implementation problems dealing with Path Maximum Transmission Unit - Discovery (PMTUD), including the long-standing black hole problem, - stretch acknowlegements (ACKs) due to confusion between Maximum - Segment Size (MSS) and segment size, and MSS advertisement based on - PMTU. - -1. Introduction - - This memo catalogs several known TCP implementation problems dealing - with Path MTU Discovery [RFC1191], including the long-standing black - hole problem, stretch ACKs due to confusion between MSS and segment - size, and MSS advertisement based on PMTU. The goal in doing so is - to improve conditions in the existing Internet by enhancing the - quality of current TCP/IP implementations. - - While Path MTU Discovery (PMTUD) can be used with any upper-layer - protocol, it is most commonly used by TCP; this document does not - attempt to treat problems encountered by other upper-layer protocols. - Path MTU Discovery for IPv6 [RFC1981] treats only IPv6-dependent - issues, but not the TCP issues brought up in this document. - - Each problem is defined as follows: - - Name of Problem - The name associated with the problem. In this memo, the name is - given as a subsection heading. - - - - - -Lahey Informational [Page 1] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - Classification - One or more problem categories for which the problem is - classified: "congestion control", "performance", "reliability", - "non-interoperation -- connectivity failure". - - Description - A definition of the problem, succinct but including necessary - background material. - - Significance - A brief summary of the sorts of environments for which the problem - is significant. - - Implications - Why the problem is viewed as a problem. - - Relevant RFCs - The RFCs defining the TCP specification with which the problem - conflicts. These RFCs often qualify behavior using terms such as - MUST, SHOULD, MAY, and others written capitalized. See RFC 2119 - for the exact interpretation of these terms. - - Trace file demonstrating the problem - One or more ASCII trace files demonstrating the problem, if - applicable. - - Trace file demonstrating correct behavior - One or more examples of how correct behavior appears in a trace, - if applicable. - - References - References that further discuss the problem. - - How to detect - How to test an implementation to see if it exhibits the problem. - This discussion may include difficulties and subtleties associated - with causing the problem to manifest itself, and with interpreting - traces to detect the presence of the problem (if applicable). - - How to fix - For known causes of the problem, how to correct the - implementation. - - - - - - - - - -Lahey Informational [Page 2] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - -2. Known implementation problems - -2.1. - - Name of Problem - Black Hole Detection - - Classification - Non-interoperation -- connectivity failure - - Description - A host performs Path MTU Discovery by sending out as large a - packet as possible, with the Don't Fragment (DF) bit set in the IP - header. If the packet is too large for a router to forward on to - a particular link, the router must send an ICMP Destination - Unreachable -- Fragmentation Needed message to the source address. - The host then adjusts the packet size based on the ICMP message. - - As was pointed out in [RFC1435], routers don't always do this - correctly -- many routers fail to send the ICMP messages, for a - variety of reasons ranging from kernel bugs to configuration - problems. Firewalls are often misconfigured to suppress all ICMP - messages. IPsec [RFC2401] and IP-in-IP [RFC2003] tunnels - shouldn't cause these sorts of problems, if the implementations - follow the advice in the appropriate documents. - - PMTUD, as documented in [RFC1191], fails when the appropriate ICMP - messages are not received by the originating host. The upper- - layer protocol continues to try to send large packets and, without - the ICMP messages, never discovers that it needs to reduce the - size of those packets. Its packets are disappearing into a PMTUD - black hole. - - Significance - When PMTUD fails due to the lack of ICMP messages, TCP will also - completely fail under some conditions. - - Implications - This failure is especially difficult to debug, as pings and some - interactive TCP connections to the destination host work. Bulk - transfers fail with the first large packet and the connection - eventually times out. - - These situations can almost always be blamed on a misconfiguration - within the network, which should be corrected. However it seems - inappropriate for some TCP implementations to suffer - - - - - -Lahey Informational [Page 3] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - interoperability failures over paths which do not affect other TCP - implementations (i.e. those without PMTUD). This creates a market - disincentive for deploying TCP implementation with PMTUD enabled. - - Relevant RFCs - RFC 1191 describes Path MTU Discovery. RFC 1435 provides an early - description of these sorts of problems. - - Trace file demonstrating the problem - Made using tcpdump [Jacobson89] recording at an intermediate host. - - 20:12:11.951321 A > B: S 1748427200:1748427200(0) - win 49152 - 20:12:11.951829 B > A: S 1001927984:1001927984(0) - ack 1748427201 win 16384 - 20:12:11.955230 A > B: . ack 1 win 49152 (DF) - 20:12:11.959099 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:12:13.139074 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:12:16.188685 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:12:22.290483 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:12:34.491856 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:12:58.896405 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:13:47.703184 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:14:52.780640 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:15:57.856037 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:17:02.932431 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:18:08.009337 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:19:13.090521 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:20:18.168066 A > B: . 1:1461(1460) ack 1 win 49152 (DF) - 20:21:23.242761 A > B: R 1461:1461(0) ack 1 win 49152 (DF) - - The short SYN packet has no trouble traversing the network, due to - its small size. Similarly, ICMP echo packets used to diagnose - connectivity problems will succeed. - - Large data packets fail to traverse the network. Eventually the - connection times out. This can be especially confusing when the - application starts out with a very small write, which succeeds, - following up with many large writes, which then fail. - - Trace file demonstrating correct behavior - - Made using tcpdump recording at an intermediate host. - - 16:48:42.659115 A > B: S 271394446:271394446(0) - win 8192 (DF) - 16:48:42.672279 B > A: S 2837734676:2837734676(0) - ack 271394447 win 16384 - - - -Lahey Informational [Page 4] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - 16:48:42.676890 A > B: . ack 1 win 8760 (DF) - 16:48:42.870574 A > B: . 1:1461(1460) ack 1 win 8760 (DF) - 16:48:42.871799 A > B: . 1461:2921(1460) ack 1 win 8760 (DF) - 16:48:45.786814 A > B: . 1:1461(1460) ack 1 win 8760 (DF) - 16:48:51.794676 A > B: . 1:1461(1460) ack 1 win 8760 (DF) - 16:49:03.808912 A > B: . 1:537(536) ack 1 win 8760 - 16:49:04.016476 B > A: . ack 537 win 16384 - 16:49:04.021245 A > B: . 537:1073(536) ack 1 win 8760 - 16:49:04.021697 A > B: . 1073:1609(536) ack 1 win 8760 - 16:49:04.120694 B > A: . ack 1609 win 16384 - 16:49:04.126142 A > B: . 1609:2145(536) ack 1 win 8760 - - In this case, the sender sees four packets fail to traverse the - network (using a two-packet initial send window) and turns off - PMTUD. All subsequent packets have the DF flag turned off, and - the size set to the default value of 536 [RFC1122]. - - References - This problem has been discussed extensively on the tcp-impl - mailing list; the name "black hole" has been in use for many - years. - - How to detect - This shows up as a TCP connection which hangs (fails to make - progress) until closed by timeout (this often manifests itself as - a connection that connects and starts to transfer, then eventually - terminates after 15 minutes with zero bytes transfered). This is - particularly annoying with an application like ftp, which will - work perfectly while it uses small packets for control - information, and then fail on bulk transfers. - - A series of ICMP echo packets will show that the two end hosts are - still capable of passing packets, a series of MTU-sized ICMP echo - packets will show some fragmentation, and a series of MTU-sized - ICMP echo packets with DF set will fail. This can be confusing - for network engineers trying to diagnose the problem. - - There are several traceroute implementations that do PMTUD, and - can demonstrate the problem. - - How to fix - TCP should notice that the connection is timing out. After - several timeouts, TCP should attempt to send smaller packets, - perhaps turning off the DF flag for each packet. If this - succeeds, it should continue to turn off PMTUD for the connection - for some reasonable period of time, after which it should probe - again to try to determine if the path has changed. - - - - -Lahey Informational [Page 5] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - Note that, under IPv6, there is no DF bit -- it is implicitly on - at all times. Fragmentation is not allowed in routers, only at - the originating host. Fortunately, the minimum supported MTU for - IPv6 is 1280 octets, which is significantly larger than the 68 - octet minimum in IPv4. This should make it more reasonable for - IPv6 TCP implementations to fall back to 1280 octet packets, when - IPv4 implementations will probably have to turn off DF to respond - to black hole detection. - - Ideally, the ICMP black holes should be fixed when they are found. - - If hosts start to implement black hole detection, it may be that - these problems will go unnoticed and unfixed. This is especially - unfortunate, since detection can take several seconds each time, - and these delays could result in a significant, hidden degradation - of performance. Hosts that implement black hole detection should - probably log detected black holes, so that they can be fixed. - -2.2. - - Name of Problem - Stretch ACK due to PMTUD - - Classification - Congestion Control / Performance - - Description - When a naively implemented TCP stack communicates with a PMTUD - equipped stack, it will try to generate an ACK for every second - full-sized segment. If it determines the full-sized segment based - on the advertised MSS, this can degrade badly in the face of - PMTUD. - - The PMTU can wind up being a small fraction of the advertised MSS; - in this case, an ACK would be generated only very infrequently. - - Significance - - Stretch ACKs have a variety of unfortunate effects, more fully - outlined in [RFC2525]. Most of these have to do with encouraging - a more bursty connection, due to the infrequent arrival of ACKs. - They can also impede congestion window growth. - - Implications - - The complete implications of stretch ACKs are outlined in - [RFC2525]. - - - - -Lahey Informational [Page 6] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - Relevant RFCs - RFC 1122 outlines the requirements for frequency of ACK - generation. [RFC2581] expands on this and clarifies that delayed - ACK is a SHOULD, not a MUST. - - Trace file demonstrating it - - Made using tcpdump recording at an intermediate host. The - timestamp options from all but the first two packets have been - removed for clarity. - - 18:16:52.976657 A > B: S 3183102292:3183102292(0) win 16384 - (DF) - 18:16:52.979580 B > A: S 2022212745:2022212745(0) ack 3183102293 win - 49152 (DF) - 18:16:52.979738 A > B: . ack 1 win 17248 (DF) - 18:16:52.982473 A > B: . 1:4301(4300) ack 1 win 17248 (DF) - 18:16:52.982557 C > A: icmp: B unreachable - - need to frag (mtu 1500)! (DF) - 18:16:52.985839 B > A: . ack 1 win 32768 (DF) - 18:16:54.129928 A > B: . 1:1449(1448) ack 1 win 17248 (DF) - . - . - . - 18:16:58.507078 A > B: . 1463941:1465389(1448) ack 1 win 17248 (DF) - 18:16:58.507200 A > B: . 1465389:1466837(1448) ack 1 win 17248 (DF) - 18:16:58.507326 A > B: . 1466837:1468285(1448) ack 1 win 17248 (DF) - 18:16:58.507439 A > B: . 1468285:1469733(1448) ack 1 win 17248 (DF) - 18:16:58.524763 B > A: . ack 1452357 win 32768 (DF) - 18:16:58.524986 B > A: . ack 1461045 win 32768 (DF) - 18:16:58.525138 A > B: . 1469733:1471181(1448) ack 1 win 17248 (DF) - 18:16:58.525268 A > B: . 1471181:1472629(1448) ack 1 win 17248 (DF) - 18:16:58.525393 A > B: . 1472629:1474077(1448) ack 1 win 17248 (DF) - 18:16:58.525516 A > B: . 1474077:1475525(1448) ack 1 win 17248 (DF) - 18:16:58.525642 A > B: . 1475525:1476973(1448) ack 1 win 17248 (DF) - 18:16:58.525766 A > B: . 1476973:1478421(1448) ack 1 win 17248 (DF) - 18:16:58.526063 A > B: . 1478421:1479869(1448) ack 1 win 17248 (DF) - 18:16:58.526187 A > B: . 1479869:1481317(1448) ack 1 win 17248 (DF) - 18:16:58.526310 A > B: . 1481317:1482765(1448) ack 1 win 17248 (DF) - 18:16:58.526432 A > B: . 1482765:1484213(1448) ack 1 win 17248 (DF) - 18:16:58.526561 A > B: . 1484213:1485661(1448) ack 1 win 17248 (DF) - 18:16:58.526671 A > B: . 1485661:1487109(1448) ack 1 win 17248 (DF) - 18:16:58.537944 B > A: . ack 1478421 win 32768 (DF) - 18:16:58.538328 A > B: . 1487109:1488557(1448) ack 1 win 17248 (DF) - - - - - - - -Lahey Informational [Page 7] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - Note that the interval between ACKs is significantly larger than two - times the segment size; it works out to be almost exactly two times - the advertised MSS. This transfer was long enough that it could be - verified that the stretch ACK was not the result of lost ACK packets. - - Trace file demonstrating correct behavior - - Made using tcpdump recording at an intermediate host. The timestamp - options from all but the first two packets have been removed for - clarity. - - 18:13:32.287965 A > B: S 2972697496:2972697496(0) - win 16384 (DF) - 18:13:32.290785 B > A: S 245639054:245639054(0) - ack 2972697497 win 34496 (DF) - 18:13:32.290941 A > B: . ack 1 win 17248 (DF) - 18:13:32.293774 A > B: . 1:4313(4312) ack 1 win 17248 (DF) - 18:13:32.293856 C > A: icmp: B unreachable - - need to frag (mtu 1500)! (DF) - 18:13:33.637338 A > B: . 1:1461(1460) ack 1 win 17248 (DF) - . - . - . - 18:13:35.561691 A > B: . 1514021:1515481(1460) ack 1 win 17248 (DF) - 18:13:35.561814 A > B: . 1515481:1516941(1460) ack 1 win 17248 (DF) - 18:13:35.561938 A > B: . 1516941:1518401(1460) ack 1 win 17248 (DF) - 18:13:35.562059 A > B: . 1518401:1519861(1460) ack 1 win 17248 (DF) - 18:13:35.562174 A > B: . 1519861:1521321(1460) ack 1 win 17248 (DF) - 18:13:35.564008 B > A: . ack 1481901 win 64680 (DF) - 18:13:35.564383 A > B: . 1521321:1522781(1460) ack 1 win 17248 (DF) - 18:13:35.564499 A > B: . 1522781:1524241(1460) ack 1 win 17248 (DF) - 18:13:35.615576 B > A: . ack 1484821 win 64680 (DF) - 18:13:35.615646 B > A: . ack 1487741 win 64680 (DF) - 18:13:35.615716 B > A: . ack 1490661 win 64680 (DF) - 18:13:35.615784 B > A: . ack 1493581 win 64680 (DF) - 18:13:35.615856 B > A: . ack 1496501 win 64680 (DF) - 18:13:35.615952 A > B: . 1524241:1525701(1460) ack 1 win 17248 (DF) - 18:13:35.615966 B > A: . ack 1499421 win 64680 (DF) - 18:13:35.616088 A > B: . 1525701:1527161(1460) ack 1 win 17248 (DF) - 18:13:35.616105 B > A: . ack 1502341 win 64680 (DF) - 18:13:35.616211 A > B: . 1527161:1528621(1460) ack 1 win 17248 (DF) - 18:13:35.616228 B > A: . ack 1505261 win 64680 (DF) - 18:13:35.616327 A > B: . 1528621:1530081(1460) ack 1 win 17248 (DF) - 18:13:35.616349 B > A: . ack 1508181 win 64680 (DF) - 18:13:35.616448 A > B: . 1530081:1531541(1460) ack 1 win 17248 (DF) - 18:13:35.616565 A > B: . 1531541:1533001(1460) ack 1 win 17248 (DF) - 18:13:35.616891 A > B: . 1533001:1534461(1460) ack 1 win 17248 (DF) - - - - -Lahey Informational [Page 8] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - In this trace, an ACK is generated for every two segments that - arrive. (The segment size is slightly larger in this trace, even - though the source hosts are the same, because of the lack of - timestamp options in this trace.) - - How to detect - This condition can be observed in a packet trace when the advertised - MSS is significantly larger than the actual PMTU of a connection. - - How to fix Several solutions for this problem have been proposed: - - A simple solution is to ACK every other packet, regardless of size. - This has the drawback of generating large numbers of ACKs in the face - of lots of very small packets; this shows up with applications like - the X Window System. - - A slightly more complex solution would monitor the size of incoming - segments and try to determine what segment size the sender is using. - This requires slightly more state in the receiver, but has the - advantage of making receiver silly window syndrome avoidance - computations more accurate [RFC813]. - -2.3. - - Name of Problem - Determining MSS from PMTU - - Classification - Performance - - Description - The MSS advertised at the start of a connection should be based on - the MTU of the interfaces on the system. (For efficiency and other - reasons this may not be the largest MSS possible.) Some systems use - PMTUD determined values to determine the MSS to advertise. - - This results in an advertised MSS that is smaller than the largest - MTU the system can receive. - - Significance - The advertised MSS is an indication to the remote system about the - largest TCP segment that can be received [RFC879]. If this value is - too small, the remote system will be forced to use a smaller segment - size when sending, purely because the local system found a particular - PMTU earlier. - - - - - - -Lahey Informational [Page 9] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - Given the asymmetric nature of many routes on the Internet - [Paxson97], it seems entirely possible that the return PMTU is - different from the sending PMTU. Limiting the segment size in this - way can reduce performance and frustrate the PMTUD algorithm. - - Even if the route was symmetric, setting this artificially lowered - limit on segment size will make it impossible to probe later to - determine if the PMTU has changed. - - Implications - The whole point of PMTUD is to send as large a segment as possible. - If long-running connections cannot successfully probe for larger - PMTU, then potential performance gains will be impossible to realize. - This destroys the whole point of PMTUD. - - Relevant RFCs RFC 1191. [RFC879] provides a complete discussion of - MSS calculations and appropriate values. Note that this practice - does not violate any of the specifications in these RFCs. - - Trace file demonstrating it - This trace was made using tcpdump running on an intermediate host. - Host A initiates two separate consecutive connections, A1 and A2, to - host B. Router C is the location of the MTU bottleneck. As usual, - TCP options are removed from all non-SYN packets. - - 22:33:32.305912 A1 > B: S 1523306220:1523306220(0) - win 8760 (DF) - 22:33:32.306518 B > A1: S 729966260:729966260(0) - ack 1523306221 win 16384 - 22:33:32.310307 A1 > B: . ack 1 win 8760 (DF) - 22:33:32.323496 A1 > B: P 1:1461(1460) ack 1 win 8760 (DF) - 22:33:32.323569 C > A1: icmp: 129.99.238.5 unreachable - - need to frag (mtu 1024) (DF) (ttl 255, id 20666) - 22:33:32.783694 A1 > B: . 1:985(984) ack 1 win 8856 (DF) - 22:33:32.840817 B > A1: . ack 985 win 16384 - 22:33:32.845651 A1 > B: . 1461:2445(984) ack 1 win 8856 (DF) - 22:33:32.846094 B > A1: . ack 985 win 16384 - 22:33:33.724392 A1 > B: . 985:1969(984) ack 1 win 8856 (DF) - 22:33:33.724893 B > A1: . ack 2445 win 14924 - 22:33:33.728591 A1 > B: . 2445:2921(476) ack 1 win 8856 (DF) - 22:33:33.729161 A1 > B: . ack 1 win 8856 (DF) - 22:33:33.840758 B > A1: . ack 2921 win 16384 - - [...] - - 22:33:34.238659 A1 > B: F 7301:8193(892) ack 1 win 8856 (DF) - 22:33:34.239036 B > A1: . ack 8194 win 15492 - 22:33:34.239303 B > A1: F 1:1(0) ack 8194 win 16384 - - - -Lahey Informational [Page 10] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - 22:33:34.242971 A1 > B: . ack 2 win 8856 (DF) - 22:33:34.454218 A2 > B: S 1523591299:1523591299(0) - win 8856 (DF) - 22:33:34.454617 B > A2: S 732408874:732408874(0) - ack 1523591300 win 16384 - 22:33:34.457516 A2 > B: . ack 1 win 8856 (DF) - 22:33:34.470683 A2 > B: P 1:985(984) ack 1 win 8856 (DF) - 22:33:34.471144 B > A2: . ack 985 win 16384 - 22:33:34.476554 A2 > B: . 985:1969(984) ack 1 win 8856 (DF) - 22:33:34.477580 A2 > B: P 1969:2953(984) ack 1 win 8856 (DF) - - [...] - - Notice that the SYN packet for session A2 specifies an MSS of 984. - - Trace file demonstrating correct behavior - - As before, this trace was made using tcpdump running on an - intermediate host. Host A initiates two separate consecutive - connections, A1 and A2, to host B. Router C is the location of the - MTU bottleneck. As usual, TCP options are removed from all non-SYN - packets. - - 22:36:58.828602 A1 > B: S 3402991286:3402991286(0) win 32768 - (DF) - 22:36:58.844040 B > A1: S 946999880:946999880(0) - ack 3402991287 win 16384 - - 22:36:58.848058 A1 > B: . ack 1 win 32768 (DF) - 22:36:58.851514 A1 > B: P 1:1025(1024) ack 1 win 32768 (DF) - 22:36:58.851584 C > A1: icmp: 129.99.238.5 unreachable - - need to frag (mtu 1024) (DF) - 22:36:58.855885 A1 > B: . 1:969(968) ack 1 win 32768 (DF) - 22:36:58.856378 A1 > B: . 969:985(16) ack 1 win 32768 (DF) - 22:36:59.036309 B > A1: . ack 985 win 16384 - 22:36:59.039255 A1 > B: FP 985:1025(40) ack 1 win 32768 (DF) - 22:36:59.039623 B > A1: . ack 1026 win 16344 - 22:36:59.039828 B > A1: F 1:1(0) ack 1026 win 16384 - 22:36:59.043037 A1 > B: . ack 2 win 32768 (DF) - 22:37:01.436032 A2 > B: S 3404812097:3404812097(0) win 32768 - (DF) - 22:37:01.436424 B > A2: S 949814769:949814769(0) - ack 3404812098 win 16384 - - 22:37:01.440147 A2 > B: . ack 1 win 32768 (DF) - 22:37:01.442736 A2 > B: . 1:969(968) ack 1 win 32768 (DF) - - - -Lahey Informational [Page 11] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - - 22:37:01.442894 A2 > B: P 969:985(16) ack 1 win 32768 (DF) - 22:37:01.443283 B > A2: . ack 985 win 16384 - 22:37:01.446068 A2 > B: P 985:1025(40) ack 1 win 32768 (DF) - 22:37:01.446519 B > A2: . ack 1025 win 16384 - 22:37:01.448465 A2 > B: F 1025:1025(0) ack 1 win 32768 (DF) - 22:37:01.448837 B > A2: . ack 1026 win 16384 - 22:37:01.449007 B > A2: F 1:1(0) ack 1026 win 16384 - 22:37:01.452201 A2 > B: . ack 2 win 32768 (DF) - - Note that the same MSS was used for both session A1 and session A2. - - How to detect - This can be detected using a packet trace of two separate - connections; the first should invoke PMTUD; the second should start - soon enough after the first that the PMTU value does not time out. - - How to fix - The MSS should be determined based on the MTUs of the interfaces on - the system, as outlined in [RFC1122] and [RFC1191]. - -3. Security Considerations - - The one security concern raised by this memo is that ICMP black holes - are often caused by over-zealous security administrators who block - all ICMP messages. It is vitally important that those who design and - deploy security systems understand the impact of strict filtering on - upper-layer protocols. The safest web site in the world is worthless - if most TCP implementations cannot transfer data from it. It would - be far nicer to have all of the black holes fixed rather than fixing - all of the TCP implementations. - -4. Acknowledgements - - Thanks to Mark Allman, Vern Paxson, and Jamshid Mahdavi for generous - help reviewing the document, and to Matt Mathis for early suggestions - of various mechanisms that can cause PMTUD black holes, as well as - review. The structure for describing TCP problems, and the early - description of that structure is from [RFC2525]. Special thanks to - Amy Bock, who helped perform the PMTUD tests which discovered these - bugs. - - - - - - - - - - - -Lahey Informational [Page 12] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - -5. References - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC1122] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC813] Clark, D., "Window and Acknowledgement Strategy in TCP", - RFC 813, July 1982. - - [Jacobson89] V. Jacobson, C. Leres, and S. McCanne, tcpdump, June - 1989, ftp.ee.lbl.gov - - [RFC1435] Knowles, S., "IESG Advice from Experience with Path MTU - Discovery", RFC 1435, March 1993. - - [RFC1191] Mogul, J. and S. Deering, "Path MTU discovery", RFC - 1191, November 1990. - - [RFC1981] McCann, J., Deering, S. and J. Mogul, "Path MTU - Discovery for IP version 6", RFC 1981, August 1996. - - [Paxson96] V. Paxson, "End-to-End Routing Behavior in the - Internet", IEEE/ACM Transactions on Networking (5), - pp.~601-615, Oct. 1997. - - [RFC2525] Paxon, V., Allman, M., Dawson, S., Fenner, W., Griner, - J., Heavens, I., Lahey, K., Semke, I. and B. Volz, - "Known TCP Implementation Problems", RFC 2525, March - 1999. - - [RFC879] Postel, J., "The TCP Maximum Segment Size and Related - Topics", RFC 879, November 1983. - - [RFC2001] Stevens, W., "TCP Slow Start, Congestion Avoidance, Fast - Retransmit, and Fast Recovery Algorithms", RFC 2001, - January 1997. - - - - - - - - - - - - - -Lahey Informational [Page 13] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - -6. Author's Address - - Kevin Lahey - dotRocket, Inc. - 1901 S. Bascom Ave., Suite 300 - Campbell, CA 95008 - USA - - Phone: +1 408-371-8977 x115 - email: kml@dotrocket.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Lahey Informational [Page 14] - -RFC 2923 TCP Problems with Path MTU Discovery September 2000 - - -7. Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Lahey Informational [Page 15] - diff --git a/kernel/picotcp/RFC/rfc2988.txt b/kernel/picotcp/RFC/rfc2988.txt deleted file mode 100644 index 2815ac8..0000000 --- a/kernel/picotcp/RFC/rfc2988.txt +++ /dev/null @@ -1,451 +0,0 @@ - - - - - - -Network Working Group V. Paxson -Request for Comments: 2988 ACIRI -Category: Standards Track M. Allman - NASA GRC/BBN - November 2000 - - - Computing TCP's Retransmission Timer - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2000). All Rights Reserved. - -Abstract - - This document defines the standard algorithm that Transmission - Control Protocol (TCP) senders are required to use to compute and - manage their retransmission timer. It expands on the discussion in - section 4.2.3.1 of RFC 1122 and upgrades the requirement of - supporting the algorithm from a SHOULD to a MUST. - -1 Introduction - - The Transmission Control Protocol (TCP) [Pos81] uses a retransmission - timer to ensure data delivery in the absence of any feedback from the - remote data receiver. The duration of this timer is referred to as - RTO (retransmission timeout). RFC 1122 [Bra89] specifies that the - RTO should be calculated as outlined in [Jac88]. - - This document codifies the algorithm for setting the RTO. In - addition, this document expands on the discussion in section 4.2.3.1 - of RFC 1122 and upgrades the requirement of supporting the algorithm - from a SHOULD to a MUST. RFC 2581 [APS99] outlines the algorithm TCP - uses to begin sending after the RTO expires and a retransmission is - sent. This document does not alter the behavior outlined in RFC 2581 - [APS99]. - - - - - - - -Paxson & Allman Standards Track [Page 1] - -RFC 2988 Computing TCP's Retransmission Timer November 2000 - - - In some situations it may be beneficial for a TCP sender to be more - conservative than the algorithms detailed in this document allow. - However, a TCP MUST NOT be more aggressive than the following - algorithms allow. - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in [Bra97]. - -2 The Basic Algorithm - - To compute the current RTO, a TCP sender maintains two state - variables, SRTT (smoothed round-trip time) and RTTVAR (round-trip - time variation). In addition, we assume a clock granularity of G - seconds. - - The rules governing the computation of SRTT, RTTVAR, and RTO are as - follows: - - (2.1) Until a round-trip time (RTT) measurement has been made for a - segment sent between the sender and receiver, the sender SHOULD - set RTO <- 3 seconds (per RFC 1122 [Bra89]), though the - "backing off" on repeated retransmission discussed in (5.5) - still applies. - - Note that some implementations may use a "heartbeat" timer - that in fact yield a value between 2.5 seconds and 3 - seconds. Accordingly, a lower bound of 2.5 seconds is also - acceptable, providing that the timer will never expire - faster than 2.5 seconds. Implementations using a heartbeat - timer with a granularity of G SHOULD not set the timer below - 2.5 + G seconds. - - (2.2) When the first RTT measurement R is made, the host MUST set - - SRTT <- R - RTTVAR <- R/2 - RTO <- SRTT + max (G, K*RTTVAR) - - where K = 4. - - (2.3) When a subsequent RTT measurement R' is made, a host MUST set - - RTTVAR <- (1 - beta) * RTTVAR + beta * |SRTT - R'| - SRTT <- (1 - alpha) * SRTT + alpha * R' - - - - - - -Paxson & Allman Standards Track [Page 2] - -RFC 2988 Computing TCP's Retransmission Timer November 2000 - - - The value of SRTT used in the update to RTTVAR is its value - before updating SRTT itself using the second assignment. That - is, updating RTTVAR and SRTT MUST be computed in the above - order. - - The above SHOULD be computed using alpha=1/8 and beta=1/4 (as - suggested in [JK88]). - - After the computation, a host MUST update - RTO <- SRTT + max (G, K*RTTVAR) - - (2.4) Whenever RTO is computed, if it is less than 1 second then the - RTO SHOULD be rounded up to 1 second. - - Traditionally, TCP implementations use coarse grain clocks to - measure the RTT and trigger the RTO, which imposes a large - minimum value on the RTO. Research suggests that a large - minimum RTO is needed to keep TCP conservative and avoid - spurious retransmissions [AP99]. Therefore, this - specification requires a large minimum RTO as a conservative - approach, while at the same time acknowledging that at some - future point, research may show that a smaller minimum RTO is - acceptable or superior. - - (2.5) A maximum value MAY be placed on RTO provided it is at least 60 - seconds. - -3 Taking RTT Samples - - TCP MUST use Karn's algorithm [KP87] for taking RTT samples. That - is, RTT samples MUST NOT be made using segments that were - retransmitted (and thus for which it is ambiguous whether the reply - was for the first instance of the packet or a later instance). The - only case when TCP can safely take RTT samples from retransmitted - segments is when the TCP timestamp option [JBB92] is employed, since - the timestamp option removes the ambiguity regarding which instance - of the data segment triggered the acknowledgment. - - Traditionally, TCP implementations have taken one RTT measurement at - a time (typically once per RTT). However, when using the timestamp - option, each ACK can be used as an RTT sample. RFC 1323 [JBB92] - suggests that TCP connections utilizing large congestion windows - should take many RTT samples per window of data to avoid aliasing - effects in the estimated RTT. A TCP implementation MUST take at - least one RTT measurement per RTT (unless that is not possible per - Karn's algorithm). - - - - - -Paxson & Allman Standards Track [Page 3] - -RFC 2988 Computing TCP's Retransmission Timer November 2000 - - - For fairly modest congestion window sizes research suggests that - timing each segment does not lead to a better RTT estimator [AP99]. - Additionally, when multiple samples are taken per RTT the alpha and - beta defined in section 2 may keep an inadequate RTT history. A - method for changing these constants is currently an open research - question. - -4 Clock Granularity - - There is no requirement for the clock granularity G used for - computing RTT measurements and the different state variables. - However, if the K*RTTVAR term in the RTO calculation equals zero, - the variance term MUST be rounded to G seconds (i.e., use the - equation given in step 2.3). - - RTO <- SRTT + max (G, K*RTTVAR) - - Experience has shown that finer clock granularities (<= 100 msec) - perform somewhat better than more coarse granularities. - - Note that [Jac88] outlines several clever tricks that can be used to - obtain better precision from coarse granularity timers. These - changes are widely implemented in current TCP implementations. - -5 Managing the RTO Timer - - An implementation MUST manage the retransmission timer(s) in such a - way that a segment is never retransmitted too early, i.e. less than - one RTO after the previous transmission of that segment. - - The following is the RECOMMENDED algorithm for managing the - retransmission timer: - - (5.1) Every time a packet containing data is sent (including a - retransmission), if the timer is not running, start it running - so that it will expire after RTO seconds (for the current value - of RTO). - - (5.2) When all outstanding data has been acknowledged, turn off the - retransmission timer. - - (5.3) When an ACK is received that acknowledges new data, restart the - retransmission timer so that it will expire after RTO seconds - (for the current value of RTO). - - - - - - - -Paxson & Allman Standards Track [Page 4] - -RFC 2988 Computing TCP's Retransmission Timer November 2000 - - - When the retransmission timer expires, do the following: - - (5.4) Retransmit the earliest segment that has not been acknowledged - by the TCP receiver. - - (5.5) The host MUST set RTO <- RTO * 2 ("back off the timer"). The - maximum value discussed in (2.5) above may be used to provide an - upper bound to this doubling operation. - - (5.6) Start the retransmission timer, such that it expires after RTO - seconds (for the value of RTO after the doubling operation - outlined in 5.5). - - Note that after retransmitting, once a new RTT measurement is - obtained (which can only happen when new data has been sent and - acknowledged), the computations outlined in section 2 are performed, - including the computation of RTO, which may result in "collapsing" - RTO back down after it has been subject to exponential backoff - (rule 5.5). - - Note that a TCP implementation MAY clear SRTT and RTTVAR after - backing off the timer multiple times as it is likely that the - current SRTT and RTTVAR are bogus in this situation. Once SRTT and - RTTVAR are cleared they should be initialized with the next RTT - sample taken per (2.2) rather than using (2.3). - -6 Security Considerations - - This document requires a TCP to wait for a given interval before - retransmitting an unacknowledged segment. An attacker could cause a - TCP sender to compute a large value of RTO by adding delay to a - timed packet's latency, or that of its acknowledgment. However, - the ability to add delay to a packet's latency often coincides with - the ability to cause the packet to be lost, so it is difficult to - see what an attacker might gain from such an attack that could cause - more damage than simply discarding some of the TCP connection's - packets. - - The Internet to a considerable degree relies on the correct - implementation of the RTO algorithm (as well as those described in - RFC 2581) in order to preserve network stability and avoid - congestion collapse. An attacker could cause TCP endpoints to - respond more aggressively in the face of congestion by forging - acknowledgments for segments before the receiver has actually - received the data, thus lowering RTO to an unsafe value. But to do - so requires spoofing the acknowledgments correctly, which is - difficult unless the attacker can monitor traffic along the path - between the sender and the receiver. In addition, even if the - - - -Paxson & Allman Standards Track [Page 5] - -RFC 2988 Computing TCP's Retransmission Timer November 2000 - - - attacker can cause the sender's RTO to reach too small a value, it - appears the attacker cannot leverage this into much of an attack - (compared to the other damage they can do if they can spoof packets - belonging to the connection), since the sending TCP will still back - off its timer in the face of an incorrectly transmitted packet's - loss due to actual congestion. - -Acknowledgments - - The RTO algorithm described in this memo was originated by Van - Jacobson in [Jac88]. - -References - - [AP99] Allman, M. and V. Paxson, "On Estimating End-to-End Network - Path Properties", SIGCOMM 99. - - [APS99] Allman, M., Paxson V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [Bra89] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [Bra97] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [Jac88] Jacobson, V., "Congestion Avoidance and Control", Computer - Communication Review, vol. 18, no. 4, pp. 314-329, Aug. 1988. - - [JK88] Jacobson, V. and M. Karels, "Congestion Avoidance and - Control", ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z. - - [KP87] Karn, P. and C. Partridge, "Improving Round-Trip Time - Estimates in Reliable Transport Protocols", SIGCOMM 87. - - [Pos81] Postel, J., "Transmission Control Protocol", STD 7, RFC 793, - September 1981. - - - - - - - - - - - - - - -Paxson & Allman Standards Track [Page 6] - -RFC 2988 Computing TCP's Retransmission Timer November 2000 - - -Author's Addresses - - Vern Paxson - ACIRI / ICSI - 1947 Center Street - Suite 600 - Berkeley, CA 94704-1198 - - Phone: 510-666-2882 - Fax: 510-643-7684 - EMail: vern@aciri.org - http://www.aciri.org/vern/ - - - Mark Allman - NASA Glenn Research Center/BBN Technologies - Lewis Field - 21000 Brookpark Rd. MS 54-2 - Cleveland, OH 44135 - - Phone: 216-433-6586 - Fax: 216-433-8705 - EMail: mallman@grc.nasa.gov - http://roland.grc.nasa.gov/~mallman - - - - - - - - - - - - - - - - - - - - - - - - - - - -Paxson & Allman Standards Track [Page 7] - -RFC 2988 Computing TCP's Retransmission Timer November 2000 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2000). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Paxson & Allman Standards Track [Page 8] - diff --git a/kernel/picotcp/RFC/rfc3042.txt b/kernel/picotcp/RFC/rfc3042.txt deleted file mode 100644 index a190655..0000000 --- a/kernel/picotcp/RFC/rfc3042.txt +++ /dev/null @@ -1,507 +0,0 @@ - - - - - - -Network Working Group M. Allman -Request for Comments: 3042 NASA GRC/BBN -Category: Standards Track H. Balakrishnan - MIT - S. Floyd - ACIRI - January 2001 - - - Enhancing TCP's Loss Recovery Using Limited Transmit - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2001). All Rights Reserved. - -Abstract - - This document proposes a new Transmission Control Protocol (TCP) - mechanism that can be used to more effectively recover lost segments - when a connection's congestion window is small, or when a large - number of segments are lost in a single transmission window. The - "Limited Transmit" algorithm calls for sending a new data segment in - response to each of the first two duplicate acknowledgments that - arrive at the sender. Transmitting these segments increases the - probability that TCP can recover from a single lost segment using the - fast retransmit algorithm, rather than using a costly retransmission - timeout. Limited Transmit can be used both in conjunction with, and - in the absence of, the TCP selective acknowledgment (SACK) mechanism. - -1 Introduction - - A number of researchers have observed that TCP's loss recovery - strategies do not work well when the congestion window at a TCP - sender is small. This can happen, for instance, because there is - only a limited amount of data to send, or because of the limit - imposed by the receiver-advertised window, or because of the - constraints imposed by end-to-end congestion control over a - connection with a small bandwidth-delay product - [Riz96,Mor97,BPS+98,Bal98,LK98]. When a TCP detects a missing - segment, it enters a loss recovery phase using one of two methods. - - - -Allman, et al. Standards Track [Page 1] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - - First, if an acknowledgment (ACK) for a given segment is not received - in a certain amount of time a retransmission timeout occurs and the - segment is resent [RFC793,PA00]. Second, the "Fast Retransmit" - algorithm resends a segment when three duplicate ACKs arrive at the - sender [Jac88,RFC2581]. However, because duplicate ACKs from the - receiver are also triggered by packet reordering in the Internet, the - TCP sender waits for three duplicate ACKs in an attempt to - disambiguate segment loss from packet reordering. Once in a loss - recovery phase, a number of techniques can be used to retransmit lost - segments, including slow start-based recovery or Fast Recovery - [RFC2581], NewReno [RFC2582], and loss recovery based on selective - acknowledgments (SACKs) [RFC2018,FF96]. - - TCP's retransmission timeout (RTO) is based on measured round-trip - times (RTT) between the sender and receiver, as specified in [PA00]. - To prevent spurious retransmissions of segments that are only delayed - and not lost, the minimum RTO is conservatively chosen to be 1 - second. Therefore, it behooves TCP senders to detect and recover - from as many losses as possible without incurring a lengthy timeout - when the connection remains idle. However, if not enough duplicate - ACKs arrive from the receiver, the Fast Retransmit algorithm is never - triggered---this situation occurs when the congestion window is small - or if a large number of segments in a window are lost. For instance, - consider a congestion window (cwnd) of three segments. If one - segment is dropped by the network, then at most two duplicate ACKs - will arrive at the sender. Since three duplicate ACKs are required - to trigger Fast Retransmit, a timeout will be required to resend the - dropped packet. - - [BPS+97] found that roughly 56% of retransmissions sent by a busy web - server were sent after the RTO expires, while only 44% were handled - by Fast Retransmit. In addition, only 4% of the RTO-based - retransmissions could have been avoided with SACK, which of course - has to continue to disambiguate reordering from genuine loss. In - contrast, using the technique outlined in this document and in - [Bal98], 25% of the RTO-based retransmissions in that dataset would - have likely been avoided. - - The next section of this document outlines small changes to TCP - senders that will decrease the reliance on the retransmission timer, - and thereby improve TCP performance when Fast Retransmit is not - triggered. These changes do not adversely affect the performance of - TCP nor interact adversely with other connections, in other - circumstances. - - - - - - - -Allman, et al. Standards Track [Page 2] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - -1.1 Terminology - - In this document, he key words "MUST", "MUST NOT", "REQUIRED", - "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", - AND "OPTIONAL" are to be interpreted as described in RFC 2119 [1] and - indicate requirement levels for protocols. - -2 The Limited Transmit Algorithm - - When a TCP sender has previously unsent data queued for transmission - it SHOULD use the Limited Transmit algorithm, which calls for a TCP - sender to transmit new data upon the arrival of the first two - consecutive duplicate ACKs when the following conditions are - satisfied: - - * The receiver's advertised window allows the transmission of the - segment. - - * The amount of outstanding data would remain less than or equal - to the congestion window plus 2 segments. In other words, the - sender can only send two segments beyond the congestion window - (cwnd). - - The congestion window (cwnd) MUST NOT be changed when these new - segments are transmitted. Assuming that these new segments and the - corresponding ACKs are not dropped, this procedure allows the sender - to infer loss using the standard Fast Retransmit threshold of three - duplicate ACKs [RFC2581]. This is more robust to reordered packets - than if an old packet were retransmitted on the first or second - duplicate ACK. - - Note: If the connection is using selective acknowledgments [RFC2018], - the data sender MUST NOT send new segments in response to duplicate - ACKs that contain no new SACK information, as a misbehaving receiver - can generate such ACKs to trigger inappropriate transmission of data - segments. See [SCWA99] for a discussion of attacks by misbehaving - receivers. - - Limited Transmit follows the "conservation of packets" congestion - control principle [Jac88]. Each of the first two duplicate ACKs - indicate that a segment has left the network. Furthermore, the - sender has not yet decided that a segment has been dropped and - therefore has no reason to assume that the current congestion control - state is inaccurate. Therefore, transmitting segments does not - deviate from the spirit of TCP's congestion control principles. - - - - - - -Allman, et al. Standards Track [Page 3] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - - [BPS99] shows that packet reordering is not a rare network event. - [RFC2581] does not provide for sending of data on the first two - duplicate ACKs that arrive at the sender. This causes a burst of - segments to be sent when an ACK for new data does arrive following - packet reordering. Using Limited Transmit, data packets will be - clocked out by incoming ACKs and therefore transmission will not be - as bursty. - - Note: Limited Transmit is implemented in the ns simulator [NS]. - Researchers wishing to investigate this mechanism further can do so - by enabling "singledup_" for the given TCP connection. - -3 Related Work - - Deployment of Explicit Congestion Notification (ECN) [Flo94,RFC2481] - may benefit connections with small congestion window sizes [SA00]. - ECN provides a method for indicating congestion to the end-host - without dropping segments. While some segment drops may still occur, - ECN may allow TCP to perform better with small congestion window - sizes because the sender can avoid many of the Fast Retransmits and - Retransmit Timeouts that would otherwise have been needed to detect - dropped segments [SA00]. - - When ECN-enabled TCP traffic competes with non-ECN-enabled TCP - traffic, ECN-enabled traffic can receive up to 30% higher goodput. - For bulk transfers, the relative performance benefit of ECN is - greatest when on average each flow has 3-4 outstanding packets during - each round-trip time [ZQ00]. This should be a good estimate for the - performance impact of a flow using Limited Transmit, since both ECN - and Limited Transmit reduce the reliance on the retransmission timer - for signaling congestion. - - The Rate-Halving congestion control algorithm [MSML99] uses a form of - limited transmit, as it calls for transmitting a data segment on - every second duplicate ACK that arrives at the sender. The algorithm - decouples the decision of what to send from the decision of when to - send. However, similar to Limited Transmit the algorithm will always - send a new data segment on the second duplicate ACK that arrives at - the sender. - -4 Security Considerations - - The additional security implications of the changes proposed in this - document, compared to TCP's current vulnerabilities, are minimal. - The potential security issues come from the subversion of end-to-end - congestion control from "false" duplicate ACKs, where a "false" - duplicate ACK is a duplicate ACK that does not actually acknowledge - new data received at the TCP receiver. False duplicate ACKs could - - - -Allman, et al. Standards Track [Page 4] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - - result from duplicate ACKs that are themselves duplicated in the - network, or from misbehaving TCP receivers that send false duplicate - ACKs to subvert end-to-end congestion control [SCWA99,RFC2581]. - - When the TCP data receiver has agreed to use the SACK option, the TCP - data sender has fairly strong protection against false duplicate - ACKs. In particular, with SACK, a duplicate ACK that acknowledges - new data arriving at the receiver reports the sequence numbers of - that new data. Thus, with SACK, the TCP sender can verify that an - arriving duplicate ACK acknowledges data that the TCP sender has - actually sent, and for which no previous acknowledgment has been - received, before sending new data as a result of that acknowledgment. - For further protection, the TCP sender could keep a record of packet - boundaries for transmitted data packets, and recognize at most one - valid acknowledgment for each packet (e.g., the first acknowledgment - acknowledging the receipt of all of the sequence numbers in that - packet). - - One could imagine some limited protection against false duplicate - ACKs for a non-SACK TCP connection, where the TCP sender keeps a - record of the number of packets transmitted, and recognizes at most - one acknowledgment per packet to be used for triggering the sending - of new data. However, this accounting of packets transmitted and - acknowledged would require additional state and extra complexity at - the TCP sender, and does not seem necessary. - - The most important protection against false duplicate ACKs comes from - the limited potential of duplicate ACKs in subverting end-to-end - congestion control. There are two separate cases to consider: when - the TCP sender receives less than a threshold number of duplicate - ACKs, and when the TCP sender receives at least a threshold number of - duplicate ACKs. In the latter case a TCP with Limited Transmit will - behave essentially the same as a TCP without Limited Transmit in that - the congestion window will be halved and a loss recovery period will - be initiated. - - When a TCP sender receives less than a threshold number of duplicate - ACKs a misbehaving receiver could send two duplicate ACKs after each - regular ACK. One might imagine that the TCP sender would send at - three times its allowed sending rate. However, using Limited - Transmit as outlined in section 2 the sender is only allowed to - exceed the congestion window by less than the duplicate ACK threshold - (of three segments), and thus would not send a new packet for each - duplicate ACK received. - - - - - - - -Allman, et al. Standards Track [Page 5] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - -Acknowledgments - - Bill Fenner, Jamshid Mahdavi and the Transport Area Working Group - provided valuable feedback on an early version of this document. - -References - - [Bal98] Hari Balakrishnan. Challenges to Reliable Data Transport - over Heterogeneous Wireless Networks. Ph.D. Thesis, - University of California at Berkeley, August 1998. - - [BPS+97] Hari Balakrishnan, Venkata Padmanabhan, Srinivasan Seshan, - Mark Stemm, and Randy Katz. TCP Behavior of a Busy Web - Server: Analysis and Improvements. Technical Report - UCB/CSD-97-966, August 1997. Available from - http://nms.lcs.mit.edu/~hari/papers/csd-97-966.ps. (Also - in Proc. IEEE INFOCOM Conf., San Francisco, CA, March - 1998.) - - [BPS99] Jon Bennett, Craig Partridge, Nicholas Shectman. Packet - Reordering is Not Pathological Network Behavior. IEEE/ACM - Transactions on Networking, December 1999. - - [FF96] Kevin Fall, Sally Floyd. Simulation-based Comparisons of - Tahoe, Reno, and SACK TCP. ACM Computer Communication - Review, July 1996. - - [Flo94] Sally Floyd. TCP and Explicit Congestion Notification. - ACM Computer Communication Review, October 1994. - - [Jac88] Van Jacobson. Congestion Avoidance and Control. ACM - SIGCOMM 1988. - - [LK98] Dong Lin, H.T. Kung. TCP Fast Recovery Strategies: - Analysis and Improvements. Proceedings of InfoCom, March - 1998. - - [MSML99] Matt Mathis, Jeff Semke, Jamshid Mahdavi, Kevin Lahey. The - Rate Halving Algorithm, 1999. URL: - http://www.psc.edu/networking/rate_halving.html. - - [Mor97] Robert Morris. TCP Behavior with Many Flows. Proceedings - of the Fifth IEEE International Conference on Network - Protocols. October 1997. - - [NS] Ns network simulator. URL: http://www.isi.edu/nsnam/. - - - - - -Allman, et al. Standards Track [Page 6] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - - [PA00] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [Riz96] Luigi Rizzo. Issues in the Implementation of Selective - Acknowledgments for TCP. January, 1996. URL: - http://www.iet.unipi.it/~luigi/selack.ps - - [SA00] Hadi Salim, J. and U. Ahmed, "Performance Evaluation of - Explicit Congestion Notification (ECN) in IP Networks", RFC - 2884, July 2000. - - [SCWA99] Stefan Savage, Neal Cardwell, David Wetherall, Tom - Anderson. TCP Congestion Control with a Misbehaving - Receiver. ACM Computer Communications Review, October - 1999. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October 1996. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2481] Ramakrishnan, K. and S. Floyd, "A Proposal to Add Explicit - Congestion Notification (ECN) to IP", RFC 2481, January - 1999. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [ZQ00] Yin Zhang and Lili Qiu, Understanding the End-to-End - Performance Impact of RED in a Heterogeneous Environment, - Cornell CS Technical Report 2000-1802, July 2000. URL - http://www.cs.cornell.edu/yzhang/papers.htm. - - - - - - - - - - - - -Allman, et al. Standards Track [Page 7] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - -Authors' Addresses - - Mark Allman - NASA Glenn Research Center/BBN Technologies - Lewis Field - 21000 Brookpark Rd. MS 54-5 - Cleveland, OH 44135 - - Phone: +1-216-433-6586 - Fax: +1-216-433-8705 - EMail: mallman@grc.nasa.gov - http://roland.grc.nasa.gov/~mallman - - - Hari Balakrishnan - Laboratory for Computer Science - 545 Technology Square - Massachusetts Institute of Technology - Cambridge, MA 02139 - - EMail: hari@lcs.mit.edu - http://nms.lcs.mit.edu/~hari/ - - - Sally Floyd - AT&T Center for Internet Research at ICSI (ACIRI) - 1947 Center St, Suite 600 - Berkeley, CA 94704 - - Phone: +1-510-666-2989 - EMail: floyd@aciri.org - http://www.aciri.org/floyd/ - - - - - - - - - - - - - - - - - - - -Allman, et al. Standards Track [Page 8] - -RFC 3042 Enhancing TCP Loss Recovery January 2001 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2001). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Allman, et al. Standards Track [Page 9] - diff --git a/kernel/picotcp/RFC/rfc3124.txt b/kernel/picotcp/RFC/rfc3124.txt deleted file mode 100644 index db57bc3..0000000 --- a/kernel/picotcp/RFC/rfc3124.txt +++ /dev/null @@ -1,1235 +0,0 @@ - - - - - - -Network Working Group H. Balakrishnan -Request for Comments: 3124 MIT LCS -Category: Standards Track S. Seshan - CMU - June 2001 - - - The Congestion Manager - - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2001). All Rights Reserved. - -Abstract - - This document describes the Congestion Manager (CM), an end-system - module that: - - (i) Enables an ensemble of multiple concurrent streams from a sender - destined to the same receiver and sharing the same congestion - properties to perform proper congestion avoidance and control, and - - (ii) Allows applications to easily adapt to network congestion. - -1. Conventions used in this document: - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in RFC-2119 [Bradner97]. - - STREAM - - A group of packets that all share the same source and destination - IP address, IP type-of-service, transport protocol, and source and - destination transport-layer port numbers. - - - - - - - -Balakrishnan, et. al. Standards Track [Page 1] - -RFC 3124 The Congestion Manager June 2001 - - - MACROFLOW - - A group of CM-enabled streams that all use the same congestion - management and scheduling algorithms, and share congestion state - information. Currently, streams destined to different receivers - belong to different macroflows. Streams destined to the same - receiver MAY belong to different macroflows. When the Congestion - Manager is in use, streams that experience identical congestion - behavior and use the same congestion control algorithm SHOULD - belong to the same macroflow. - - APPLICATION - - Any software module that uses the CM. This includes user-level - applications such as Web servers or audio/video servers, as well - as in-kernel protocols such as TCP [Postel81] that use the CM for - congestion control. - - WELL-BEHAVED APPLICATION - - An application that only transmits when allowed by the CM and - accurately accounts for all data that it has sent to the receiver - by informing the CM using the CM API. - - PATH MAXIMUM TRANSMISSION UNIT (PMTU) - - The size of the largest packet that the sender can transmit - without it being fragmented en route to the receiver. It includes - the sizes of all headers and data except the IP header. - - CONGESTION WINDOW (cwnd) - - A CM state variable that modulates the amount of outstanding data - between sender and receiver. - - OUTSTANDING WINDOW (ownd) - - The number of bytes that has been transmitted by the source, but - not known to have been either received by the destination or lost - in the network. - - INITIAL WINDOW (IW) - - The size of the sender's congestion window at the beginning of a - macroflow. - - - - - - -Balakrishnan, et. al. Standards Track [Page 2] - -RFC 3124 The Congestion Manager June 2001 - - - DATA TYPE SYNTAX - - We use "u64" for unsigned 64-bit, "u32" for unsigned 32-bit, "u16" - for unsigned 16-bit, "u8" for unsigned 8-bit, "i32" for signed - 32-bit, "i16" for signed 16-bit quantities, "float" for IEEE - floating point values. The type "void" is used to indicate that - no return value is expected from a call. Pointers are referred to - using "*" syntax, following C language convention. - - We emphasize that all the API functions described in this document - are "abstract" calls and that conformant CM implementations may - differ in specific implementation details. - -2. Introduction - - The framework described in this document integrates congestion - management across all applications and transport protocols. The CM - maintains congestion parameters (available aggregate and per-stream - bandwidth, per-receiver round-trip times, etc.) and exports an API - that enables applications to learn about network characteristics, - pass information to the CM, share congestion information with each - other, and schedule data transmissions. This document focuses on - applications and transport protocols with their own independent per- - byte or per-packet sequence number information, and does not require - modifications to the receiver protocol stack. However, the receiving - application must provide feedback to the sending application about - received packets and losses, and the latter is expected to use the CM - API to update CM state. This document does not address networks with - reservations or service differentiation. - - The CM is an end-system module that enables an ensemble of multiple - concurrent streams to perform stable congestion avoidance and - control, and allows applications to easily adapt their transmissions - to prevailing network conditions. It integrates congestion - management across all applications and transport protocols. It - maintains congestion parameters (available aggregate and per-stream - bandwidth, per-receiver round-trip times, etc.) and exports an API - that enables applications to learn about network characteristics, - pass information to the CM, share congestion information with each - other, and schedule data transmissions. When the CM is used, all - data transmissions subject to the CM must be done with the explicit - consent of the CM via this API to ensure proper congestion behavior. - - Systems MAY choose to use CM, and if so they MUST follow this - specification. - - This document focuses on applications and networks where the - following conditions hold: - - - -Balakrishnan, et. al. Standards Track [Page 3] - -RFC 3124 The Congestion Manager June 2001 - - - 1. Applications are well-behaved with their own independent - per-byte or per-packet sequence number information, and use the - CM API to update internal state in the CM. - - 2. Networks are best-effort without service discrimination or - reservations. In particular, it does not address situations - where different streams between the same pair of hosts traverse - paths with differing characteristics. - - The Congestion Manager framework can be extended to support - applications that do not provide their own feedback and to - differentially-served networks. These extensions will be addressed - in later documents. - - The CM is motivated by two main goals: - - (i) Enable efficient multiplexing. Increasingly, the trend on the - Internet is for unicast data senders (e.g., Web servers) to transmit - heterogeneous types of data to receivers, ranging from unreliable - real-time streaming content to reliable Web pages and applets. As a - result, many logically different streams share the same path between - sender and receiver. For the Internet to remain stable, each of - these streams must incorporate control protocols that safely probe - for spare bandwidth and react to congestion. Unfortunately, these - concurrent streams typically compete with each other for network - resources, rather than share them effectively. Furthermore, they do - not learn from each other about the state of the network. Even if - they each independently implement congestion control (e.g., a group - of TCP connections each implementing the algorithms in [Jacobson88, - Allman99]), the ensemble of streams tends to be more aggressive in - the face of congestion than a single TCP connection implementing - standard TCP congestion control and avoidance [Balakrishnan98]. - - (ii) Enable application adaptation to congestion. Increasingly, - popular real-time streaming applications run over UDP using their own - user-level transport protocols for good application performance, but - in most cases today do not adapt or react properly to network - congestion. By implementing a stable control algorithm and exposing - an adaptation API, the CM enables easy application adaptation to - congestion. Applications adapt the data they transmit to the current - network conditions. - - The CM framework builds on recent work on TCP control block sharing - [Touch97], integrated TCP congestion control (TCP-Int) - [Balakrishnan98] and TCP sessions [Padmanabhan98]. [Touch97] - advocates the sharing of some of the state in the TCP control block - to improve transient transport performance and describes sharing - across an ensemble of TCP connections. [Balakrishnan98], - - - -Balakrishnan, et. al. Standards Track [Page 4] - -RFC 3124 The Congestion Manager June 2001 - - - [Padmanabhan98], and [Eggert00] describe several experiments that - quantify the benefits of sharing congestion state, including improved - stability in the face of congestion and better loss recovery. - Integrating loss recovery across concurrent connections significantly - improves performance because losses on one connection can be detected - by noticing that later data sent on another connection has been - received and acknowledged. The CM framework extends these ideas in - two significant ways: (i) it extends congestion management to non-TCP - streams, which are becoming increasingly common and often do not - implement proper congestion management, and (ii) it provides an API - for applications to adapt their transmissions to current network - conditions. For an extended discussion of the motivation for the CM, - its architecture, API, and algorithms, see [Balakrishnan99]; for a - description of an implementation and performance results, see - [Andersen00]. - - The resulting end-host protocol architecture at the sender is shown - in Figure 1. The CM helps achieve network stability by implementing - stable congestion avoidance and control algorithms that are "TCP- - friendly" [Mahdavi98] based on algorithms described in [Allman99]. - However, it does not attempt to enforce proper congestion behavior - for all applications (but it does not preclude a policer on the host - that performs this task). Note that while the policer at the end- - host can use CM, the network has to be protected against compromises - to the CM and the policer at the end hosts, a task that requires - router machinery [Floyd99a]. We do not address this issue further in - this document. - - - - - - - - - - - - - - - - - - - - - - - - -Balakrishnan, et. al. Standards Track [Page 5] - -RFC 3124 The Congestion Manager June 2001 - - - |--------| |--------| |--------| |--------| |--------------| - | HTTP | | FTP | | RTP 1 | | RTP 2 | | | - |--------| |--------| |--------| |--------| | | - | | | ^ | ^ | | - | | | | | | | Scheduler | - | | | | | | |---| | | - | | | |-------|--+->| | | | - | | | | | |<--| | - v v v v | | |--------------| - |--------| |--------| |-------------| | | ^ - | TCP 1 | | TCP 2 | | UDP 1 | | A | | - |--------| |--------| |-------------| | | | - ^ | ^ | | | | |--------------| - | | | | | | P |-->| | - | | | | | | | | | - |---|------+---|--------------|------->| | | Congestion | - | | | | I | | | - v v v | | | Controller | - |-----------------------------------| | | | | - | IP |-->| | | | - |-----------------------------------| | | |--------------| - |---| - - Figure 1 - - The key components of the CM framework are (i) the API, (ii) the - congestion controller, and (iii) the scheduler. The API is (in part) - motivated by the requirements of application-level framing (ALF) - [Clark90], and is described in Section 4. The CM internals (Section - 5) include a congestion controller (Section 5.1) and a scheduler to - orchestrate data transmissions between concurrent streams in a - macroflow (Section 5.2). The congestion controller adjusts the - aggregate transmission rate between sender and receiver based on its - estimate of congestion in the network. It obtains feedback about its - past transmissions from applications themselves via the API. The - scheduler apportions available bandwidth amongst the different - streams within each macroflow and notifies applications when they are - permitted to send data. This document focuses on well-behaved - applications; a future one will describe the sender-receiver protocol - and header formats that will handle applications that do not - incorporate their own feedback to the CM. - -3. CM API - - By convention, the IETF does not treat Application Programming - Interfaces as standards track. However, it is considered important - to have the CM API and CM algorithm requirements in one coherent - document. The following section on the CM API uses the terms MUST, - - - -Balakrishnan, et. al. Standards Track [Page 6] - -RFC 3124 The Congestion Manager June 2001 - - - SHOULD, etc., but the terms are meant to apply within the context of - an implementation of the CM API. The section does not apply to - congestion control implementations in general, only to those - implementations offering the CM API. - - Using the CM API, streams can determine their share of the available - bandwidth, request and have their data transmissions scheduled, - inform the CM about successful transmissions, and be informed when - the CM's estimate of path bandwidth changes. Thus, the CM frees - applications from having to maintain information about the state of - congestion and available bandwidth along any path. - - The function prototypes below follow standard C language convention. - We emphasize that these API functions are abstract calls and - conformant CM implementations may differ in specific details, as long - as equivalent functionality is provided. - - When a new stream is created by an application, it passes some - information to the CM via the cm_open(stream_info) API call. - Currently, stream_info consists of the following information: (i) the - source IP address, (ii) the source port, (iii) the destination IP - address, (iv) the destination port, and (v) the IP protocol number. - -3.1 State maintenance - - 1. Open: All applications MUST call cm_open(stream_info) before - using the CM API. This returns a handle, cm_streamid, for the - application to use for all further CM API invocations for that - stream. If the returned cm_streamid is -1, then the cm_open() - failed and that stream cannot use the CM. - - All other calls to the CM for a stream use the cm_streamid - returned from the cm_open() call. - - 2. Close: When a stream terminates, the application SHOULD invoke - cm_close(cm_streamid) to inform the CM about the termination - of the stream. - - 3. Packet size: cm_mtu(cm_streamid) returns the estimated PMTU of - the path between sender and receiver. Internally, this - information SHOULD be obtained via path MTU discovery - [Mogul90]. It MAY be statically configured in the absence of - such a mechanism. - - - - - - - - -Balakrishnan, et. al. Standards Track [Page 7] - -RFC 3124 The Congestion Manager June 2001 - - -3.2 Data transmission - - The CM accommodates two types of adaptive senders, enabling - applications to dynamically adapt their content based on prevailing - network conditions, and supporting ALF-based applications. - - 1. Callback-based transmission. The callback-based transmission API - puts the stream in firm control of deciding what to transmit at each - point in time. To achieve this, the CM does not buffer any data; - instead, it allows streams the opportunity to adapt to unexpected - network changes at the last possible instant. Thus, this enables - streams to "pull out" and repacketize data upon learning about any - rate change, which is hard to do once the data has been buffered. - The CM must implement a cm_request(i32 cm_streamid) call for streams - wishing to send data in this style. After some time, depending on - the rate, the CM MUST invoke a callback using cmapp_send(), which is - a grant for the stream to send up to PMTU bytes. The callback-style - API is the recommended choice for ALF-based streams. Note that - cm_request() does not take the number of bytes or MTU-sized units as - an argument; each call to cm_request() is an implicit request for - sending up to PMTU bytes. The CM MAY provide an alternate interface, - cm_request(int k). The cmapp_send callback for this request is - granted the right to send up to k PMTU sized segments. Section 4.3 - discusses the time duration for which the transmission grant is - valid, while Section 5.2 describes how these requests are scheduled - and callbacks made. - - 2. Synchronous-style. The above callback-based API accommodates a - class of ALF streams that are "asynchronous." Asynchronous - transmitters do not transmit based on a periodic clock, but do so - triggered by asynchronous events like file reads or captured frames. - On the other hand, there are many streams that are "synchronous" - transmitters, which transmit periodically based on their own internal - timers (e.g., an audio senders that sends at a constant sampling - rate). While CM callbacks could be configured to periodically - interrupt such transmitters, the transmit loop of such applications - is less affected if they retain their original timer-based loop. In - addition, it complicates the CM API to have a stream express the - periodicity and granularity of its callbacks. Thus, the CM MUST - export an API that allows such streams to be informed of changes in - rates using the cmapp_update(u64 newrate, u32 srtt, u32 rttdev) - callback function, where newrate is the new rate in bits per second - for this stream, srtt is the current smoothed round trip time - estimate in microseconds, and rttdev is the smoothed linear deviation - in the round-trip time estimate calculated using the same algorithm - as in TCP [Paxson00]. The newrate value reports an instantaneous - rate calculated, for example, by taking the ratio of cwnd and srtt, - and dividing by the fraction of that ratio allocated to the stream. - - - -Balakrishnan, et. al. Standards Track [Page 8] - -RFC 3124 The Congestion Manager June 2001 - - - In response, the stream MUST adapt its packet size or change its - timer interval to conform to (i.e., not exceed) the allowed rate. Of - course, it may choose not to use all of this rate. Note that the CM - is not on the data path of the actual transmission. - - To avoid unnecessary cmapp_update() callbacks that the application - will only ignore, the CM MUST provide a cm_thresh(float - rate_downthresh, float rate_upthresh, float rtt_downthresh, float - rtt_upthresh) function that a stream can use at any stage in its - execution. In response, the CM SHOULD invoke the callback only when - the rate decreases to less than (rate_downthresh * lastrate) or - increases to more than (rate_upthresh * lastrate), where lastrate is - the rate last notified to the stream, or when the round-trip time - changes correspondingly by the requisite thresholds. This - information is used as a hint by the CM, in the sense the - cmapp_update() can be called even if these conditions are not met. - - The CM MUST implement a cm_query(i32 cm_streamid, u64* rate, u32* - srtt, u32* rttdev) to allow an application to query the current CM - state. This sets the rate variable to the current rate estimate in - bits per second, the srtt variable to the current smoothed round-trip - time estimate in microseconds, and rttdev to the mean linear - deviation. If the CM does not have valid estimates for the - macroflow, it fills in negative values for the rate, srtt, and - rttdev. - - Note that a stream can use more than one of the above transmission - APIs at the same time. In particular, the knowledge of sustainable - rate is useful for asynchronous streams as well as synchronous ones; - e.g., an asynchronous Web server disseminating images using TCP may - use cmapp_send() to schedule its transmissions and cmapp_update() to - decide whether to send a low-resolution or high-resolution image. A - TCP implementation using the CM is described in Section 6.1.1, where - the benefit of the cm_request() callback API for TCP will become - apparent. - - The reader will notice that the basic CM API does not provide an - interface for buffered congestion-controlled transmissions. This is - intentional, since this transmission mode can be implemented using - the callback-based primitive. Section 6.1.2 describes how - congestion-controlled UDP sockets may be implemented using the CM - API. - -3.3 Application notification - - When a stream receives feedback from receivers, it MUST use - cm_update(i32 cm_streamid, u32 nrecd, u32 nlost, u8 lossmode, i32 - rtt) to inform the CM about events such as congestion losses, - - - -Balakrishnan, et. al. Standards Track [Page 9] - -RFC 3124 The Congestion Manager June 2001 - - - successful receptions, type of loss (timeout event, Explicit - Congestion Notification [Ramakrishnan99], etc.) and round-trip time - samples. The nrecd parameter indicates how many bytes were - successfully received by the receiver since the last cm_update call, - while the nrecd parameter identifies how many bytes were received - were lost during the same time period. The rtt value indicates the - round-trip time measured during the transmission of these bytes. The - rtt value must be set to -1 if no valid round-trip sample was - obtained by the application. The lossmode parameter provides an - indicator of how a loss was detected. A value of CM_NO_FEEDBACK - indicates that the application has received no feedback for all its - outstanding data, and is reporting this to the CM. For example, a - TCP that has experienced a timeout would use this parameter to inform - the CM of this. A value of CM_LOSS_FEEDBACK indicates that the - application has experienced some loss, which it believes to be due to - congestion, but not all outstanding data has been lost. For example, - a TCP segment loss detected using duplicate (selective) - acknowledgments or other data-driven techniques fits this category. - A value of CM_EXPLICIT_CONGESTION indicates that the receiver echoed - an explicit congestion notification message. Finally, a value of - CM_NO_CONGESTION indicates that no congestion-related loss has - occurred. The lossmode parameter MUST be reported as a bit-vector - where the bits correspond to CM_NO_FEEDBACK, CM_LOSS_FEEDBACK, - CM_EXPLICIT_CONGESTION, and CM_NO_CONGESTION. Note that over links - (paths) that experience losses for reasons other than congestion, an - application SHOULD inform the CM of losses, with the CM_NO_CONGESTION - field set. - - cm_notify(i32 cm_streamid, u32 nsent) MUST be called when data is - transmitted from the host (e.g., in the IP output routine) to inform - the CM that nsent bytes were just transmitted on a given stream. - This allows the CM to update its estimate of the number of - outstanding bytes for the macroflow and for the stream. - - A cmapp_send() grant from the CM to an application is valid only for - an expiration time, equal to the larger of the round-trip time and an - implementation-dependent threshold communicated as an argument to the - cmapp_send() callback function. The application MUST NOT send data - based on this callback after this time has expired. Furthermore, if - the application decides not to send data after receiving this - callback, it SHOULD call cm_notify(stream_info, 0) to allow the CM to - permit other streams in the macroflow to transmit data. The CM - congestion controller MUST be robust to applications forgetting to - invoke cm_notify(stream_info, 0) correctly, or applications that - crash or disappear after having made a cm_request() call. - - - - - - -Balakrishnan, et. al. Standards Track [Page 10] - -RFC 3124 The Congestion Manager June 2001 - - -3.4 Querying - - If applications wish to learn about per-stream available bandwidth - and round-trip time, they can use the CM's cm_query(i32 cm_streamid, - i64* rate, i32* srtt, i32* rttdev) call, which fills in the desired - quantities. If the CM does not have valid estimates for the - macroflow, it fills in negative values for the rate, srtt, and - rttdev. - -3.5 Sharing granularity - - One of the decisions the CM needs to make is the granularity at which - a macroflow is constructed, by deciding which streams belong to the - same macroflow and share congestion information. The API provides - two functions that allow applications to decide which of their - streams ought to belong to the same macroflow. - - cm_getmacroflow(i32 cm_streamid) returns a unique i32 macroflow - identifier. cm_setmacroflow(i32 cm_macroflowid, i32 cm_streamid) - sets the macroflow of the stream cm_streamid to cm_macroflowid. If - the cm_macroflowid that is passed to cm_setmacroflow() is -1, then a - new macroflow is constructed and this is returned to the caller. - Each call to cm_setmacroflow() overrides the previous macroflow - association for the stream, should one exist. - - The default suggested aggregation method is to aggregate by - destination IP address; i.e., all streams to the same destination - address are aggregated to a single macroflow by default. The - cm_getmacroflow() and cm_setmacroflow() calls can then be used to - change this as needed. We do note that there are some cases where - this may not be optimal, even over best-effort networks. For - example, when a group of receivers are behind a NAT device, the - sender will see them all as one address. If the hosts behind the NAT - are in fact connected over different bottleneck links, some of those - hosts could see worse performance than before. It is possible to - detect such hosts when using delay and loss estimates, although the - specific mechanisms for doing so are beyond the scope of this - document. - - The objective of this interface is to set up sharing of groups not - sharing policy of relative weights of streams in a macroflow. The - latter requires the scheduler to provide an interface to set sharing - policy. However, because we want to support many different - schedulers (each of which may need different information to set - policy), we do not specify a complete API to the scheduler (but see - - - - - - -Balakrishnan, et. al. Standards Track [Page 11] - -RFC 3124 The Congestion Manager June 2001 - - - Section 5.2). A later guideline document is expected to describe a - few simple schedulers (e.g., weighted round-robin, hierarchical - scheduling) and the API they export to provide relative - prioritization. - -4. CM internals - - This section describes the internal components of the CM. It - includes a Congestion Controller and a Scheduler, with well-defined, - abstract interfaces exported by them. - -4.1 Congestion controller - - Associated with each macroflow is a congestion control algorithm; the - collection of all these algorithms comprises the congestion - controller of the CM. The control algorithm decides when and how - much data can be transmitted by a macroflow. It uses application - notifications (Section 4.3) from concurrent streams on the same - macroflow to build up information about the congestion state of the - network path used by the macroflow. - - The congestion controller MUST implement a "TCP-friendly" [Mahdavi98] - congestion control algorithm. Several macroflows MAY (and indeed, - often will) use the same congestion control algorithm but each - macroflow maintains state about the network used by its streams. - - The congestion control module MUST implement the following abstract - interfaces. We emphasize that these are not directly visible to - applications; they are within the context of a macroflow, and are - different from the CM API functions of Section 4. - - - void query(u64 *rate, u32 *srtt, u32 *rttdev): This function - returns the estimated rate (in bits per second) and smoothed - round trip time (in microseconds) for the macroflow. - - - void notify(u32 nsent): This function MUST be used to notify the - congestion control module whenever data is sent by an - application. The nsent parameter indicates the number of bytes - just sent by the application. - - - void update(u32 nsent, u32 nrecd, u32 rtt, u32 lossmode): This - function is called whenever any of the CM streams associated with - a macroflow identifies that data has reached the receiver or has - been lost en route. The nrecd parameter indicates the number of - bytes that have just arrived at the receiver. The nsent - parameter is the sum of the number of bytes just received and the - - - - - -Balakrishnan, et. al. Standards Track [Page 12] - -RFC 3124 The Congestion Manager June 2001 - - - number of bytes identified as lost en route. The rtt parameter is - the estimated round trip time in microseconds during the - transfer. The lossmode parameter provides an indicator of how a - loss was detected (section 4.3). - - Although these interfaces are not visible to applications, the - congestion controller MUST implement these abstract interfaces to - provide for modular inter-operability with different separately- - developed schedulers. - - The congestion control module MUST also call the associated - scheduler's schedule function (section 5.2) when it believes that the - current congestion state allows an MTU-sized packet to be sent. - -4.2 Scheduler - - While it is the responsibility of the congestion control module to - determine when and how much data can be transmitted, it is the - responsibility of a macroflow's scheduler module to determine which - of the streams should get the opportunity to transmit data. - - The Scheduler MUST implement the following interfaces: - - - void schedule(u32 num_bytes): When the congestion control module - determines that data can be sent, the schedule() routine MUST be - called with no more than the number of bytes that can be sent. - In turn, the scheduler MAY call the cmapp_send() function that CM - applications must provide. - - - float query_share(i32 cm_streamid): This call returns the - described stream's share of the total bandwidth available to the - macroflow. This call combined with the query call of the - congestion controller provides the information to satisfy an - application's cm_query() request. - - - void notify(i32 cm_streamid, u32 nsent): This interface is used - to notify the scheduler module whenever data is sent by a CM - application. The nsent parameter indicates the number of bytes - just sent by the application. - - The Scheduler MAY implement many additional interfaces. As - experience with CM schedulers increases, future documents may - make additions and/or changes to some parts of the scheduler - API. - - - - - - - -Balakrishnan, et. al. Standards Track [Page 13] - -RFC 3124 The Congestion Manager June 2001 - - -5. Examples - -5.1 Example applications - - This section describes three possible uses of the CM API by - applications. We describe two asynchronous applications---an - implementation of a TCP sender and an implementation of congestion- - controlled UDP sockets, and a synchronous application---a streaming - audio server. More details of these applications and CM - implementation optimizations for efficient operation are described in - [Andersen00]. - - All applications that use the CM MUST incorporate feedback from the - receiver. For example, it must periodically (typically once or twice - per round trip time) determine how many of its packets arrived at the - receiver. When the source gets this feedback, it MUST use - cm_update() to inform the CM of this new information. This results - in the CM updating ownd and may result in the CM changing its - estimates and calling cmapp_update() of the streams of the macroflow. - - The protocols in this section are examples and suggestions for - implementation, rather than requirements for any conformant - implementation. - -5.1.1 TCP - - A TCP implementation that uses CM should use the cmapp_send() - callback API. TCP only identifies which data it should send upon the - arrival of an acknowledgement or expiration of a timer. As a result, - it requires tight control over when and if new data or - retransmissions are sent. - - When TCP either connects to or accepts a connection from another - host, it performs a cm_open() call to associate the TCP connection - with a cm_streamid. - - Once a connection is established, the CM is used to control the - transmission of outgoing data. The CM eliminates the need for - tracking and reacting to congestion in TCP, because the CM and its - transmission API ensure proper congestion behavior. Loss recovery is - still performed by TCP based on fast retransmissions and recovery as - well as timeouts. In addition, TCP is also modified to have its own - outstanding window (tcp_ownd) estimate. Whenever data segments are - sent from its cmapp_send() callback, TCP updates its tcp_ownd value. - The ownd variable is also updated after each cm_update() call. TCP - also maintains a count of the number of outstanding segments - (pkt_cnt). At any time, TCP can calculate the average packet size - (avg_pkt_size) as tcp_ownd/pkt_cnt. The avg_pkt_size is used by TCP - - - -Balakrishnan, et. al. Standards Track [Page 14] - -RFC 3124 The Congestion Manager June 2001 - - - to help estimate the amount of outstanding data. Note that this is - not needed if the SACK option is used on the connection, since this - information is explicitly available. - - The TCP output routines are modified as follows: - - 1. All congestion window (cwnd) checks are removed. - - 2. When application data is available. The TCP output routines - perform all non-congestion checks (Nagle algorithm, receiver- - advertised window check, etc). If these checks pass, the output - routine queues the data and calls cm_request() for the stream. - - 3. If incoming data or timers result in a loss being detected, the - retransmission is also placed in a queue and cm_request() is - called for the stream. - - 4. The cmapp_send() callback for TCP is set to an output routine. - If any retransmission is enqueued, the routine outputs the - retransmission. Otherwise, the routine outputs as much new data - as the TCP connection state allows. However, the cmapp_send() - never sends more than a single segment per call. This routine - arranges for the other output computations to be done, such as - header and options computations. - - The IP output routine on the host calls cm_notify() when the packets - are actually sent out. Because it does not know which cm_streamid is - responsible for the packet, cm_notify() takes the stream_info as - argument (see Section 4 for what the stream_info should contain). - Because cm_notify() reports the IP payload size, TCP keeps track of - the total header size and incorporates these updates. - - The TCP input routines are modified as follows: - - 1. RTT estimation is done as normal using either timestamps or - Karn's algorithm. Any rtt estimate that is generated is passed to - CM via the cm_update call. - - 2. All cwnd and slow start threshold (ssthresh) updates are - removed. - - 3. Upon the arrival of an ack for new data, TCP computes the value - of in_flight (the amount of data in flight) as snd_max-ack-1 - (i.e., MAX Sequence Sent - Current Ack - 1). TCP then calls - cm_update(streamid, tcp_ownd - in_flight, 0, CM_NO_CONGESTION, - rtt). - - - - - -Balakrishnan, et. al. Standards Track [Page 15] - -RFC 3124 The Congestion Manager June 2001 - - - 4. Upon the arrival of a duplicate acknowledgement, TCP must check - its dupack count (dup_acks) to determine its action. If dup_acks - < 3, the TCP does nothing. If dup_acks == 3, TCP assumes that a - packet was lost and that at least 3 packets arrived to generate - these duplicate acks. Therefore, it calls cm_update(streamid, 4 * - avg_pkt_size, 3 * avg_pkt_size, CM_LOSS_FEEDBACK, rtt). The - average packet size is used since the acknowledgments do not - indicate exactly how much data has reached the other end. Most - TCP implementations interpret a duplicate ACK as an indication - that a full MSS has reached its destination. Once a new ACK is - received, these TCP sender implementations may resynchronize with - TCP receiver. The CM API does not provide a mechanism for TCP to - pass information from this resynchronization. Therefore, TCP can - only infer the arrival of an avg_pkt_size amount of data from each - duplicate ack. TCP also enqueues a retransmission of the lost - segment and calls cm_request(). If dup_acks > 3, TCP assumes that - a packet has reached the other end and caused this ack to be sent. - As a result, it calls cm_update(streamid, avg_pkt_size, - avg_pkt_size, CM_NO_CONGESTION, rtt). - - 5. Upon the arrival of a partial acknowledgment (one that does not - exceed the highest segment transmitted at the time the loss - occurred, as defined in [Floyd99b]), TCP assumes that a packet was - lost and that the retransmitted packet has reached the recipient. - Therefore, it calls cm_update(streamid, 2 * avg_pkt_size, - avg_pkt_size, CM_NO_CONGESTION, rtt). CM_NO_CONGESTION is used - since the loss period has already been reported. TCP also - enqueues a retransmission of the lost segment and calls - cm_request(). - - When the TCP retransmission timer expires, the sender identifies that - a segment has been lost and calls cm_update(streamid, avg_pkt_size, - 0, CM_NO_FEEDBACK, 0) to signify that no feedback has been received - from the receiver and that one segment is sure to have "left the - pipe." TCP also enqueues a retransmission of the lost segment and - calls cm_request(). - -5.1.2 Congestion-controlled UDP - - Congestion-controlled UDP is a useful CM application, which we - describe in the context of Berkeley sockets [Stevens94]. They - provide the same functionality as standard Berkeley UDP sockets, but - instead of immediately sending the data from the kernel packet queue - to lower layers for transmission, the buffered socket implementation - makes calls to the API exported by the CM inside the kernel and gets - callbacks from the CM. When a CM UDP socket is created, it is bound - to a particular stream. Later, when data is added to the packet - queue, cm_request() is called on the stream associated with the - - - -Balakrishnan, et. al. Standards Track [Page 16] - -RFC 3124 The Congestion Manager June 2001 - - - socket. When the CM schedules this stream for transmission, it calls - udp_ccappsend() in the UDP module. This function transmits one MTU - from the packet queue, and schedules the transmission of any - remaining packets. The in-kernel implementation of the CM UDP API - should not require any additional data copies and should support all - standard UDP options. Modifying existing applications to use - congestion-controlled UDP requires the implementation of a new socket - option on the socket. To work correctly, the sender must obtain - feedback about congestion. This can be done in at least two ways: - (i) the UDP receiver application can provide feedback to the sender - application, which will inform the CM of network conditions using - cm_update(); (ii) the UDP receiver implementation can provide - feedback to the sending UDP. Note that this latter alternative - requires changes to the receiver's network stack and the sender UDP - cannot assume that all receivers support this option without explicit - negotiation. - -5.1.3 Audio server - - A typical audio application often has access to the sample in a - multitude of data rates and qualities. The objective of the - application is then to deliver the highest possible quality of audio - (typically the highest data rate) its clients. The selection of - which version of audio to transmit should be based on the current - congestion state of the network. In addition, the source will want - audio delivered to its users at a consistent sampling rate. As a - result, it must send data a regular rate, minimizing delaying - transmissions and reducing buffering before playback. To meet these - requirements, this application can use the synchronous sender API - (Section 4.2). - - When the source first starts, it uses the cm_query() call to get an - initial estimate of network bandwidth and delay. If some other - streams on that macroflow have already been active, then it gets an - initial estimate that is valid; otherwise, it gets negative values, - which it ignores. It then chooses an encoding that does not exceed - these estimates (or, in the case of an invalid estimate, uses - application-specific initial values) and begins transmitting data. - The application also implements the cmapp_update() callback. When - the CM determines that network characteristics have changed, it calls - the application's cmapp_update() function and passes it a new rate - and round-trip time estimate. The application must change its choice - of audio encoding to ensure that it does not exceed these new - estimates. - - - - - - - -Balakrishnan, et. al. Standards Track [Page 17] - -RFC 3124 The Congestion Manager June 2001 - - -5.2 Example congestion control module - - To illustrate the responsibilities of a congestion control module, - the following describes some of the actions of a simple TCP-like - congestion control module that implements Additive Increase - Multiplicative Decrease congestion control (AIMD_CC): - - - query(): AIMD_CC returns the current congestion window (cwnd) - divided by the smoothed rtt (srtt) as its bandwidth estimate. It - returns the smoothed rtt estimate as srtt. - - - notify(): AIMD_CC adds the number of bytes sent to its - outstanding data window (ownd). - - - update(): AIMD_CC subtracts nsent from ownd. If the value of rtt - is non-zero, AIMD_CC updates srtt using the TCP srtt calculation. - If the update indicates that data has been lost, AIMD_CC sets - cwnd to 1 MTU if the loss_mode is CM_NO_FEEDBACK and to cwnd/2 - (with a minimum of 1 MTU) if the loss_mode is CM_LOSS_FEEDBACK or - CM_EXPLICIT_CONGESTION. AIMD_CC also sets its internal ssthresh - variable to cwnd/2. If no loss had occurred, AIMD_CC mimics TCP - slow start and linear growth modes. It increments cwnd by nsent - when cwnd < ssthresh (bounded by a maximum of ssthresh-cwnd) and - by nsent * MTU/cwnd when cwnd > ssthresh. - - - When cwnd or ownd are updated and indicate that at least one MTU - may be transmitted, AIMD_CC calls the CM to schedule a - transmission. - -5.3 Example Scheduler Module - - To clarify the responsibilities of a scheduler module, the following - describes some of the actions of a simple round robin scheduler - module (RR_sched): - - - schedule(): RR_sched schedules as many streams as possible in round - robin fashion. - - - query_share(): RR_sched returns 1/(number of streams in macroflow). - - - notify(): RR_sched does nothing. Round robin scheduling is not - affected by the amount of data sent. - -6. Security Considerations - - The CM provides many of the same services that the congestion control - in TCP provides. As such, it is vulnerable to many of the same - security problems. For example, incorrect reports of losses and - - - -Balakrishnan, et. al. Standards Track [Page 18] - -RFC 3124 The Congestion Manager June 2001 - - - transmissions will give the CM an inaccurate picture of the network's - congestion state. By giving CM a high estimate of congestion, an - attacker can degrade the performance observed by applications. For - example, a stream on a host can arbitrarily slow down any other - stream on the same macroflow, a form of denial of service. - - The more dangerous form of attack occurs when an application gives - the CM a low estimate of congestion. This would cause CM to be - overly aggressive and allow data to be sent much more quickly than - sound congestion control policies would allow. - - [Touch97] describes a number of the security problems that arise with - congestion information sharing. An additional vulnerability (not - covered by [Touch97])) occurs because applications have access - through the CM API to control shared state that will affect other - applications on the same computer. For instance, a poorly designed, - possibly a compromised, or intentionally malicious UDP application - could misuse cm_update() to cause starvation and/or too-aggressive - behavior of others in the macroflow. - -7. References - - [Allman99] Allman, M. and Paxson, V., "TCP Congestion - Control", RFC 2581, April 1999. - - [Andersen00] Balakrishnan, H., System Support for Bandwidth - Management and Content Adaptation in Internet - Applications, Proc. 4th Symp. on Operating Systems - Design and Implementation, San Diego, CA, October - 2000. Available from - http://nms.lcs.mit.edu/papers/cm-osdi2000.html - - [Balakrishnan98] Balakrishnan, H., Padmanabhan, V., Seshan, S., - Stemm, M., and Katz, R., "TCP Behavior of a Busy - Web Server: Analysis and Improvements," Proc. IEEE - INFOCOM, San Francisco, CA, March 1998. - - [Balakrishnan99] Balakrishnan, H., Rahul, H., and Seshan, S., "An - Integrated Congestion Management Architecture for - Internet Hosts," Proc. ACM SIGCOMM, Cambridge, MA, - September 1999. - - [Bradner96] Bradner, S., "The Internet Standards Process --- - Revision 3", BCP 9, RFC 2026, October 1996. - - [Bradner97] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - - - -Balakrishnan, et. al. Standards Track [Page 19] - -RFC 3124 The Congestion Manager June 2001 - - - [Clark90] Clark, D. and Tennenhouse, D., "Architectural - Consideration for a New Generation of Protocols", - Proc. ACM SIGCOMM, Philadelphia, PA, September - 1990. - - [Eggert00] Eggert, L., Heidemann, J., and Touch, J., "Effects - of Ensemble TCP," ACM Computer Comm. Review, - January 2000. - - [Floyd99a] Floyd, S. and Fall, K.," Promoting the Use of End- - to-End Congestion Control in the Internet," - IEEE/ACM Trans. on Networking, 7(4), August 1999, - pp. 458-472. - - [Floyd99b] Floyd, S. and T. Henderson,"The New Reno - Modification to TCP's Fast Recovery Algorithm," RFC - 2582, April 1999. - - [Jacobson88] Jacobson, V., "Congestion Avoidance and Control," - Proc. ACM SIGCOMM, Stanford, CA, August 1988. - - [Mahdavi98] Mahdavi, J. and Floyd, S., "The TCP Friendly - Website," - http://www.psc.edu/networking/tcp_friendly.html - - [Mogul90] Mogul, J. and S. Deering, "Path MTU Discovery," RFC - 1191, November 1990. - - [Padmanabhan98] Padmanabhan, V., "Addressing the Challenges of Web - Data Transport," PhD thesis, Univ. of California, - Berkeley, December 1998. - - [Paxson00] Paxson, V. and M. Allman, "Computing TCP's - Retransmission Timer", RFC 2988, November 2000. - - [Postel81] Postel, J., Editor, "Transmission Control - Protocol", STD 7, RFC 793, September 1981. - - [Ramakrishnan99] Ramakrishnan, K. and Floyd, S., "A Proposal to Add - Explicit Congestion Notification (ECN) to IP," RFC - 2481, January 1999. - - - [Stevens94] Stevens, W., TCP/IP Illustrated, Volume 1. - Addison-Wesley, Reading, MA, 1994. - - [Touch97] Touch, J., "TCP Control Block Interdependence", RFC - 2140, April 1997. - - - -Balakrishnan, et. al. Standards Track [Page 20] - -RFC 3124 The Congestion Manager June 2001 - - -8. Acknowledgments - - We thank David Andersen, Deepak Bansal, and Dorothy Curtis for their - work on the CM design and implementation. We thank Vern Paxson for - his detailed comments, feedback, and patience, and Sally Floyd, Mark - Handley, and Steven McCanne for useful feedback on the CM - architecture. Allison Mankin and Joe Touch provided several useful - comments on previous drafts of this document. - -9. Authors' Addresses - - Hari Balakrishnan - Laboratory for Computer Science - 200 Technology Square - Massachusetts Institute of Technology - Cambridge, MA 02139 - - EMail: hari@lcs.mit.edu - Web: http://nms.lcs.mit.edu/~hari/ - - - Srinivasan Seshan - School of Computer Science - Carnegie Mellon University - 5000 Forbes Ave. - Pittsburgh, PA 15213 - - EMail: srini@cmu.edu - Web: http://www.cs.cmu.edu/~srini/ - - - - - - - - - - - - - - - - - - - - - - -Balakrishnan, et. al. Standards Track [Page 21] - -RFC 3124 The Congestion Manager June 2001 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2001). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Balakrishnan, et. al. Standards Track [Page 22] - diff --git a/kernel/picotcp/RFC/rfc3135.txt b/kernel/picotcp/RFC/rfc3135.txt deleted file mode 100644 index 1138e09..0000000 --- a/kernel/picotcp/RFC/rfc3135.txt +++ /dev/null @@ -1,2523 +0,0 @@ - - - - - - -Network Working Group J. Border -Request for Comments: 3135 Hughes Network Systems -Category: Informational M. Kojo - University of Helsinki - J. Griner - NASA Glenn Research Center - G. Montenegro - Sun Microsystems, Inc. - Z. Shelby - University of Oulu - June 2001 - - - Performance Enhancing Proxies Intended to Mitigate Link-Related - Degradations - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2001). All Rights Reserved. - -Abstract - - This document is a survey of Performance Enhancing Proxies (PEPs) - often employed to improve degraded TCP performance caused by - characteristics of specific link environments, for example, in - satellite, wireless WAN, and wireless LAN environments. Different - types of Performance Enhancing Proxies are described as well as the - mechanisms used to improve performance. Emphasis is put on proxies - operating with TCP. In addition, motivations for their development - and use are described along with some of the consequences of using - them, especially in the context of the Internet. - -Table of Contents - - 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3 - 2. Types of Performance Enhancing Proxies . . . . . . . . . . . . 4 - 2.1 Layering . . . . . . . . . . . . . . . . . . . . . . . . . . . 4 - 2.1.1 Transport Layer PEPs . . . . . . . . . . . . . . . . . . . . 5 - 2.1.2 Application Layer PEPs . . . . . . . . . . . . . . . . . . . 5 - 2.2 Distribution . . . . . . . . . . . . . . . . . . . . . . . . . 6 - 2.3 Implementation Symmetry . . . . . . . . . . . . . . . . . . . 6 - 2.4 Split Connections . . . . . . . . . . . . . . . . . . . . . . 7 - - - -Border, et al. Informational [Page 1] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - 2.5 Transparency . . . . . . . . . . . . . . . . . . . . . . . . . 8 - 3. PEP Mechanisms . . . . . . . . . . . . . . . . . . . . . . . . 9 - 3.1 TCP ACK Handling . . . . . . . . . . . . . . . . . . . . . . . 9 - 3.1.1 TCP ACK Spacing . . . . . . . . . . . . . . . . . . . . . . 9 - 3.1.2 Local TCP Acknowledgements . . . . . . . . . . . . . . . . . 9 - 3.1.3 Local TCP Retransmissions . . . . . . . . . . . . . . . . . 9 - 3.1.4 TCP ACK Filtering and Reconstruction . . . . . . . . . . . . 10 - 3.2 Tunneling . . . . . . . . . . . . . . . . . . . . . . . . . . 10 - 3.3 Compression . . . . . . . . . . . . . . . . . . . . . . . . . 10 - 3.4 Handling Periods of Link Disconnection with TCP . . . . . . . 11 - 3.5 Priority-based Multiplexing . . . . . . . . . . . . . . . . . 12 - 3.6 Protocol Booster Mechanisms . . . . . . . . . . . . . . . . . 13 - 4. Implications of Using PEPs . . . . . . . . . . . . . . . . . . 14 - 4.1 The End-to-end Argument . . . . . . . . . . . . . . . . . . . 14 - 4.1.1 Security . . . . . . . . . . . . . . . . . . . . . . . . . . 14 - 4.1.1.1 Security Implications . . . . . . . . . . . . . . . . . . 15 - 4.1.1.2 Security Implication Mitigations . . . . . . . . . . . . . 16 - 4.1.1.3 Security Research Related to PEPs . . . . . . . . . . . . 16 - 4.1.2 Fate Sharing . . . . . . . . . . . . . . . . . . . . . . . . 16 - 4.1.3 End-to-end Reliability . . . . . . . . . . . . . . . . . . . 17 - 4.1.4 End-to-end Failure Diagnostics . . . . . . . . . . . . . . . 19 - 4.2 Asymmetric Routing . . . . . . . . . . . . . . . . . . . . . . 19 - 4.3 Mobile Hosts . . . . . . . . . . . . . . . . . . . . . . . . . 20 - 4.4 Scalability . . . . . . . . . . . . . . . . . . . . . . . . . 20 - 4.5 Other Implications of Using PEPs . . . . . . . . . . . . . . . 21 - 5. PEP Environment Examples . . . . . . . . . . . . . . . . . . . 21 - 5.1 VSAT Environments . . . . . . . . . . . . . . . . . . . . . . 21 - 5.1.1 VSAT Network Characteristics . . . . . . . . . . . . . . . . 22 - 5.1.2 VSAT Network PEP Implementations . . . . . . . . . . . . . . 23 - 5.1.3 VSAT Network PEP Motivation . . . . . . . . . . . . . . . . 24 - 5.2 W-WAN Environments . . . . . . . . . . . . . . . . . . . . . . 25 - 5.2.1 W-WAN Network Characteristics . . . . . . . . . . . . . . . 25 - 5.2.2 W-WAN PEP Implementations . . . . . . . . . . . . . . . . . 26 - 5.2.2.1 Mowgli System . . . . . . . . . . . . . . . . . . . . . . 26 - 5.2.2.2 Wireless Application Protocol (WAP) . . . . . . . . . . . 28 - 5.2.3 W-WAN PEP Motivation . . . . . . . . . . . . . . . . . . . . 29 - 5.3 W-LAN Environments . . . . . . . . . . . . . . . . . . . . . . 30 - 5.3.1 W-LAN Network Characteristics . . . . . . . . . . . . . . . 30 - 5.3.2 W-LAN PEP Implementations: Snoop . . . . . . . . . . . . . . 31 - 5.3.3 W-LAN PEP Motivation . . . . . . . . . . . . . . . . . . . . 33 - 6. Security Considerations . . . . . . . . . . . . . . . . . . . . 34 - 7. IANA Considerations . . . . . . . . . . . . . . . . . . . . . . 34 - 8. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 34 - 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 35 - 10. Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . 39 - Appendix A - PEP Terminology Summary . . . . . . . . . . . . . . . 41 - Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . 45 - - - - -Border, et al. Informational [Page 2] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -1. Introduction - - The Transmission Control Protocol [RFC0793] (TCP) is used as the - transport layer protocol by many Internet and intranet applications. - However, in certain environments, TCP and other higher layer protocol - performance is limited by the link characteristics of the - environment. - - This document is a survey of Performance Enhancing Proxy (PEP) - performance migitigation techniques. A PEP is used to improve the - performance of the Internet protocols on network paths where native - performance suffers due to characteristics of a link or subnetwork on - the path. This document is informational and does not make - recommendations about using PEPs or not using them. Distinct - standards track recommendations for the performance mitigation of TCP - over links with high error rates, links with low bandwidth, and so - on, have been developed or are in development by the Performance - Implications of Link Characteristics WG (PILC) [PILCWEB]. - - Link design choices may have a significant influence on the - performance and efficiency of the Internet. However, not all link - characteristics, for example, high latency, can be compensated for by - choices in the link layer design. And, the cost of compensating for - some link characteristics may be prohibitive for some technologies. - The techniques surveyed here are applied to existing link - technologies. When new link technologies are designed, they should - be designed so that these techniques are not required, if at all - possible. - - This document does not advocate the use of PEPs in any general case. - On the contrary, we believe that the end-to-end principle in - designing Internet protocols should be retained as the prevailing - approach and PEPs should be used only in specific environments and - circumstances where end-to-end mechanisms providing similar - performance enhancements are not available. In any environment where - one might consider employing a PEP for improved performance, an end - user (or, in some cases, the responsible network administrator) - should be aware of the PEP and the choice of employing PEP - functionality should be under the control of the end user, especially - if employing the PEP would interfere with end-to-end usage of IP - layer security mechanisms or otherwise have undesirable implications - in some circumstances. This would allow the user to choose end-to- - end IP at all times but, of course, without the performance - enhancements that employing the PEP may yield. - - This survey does not make recommendations, for or against, with - respect to using PEPs. Standards track recommendations have been or - are being developed within the IETF for individual link - - - -Border, et al. Informational [Page 3] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - characteristics, e.g., links with high error rates, links with low - bandwidth, links with asymmetric bandwidth, etc., by the Performance - Implications of Link Characteristics WG (PILC) [PILCWEB]. - - The remainder of this document is organized as follows. Section 2 - provides an overview of different kinds of PEP implementations. - - Section 3 discusses some of the mechanisms which PEPs may employ in - order to improve performance. Section 4 discusses some of the - implications with respect to using PEPs, especially in the context of - the global Internet. Finally, Section 5 discusses some example - environments where PEPs are used: satellite very small aperture - terminal (VSAT) environments, mobile wireless WAN (W-WAN) - environments and wireless LAN (W-LAN) environments. A summary of PEP - terminology is included in an appendix (Appendix A). - -2. Types of Performance Enhancing Proxies - - There are many types of Performance Enhancing Proxies. Different - types of PEPs are used in different environments to overcome - different link characteristics which affect protocol performance. - Note that enhancing performance is not necessarily limited in scope - to throughput. Other performance related aspects, like usability of - a link, may also be addressed. For example, [M-TCP] addresses the - issue of keeping TCP connections alive during periods of - disconnection in wireless networks. - - The following sections describe some of the key characteristics which - differentiate different types of PEPs. - -2.1 Layering - - In principle, a PEP implementation may function at any protocol layer - but typically it functions at one or two layers only. In this - document we focus on PEP implementations that function at the - transport layer or at the application layer as such PEPs are most - commonly used to enhance performance over links with problematic - characteristics. A PEP implementation may also operate below the - network layer, that is, at the link layer, but this document pays - only little attention to such PEPs as link layer mechanisms can be - and typically are implemented transparently to network and higher - layers, requiring no modifications to protocol operation above the - link layer. It should also be noted that some PEP implementations - operate across several protocol layers by exploiting the protocol - information and possibly modifying the protocol operation at more - than one layer. For such a PEP it may be difficult to define at - which layer(s) it exactly operates on. - - - - -Border, et al. Informational [Page 4] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -2.1.1 Transport Layer PEPs - - Transport layer PEPs operate at the transport level. They may be - aware of the type of application being carried by the transport layer - but, at most, only use this information to influence their behavior - with respect to the transport protocol; they do not modify the - application protocol in any way, but let the application protocol - operate end-to-end. Most transport layer PEP implementations - interact with TCP. Such an implementation is called a TCP - Performance Enhancing Proxy (TCP PEP). For example, in an - environment where ACKs may bunch together causing undesirable data - segment bursts, a TCP PEP may be used to simply modify the ACK - spacing in order to improve performance. On the other hand, in an - environment with a large bandwidth*delay product, a TCP PEP may be - used to alter the behavior of the TCP connection by generating local - acknowledgments to TCP data segments in order to improve the - connection's throughput. - - The term TCP spoofing is sometimes used synonymously for TCP PEP - functionality. However, the term TCP spoofing more accurately - describes the characteristic of intercepting a TCP connection in the - middle and terminating the connection as if the interceptor is the - intended destination. While this is a characteristic of many TCP PEP - implementations, it is not a characteristic of all TCP PEP - implementations. - -2.1.2 Application Layer PEPs - - Application layer PEPs operate above the transport layer. Today, - different kinds of application layer proxies are widely used in the - Internet. Such proxies include Web caches and relay Mail Transfer - Agents (MTA) and they typically try to improve performance or service - availability and reliability in general and in a way which is - applicable in any environment but they do not necessarily include any - optimizations that are specific to certain link characteristics. - - Application layer PEPs, on the other hand, can be implemented to - improve application protocol as well as transport layer performance - with respect to a particular application being used with a particular - type of link. An application layer PEP may have the same - functionality as the corresponding regular proxy for the same - application (e.g., relay MTA or Web caching proxy) but extended with - link-specific optimizations of the application protocol operation. - - Some application protocols employ extraneous round trips, overly - verbose headers and/or inefficient header encoding which may have a - significant impact on performance, in particular, with long delay and - slow links. This unnecessary overhead can be reduced, in general or - - - -Border, et al. Informational [Page 5] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - for a particular type of link, by using an application layer PEP in - an intermediate node. Some examples of application layer PEPs which - have been shown to improve performance on slow wireless WAN links are - described in [LHKR96] and [CTC+97]. - -2.2 Distribution - - A PEP implementation may be integrated, i.e., it comprises a single - PEP component implemented within a single node, or distributed, i.e., - it comprises two or more PEP components, typically implemented in - multiple nodes. An integrated PEP implementation represents a single - point at which performance enhancement is applied. For example, a - single PEP component might be implemented to provide impedance - matching at the point where wired and wireless links meet. - - A distributed PEP implementation is generally used to surround a - particular link for which performance enhancement is desired. For - example, a PEP implementation for a satellite connection may be - distributed between two PEPs located at each end of the satellite - link. - -2.3 Implementation Symmetry - - A PEP implementation may be symmetric or asymmetric. Symmetric PEPs - use identical behavior in both directions, i.e., the actions taken by - the PEP occur independent from which interface a packet is received. - Asymmetric PEPs operate differently in each direction. The direction - can be defined in terms of the link (e.g., from a central site to a - remote site) or in terms of protocol traffic (e.g., the direction of - TCP data flow, often called the TCP data channel, or the direction of - TCP ACK flow, often called the TCP ACK channel). An asymmetric PEP - implementation is generally used at a point where the characteristics - of the links on each side of the PEP differ or with asymmetric - protocol traffic. For example, an asymmetric PEP might be placed at - the intersection of wired and wireless networks or an asymmetric - application layer PEP might be used for the request-reply type of - HTTP traffic. A PEP implementation may also be both symmetric and - asymmetric at the same time with regard to different mechanisms it - employs. (PEP mechanisms are described in Section 3.) - - Whether a PEP implementation is symmetric or asymmetric is - independent of whether the PEP implementation is integrated or - distributed. In other words, a distributed PEP implementation might - operate symmetrically at each end of a link (i.e., the two PEPs - function identically). On the other hand, a distributed PEP - implementation might operate asymmetrically, with a different PEP - implementation at each end of the link. Again, this usually is used - with asymmetric links. For example, for a link with an asymmetric - - - -Border, et al. Informational [Page 6] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - amount of bandwidth available in each direction, the PEP on the end - of the link forwarding traffic in the direction with a large amount - of bandwidth might focus on locally acknowledging TCP traffic in - order to use the available bandwidth. At the same time, the PEP on - the end of the link forwarding traffic in the direction with very - little bandwidth might focus on reducing the amount of TCP - acknowledgement traffic being forwarded across the link (to keep the - link from congesting). - -2.4 Split Connections - - A split connection TCP implementation terminates the TCP connection - received from an end system and establishes a corresponding TCP - connection to the other end system. In a distributed PEP - implementation, this is typically done to allow the use of a third - connection between two PEPs optimized for the link. This might be a - TCP connection optimized for the link or it might be another - protocol, for example, a proprietary protocol running on top of UDP. - Also, the distributed implementation might use a separate connection - between the proxies for each TCP connection or it might multiplex the - data from multiple TCP connections across a single connection between - the PEPs. - - In an integrated PEP split connection TCP implementation, the PEP - again terminates the connection from one end system and originates a - separate connection to the other end system. [I-TCP] documents an - example of a single PEP split connection implementation. - - Many integrated PEPs use a split connection implementation in order - to address a mismatch in TCP capabilities between two end systems. - For example, the TCP window scaling option [RFC1323] can be used to - extend the maximum amount of TCP data which can be "in flight" (i.e., - sent and awaiting acknowledgement). This is useful for filling a - link which has a high bandwidth*delay product. If one end system is - capable of using scaled TCP windows but the other is not, the end - system which is not capable can set up its connection with a PEP on - its side of the high bandwidth*delay link. The split connection PEP - then sets up a TCP connection with window scaling over the link to - the other end system. - - Split connection TCP implementations can effectively leverage TCP - performance enhancements optimal for a particular link but which - cannot necessarily be employed safely over the global Internet. - - Note that using split connection PEPs does not necessarily exclude - simultaneous use of IP for end-to-end connectivity. If a split - connection is managed per application or per connection and is under - the control of the end user, the user can decide whether a particular - - - -Border, et al. Informational [Page 7] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - TCP connection or application makes use of the split connection PEP - or whether it operates end-to-end. When a PEP is employed on a last - hop link, the end user control is relatively easy to implement. - - In effect, application layer proxies for TCP-based applications are - split connection TCP implementations with end systems using PEPs as a - service related to a particular application. Therefore, all - transport (TCP) layer enhancements that are available with split - connection TCP implementations can also be employed with application - layer PEPs in conjunction with application layer enhancements. - -2.5 Transparency - - Another key characteristic of a PEP is its degree of transparency. - PEPs may operate totally transparently to the end systems, transport - endpoints, and/or applications involved (in a connection), requiring - no modifications to the end systems, transport endpoints, or - applications. - - On the other hand, a PEP implementation may require modifications to - both ends in order to be used. In between, a PEP implementation may - require modifications to only one of the ends involved. Either of - these kind of PEP implementations is non-transparent, at least to the - layer requiring modification. - - It is sometimes useful to think of the degree of transparency of a - PEP implementation at four levels, transparency with respect to the - end systems (network-layer transparent PEP), transparency with - respect to the transport endpoints (transport-layer transparent PEP), - transparency with respect to the applications (application-layer - transparent PEP) and transparency with respect to the users. For - example, a user who subscribes to a satellite Internet access service - may be aware that the satellite terminal is providing a performance - enhancing service even though the TCP/IP stack and the applications - in the user's PC are not aware of the PEP which implements it. - - Note that the issue of transparency is not the same as the issue of - maintaining end-to-end semantics. For example, a PEP implementation - which simply uses a TCP ACK spacing mechanism maintains the end-to- - end semantics of the TCP connection while a split connection TCP PEP - implementation may not. Yet, both can be implemented transparently - to the transport endpoints at both ends. The implications of not - maintaining the end-to-end semantics, in particular the end-to-end - semantics of TCP connections, are discussed in Section 4. - - - - - - - -Border, et al. Informational [Page 8] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -3. PEP Mechanisms - - An obvious key characteristic of a PEP implementation is the - mechanism(s) it uses to improve performance. Some examples of PEP - mechanisms are described in the following subsections. A PEP - implementation might implement more than one of these mechanisms. - -3.1 TCP ACK Handling - - Many TCP PEP implementations are based on TCP ACK manipulation. The - handling of TCP acknowledgments can differ significantly between - different TCP PEP implementations. The following subsections - describe various TCP ACK handling mechanisms. Many implementations - combine some of these mechanisms and possibly employ some additional - mechanisms as well. - -3.1.1 TCP ACK Spacing - - In environments where ACKs tend to bunch together, ACK spacing is - used to smooth out the flow of TCP acknowledgments traversing a link. - This improves performance by eliminating bursts of TCP data segments - that the TCP sender would send due to back-to-back arriving TCP - acknowledgments [BPK97]. - -3.1.2 Local TCP Acknowledgements - - In some PEP implementations, TCP data segments received by the PEP - are locally acknowledged by the PEP. This is very useful over - network paths with a large bandwidth*delay product as it speeds up - TCP slow start and allows the sending TCP to quickly open up its - congestion window. Local (negative) acknowledgments are often also - employed to trigger local (and faster) error recovery on links with - significant error rates. (See Section 3.1.3.) - - Local acknowledgments are automatically employed with split - connection TCP implementations. When local acknowledgments are used, - the burden falls upon the TCP PEP to recover any data which is - dropped after the PEP acknowledges it. - -3.1.3 Local TCP Retransmissions - - A TCP PEP may locally retransmit data segments lost on the path - between the TCP PEP and the receiving end system, thus aiming at - faster recovery from lost data. In order to achieve this the TCP PEP - may use acknowledgments arriving from the end system that receives - the TCP data segments, along with appropriate timeouts, to determine - - - - - -Border, et al. Informational [Page 9] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - when to locally retransmit lost data. TCP PEPs sending local - acknowledgments to the sending end system are required to employ - local retransmissions towards the receiving end system. - - Some PEP implementations perform local retransmissions even though - they do not use local acknowledgments to alter TCP connection - performance. Basic Snoop [SNOOP] is a well know example of such a - PEP implementation. Snoop caches TCP data segments it receives and - forwards and then monitors the end-to-end acknowledgments coming from - the receiving TCP end system for duplicate acknowledgments (DUPACKs). - When DUPACKs are received, Snoop locally retransmits the lost TCP - data segments from its cache, suppressing the DUPACKs flowing to the - sending TCP end system until acknowledgments for new data are - received. The Snoop system also implements an option to employ local - negative acknowledgments to trigger local TCP retransmissions. This - can be achieved, for example, by applying TCP selective - acknowledgments locally on the error-prone link. (See Section 5.3 - for details.) - -3.1.4 TCP ACK Filtering and Reconstruction - - On paths with highly asymmetric bandwidth the TCP ACKs flowing in the - low-speed direction may get congested if the asymmetry ratio is high - enough. The ACK filtering and reconstruction mechanism addresses - this by filtering the ACKs on one side of the link and reconstructing - the deleted ACKs on the other side of the link. The mechanism and - the issue of dealing with TCP ACK congestion with highly asymmetric - links are discussed in detail in [RFC2760] and in [BPK97]. - -3.2 Tunneling - - A Performance Enhancing Proxy may encapsulate messages to carry the - messages across a particular link or to force messages to traverse a - particular path. A PEP at the other end of the encapsulation tunnel - removes the tunnel wrappers before final delivery to the receiving - end system. A tunnel might be used by a distributed split connection - TCP implementation as the means for carrying the connection between - the distributed PEPs. A tunnel might also be used to support forcing - TCP connections which use asymmetric routing to go through the end - points of a distributed PEP implementation. - -3.3 Compression - - Many PEP implementations include support for one or more forms of - compression. In some PEP implementations, compression may even be - the only mechanism used for performance improvement. Compression - reduces the number of bytes which need to be sent across a link. - This is useful in general and can be very important for bandwidth - - - -Border, et al. Informational [Page 10] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - limited links. Benefits of using compression include improved link - efficiency and higher effective link utilization, reduced latency and - improved interactive response time, decreased overhead and reduced - packet loss rate over lossy links. - - Where appropriate, link layer compression is used. TCP and IP header - compression are also frequently used with PEP implementations. - [RFC1144] describes a widely deployed method for compressing TCP - headers. Other header compression algorithms are described in - [RFC2507], [RFC2508] and [RFC2509]. - - Payload compression is also desirable and is increasing in importance - with today's increased emphasis on Internet security. Network (IP) - layer (and above) security mechanisms convert IP payloads into random - bit streams which defeat applicable link layer compression mechanisms - by removing or hiding redundant "information." Therefore, - compression of the payload needs to be applied before security - mechanisms are applied. [RFC2393] defines a framework where common - compression algorithms can be applied to arbitrary IP segment - payloads. However, [RFC2393] compression is not always applicable. - Many types of IP payloads (e.g., images, audio, video and "zipped" - files being transferred) are already compressed. And, when security - mechanisms such as TLS [RFC2246] are applied above the network (IP) - layer, the data is already encrypted (and possibly also compressed), - again removing or hiding any redundancy in the payload. The - resulting additional transport or network layer compression will - compact only headers, which are small, and possibly already covered - by separate compression algorithms of their own. - - With application layer PEPs one can employ application-specific - compression. Typically an application-specific (or content-specific) - compression mechanism is much more efficient than any generic - compression mechanism. For example, a distributed Web PEP - implementation may implement more efficient binary encoding of HTTP - headers, or a PEP can employ lossy compression that reduces the image - quality of online-images on Web pages according to end user - instructions, thus reducing the number of bytes transferred over a - slow link and consequently the response time perceived by the user - [LHKR96]. - -3.4 Handling Periods of Link Disconnection with TCP - - Periods of link disconnection or link outages are very common with - some wireless links. During these periods, a TCP sender does not - receive the expected acknowledgments. Upon expiration of the - retransmit timer, this causes TCP to close its congestion window with - all of the related drawbacks. A TCP PEP may monitor the traffic - coming from the TCP sender towards the TCP receiver behind the - - - -Border, et al. Informational [Page 11] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - disconnected link. The TCP PEP retains the last ACK, so that it can - shut down the TCP sender's window by sending the last ACK with a - window set to zero. Thus, the TCP sender will go into persist mode. - - To make this work in both directions with an integrated TCP PEP - implementation, the TCP receiver behind the disconnected link must be - aware of the current state of the connection and, in the event of a - disconnection, it must be capable of freezing all timers. [M-TCP] - implements such operation. Another possibility is that the - disconnected link is surrounded by a distributed PEP pair. - - In split connection TCP implementations, a period of link - disconnection can easily be hidden from the end host on the other - side of the PEP thus precluding the TCP connection from breaking even - if the period of link disconnection lasts a very long time; if the - TCP PEP cannot forward data due to link disconnection, it stops - receiving data. Normal TCP flow control then prevents the TCP sender - from sending more than the TCP advertised window allowed by the PEP. - Consequently, the PEP and its counterpart behind the disconnected - link can employ a modified TCP version which retains the state and - all unacknowledged data segments across the period of disconnection - and then performs local recovery as the link is reconnected. The - period of link disconnection may or may not be hidden from the - application and user, depending upon what application the user is - using the TCP connection for. - -3.5 Priority-based Multiplexing - - Implementing priority-based multiplexing of data over a slow and - expensive link may significantly improve the performance and - usability of the link for selected applications or connections. - - A user behind a slow link would experience the link more feasible to - use in case of simultaneous data transfers, if urgent data transfers - (e.g., interactive connections) could have shorter response time - (better performance) than less urgent background transfers. If the - interactive connections transmit enough data to keep the slow link - fully utilized, it might be necessary to fully suspend the background - transfers for awhile to ensure timely delivery for the interactive - connections. - - In flight TCP segments of an end-to-end TCP connection (with low - priority) cannot be delayed for a long time. Otherwise, the TCP - timer at the sending end would expire, resulting in suboptimal - performance. However, this kind of operation can be controlled in - conjunction with a split connection TCP PEP by assigning different - priorities for different connections (or applications). A split - connection PEP implementation allows the PEP in an intermediate node - - - -Border, et al. Informational [Page 12] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - to delay the data delivery of a lower-priority TCP flow for an - unlimited period of time by simply rescheduling the order in which it - forwards data of different flows to the destination host behind the - slow link. This does not have a negative impact on the delayed TCP - flow as normal TCP flow control takes care of suspending the flow - between the TCP sender and the PEP, when the PEP is not forwarding - data for the flow, and resumes it once the PEP decides to continue - forwarding data for the flow. This can further be assisted, if the - protocol stacks on both sides of the slow link implement priority - based scheduling of connections. - - With such a PEP implementation, along with user-controlled - priorities, the user can assign higher priority for selected - interactive connection(s) and have much shorter response time for the - selected connection(s), even if there are simultaneous low priority - bulk data transfers which in regular end-to-end operation would - otherwise eat the available bandwidth of the slow link almost - completely. These low priority bulk data transfers would then - proceed nicely during the idle periods of interactive connections, - allowing the user to keep the slow and expensive link (e.g., wireless - WAN) fully utilized. - - Other priority-based mechanisms may be applied on shared wireless - links with more than two terminals. With shared wireless mediums - becoming a weak link in Internet QoS architectures, many may turn to - PEPs to provide extra priority levels across a shared wireless medium - [SHEL00]. These PEPs are distributed on all nodes of the shared - wireless medium. For example, in an 802.11 WLAN this PEP is - implemented in the access point (base station) and each mobile host. - One PEP then uses distributed queuing techniques to coordinate - traffic classes of all nodes. This is also sometimes called subnet - bandwidth management. See [BBKT97] for an example of queuing - techniques which can be used to achieve this. This technique can be - implemented either above or below the IP layer. Priority treatment - can typically be specified either by the user or by marking the - (IPv4) ToS or (IPv6) Traffic Class IP header field. - -3.6 Protocol Booster Mechanisms - - Work in [FMSBMR98] shows a range of other possible PEP mechanisms - called protocol boosters. Some of these mechanisms are specific to - UDP flows. For example, a PEP may apply asymmetrical methods such as - extra UDP error detection. Since the 16 bit UDP checksum is - optional, it is typically not computed. However, for links with - errors, the checksum could be beneficial. This checksum can be added - to outgoing UDP packets by a PEP. - - - - - -Border, et al. Informational [Page 13] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - Symmetrical mechanisms have also been developed. A Forward Erasure - Correction (FZC) mechanism can be used with real-time and multicast - traffic. The encoding PEP adds a parity packet over a block of - packets. Upon reception, the parity is removed and missing data is - regenerated. A jitter control mechanism can be implemented at the - expense of extra latency. A sending PEP can add a timestamp to - outgoing packets. The receiving PEP then delays packets in order to - reproduce the correct interval. - -4. Implications of Using PEPs - - The following sections describe some of the implications of using - Performance Enhancing Proxies. - -4.1 The End-to-end Argument - - As indicated in [RFC1958], the end-to-end argument [SRC84] is one of - the architectural principles of the Internet. The basic argument is - that, as a first principle, certain required end-to-end functions can - only be correctly performed by the end systems themselves. Most of - the potential negative implications associated with using PEPs are - related to the possibility of breaking the end-to-end semantics of - connections. This is one of the main reasons why PEPs are not - recommended for general use. - - As indicated in Section 2.5, not all PEP implementations break the - end-to-end semantics of connections. Correctly designed PEPs do not - attempt to replace any application level end-to-end function, but - only attempt to add performance optimizations to a subpath of the - end-to-end path between the application endpoints. Doing this can be - consistent with the end-to-end argument. However, a user or network - administrator adding a PEP to his network configuration should be - aware of the potential end-to-end implications related to the - mechanisms being used by the particular PEP implementation. - -4.1.1 Security - - In most cases, security applied above the transport layer can be used - with PEPs, especially transport layer PEPs. However, today, only a - limited number of applications include support for the use of - transport (or higher) layer security. Network (IP) layer security - (IPsec) [RFC2401], on the other hand, can generally be used by any - application, transparently to the application. - - - - - - - - -Border, et al. Informational [Page 14] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -4.1.1.1 Security Implications - - The most detrimental negative implication of breaking the end-to-end - semantics of a connection is that it disables end-to-end use of - IPsec. In general, a user or network administrator must choose - between using PEPs and using IPsec. If IPsec is employed end-to-end, - PEPs that are implemented on intermediate nodes in the network cannot - examine the transport or application headers of IP packets because - encryption of IP packets via IPsec's ESP header (in either transport - or tunnel mode) renders the TCP header and payload unintelligible to - the PEPs. Without being able to examine the transport or application - headers, a PEP may not function optimally or at all. - - If a PEP implementation is non-transparent to the users and the users - trust the PEP in the middle, IPsec can be used separately between - each end system and PEP. However, in most cases this is an - undesirable or unacceptable alternative as the end systems cannot - trust PEPs in general. In addition, this is not as secure as end- - to-end security. (For example, the traffic is exposed in the PEP - when it is decrypted to be processed.) And, it can lead to - potentially misleading security level assumptions by the end systems. - If the two end systems negotiate different levels of security with - the PEP, the end system which negotiated the stronger level of - security may not be aware that a lower level of security is being - provided for part of the connection. The PEP could be implemented to - prevent this from happening by being smart enough to force the same - level of security to each end system but this increases the - complexity of the PEP implementation (and still is not as secure as - end-to-end security). - - With a transparent PEP implementation, it is difficult for the end - systems to trust the PEP because they may not be aware of its - existence. Even if the user is aware of the PEP, setting up - acceptable security associations with the PEP while maintaining the - PEP's transparent nature is problematic (if not impossible). - - Note that even when a PEP implementation does not break the end-to- - end semantics of a connection, the PEP implementation may not be able - to function in the presence of IPsec. For example, it is difficult - to do ACK spacing if the PEP cannot reliably determine which IP - packets contain ACKs of interest. In any case, the authors are - currently not aware of any PEP implementations, transparent or non- - transparent, which provide support for end-to-end IPsec, except in a - case where the PEPs are implemented on the end hosts. - - - - - - - -Border, et al. Informational [Page 15] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -4.1.1.2 Security Implication Mitigations - - There are some steps which can be taken to allow the use of IPsec and - PEPs to coexist. If an end user can select the use of IPsec for some - traffic and not for other traffic, PEP processing can be applied to - the traffic sent without IPsec. Of course, the user must then do - without security for this traffic or provide security for the traffic - via other means (for example, by using transport layer security). - However, even when this is possible, significant complexity may need - to be added to the configuration of the end system. - - Another alternative is to implement IPsec between the two PEPs of a - distributed PEP implementation. This at least protects the traffic - between the two PEPs. (The issue of trusting the PEPs does not - change.) In the case where the PEP implementation is not transparent - to the user, (assuming that the user trusts the PEPs,) the user can - configure his end system to use the PEPs as the end points of an - IPsec tunnel. And, an IPsec tunnel could even potentially be used - between the end system and a PEP to protect traffic on this part of - the path. But, all of this adds complexity. And, it still does not - eliminate the risk of the traffic being exposed in the PEP itself as - the traffic is received from one IPsec tunnel, processed and then - forwarded (even if forwarded through another IPsec tunnel). - -4.1.1.3 Security Research Related to PEPs - - There is research underway investigating the possibility of changing - the implementation of IPsec to be more friendly to the use of PEPs. - One approach being actively looked at is the use of multi-layer IP - security. [Zhang00] describes a method which allows TCP headers to - be encrypted as one layer (with the PEPs in the path of the TCP - connections included in the security associations used to encrypt the - TCP headers) while the TCP payload is encrypted end-to-end as a - separate layer. This still involves trusting the PEP, but to a much - lesser extent. However, a drawback to this approach is that it adds - a significant amount of complexity to the IP security implementation. - Given the existing complexity of IPsec, this drawback is a serious - impediment to the standardization of the multi-layer IP security idea - and it is very unlikely that this approach will be adopted as a - standard any time soon. Therefore, relying on this type of approach - will likely involve the use of non-standard protocols (and the - associated risk of doing so). - -4.1.2 Fate Sharing - - Another important aspect of the end-to-end argument is fate sharing. - If a failure occurs in the network, the ability of the connection to - survive the failure depends upon how much state is being maintained - - - -Border, et al. Informational [Page 16] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - on behalf of the connection in the network and whether the state is - self-healing. If no connection specific state resides in the network - or such state is self-healing as in case of regular end-to-end - operation, then a failure in the network will break the connection - only if there is no alternate path through the network between the - end systems. And, if there is no path, both end systems can detect - this. However, if the connection depends upon some state being - stored in the network (e.g., in a PEP), then a failure in the network - (e.g., the node containing a PEP crashes) causes this state to be - lost, forcing the connection to terminate even if an alternate path - through the network exists. - - The importance of this aspect of the end-to-end argument with respect - to PEPs is dependent upon both the PEP implementation and upon the - types of applications being used. Sometimes coincidentally but more - often by design, PEPs are used in environments where there is no - alternate path between the end systems and, therefore, a failure of - the intermediate node containing a PEP would result in the - termination of the connection in any case. And, even when this is - not the case, the risk of losing the connection in the case of - regular end-to-end operation may exist as the connection could break - for some other reason, for example, a long enough link outage of a - last-hop wireless link to the end host. Therefore, users may choose - to accept the risk of a PEP crashing in order to take advantage of - the performance gains offered by the PEP implementation. The - important thing is that accepting the risk should be under the - control of the user (i.e., the user should always have the option to - choose end-to-end operation) and, if the user chooses to use the PEP, - the user should be aware of the implications that a PEP failure has - with respect to the applications being used. - -4.1.3 End-to-end Reliability - - Another aspect of the end-to-end argument is that of acknowledging - the receipt of data end-to-end in order to achieve reliable end-to- - end delivery of data. An application aiming at reliable end-to-end - delivery must implement an end-to-end check and recovery at the - application level. According to the end-to-end argument, this is the - only possibility to correctly implement reliable end-to-end - operation. Otherwise the application violates the end-to-end - argument. This also means that a correctly designed application can - never fully rely on the transport layer (e.g., TCP) or any other - communication subsystem to provide reliable end-to-end delivery. - - First, a TCP connection may break down for some reason and result in - lost data that must be recovered at the application level. Second, - the checksum provided by TCP may be considered inadequate, resulting - in undetected (by TCP) data corruption [Pax99] and requiring an - - - -Border, et al. Informational [Page 17] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - application level check for data corruption. Third, a TCP - acknowledgement only indicates that data was delivered to the TCP - implementation on the other end system. It does not guarantee that - the data was delivered to the application layer on the other end - system. Therefore, a well designed application must use an - application layer acknowledgement to ensure end-to-end delivery of - application layer data. Note that this does not diminish the value - of a reliable transport protocol (i.e., TCP) as such a protocol - allows efficient implementation of several essential functions (e.g., - congestion control) for an application. - - If a PEP implementation acknowledges application data prematurely - (before the PEP receives an application ACK from the other endpoint), - end-to-end reliability cannot be guaranteed. Typically, application - layer PEPs do not acknowledge data prematurely, i.e., the PEP does - not send an application ACK to the sender until it receives an - application ACK from the receiver. And, transport layer PEP - implementations, including TCP PEPs, generally do not interfere with - end-to-end application layer acknowledgments as they let applications - operate end-to-end. However, the user and/or network administrator - employing the PEP must understand how it operates in order to - understand the risks related to end-to-end reliability. - - Some Internet applications do not necessarily operate end-to-end in - their regular operation, thus abandoning any end-to-end reliability - guarantee. For example, Internet email delivery often operates via - relay Mail Transfer Agents, that is, relay Simple Mail Transfer - Protocol (SMTP) servers. An originating MTA (SMTP server) sends the - mail message to a relay MTA that receives the mail message, stores it - in non-volatile storage (e.g., on disk) and then sends an application - level acknowledgement. The relay MTA then takes "full - responsibility" for delivering the mail message to the destination - SMTP server (maybe via another relay MTA); it tries to forward the - message for a relatively long time (typically around 5 days). This - scheme does not give a 100% guarantee of email delivery, but - reliability is considered "good enough". - - An application layer PEP for this kind of an application may - acknowledge application data (e.g., mail message) without essentially - decreasing reliability, as long as the PEP operates according to the - same procedure as the regular proxy (e.g., relay MTA). Again, as - indicated above, the user and/or network administrator employing such - a PEP needs to understand how it operates in order to understand the - reliability risks associated with doing so. - - - - - - - -Border, et al. Informational [Page 18] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -4.1.4 End-to-end Failure Diagnostics - - Another aspect of the end-to-end argument is the ability to support - end-to-end failure diagnostics when problems are encountered. If a - network problem occurs which breaks a connection, the end points of - the connection will detect the failure via timeouts. However, the - existence of a PEP in between the two end points could delay - (sometimes significantly) the detection of the failure by one or both - of the end points. (Of course, some PEPs are intentionally designed - to hide these types of failures as described in Section 3.4.) The - implications of delayed detection of a failed connection depend on - the applications being used. Possibilities range from no impact at - all (or just minor annoyance to the end user) all the way up to - impacting mission critical business functions by delaying switchovers - to alternate communications paths. - - In addition, tools used to debug connection failures may be affected - by the use of a PEP. For example, PING (described in [RFC792] and - [RFC2151]) is often used to test for connectivity. But, because PING - is based on ICMP instead of TCP (i.e., it is implemented using ICMP - Echo and Reply commands at the network layer), it is possible that - the configuration of the network might route PING traffic around the - PEP. Thus, PING could indicate that an end-to-end path exists - between two hosts when it does not actually exist for TCP traffic. - Even when the PING traffic does go through the PEP, the diagnostics - indications provided by the PING traffic are altered. For example, - if the PING traffic goes transparently through the PEP, PING does not - provide any indication that the PEP exists and since the PING traffic - is not being subjected to the same processing as TCP traffic, it may - not necessarily provide an accurate indication of the network delay - being experienced by TCP traffic. On the other hand, if the PEP - terminates the PING and responds to it on behalf of the end host, - then the PING provides information only on the connectivity to the - PEP. Traceroute (also described in [RFC2151]) is similarly affected - by the presence of the PEP. - -4.2 Asymmetric Routing - - Deploying a PEP implementation usually requires that traffic to and - from the end hosts is routed through the intermediate node(s) where - PEPs reside. With some networks, this cannot be accomplished, or it - might require that the intermediate node is located several hops away - from the target link edge which in turn is impractical in many cases - and may result in non-optimal routing. - - - - - - - -Border, et al. Informational [Page 19] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - Note that this restriction does not apply to all PEP implementations. - For example, a PEP which is simply doing ACK spacing only needs to - see one direction of the traffic flow (the direction in which the - ACKs are flowing). ACK spacing can be done without seeing the actual - flow of data. - -4.3 Mobile Hosts - - In environments where a PEP implementation is used to serve mobile - hosts, additional problems may be encountered because PEP related - state information may need to be transferred to a new PEP node during - a handoff. - - When a mobile host moves, it is subject to handovers. If the - intermediate node and home for the serving PEP changes due to - handover, any state information that the PEP maintains and is - required for continuous operation must be transferred to the new - intermediate node to ensure continued operation of the connection. - This requires extra work and overhead and may not be possible to - perform fast enough, especially if the host moves frequently over - cell boundaries of a wireless network. If the mobile host moves to - another IP network, routing to and from the mobile host may need to - be changed to traverse a new PEP node. - - Today, mobility implications with respect to using PEPs are more - significant to W-LAN networks than to W-WAN networks. Currently, a - W-WAN base station typically does not provide the mobile host with - the connection point to the wireline Internet. (A W-WAN base station - may not even have an IP stack.) Instead, the W-WAN network takes - care of mobility with the connection point to the wireline Internet - remaining unchanged while the mobile host moves. Thus, PEP state - handover is not currently required in most W-WAN networks when the - host moves. However, this is generally not true in W-LAN networks - and, even in the case of W-WAN networks, the user and/or network - administrator using a PEP needs to be cognizant of how the W-WAN base - stations and the PEP work in case W-WAN PEP state handoff becomes - necessary in the future. - -4.4 Scalability - - Because a PEP typically processes packet information above the IP - layer, a PEP requires more processing power per packet than a router. - Therefore, PEPs will always be (at least) one step behind routers in - terms of the total throughput they can support. (Processing above - the IP layer is also more difficult to implement in hardware.) In - addition, since most PEP implementations require per connection - state, PEP memory requirements are generally significantly higher - - - - -Border, et al. Informational [Page 20] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - than with a router. Therefore, a PEP implementation may have a limit - on the number of connections which it can support whereas a router - has no such limitation. - - Increased processing power and memory requirements introduce - scalability issues with respect to the use of PEPs. Placement of a - PEP on a high speed link or a link which supports a large number of - connections may require network topology changes beyond just - inserting the PEP into the path of the traffic. For example, if a - PEP can only handle half of the traffic on a link, multiple PEPs may - need to be used in parallel, adding complexity to the network - configuration to divide the traffic between the PEPs. - -4.5 Other Implications of Using PEPs - - This document describes some significant implications with respect to - using Performance Enhancing Proxies. However, the list of - implications provided in this document is not necessarily exhaustive. - Some examples of other potential implications related to using PEPs - include the use of PEPs in multi-homing environments and the use of - PEPs with respect to Quality of Service (QoS) transparency. For - example, there may be potential interaction with the priority-based - multiplexing mechanism described in Section 3.5 and the use of - differentiated services [RFC2475]. Therefore, users and network - administrators who wish to deploy a PEP should look not only at the - implications described in this document but also at the overall - impact (positive and negative) that the PEP will have on their - applications and network infrastructure, both initially and in the - future when new applications are added and/or changes in the network - infrastructure are required. - -5. PEP Environment Examples - - The following sections describe examples of environments where PEP is - currently used to improve performance. The examples are provided to - illustrate the use of the various PEP types and PEP mechanisms - described earlier in the document and to help illustrate the - motivation for their development and use. - -5.1 VSAT Environments - - Today, VSAT networks are implemented with geosynchronous satellites. - VSAT data networks are typically implemented using a star topology. - A large hub earth station is located at the center of the star with - VSATs used at the remote sites of the network. Data is sent from the - hub to the remote sites via an outroute. Data is sent from the - remote sites to the hub via one or more inroutes. VSATs represent an - environment with highly asymmetric links, with an outroute typically - - - -Border, et al. Informational [Page 21] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - much larger than an inroute. (Multiple inroutes can be used with - each outroute but any particular VSAT only has access to a single - inroute at a time, making the link asymmetric.) - - VSAT networks are generally used to implement private networks (i.e., - intranets) for enterprises (e.g., corporations) with geographically - dispersed sites. VSAT networks are rarely, if ever, used to - implement Internet connectivity except at the edge of the Internet - (i.e., as the last hop). Connection to the Internet for the VSAT - network is usually implemented at the VSAT network hub site using - appropriate firewall and (when necessary) NAT [RFC2663] devices. - -5.1.1 VSAT Network Characteristics - - With respect to TCP performance, VSAT networks exhibit the following - subset of the satellite characteristics documented in [RFC2488]: - - Long feedback loops - - Propagation delay from a sender to a receiver in a geosynchronous - satellite network can range from 240 to 280 milliseconds, - depending on where the sending and receiving sites are in the - satellite footprint. This makes the round trip time just due to - propagation delay at least 480 milliseconds. Queueing delay and - delay due to shared channel access methods can sometimes increase - the total delay up to on the order of a few seconds. - - Large bandwidth*delay products - - VSAT networks can support capacity ranging from a few kilobits per - second up to multiple megabits per second. When combined with the - relatively long round trip time, TCP needs to keep a large number - of packets "in flight" in order to fully utilize the satellite - link. - - Asymmetric capacity - - As indicated above, the outroute of a VSAT network is usually - significantly larger than an inroute. Even though multiple - inroutes can be used within a network, a given VSAT can only - access one inroute at a time. Therefore, the incoming (outroute) - and outgoing (inroute) capacity for a VSAT is often very - asymmetric. As outroute capacity has increased in recent years, - ratios of 400 to 1 or greater are becoming more and more common. - With a TCP maximum segment size of 1460 bytes and delayed - acknowledgments [RFC1122] in use, the ratio of IP packet bytes for - data to IP packet bytes for ACKs is only (3000 to 40) 75 to 1. - - - - -Border, et al. Informational [Page 22] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - Thus, inroute capacity for carrying ACKs can have a significant - impact on TCP performance. (The issue of asymmetric link impact - on TCP performance is described in more detail in [BPK97].) - - With respect to the other satellite characteristics listed in - [RFC2488], VSAT networks typically do not suffer from intermittent - connectivity or variable round trip times. Also, VSAT networks - generally include a significant amount of error correction coding. - This makes the bit error rate very low during clear sky conditions, - approaching the bit error rate of a typical terrestrial network. In - severe weather, the bit error rate may increase significantly but - such conditions are rare (when looked at from an overall network - availability point of view) and VSAT networks are generally - engineered to work during these conditions but not to optimize - performance during these conditions. - -5.1.2 VSAT Network PEP Implementations - - Performance Enhancing Proxies implemented for VSAT networks generally - focus on improving throughput (for applications such as FTP and HTTP - web page retrievals). To a lesser degree, PEP implementations also - work to improve interactive response time for small transactions. - - There is not a dominant PEP implementation used with VSAT networks. - Each VSAT network vendor tends to implement their own version of PEP - functionality, integrated with the other features of their VSAT - product. [HNS] and [SPACENET] describe VSAT products with integrated - PEP capabilities. There are also third party PEP implementations - designed to be used with VSAT networks. These products run on nodes - external to the VSAT network at the hub and remote sites. NettGain - [FLASH] and Venturi [FOURELLE] are examples of such products. VSAT - network PEP implementations generally share the following - characteristics: - - - They focus on improving TCP performance; - - - They use an asymmetric distributed implementation; - - - They use a split connection approach with local acknowledgments - and local retransmissions; - - - They support some form of compression to reduce the amount of - bandwidth required (with emphasis on saving inroute bandwidth). - - The key differentiators between VSAT network PEP implementations are: - - - The maximum throughput they attempt to support (mainly a - function of the amount of buffer space they use); - - - -Border, et al. Informational [Page 23] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - - The protocol used over the satellite link. Some implementations - use a modified version of TCP while others use a proprietary - protocol running on top of UDP; - - - The type of compression used. Third party VSAT network PEP - implementations generally focus on application (e.g., HTTP) - specific compression algorithms while PEP implementations - integrated into the VSAT network generally focus on link - specific compression. - - PEP implementations integrated into a VSAT product are generally - transparent to the end systems. Third party PEP implementations used - with VSAT networks usually require configuration changes in the - remote site end systems to route TCP packets to the remote site - proxies but do not require changes to the hub site end systems. In - some cases, the PEP implementation is actually integrated - transparently into the end system node itself, using a "bump in the - stack" approach. In all cases, the use of a PEP is non-transparent - to the user, i.e., the user is aware when a PEP implementation is - being used to boost performance. - -5.1.3 VSAT Network PEP Motivation - - VSAT networks, since the early stages of their deployment, have - supported the use of local termination of a protocol (e.g., SDLC and - X.25) on each side of the satellite link to hide the satellite link - from the applications using the protocol. Therefore, when LAN - capabilities were added to VSAT networks, VSAT customers expected - and, in fact, demanded, the use of similar techniques for improving - the performance of IP based traffic, in particular TCP traffic. - - As indicated in Section 5.1, VSAT networks are primarily used to - implement intranets with Internet connectivity limited to and closely - controlled at the hub site of the VSAT network. Therefore, VSAT - customers are not as affected (or at least perceive that they are not - as affected) by the Internet related implications of using PEPs as - are other technologies. Instead, what is more important to VSAT - customers is the optimization of the network. And, VSAT customers, - in general, prefer that the optimization of the network be done by - the network itself rather than by implementing changes (such as - enabling the TCP scaled window option) to their own equipment. VSAT - customers prefer to optimize their end system configuration for local - communications related to their local mission critical functions and - let the VSAT network hide the presence of the satellite link as much - as possible. VSAT network vendors have also been able to use PEP - functionality to provide value added "services" to their customers - such as extending the useful of life of older equipment which - includes older, "non-modern" TCP stacks. - - - -Border, et al. Informational [Page 24] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - Of course, as the line between intranets and the Internet continues - to fade, the implications of using PEPs start to become more - significant for VSAT networks. For example, twelve years ago - security was not a major concern because the equipment cost related - to being able to intercept VSAT traffic was relatively high. Now, as - technology has advanced, the cost is much less prohibitive. - Therefore, because the use of PEP functionality in VSAT networks - prevents the use of IPsec, customers must rely on the use of higher - layer security mechanisms such as TLS or on proprietary security - mechanisms implemented in the VSAT networks themselves (since - currently many applications are incapable of making (or simply don't - make) use of the standardized higher layer security mechanisms). - This, in turn, affects the cost of the VSAT network as well as - affects the ability of the customers to make use of Internet based - capabilities. - -5.2 W-WAN Environments - - In mobile wireless WAN (W-WAN) environments the wireless link is - typically used as the last-hop link to the end user. W-WANs include - such networks as GSM [GSM], GPRS [GPRS],[BW97], CDPD [CDPD], IS-95 - [CDMA], RichoNet, and PHS. Many of these networks, but not all, have - been designed to provide mobile telephone voice service in the first - place but include data services as well or they evolve from a mobile - telephone network. - -5.2.1 W-WAN Network Characteristics - - W-WAN links typically exhibit some combination of the following link - characteristics: - - - low bandwidth (with some links the available bandwidth might be - as low as a few hundred bits/sec) - - - high latency (minimum round-trip delay close to one second is - not exceptional) - - - high BER resulting in frame or packet losses, or long variable - delays due to local link-layer error recovery - - - some W-WAN links have a lot of internal buffer space which tend - to accumulate data, thus resulting in increased round-trip - delay due to long (and variable) queuing delays - - - on some W-WAN links the users may share common channels for - their data packet delivery which, in turn, may cause unexpected - delays to the packet delivery of a user due to simultaneous use - of the same channel resources by the other users - - - -Border, et al. Informational [Page 25] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - - unexpected link disconnections (or intermittent link outages) - may occur frequently and the period of disconnection may last a - very long time - - - (re)setting the link-connection up may take a long time - (several tens of seconds or even minutes) - - - the W-WAN network typically takes care of terminal mobility: - the connection point to the Internet is retained while the user - moves with the mobile host - - - the use of most W-WAN links is expensive. Many of the service - providers apply time-based charging. - -5.2.2 W-WAN PEP Implementations - - Performance Enhancing Proxies implemented for W-WAN environments - generally focus on improving the interactive response time but at the - same time aim at improving throughput, mainly by reducing the - transfer volume over the inherently slow link in various ways. To - achieve this, typically enhancements are applied at almost all - protocol layers. - -5.2.2.1 Mowgli System - - The Mowgli system [KRA94] is one of the early approaches to address - the challenges induced by the problematic characteristics of low - bandwidth W-WAN links. - - The indirect approach used in Mowgli is not limited to a single layer - as in many other split connection approaches, but it involves all - protocol layers. The basic architecture is based on split TCP (UDP - is also supported) together with full support for application layer - proxies with a distributed PEP approach. An application layer proxy - pair may be added between a client and server, the agent (local - proxy) on a mobile host and the proxy on an intermediate node that - provides the mobile host with the connection to the wireline - Internet. Such a pair may be either explicit or fully transparent to - the applications, but it is, at all times, under end-user control - thus allowing the user to select the traffic that traverses through - the PEP implementation and choose end-to-end IP for other traffic. - - In order to allow running legacy applications unmodified and without - recompilation, the socket layer implementation on the mobile host is - slightly modified to connect the applications, which are configured - to traverse through the PEP, to a local agent while retaining the - original TCP/IP socket semantics. Two types of application layer - agent-proxy pairs can be configured for mobile host application use. - - - -Border, et al. Informational [Page 26] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - A generic pair can be used with any application and it simply - provides split transport service with some optional generic - enhancements like compression. An application-specific pair can be - retailed for any application or a group of applications that are able - to take leverage on the same kind of enhancements. A good example of - enhancements achieved with an application-specific proxy pair is the - Mowgli WWW system that improves significantly the user perceived - response time of Web browsing mainly by reducing the transfer volume - and the number of round trips over the wireless link [LAKLR95], - [LHKR96]. - - Mowgli provides also an option to replace the TCP/IP core protocols - on the last-hop link with a custom protocol that is tuned for low- - bandwidth W-WAN links [KRLKA97]. This protocol was designed to - provide the same transport service with similar semantics as regular - TCP and UDP provide, but use a different protocol implementation that - can freely apply any appropriate protocol mechanisms without being - constrained by the current TCP/IP packet format or protocol - operation. As this protocol is required to operate over a single - logical link only, it could partially combine the protocol control - information and protocol operation of the link, network, and - transport layers. In addition, the protocol can operate on top of - various link services, for example on top of different raw link - services, on top of PPP, on top of IP, or even on top of a single TCP - connection using it as a link service and implementing "TCP - multiplexing" over it. In all other cases, except when the protocol - is configured to operate on top of raw (wireless) link service, IP - may co-exist with the custom protocol allowing simultaneous end-to- - end IP delivery for the traffic not traversing through the PEP - implementation. - - Furthermore, the custom protocol can be run in different operation - modes which turn on or off certain protocol functions depending on - the underlying link service. For example, if the underlying link - service provides reliable data delivery, the checksum and the - window-based error recovery can be turned off, thus reducing the - protocol overhead; only a very simple recovery mechanism is needed to - allow recovery from an unexpected link disconnection. Therefore, the - protocol design was able to use extremely efficient header encoding - (only 1-3 bytes per packet in a typical case), reduce the number of - round trips significantly, and various features that are useful with - low-bandwidth W-WAN links were easy to add. Such features include - suspending the protocol operation over the periods of link - disconnection or link outage together with fast start once the link - becomes operational again, priority-based multiplexing of user data - over the W-WAN link thus offering link capacity to interactive - - - - - -Border, et al. Informational [Page 27] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - applications in a timely manner even in presence of bandwidth- - intensive background transfers, and link-level flow control to - prevent data from accumulating into the W-WAN link internal buffers. - - If desired, regular TCP/IP transport, possibly with corresponding - protocol modifications in TCP (and UDP) that would tune it more - suitable for W-WAN links, can be employed on the last-hop link. - -5.2.2.2 Wireless Application Protocol (WAP) - - The Mowgli system was designed to support mobile hosts that are - attached to the Internet over constrained links, but did not address - the specific challenges with low-end mobile devices. Many mobile - wireless devices are power, memory, and processing constrained, and - the communication links to these devices have lower bandwidth and - less stable connections. These limitations led designers to develop - the Wireless Application Protocol (WAP) that specifies an application - framework and network protocols intended to work across differing - narrowband wireless network technologies bringing Internet content - and advanced data services to low-end digital cellular phones and - other mobile wireless terminals, such as pagers and PDAs. - - The WAP model consists of a WAP client (mobile terminal), a WAP - proxy, and an origin server. It requires a WAP proxy between the WAP - client and the server on the Internet. WAP uses a layered, scalable - architecture [WAPARCH], specifying the following five protocol layers - to be used between the terminal and the proxy: Application Layer - (WAE) [WAPWAE], Session Layer (WSP) [WAPWSP], Transaction Layer (WTP) - [WAPWTP], Security Layer (WTLS) [WAPWTLS], and Transport Layer (WDP) - [WAPWDP]. Standard Internet protocols are used between the proxy and - the origin server. If the origin server includes WAP proxy - functionality, it is called a WAP Server. - - In a typical scenario, a WAP client sends an encoded WAP request to a - WAP proxy. The WAP proxy translates the WAP request into a WWW - (HTTP) request, performing the required protocol conversions, and - submits this request to a standard web server on the Internet. After - the web server responds to the WAP proxy, the response is encoded - into a more compact binary format to decrease the size of the data - over the air. This encoded response is forwarded to the WAP client - [WAPPROXY]. - - WAP operates over a variety of bearer datagram services. When - communicating over these bearer services, the WAP transport layer - (WDP) is always used between the WAP client and WAP proxy and it - provides port addressed datagram service to the higher WAP layers. - If the bearer service supports IP (e.g., GSM-CSD, GSM-GPRS, IS-136, - CDPD), UDP is used as the datagram protocol. However, if the bearer - - - -Border, et al. Informational [Page 28] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - service does not support IP (e.g., GSM-SMS, GSM-USSD, GSM Cell - Broadcast, CDMS-SMS, TETRA-SDS), WDP implements the required datagram - protocol as an adaptation layer between the bearer network and the - protocol stack. - - The use of the other layers depends on the port number. WAP has - registered a set of well-known ports with IANA. The port number - selected by the application for communication between a WAP client - and proxy defines the other layers to be used at each end. The - security layer, WTLS, provides privacy, data integrity and - authentication. Its functionality is similar to TLS 1.0 [RFC2246] - extended with datagram support, optimized handshake and dynamic key - refreshing. If the origin server includes WAP proxy functionality, - it might be used to facilitate the end-to-end security solutions, - otherwise it provides security between the mobile terminal and the - proxy. - - The transaction layer, WTP, is message based without connection - establishment and tear down. It supports three types of transaction - classes: an unconfirmed request (unidirectional), a reliable - (confirmed) request (unidirectional), and a reliable (confirmed) - request-reply transaction. Data is carried in the first packet and - 3-way handshake is eliminated to reduce latencies. In addition - acknowledgments, retransmission, and flow control are provided. It - allows more than one outstanding transaction at a time. It handles - the bearer dependence of a transfer, e.g., selects timeout values and - packet sizes according to the bearer. Unfortunately, WTP uses fixed - retransmission timers and does not include congestion control, which - is a potential problem area as the use of WAP increases [RFC3002]. - - The session layer, WSP, supports binary encoded HTTP 1.1 with some - extensions such as long living session with suspend/resume facility - and state handling, header caching, and push facility. On top of the - architecture is the application environment (WAE). - -5.2.3 W-WAN PEP Motivation - - As indicated in Section 5.2.1, W-WAN networks typically offer very - low bandwidth connections with high latency and relatively frequent - periods of link disconnection and they usually are expensive to use. - Therefore, the transfer volume and extra round-trips, such as those - associated with TCP connection setup and teardown, must be reduced - and the slow W-WAN link should be efficiently shielded from excess - traffic and global (wired) Internet congestion to make Internet - access usable and economical. Furthermore, interactive traffic must - be transmitted in a timely manner even if there are other - simultaneous bandwidth intensive (background) transfers and during - the periods with connectivity the link must be kept fully utilized - - - -Border, et al. Informational [Page 29] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - due to expensive use. In addition, the (long) periods of link - disconnection must not abort active (bulk data) transfers, if an - end-user so desires. - - As (all) applications cannot be made mobility/W-WAN aware in short - time frame or maybe ever, support for mobile W-WAN use should be - implemented in a way which allows most applications, at least those - running on fixed Internet hosts, to continue their operation - unmodified. - -5.3 W-LAN Environments - - Wireless LANs (W-LAN) are typically organized in a cellular topology - where an access point with a W-LAN transceiver controls a single - cell. A cell is defined in terms of the coverage area of the base - station. The access points are directly connected to the wired - network. The access point in each of the cells is responsible for - forwarding packets to and from the hosts located in the cell. Often - the hosts with W-LAN transceivers are mobile. When such a mobile - host moves from one cell to another cell, the responsibility for - forwarding packets between the wired network and the mobile host must - be transferred to the access point of the new cell. This is known as - a handoff. Many W-LAN systems also support an operation mode - enabling ad-hoc networking. In this mode access points are not - necessarily needed, but hosts with W-LAN transceiver can communicate - directly with the other hosts within the transceiver's transmission - range. - -5.3.1 W-LAN Network Characteristics - - Current wireless LANs typically provide link bandwidth from 1 Mbps to - 11 Mbps. In the future, wide deployment of higher bandwidths up to - 54 Mbps or even higher can be expected. The round-trip delay with - wireless LANs is on the order of a few milliseconds or tens of - milliseconds. Examples of W-LANs include IEEE 802.11, HomeRF, and - Hiperlan. Wireless personal area networks (WPAN) such as Bluethooth - can use the same PEP techniques. - - Wireless LANs are error-prone due to bit errors, collisions and link - outages. In addition, consecutive packet losses may also occur - during handoffs. Most W-LAN MAC protocols perform low level - retransmissions. This feature shields upper layers from most losses. - However, unavoidable losses, retransmission latency and link outages - still affect upper layers. TCP performance over W-LANs or a network - path involving a W-LAN link is likely to suffer from these effects. - - - - - - -Border, et al. Informational [Page 30] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - As TCP wrongly interprets these packet losses to be network - congestion, the TCP sender reduces its congestion window and is often - forced to timeout in order to recover from the consecutive losses. - The result is often unacceptably poor end-to-end performance. - -5.3.2 W-LAN PEP Implementations: Snoop - - Berkeley's Snoop protocol [SNOOP] is a TCP-specific approach in which - a TCP-aware module, a Snoop agent, is deployed at the W-LAN base - station that acts as the last-hop router to the mobile host. Snoop - aims at retaining the TCP end-to-end semantics. The Snoop agent - monitors every packet that passes through the base station in either - direction and maintains soft state for each TCP connection. The - Snoop agent is an asymmetric PEP implementation as it operates - differently on TCP data and ACK channels as well as on the uplink - (from the mobile host) and downlink (to the mobile host) TCP - segments. - - For a data transfer to a mobile host, the Snoop agent caches - unacknowledged TCP data segments which it forwards to the TCP - receiver and monitors the corresponding ACKs. It does two things: - - 1. Retransmits any lost data segments locally by using local timers - and TCP duplicate ACKs to identify packet loss, instead of waiting - for the TCP sender to do so end-to-end. - - 2. Suppresses the duplicate ACKs on their way from the mobile host - back to the sender, thus avoiding fast retransmit and congestion - avoidance at the latter. - - Suppressing the duplicate ACKs is required to avoid unnecessary fast - retransmits by the TCP sender as the Snoop agent retransmits a packet - locally. Consider a system that employs the Snoop agent and a TCP - sender S that sends packets to receiver R via a base station BS. - Assume that S sends packets A, B, C, D, E (in that order) which are - forwarded by BS to the wireless receiver R. Assume the first - transmission of packet B is lost due to errors on the wireless link. - In this case, R receives packets A, C, D, E and B (in that order). - Receipt of packets C, D and E trigger duplicate ACKs. When S - receives three duplicate ACKs, it triggers fast retransmit (which - results in a retransmission, as well as reduction of the congestion - window). The Snoop agent also retransmits B locally, when it - receives three duplicate ACKs. The fast retransmit at S occurs - despite the local retransmit on the wireless link, degrading - throughput. Snoop deals with this problem by dropping TCP duplicate - ACKs appropriately at BS. - - - - - -Border, et al. Informational [Page 31] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - For a data transfer from a mobile host, the Snoop agent detects the - packet losses on the wireless link by monitoring the data segments it - forwards. It then employs either Negative Acknowledgements (NAK) - locally or Explicit Loss Notifications (ELN) to inform the mobile - sender that the packet loss was not related to congestion, thus - allowing the sender to retransmit without triggering normal - congestion control procedures. To implement this, changes at the - mobile host are required. - - When a Snoop agent uses NAKs to inform the TCP sender of the packet - losses on the wireless link, one possibility to implement them is - using the Selective Acknowledgment (SACK) option of TCP [RFC2018]. - This requires enabling SACK processing at the mobile host. The Snoop - agent sends a TCP SACK, when it detects a hole in the transmission - sequence from the mobile host or when it has not received any new - packets from the mobile host for a certain time period. This - approach relies on the advisory nature of the SACKs: the mobile - sender is advised to retransmit the missing segments indicated by - SACK, but it must not assume successful end-to-end delivery of the - segments acknowledged with SACK as these segments might get lost - later in the path to the receiver. Instead, the sender must wait for - a cumulative ACK to arrive. - - When the ELN mechanism is used to inform the mobile sender of the - packet losses, Snoop uses one of the 'unreserved' bits in the TCP - header for ELN [SNOOPELN]. The Snoop agent keeps track of the holes - that correspond to segments lost over the wireless link. When a - (duplicate) ACK corresponding to a hole in the sequence space arrives - from the TCP receiver, the Snoop agent sets the ELN bit on the ACK to - indicate that the loss is unrelated to congestion and then forwards - the ACK to the TCP sender. When the sender receives a certain number - of (duplicate) ACKs with ELN (a configurable variable at the mobile - host, e.g., two), it retransmit the missing segment without - performing any congestion control measures. - - The ELN mechanism using one of the six bits reserved for future use - in the TCP header is dangerous as it exercises checks that might not - be correctly implemented in TCP stacks, and may expose bugs. - - A scheme such as Snoop is needed only if the possibility of a fast - retransmit due to wireless errors is non-negligible. In particular, - if the wireless link uses link-layer recovery for lost data, then - this scheme is not beneficial. Also, if the TCP window tends to stay - smaller than four segments, for example, due to congestion related - losses on the wired network, the probability that the Snoop agent - will have an opportunity to locally retransmit a lost packet is - small. This is because at least three duplicate ACKs are needed to - trigger the local retransmission, but due to small window the Snoop - - - -Border, et al. Informational [Page 32] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - agent may not be able to forward three new packets after the lost - packet and thus induce the required three duplicate ACKs. - Conversely, when the TCP window is large enough, Snoop can provide - significant performance improvement (compared with standard TCP). - - In order to alleviate the problem with small TCP windows, Snoop - proposes a solution in which a TCP sender is allowed to transmit a - new data segment for each duplicate ACK it receives as long as the - number of duplicate ACKs is less than the threshold for TCP fast - retransmission (three duplicate ACKs). If the new segment reaches - the receiver, it will generate another duplicate ACK which, in turn, - allows the sender to transmit yet another data segment. This - continues until enough duplicate ACKs have accumulated to trigger TCP - fast retransmission. This proposal is the same as the "Limited - Transfer" proposal [RFC3042] that has recently been forwarded to the - standards track. However, to be able to benefit from this solution, - it needs to be deployed on TCP senders and therefore it is not ready - for use in a short time frame. - - Snoop requires the intermediate node (base station) to examine and - operate on the traffic between the mobile host and the other end host - on the wired Internet. Hence, Snoop does not work if the IP traffic - is encrypted. Possible solutions involve: - - - making the Snoop agent a party to the security association - between the client and the server; - - - IPsec tunneling mode, terminated at the Snooping base station. - - However, these techniques require that users trust base stations. - - Snoop also requires that both the data and the corresponding ACKs - traverse the same base station. Furthermore, the Snoop agent may - duplicate efforts by the link layer as it retransmits the TCP data - segments "at the transport layer" across the wireless link. (Snoop - has been described by its designers as a TCP-aware link layer. This - is the right approach: the link and network layers can be much more - aware of each other than strict layering suggests.) - -5.3.3 W-LAN PEP Motivation - - Wireless LANs suffer from an error prone wireless channel. Errors - can typically be considered bursty and channel conditions may change - rapidly from mobility and environmental changes. Packets are dropped - from bit errors or during handovers. Periods of link outage can also - be experienced. Although the typical MAC performs retransmissions, - dropped packets, outages and retransmission latency still can have - serious performance implications for IP performance, especially TCP. - - - -Border, et al. Informational [Page 33] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - PEPs can be used to alleviate problems caused by packet losses, - protect TCP from link outages, and to add priority multiplexing. - Techniques such as Snoop are integrally implemented in access points, - while priority and compression schemes are distributed across the W- - LAN. - -6. Security Considerations - - The use of Performance Enhancing Proxies introduces several issues - which impact security. First, (as described in detail in Section - 4.1.1,) using PEPs and using IPsec is generally mutually exclusive. - Unless the PEP is also both capable and trusted to be the endpoint of - an IPsec tunnel (and the use of an IPsec tunnel is deemed good enough - security for the applicable threat model), a user or network - administrator must choose between improved performance and network - layer security. In some cases, transport (or higher) layer security - can be used in conjunction with a PEP to mitigate the impact of not - having network layer security. But, support by applications for the - use of transport (or higher) layer security is far from ubiquitous. - - Additionally, the PEP itself needs to be protected from attack. - First, even when IPsec tunnels are used with the PEP, the PEP - represents a point in the network where traffic is exposed. And, the - placement of a PEP in the network makes it an ideal platform from - which to launch a denial of service or man in the middle attack. - (Also, taking the PEP out of action is a potential denial of service - attack itself.) Therefore, the PEP must be protected (e.g., by a - firewall) or must protect itself from improper access by an attacker - just like any other device which resides in a network. - -7. IANA Considerations - - This document is an informational overview document and, as such, - does not introduce new nor modify existing name or number spaces - managed by IANA. - -8. Acknowledgements - - This document grew out of the Internet-Draft "TCP Performance - Enhancing Proxy Terminology", RFC 2757 "Long Thin Networks", and work - done in the IETF TCPSAT working group. The authors are indebted to - the active members of the PILC working group. In particular, Joe - Touch and Mark Allman gave us invaluable feedback on various aspects - of the document and Magdolna Gerendai provided us with essential help - on the WAP example. - - - - - - -Border, et al. Informational [Page 34] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -9. References - - [BBKT97] P. Bhagwat, P. Bhattacharya, A. Krishma, S.K. Tripathi, - "Using channel state dependent packet scheduling to - improve TCP throughput over wireless LANs," ACM Wireless - Networks, March 1997, pp. 91 - 102. Available at: - http://www.acm.org/pubs - /articles/journals/wireless/1997-3-1/p91-bhagwat/p91- - bhagwat.pdf - - [BPK97] H. Balakrishnan, V.N. Padmanabhan, R.H. Katz, "The - Effects of Asymmetry on TCP Performance," Proc. ACM/IEEE - Mobicom, Budapest, Hungary, September 1997. - - [BW97] G. Brasche, B. Walke, "Concepts, Services, and Protocols - of the New GSM Phase 2+ general Packet Radio Service," - IEEE Communications Magazine, Vol. 35, No. 8, August - 1997. - - [CDMA] Electronic Industry Alliance (EIA)/Telecommunications - Industry Association (TIA), IS-95: Mobile Station-Base - Station Compatibility Standard for Dual-Mode Wideband - Spread Spectrum Cellular System, 1993. - - [CDPD] Wireless Data Forum, CDPD System Specification, Release - 1.1, 1995. - - [CTC+97] H. Chang, C. Tait, N. Cohen, M. Shapiro, S. Mastrianni, - R. Floyd, B. Housel, D. Lindquist, "Web Browsing in a - Wireless Environment: Disconnected and Asynchronous - Operation in ARTour Web Express," Proc. MobiCom'97, - Budapest, Hungary, September 1997. - - [FMSBMR98] D.C. Feldmeier, A.J. McAuley, J.M. Smith, D.S. Bakin, - W.S. Marcus, T.M. Raleigh, "Protocol Boosters," IEEE - Journal on Selected Areas of Communication, Vol. 16, No. - 3, April 1998. - - [FLASH] Flash Networks Ltd., performance boosting products - technology vendor based in Holmdel, New Jersey. Website - at http://www.flashnetworks.com. - - [FOURELLE] Fourelle Systems, performance boosting products - technology vendor based in Santa Clara, California. - Website at http://www.fourelle.com. - - [GPRS] ETSI, "General Packet Radio Service (GPRS): Service - Description, Stage 2," GSM03.60, v.6.1.1, August 1998. - - - -Border, et al. Informational [Page 35] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - [GSM] M. Rahnema, "Overview of the GSM system and protocol - architecture," IEEE Communications Magazine, Vol. 31, No. - 4, pp. 92-100, April 1993. - - [HNS] Hughes Network Systems, Inc., VSAT technology vendor - based in Germantown, Maryland. Website at - http://www.hns.com. - - [I-TCP] A. Bakre, B.R. Badrinath, "I-TCP: Indirect TCP for Mobile - Hosts," Proc. 15th International Conference on - Distributed Computing Systems (ICDCS), May 1995. - - [KRA94] M. Kojo, K. Raatikainen, T. Alanko, "Connecting Mobile - Workstations to the Internet over a Digital Cellular - Telephone Network," Proc. Workshop on Mobile and Wireless - Information Systems (MOBIDATA), Rutgers University, NJ, - November 1994. Revised version published in Mobile - Computing, pp. 253-270, Kluwer, 1996. - - [KRLKA97] M. Kojo, K. Raatikainen, M. Liljeberg, J. Kiiskinen, T. - Alanko, "An Efficient Transport Service for Slow Wireless - Telephone Links," IEEE Journal on Selected Areas of - Communication, Vol. 15, No. 7, September 1997. - - [LAKLR95] M. Liljeberg, T. Alanko, M. Kojo, H. Laamanen, K. - Raatikainen, "Optimizing World-Wide Web for Weakly- - Connected Mobile Workstations: An Indirect Approach," - Proc. of the 2nd Int. Workshop on Services in Distributed - and Networked Environments, Whistler, Canada, pp. 132- - 139, June 1995. - - [LHKR96] M. Liljeberg, H. Helin, M. Kojo, K. Raatikainen, "Mowgli - WWW Software: Improved Usability of WWW in Mobile WAN - Environments," Proc. IEEE Global Internet 1996 - Conference, London, UK, November 1996. - - [M-TCP] K. Brown, S. Singh, "M-TCP: TCP for Mobile Cellular - Networks," ACM Computer Communications Review Volume - 27(5), 1997. Available at - ftp://ftp.ece.orst.edu/pub/singh/papers/mtcp.ps.gz. - - [Pax99] V. Paxson, "End-to-End Internet Packet Dynamics," - IEEE/ACM Transactions on Networking, Vol. 7, No. 3, 1999, - pp. 277-292. - - [PILCWEB] http://pilc.grc.nasa.gov. - - - - - -Border, et al. Informational [Page 36] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - [RFC0792] Postel, J., "Internet Control Message Protocol", STD 5, - RFC 792, September 1981. - - [RFC0793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC1122] Braden, R., "Requirements for Internet Hosts -- - Communications Layers", STD 3, RFC 1122, October 1989. - - [RFC1144] Jacobson, V., "Compressing TCP/IP Headers for Low-Speed - Serial Links", RFC 1144, February 1990. - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC1958] Carpenter, B., "Architectural Principles of the - Internet", RFC 1958, June 1996. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October - 1996. - - [RFC2151] Kessler, G. and S. Shepard, "A Primer On Internet and - TCP/IP Tools and Utilities", FYI 30, RFC 2151, June 1997. - - [RFC2246] Dierk, T. and E. Allen, "TLS Protocol Version 1," RFC - 2246, January 1999. - - [RFC2393] Shacham, A., Monsour, R., Pereira, R. and M. Thomas, "IP - Payload Compression Protocol (IPcomp)", RFC 2393, - December 1998. - - [RFC2401] Kent, S., and R. Atkinson, "Security Architecture for the - Internet Protocol", RFC 2401, November 1998. - - [RFC2475] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. - and W. Weiss, "An Architecture for Differentiated - Services", RFC 2475, December 1998. - - [RFC2488] Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP - Over Satellite Channels using Standard Mechanisms", BCP - 28, RFC 2488, January 1999. - - [RFC2507] Degermark, M., Nordgren, B. and S. Pink, "IP Header - Compression", RFC 2507, February 1999. - - - - - - -Border, et al. Informational [Page 37] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - [RFC2508] Casner, S. and V. Jacobson, "Compressing IP/UDP/RTP - Headers for Low-Speed Serial Links", RFC 2508, February - 1999. - - [RFC2509] Engan, M., Casner, S. and C. Bormann, "IP Header - Compression over PPP", RFC 2509, February 1999. - - [RFC2663] Srisuresh, P. and Y. Holdrege, "IP Network Address - Translator (NAT) Terminology and Considerations", RFC - 2663, August 1999. - - [RFC2760] Allman, M., Dawkins, S., Glover, D., Griner, J., - Henderson, T., Heidemann, J., Kruse, H., Ostermann, S., - Scott, K., Semke, J., Touch, J. and D. Tran, "Ongoing TCP - Research Related to Satellites", RFC 2760, February 2000. - - [RFC3002] Mitzel, D., "Overview of 2000 IAB Wireless - Internetworking Workshop", RFC 3002, December 2000. - - [RFC3042] Allman, M., Balakrishnan, H. and S. Floyd, "Enhancing - TCP's Loss Recovery Using Limited Transmit", RFC 3042, - January 2001. - - [SHEL00] Z. Shelby, T. Saarinen, P. Mahonen, D. Melpignano, A. - Marshall, L. Munoz, "Wireless IPv6 Networks - WINE," IST - Mobile Summit, Ireland, October 2000. - - [SNOOP] H. Balakrishnan, S. Seshan, E. Amir, R. Katz, "Improving - TCP/IP Performance over Wireless Networks," Proc. 1st ACM - Conference on Mobile Communications and Networking - (Mobicom), Berkeley, California, November 1995. - - [SNOOPELN] H. Balakrishnan, R. Katz, "Explicit Loss Notification and - Wireless Web Performance," Proc. IEEE Globecom 1998, - Internet Mini-Conference, Sydney, Australia, November - 1998. - - [SPACENET] Spacenet, VSAT technology vendor based in Mclean, - Virginia. Website at http://www.spacenet.com. - - [SRC84] J.H. Saltzer, D.P. Reed, D.D. Clark, "End-To-End - Arguments in System Design," ACM TOCS, Vol. 2, No. 4, pp. - 277-288, November 1984. - - [WAPARCH] Wireless Application Protocol Architecture Specification, - April 1998, http://www.wapforum.org. - - - - - -Border, et al. Informational [Page 38] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - [WAPPROXY] Wireless Application Protocol Push Proxy Gateway Service - Specification, August 1999, http://www.wapforum.org. - - [WAPWAE] Wireless Application Protocol Wireless Application - Environment Overview, March 2000, - http://www.wapforum.org. - - [WAPWDP] Wireless Application Protocol Wireless Datagram Protocol - Specification, February 2000, http://www.wapforum.org. - - [WAPWSP] Wireless Application Protocol Wireless Session Protocol - Specification, May 2000, http://www.wapforum.org. - - [WAPWTLS] Wireless Application Protocol Wireless Transport Layer - Security Specification, February 2000, - http://www.wapforum.org. - - [WAPWTP] Wireless Application Protocol Wireless Transaction - Protocol Specification, February 2000, - http://www.wapforum.org. - - [Zhang00] Y. Zhang, B. Singh, "A Multi-Layer IPsec Protocol," Proc. - proceedings of 9th USENIX Security Symposium, Denver, - Colorado, August 2000. Available at - http://www.wins.hrl.com/people/ygz/papers/usenix00.html. - -10. Authors' Addresses - - Questions about this document may be directed to: - - John Border - Hughes Network Systems - 11717 Exploration Lane - Germantown, Maryland 20876 - - Phone: +1-301-548-6819 - Fax: +1-301-548-1196 - EMail: border@hns.com - - - - - - - - - - - - - -Border, et al. Informational [Page 39] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - Markku Kojo - Department of Computer Science - University of Helsinki - P.O. Box 26 (Teollisuuskatu 23) - FIN-00014 HELSINKI - Finland - - Phone: +358-9-1914-4179 - Fax: +358-9-1914-4441 - EMail: kojo@cs.helsinki.fi - - - Jim Griner - NASA Glenn Research Center - MS: 54-5 - 21000 Brookpark Orad - Cleveland, Ohio 44135-3191 - - Phone: +1-216-433-5787 - Fax: +1-216-433-8705 - EMail: jgriner@grc.nasa.gov - - - Gabriel Montenegro - Sun Microsystems Laboratories, Europe - 29, chemin du Vieux Chene - 38240 Meylan, FRANCE - - Phone: +33 476 18 80 45 - EMail: gab@sun.com - - - Zach Shelby - University of Oulu - Center for Wireless Communications - PO Box 4500 - FIN-90014 - Finland - - Phone: +358-40-779-6297 - EMail: zach.shelby@ee.oulu.fi - - - - - - - - - - -Border, et al. Informational [Page 40] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -Appendix A - PEP Terminology Summary - - This appendix provides a summary of terminology frequently used - during discussion of Performance Enhancing Proxies. (In some cases, - these terms have different meanings from their non-PEP related - usage.) - - ACK filtering - - Removing acknowledgments to prevent congestion of a low speed - link, usually used with paths which include a highly asymmetric - link. Sometimes also called ACK reduction. See Section 3.1.4. - - ACK spacing - - Delayed forwarding of acknowledgments in order to space them - appropriately, for example, to help minimize the burstiness of - TCP data. See Section 3.1.1. - - application layer PEP - - A Performance Enhancing Proxy operating above the transport - layer. May be aimed at improving application or transport - protocol performance (or both). Described in detail in Section - 2.1.2. - - asymmetric link - - A link which has different rates for the forward channel (used for - data segments) and the back (or return) channel (used for ACKs). - - available bandwidth - - The total capacity of a link available to carry information at any - given time. May be lower than the raw bandwidth due to competing - traffic. - - bandwidth utilization - - The actual amount of information delivered over a link in a given - period, usually expressed as a percent of the raw bandwidth of - the link. - - gateway - - Has several meanings with respect to PEPs, depending on context: - - - An access point to a particular link; - - - -Border, et al. Informational [Page 41] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - - A device capable of initiating and terminating connections - on - - behalf of a user or end system (e.g., a firewall or proxy). - - Not necessarily, but could be, a router. - - in flight (data) - - Data sent but not yet acknowledged. More precisely, data sent for - which the sender has not yet received the acknowledgement. - - link layer PEP - - A Performance Enhancing Proxy operating below the network layer. - - local acknowledgement - - The generation of acknowledgments by an entity in the path - between two end systems in order to allow the sending system to - transmit more data without waiting for end-to-end - acknowledgments. Described (in the context of TCP) in Section - 3.1.2. - - performance enhancing proxy - - An entity in the network acting on behalf of an end system or user - (with or without the knowledge of the end system or user) in order - to enhance protocol performance. Section 2 describes various - types of performance enhancing proxies. Section 3 describes the - mechanisms performance enhancing proxies use to improve - performance. - - raw bandwidth - - The total capacity of an unloaded link available to carry - information. - - Snoop - - A TCP-aware link layer developed for wireless packet radio and - cellular networks. It works by caching segments at a wireless - base station. If the base station sees duplicate acknowledgments - for a segment that it has cached, it retransmits the missing - segment while suppressing the duplicate acknowledgement stream - being forwarded back to the sender until the wireless receiver - starts to acknowledge new data. Described in detail in Section - 5.3.2 and [SNOOP]. - - - -Border, et al. Informational [Page 42] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - split connection - - A connection that has been terminated before reaching the intended - destination end system in order to initiate another connection - towards the end system. This allows the use of different - connection characteristics for different parts of the path of - the originally intended connection. See Section 2.4. - - TCP PEP - - A Performance Enhancing Proxy operating at the transport layer - with TCP. Aimed at improving TCP performance. - - TCP splitting - - Using one or more split TCP connections to improve TCP - performance. - - TCP spoofing - - Sometimes used as a synonym for TCP PEP. More accurately, TCP - spoofing refers to using transparent (to the TCP stacks in the - end systems) mechanisms to improve TCP performance. See Section - 2.1.1. - - transparent - - In the context of a PEP, transparent refers to not requiring - changes to be made to the end systems, transport endpoints - and/or applications involved in a connection. See Section 2.5 - for a more detailed explanation. - - transport layer PEP - - A Performance Enhancing Proxy operating at the transport layer. - Described in detail in Section 2.1.1. - - tunneling - - In the context of PEPs, tunneling refers to the process of - wrapping a packet for transmission over a particular link - between two PEPs. See Section 3.2. - - - - - - - - - -Border, et al. Informational [Page 43] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - - WAP - - The Wireless Application Protocol specifies an application - framework and network protocols intended to work across - differing narrow-band wireless network technologies. See - Section 5.2.2.2. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Border, et al. Informational [Page 44] - -RFC 3135 PILC - Performance Enhancing Proxies June 2001 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2001). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Border, et al. Informational [Page 45] - diff --git a/kernel/picotcp/RFC/rfc3150.txt b/kernel/picotcp/RFC/rfc3150.txt deleted file mode 100644 index aab7cf0..0000000 --- a/kernel/picotcp/RFC/rfc3150.txt +++ /dev/null @@ -1,955 +0,0 @@ - - - - - - -Network Working Group S. Dawkins -Request for Comments: 3150 G. Montenegro -BCP: 48 M . Kojo -Category: Best Current Practice V. Magret - July 2001 - - - End-to-end Performance Implications of Slow Links - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2001). All Rights Reserved. - -Abstract - - This document makes performance-related recommendations for users of - network paths that traverse "very low bit-rate" links. - - "Very low bit-rate" implies "slower than we would like". This - recommendation may be useful in any network where hosts can saturate - available bandwidth, but the design space for this recommendation - explicitly includes connections that traverse 56 Kb/second modem - links or 4.8 Kb/second wireless access links - both of which are - widely deployed. - - This document discusses general-purpose mechanisms. Where - application-specific mechanisms can outperform the relevant general- - purpose mechanism, we point this out and explain why. - - This document has some recommendations in common with RFC 2689, - "Providing integrated services over low-bitrate links", especially in - areas like header compression. This document focuses more on - traditional data applications for which "best-effort delivery" is - appropriate. - - - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 1] - -RFC 3150 PILC - Slow Links July 2001 - - -Table of Contents - - 1.0 Introduction ................................................. 2 - 2.0 Description of Optimizations ................................. 3 - 2.1 Header Compression Alternatives ...................... 3 - 2.2 Payload Compression Alternatives ..................... 5 - 2.3 Choosing MTU sizes ................................... 5 - 2.4 Interactions with TCP Congestion Control [RFC2581] ... 6 - 2.5 TCP Buffer Auto-tuning ............................... 9 - 2.6 Small Window Effects ................................. 10 - 3.0 Summary of Recommended Optimizations ......................... 10 - 4.0 Topics For Further Work ...................................... 12 - 5.0 Security Considerations ...................................... 12 - 6.0 IANA Considerations .......................................... 13 - 7.0 Acknowledgements ............................................. 13 - 8.0 References ................................................... 13 - Authors' Addresses ............................................... 16 - Full Copyright Statement ......................................... 17 - -1.0 Introduction - - The Internet protocol stack was designed to operate in a wide range - of link speeds, and has met this design goal with only a limited - number of enhancements (for example, the use of TCP window scaling as - described in "TCP Extensions for High Performance" [RFC1323] for - very-high-bandwidth connections). - - Pre-World Wide Web application protocols tended to be either - interactive applications sending very little data (e.g., Telnet) or - bulk transfer applications that did not require interactive response - (e.g., File Transfer Protocol, Network News). The World Wide Web has - given us traffic that is both interactive and often "bulky", - including images, sound, and video. - - The World Wide Web has also popularized the Internet, so that there - is significant interest in accessing the Internet over link speeds - that are much "slower" than typical office network speeds. In fact, - a significant proportion of the current Internet users is connected - to the Internet over a relatively slow last-hop link. In future, the - number of such users is likely to increase rapidly as various mobile - devices are foreseen to to be attached to the Internet over slow - wireless links. - - In order to provide the best interactive response for these "bulky" - transfers, implementors may wish to minimize the number of bits - actually transmitted over these "slow" connections. There are two - - - - - -Dawkins, et al. Best Current Practice [Page 2] - -RFC 3150 PILC - Slow Links July 2001 - - - areas that can be considered - compressing the bits that make up the - overhead associated with the connection, and compressing the bits - that make up the payload being transported over the connection. - - In addition, implementors may wish to consider TCP receive window - settings and queuing mechanisms as techniques to improve performance - over low-speed links. While these techniques do not involve protocol - changes, they are included in this document for completeness. - -2.0 Description of Optimizations - - This section describes optimizations which have been suggested for - use in situations where hosts can saturate their links. The next - section summarizes recommendations about the use of these - optimizations. - -2.1 Header Compression Alternatives - - Mechanisms for TCP and IP header compression defined in [RFC1144, - RFC2507, RFC2508, RFC2509, RFC3095] provide the following benefits: - - - Improve interactive response time - - - Decrease header overhead (for a typical dialup MTU of 296 - bytes, the overhead of TCP/IP headers can decrease from about - 13 percent with typical 40-byte headers to 1-1.5 percent with - with 3-5 byte compressed headers, for most packets). This - enables use of small packets for delay-sensitive low data-rate - traffic and good line efficiency for bulk data even with small - segment sizes (for reasons to use a small MTU on slow links, - see section 2.3) - - - Many slow links today are wireless and tend to be significantly - lossy. Header compression reduces packet loss rate over lossy - links (simply because shorter transmission times expose packets - to fewer events that cause loss). - - [RFC1144] header compression is a Proposed Standard for TCP Header - compression that is widely deployed. Unfortunately it is vulnerable - on lossy links, because even a single bit error results in loss of - synchronization between the compressor and decompressor. It uses TCP - timeouts to detect a loss of such synchronization, but these errors - result in loss of data (up to a full TCP window), delay of a full - RTO, and unnecessary slow-start. - - - - - - - -Dawkins, et al. Best Current Practice [Page 3] - -RFC 3150 PILC - Slow Links July 2001 - - - A more recent header compression proposal [RFC2507] includes an - explicit request for retransmission of an uncompressed packet to - allow resynchronization without waiting for a TCP timeout (and - executing congestion avoidance procedures). This works much better - on links with lossy characteristics. - - The above scheme ceases to perform well under conditions as extreme - as those of many cellular links (error conditions of 1e-3 or 1e-2 and - round trip times over 100 ms.). For these cases, the 'Robust Header - Compression' working group has developed ROHC [RFC3095]. Extensions - of ROHC to support compression of TCP headers are also under - development. - - [RFC1323] defines a "TCP Timestamp" option, used to prevent - "wrapping" of the TCP sequence number space on high-speed links, and - to improve TCP RTT estimates by providing unambiguous TCP roundtrip - timings. Use of TCP timestamps prevents header compression, because - the timestamps are sent as TCP options. This means that each - timestamped header has TCP options that differ from the previous - header, and headers with changed TCP options are always sent - uncompressed. In addition, timestamps do not seem to have much of an - impact on RTO estimation [AlPa99]. - - Nevertheless, the ROHC working group is developing schemes to - compress TCP headers, including options such as timestamps and - selective acknowledgements. - - Recommendation: Implement [RFC2507], in particular as it relates to - IPv4 tunnels and Minimal Encapsulation for Mobile IP, as well as TCP - header compression for lossy links and links that reorder packets. - PPP capable devices should implement "IP Header Compression over PPP" - [RFC2509]. Robust Header Compression [RFC3095] is recommended for - extremely slow links with very high error rates (see above), but - implementors should judge if its complexity is justified (perhaps by - the cost of the radio frequency resources). - - [RFC1144] header compression should only be enabled when operating - over reliable "slow" links. - - Use of TCP Timestamps [RFC1323] is not recommended with these - connections, because it complicates header compression. Even though - the Robust Header Compression (ROHC) working group is developing - specifications to remedy this, those mechanisms are not yet fully - developed nor deployed, and may not be generally justifiable. - Furthermore, connections traversing "slow" links do not require - protection against TCP sequence-number wrapping. - - - - - -Dawkins, et al. Best Current Practice [Page 4] - -RFC 3150 PILC - Slow Links July 2001 - - -2.2 Payload Compression Alternatives - - Compression of IP payloads is also desirable on "slow" network links. - "IP Payload Compression Protocol (IPComp)" [RFC2393] defines a - framework where common compression algorithms can be applied to - arbitrary IP segment payloads. - - IP payload compression is something of a niche optimization. It is - necessary because IP-level security converts IP payloads to random - bitstreams, defeating commonly-deployed link-layer compression - mechanisms which are faced with payloads that have no redundant - "information" that can be more compactly represented. - - However, many IP payloads are already compressed (images, audio, - video, "zipped" files being transferred), or are already encrypted - above the IP layer (e.g., SSL [SSL]/TLS [RFC2246]). These payloads - will not "compress" further, limiting the benefit of this - optimization. - - For uncompressed HTTP payload types, HTTP/1.1 [RFC2616] also includes - Content-Encoding and Accept-Encoding headers, supporting a variety of - compression algorithms for common compressible MIME types like - text/plain. This leaves only the HTTP headers themselves - uncompressed. - - In general, application-level compression can often outperform - IPComp, because of the opportunity to use compression dictionaries - based on knowledge of the specific data being compressed. - - Extensive use of application-level compression techniques will reduce - the need for IPComp, especially for WWW users. - - Recommendation: IPComp may optionally be implemented. - -2.3 Choosing MTU Sizes - - There are several points to keep in mind when choosing an MTU for - low-speed links. - - First, if a full-length MTU occupies a link for longer than the - delayed ACK timeout (typically 200 milliseconds, but may be up to 500 - milliseconds), this timeout will cause an ACK to be generated for - every segment, rather than every second segment, as occurs with most - implementations of the TCP delayed ACK algorithm. - - - - - - - -Dawkins, et al. Best Current Practice [Page 5] - -RFC 3150 PILC - Slow Links July 2001 - - - Second, "relatively large" MTUs, which take human-perceptible amounts - of time to be transmitted into the network, create human-perceptible - delays in other flows using the same link. [RFC1144] considers - 100-200 millisecond delays as human-perceptible. The convention of - choosing 296-byte MTUs (with header compression enabled) for dialup - access is a compromise that limits the maximum link occupancy delay - with full-length MTUs close to 200 milliseconds on 9.6 Kb/second - links. - - Third, on last-hop links using a larger link MTU size, and therefore - larger MSS, would allow a TCP sender to increase its congestion - window faster in bytes than when using a smaller MTU size (and a - smaller MSS). However, with a smaller MTU size, and a smaller MSS - size, the congestion window, when measured in segments, increases - more quickly than it would with a larger MSS size. Connections using - smaller MSS sizes are more likely to be able to send enough segments - to generate three duplicate acknowledgements, triggering fast - retransmit/fast recovery when packet losses are encountered. Hence, - a smaller MTU size is useful for slow links with lossy - characteristics. - - Fourth, using a smaller MTU size also decreases the queuing delay of - a TCP flow (and thereby RTT) compared to use of larger MTU size with - the same number of packets in a queue. This means that a TCP flow - using a smaller segment size and traversing a slow link is able to - inflate the congestion window (in number of segments) to a larger - value while experiencing the same queuing delay. - - Finally, some networks charge for traffic on a per-packet basis, not - on a per-kilobyte basis. In these cases, connections using a larger - MTU may be charged less than connections transferring the same number - of bytes using a smaller MTU. - - Recommendation: If it is possible to do so, MTUs should be chosen - that do not monopolize network interfaces for human-perceptible - amounts of time, and implementors should not chose MTUs that will - occupy a network interface for significantly more than 100-200 - milliseconds. - -2.4 Interactions with TCP Congestion Control [RFC2581] - - In many cases, TCP connections that traverse slow links have the slow - link as an "access" link, with higher-speed links in use for most of - the connection path. One common configuration might be a laptop - computer using dialup access to a terminal server (a last-hop - router), with an HTTP server on a high-speed LAN "behind" the - terminal server. - - - - -Dawkins, et al. Best Current Practice [Page 6] - -RFC 3150 PILC - Slow Links July 2001 - - - In this case, the HTTP server may be able to place packets on its - directly-attached high-speed LAN at a higher rate than the last-hop - router can forward them on the low-speed link. When the last-hop - router falls behind, it will be unable to buffer the traffic intended - for the low-speed link, and will become a point of congestion and - begin to drop the excess packets. In particular, several packets may - be dropped in a single transmission window when initial slow start - overshoots the last-hop router buffer. - - Although packet loss is occurring, it isn't detected at the TCP - sender until one RTT time after the router buffer space is exhausted - and the first packet is dropped. This late congestion signal allows - the congestion window to increase up to double the size it was at the - time the first packet was dropped at the router. - - If the link MTU is large enough to take more than the delayed ACK - timeout interval to transmit a packet, an ACK is sent for every - segment and the congestion window is doubled in a single RTT. If a - smaller link MTU is in use and delayed ACKs can be utilized, the - congestion window increases by a factor of 1.5 in one RTT. In both - cases the sender continues transmitting packets well beyond the - congestion point of the last-hop router, resulting in multiple packet - losses in a single window. - - The self-clocking nature of TCP's slow start and congestion avoidance - algorithms prevent this buffer overrun from continuing. In addition, - these algorithms allow senders to "probe" for available bandwidth - - cycling through an increasing rate of transmission until loss occurs, - followed by a dramatic (50-percent) drop in transmission rate. This - happens when a host directly connected to a low-speed link offers an - advertised window that is unrealistically large for the low-speed - link. During the congestion avoidance phase the peer host continues - to probe for available bandwidth, trying to fill the advertised - window, until packet loss occurs. - - The same problems may also exist when a sending host is directly - connected to a slow link as most slow links have some local buffer in - the link interface. This link interface buffer is subject to - overflow exactly in the same way as the last-hop router buffer. - - When a last-hop router with a small number of buffers per outbound - link is used, the first buffer overflow occurs earlier than it would - if the router had a larger number of buffers. Subsequently with a - smaller number of buffers the periodic packet losses occur more - frequently during congestion avoidance, when the sender probes for - available bandwidth. - - - - - -Dawkins, et al. Best Current Practice [Page 7] - -RFC 3150 PILC - Slow Links July 2001 - - - The most important responsibility of router buffers is to absorb - bursts. Too few buffers (for example, only three buffers per - outbound link as described in [RFC2416]) means that routers will - overflow their buffer pools very easily and are unlikely to absorb - even a very small burst. When a larger number of router buffers are - allocated per outbound link, the buffer space does not overflow as - quickly but the buffers are still likely to become full due to TCP's - default behavior. A larger number of router buffers leads to longer - queuing delays and a longer RTT. - - If router queues become full before congestion is signaled or remain - full for long periods of time, this is likely to result in "lock- - out", where a single connection or a few connections occupy the - router queue space, preventing other connections from using the link - [RFC2309], especially when a tail drop queue management discipline is - being used. - - Therefore, it is essential to have a large enough number of buffers - in routers to be able to absorb data bursts, but keep the queues - normally small. In order to achieve this it has been recommended in - [RFC2309] that an active queue management mechanism, like Random - Early Detection (RED) [RED93], should be implemented in all Internet - routers, including the last-hop routers in front of a slow link. It - should also be noted that RED requires a sufficiently large number of - router buffers to work properly. In addition, the appropriate - parameters of RED on a last-hop router connected to a slow link will - likely deviate from the defaults recommended. - - Active queue management mechanism do not eliminate packet drops but, - instead, drop packets at earlier stage to solve the full-queue - problem for flows that are responsive to packet drops as congestion - signal. Hosts that are directly connected to low-speed links may - limit the receive windows they advertise in order to lower or - eliminate the number of packet drops in a last-hop router. When - doing so one should, however, take care that the advertised window is - large enough to allow full utilization of the last-hop link capacity - and to allow triggering fast retransmit, when a packet loss is - encountered. This recommendation takes two forms: - - - Modern operating systems use relatively large default TCP receive - buffers compared to what is required to fully utilize the link - capacity of low-speed links. Users should be able to choose the - default receive window size in use - typically a system-wide - parameter. (This "choice" may be as simple as "dial-up access/LAN - access" on a dialog box - this would accommodate many environments - without requiring hand-tuning by experienced network engineers.) - - - - - -Dawkins, et al. Best Current Practice [Page 8] - -RFC 3150 PILC - Slow Links July 2001 - - - - Application developers should not attempt to manually manage - network bandwidth using socket buffer sizes. Only in very rare - circumstances will an application actually know both the bandwidth - and delay of a path and be able to choose a suitably low (or high) - value for the socket buffer size to obtain good network - performance. - - This recommendation is not a general solution for any network path - that might involve a slow link. Instead, this recommendation is - applicable in environments where the host "knows" it is always - connected to other hosts via "slow links". For hosts that may - connect to other host over a variety of links (e.g., dial-up laptop - computers with LAN-connected docking stations), buffer auto-tuning - for the receive buffer is a more reasonable recommendation, and is - discussed below. - -2.5 TCP Buffer Auto-tuning - - [SMM98] recognizes a tension between the desire to allocate "large" - TCP buffers, so that network paths are fully utilized, and a desire - to limit the amount of memory dedicated to TCP buffers, in order to - efficiently support large numbers of connections to hosts over - network paths that may vary by six orders of magnitude. - - The technique proposed is to dynamically allocate TCP buffers, based - on the current congestion window, rather than attempting to - preallocate TCP buffers without any knowledge of the network path. - - This proposal results in receive buffers that are appropriate for the - window sizes in use, and send buffers large enough to contain two - windows of segments, so that SACK and fast recovery can recover - losses without forcing the connection to use lengthy retransmission - timeouts. - - While most of the motivation for this proposal is given from a - server's perspective, hosts that connect using multiple interfaces - with markedly-different link speeds may also find this kind of - technique useful. This is true in particular with slow links, which - are likely to dominate the end-to-end RTT. If the host is connected - only via a single slow link interface at a time, it is fairly easy to - (dynamically) adjust the receive window (and thus the advertised - window) to a value appropriate for the slow last-hop link with known - bandwidth and delay characteristics. - - Recommendation: If a host is sometimes connected via a slow link but - the host is also connected using other interfaces with markedly- - different link speeds, it may use receive buffer auto-tuning to - adjust the advertised window to an appropriate value. - - - -Dawkins, et al. Best Current Practice [Page 9] - -RFC 3150 PILC - Slow Links July 2001 - - -2.6 Small Window Effects - - If a TCP connection stabilizes with a congestion window of only a few - segments (as could be expected on a "slow" link), the sender isn't - sending enough segments to generate three duplicate acknowledgements, - triggering fast retransmit and fast recovery. This means that a - retransmission timeout is required to repair the loss - dropping the - TCP connection to a congestion window with only one segment. - - [TCPB98] and [TCPF98] observe that (in studies of network trace - datasets) it is relatively common for TCP retransmission timeouts to - occur even when some duplicate acknowledgements are being sent. The - challenge is to use these duplicate acknowledgements to trigger fast - retransmit/fast recovery without injecting traffic into the network - unnecessarily - and especially not injecting traffic in ways that - will result in instability. - - The "Limited Transmit" algorithm [RFC3042] suggests sending a new - segment when the first and second duplicate acknowledgements are - received, so that the receiver is more likely to be able to continue - to generate duplicate acknowledgements until the TCP retransmit - threshold is reached, triggering fast retransmit and fast recovery. - When the congestion window is small, this is very useful in assisting - fast retransmit and fast recovery to recover from a packet loss - without using a retransmission timeout. We note that a maximum of - two additional new segments will be sent before the receiver sends - either a new acknowledgement advancing the window or two additional - duplicate acknowledgements, triggering fast retransmit/fast recovery, - and that these new segments will be acknowledgement-clocked, not - back-to-back. - - Recommendation: Limited Transmit should be implemented in all hosts. - -3.0 Summary of Recommended Optimizations - - This section summarizes our recommendations regarding the previous - standards-track mechanisms, for end nodes that are connected via a - slow link. - - Header compression should be implemented. [RFC1144] header - compression can be enabled over robust network links. [RFC2507] - should be used over network connections that are expected to - experience loss due to corruption as well as loss due to congestion. - For extremely lossy and slow links, implementors should evaluate ROHC - [RFC3095] as a potential solution. [RFC1323] TCP timestamps must be - turned off because (1) their protection against TCP sequence number - wrapping is unjustified for slow links, and (2) they complicate TCP - header compression. - - - -Dawkins, et al. Best Current Practice [Page 10] - -RFC 3150 PILC - Slow Links July 2001 - - - IP Payload Compression [RFC2393] should be implemented, although - compression at higher layers of the protocol stack (for example [RFC - 2616]) may make this mechanism less useful. - - For HTTP/1.1 environments, [RFC2616] payload compression should be - implemented and should be used for payloads that are not already - compressed. - - Implementors should choose MTUs that don't monopolize network - interfaces for more than 100-200 milliseconds, in order to limit the - impact of a single connection on all other connections sharing the - network interface. - - Use of active queue management is recommended on last-hop routers - that provide Internet access to host behind a slow link. In - addition, number of router buffers per slow link should be large - enough to absorb concurrent data bursts from more than a single flow. - To absorb concurrent data bursts from two or three TCP senders with a - typical data burst of three back-to-back segments per sender, at - least six (6) or nine (9) buffers are needed. Effective use of - active queue management is likely to require even larger number of - buffers. - - Implementors should consider the possibility that a host will be - directly connected to a low-speed link when choosing default TCP - receive window sizes. - - Application developers should not attempt to manually manage network - bandwidth using socket buffer sizes as only in very rare - circumstances an application will be able to choose a suitable value - for the socket buffer size to obtain good network performance. - - Limited Transmit [RFC3042] should be implemented in all end hosts as - it assists in triggering fast retransmit when congestion window is - small. - - All of the mechanisms described above are stable standards-track RFCs - (at Proposed Standard status, as of this writing). - - In addition, implementors may wish to consider TCP buffer auto- - tuning, especially when the host system is likely to be used with a - wide variety of access link speeds. This is not a standards-track - TCP mechanism but, as it is an operating system implementation issue, - it does not need to be standardized. - - Of the above mechanisms, only Header Compression (for IP and TCP) may - cease to work in the presence of end-to-end IPSEC. However, - [RFC3095] does allow compressing the ESP header. - - - -Dawkins, et al. Best Current Practice [Page 11] - -RFC 3150 PILC - Slow Links July 2001 - - -4.0 Topics For Further Work - - In addition to the standards-track mechanisms discussed above, there - are still opportunities to improve performance over low-speed links. - - "Sending fewer bits" is an obvious response to slow link speeds. The - now-defunct HTTP-NG proposal [HTTP-NG] replaced the text-based HTTP - header representation with a binary representation for compactness. - However, HTTP-NG is not moving forward and HTTP/1.1 is not being - enhanced to include a more compact HTTP header representation. - Instead, the Wireless Application Protocol (WAP) Forum has opted for - the XML-based Wireless Session Protocol [WSP], which includes a - compact header encoding mechanism. - - It would be nice to agree on a more compact header representation - that will be used by all WWW communities, not only the wireless WAN - community. Indeed, general XML content encodings have been proposed - [Millau], although they are not yet widely adopted. - - We note that TCP options which change from segment to segment - effectively disable header compression schemes deployed today, - because there's no way to indicate that some fields in the header are - unchanged from the previous segment, while other fields are not. The - Robust Header Compression working group is developing such schemes - for TCP options such as timestamps and selective acknowledgements. - Hopefully, documents subsequent to [RFC3095] will define such - specifications. - - Another effort worth following is that of 'Delta Encoding'. Here, - clients that request a slightly modified version of some previously - cached resource would receive a succinct description of the - differences, rather than the entire resource [HTTP-DELTA]. - -5.0 Security Considerations - - All recommendations included in this document are stable standards- - track RFCs (at Proposed Standard status, as of this writing) or - otherwise do not suggest any changes to any protocol. With the - exception of Van Jacobson compression [RFC1144] and [RFC2507, - RFC2508, RFC2509], all other mechanisms are applicable to TCP - connections protected by end-to-end IPSec. This includes ROHC - [RFC3095], albeit partially, because even though it can compress the - outermost ESP header to some extent, encryption still renders any - payload data uncompressible (including any subsequent protocol - headers). - - - - - - -Dawkins, et al. Best Current Practice [Page 12] - -RFC 3150 PILC - Slow Links July 2001 - - -6.0 IANA Considerations - - This document is a pointer to other, existing IETF standards. There - are no new IANA considerations. - -7.0 Acknowledgements - - This recommendation has grown out of "Long Thin Networks" [RFC2757], - which in turn benefited from work done in the IETF TCPSAT working - group. - -8.0 References - - [AlPa99] Mark Allman and Vern Paxson, "On Estimating End-to-End - Network Path Properties", in ACM SIGCOMM 99 Proceedings, - 1999. - - [HTTP-DELTA] J. Mogul, et al., "Delta encoding in HTTP", Work in - Progress. - - [HTTP-NG] Mike Spreitzer, Bill Janssen, "HTTP 'Next Generation'", - 9th International WWW Conference, May, 2000. Also - available as: http://www.www9.org/w9cdrom/60/60.html - - [Millau] Marc Girardot, Neel Sundaresan, "Millau: an encoding - format for efficient representation and exchange of XML - over the Web", 9th International WWW Conference, May, - 2000. Also available as: - http://www.www9.org/w9cdrom/154/154.html - - [PAX97] Paxson, V., "End-to-End Internet Packet Dynamics", 1997, - in SIGCOMM 97 Proceedings, available as: - http://www.acm.org/sigcomm/ccr/archive/ccr-toc/ccr-toc- - 97.html - - [RED93] Floyd, S., and Jacobson, V., Random Early Detection - gateways for Congestion Avoidance, IEEE/ACM Transactions - on Networking, V.1 N.4, August 1993, pp. 397-413. Also - available from http://ftp.ee.lbl.gov/floyd/red.html. - - [RFC1144] Jacobson, V., "Compressing TCP/IP Headers for Low-Speed - Serial Links", RFC 1144, February 1990. - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 13] - -RFC 3150 PILC - Slow Links July 2001 - - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC2246] Dierks, T. and C. Allen, "The TLS Protocol: Version - 1.0", RFC 2246, January 1999. - - [RFC2309] Braden, R., Clark, D., Crowcroft, J., Davie, B., - Deering, S., Estrin, D., Floyd, S., Jacobson, V., - Minshall, G., Partridge, C., Peterson, L., Ramakrishnan, - K., Shenker, S., Wroclawski, J. and L. Zhang, - "Recommendations on Queue Management and Congestion - Avoidance in the Internet", RFC 2309, April 1998. - - [RFC2393] Shacham, A., Monsour, R., Pereira, R. and M. Thomas, "IP - Payload Compression Protocol (IPComp)", RFC 2393, - December 1998. - - [RFC2401] Kent, S. and R. Atkinson, "Security Architecture for the - Internet Protocol", RFC 2401, November 1998. - - [RFC2416] Shepard, T. and C. Partridge, "When TCP Starts Up With - Four Packets Into Only Three Buffers", RFC 2416, - September 1998. - - [RFC2507] Degermark, M., Nordgren, B. and S. Pink, "IP Header - Compression", RFC 2507, February 1999. - - [RFC2508] Casner, S. and V. Jacobson. "Compressing IP/UDP/RTP - Headers for Low-Speed Serial Links", RFC 2508, February - 1999. - - [RFC2509] Engan, M., Casner, S. and C. Bormann, "IP Header - Compression over PPP", RFC 2509, February 1999. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., - Masinter, L., Leach, P. and T. Berners-Lee, "Hypertext - Transfer Protocol -- HTTP/1.1", RFC 2616, June 1999. - - [RFC2757] Montenegro, G., Dawkins, S., Kojo, M., Magret, V., and - N. Vaidya, "Long Thin Networks", RFC 2757, January 2000. - - [RFC3042] Allman, M., Balakrishnan, H. and S. Floyd, "Enhancing - TCP's Loss Recovery Using Limited Transmit", RFC 3042, - January 2001. - - - - -Dawkins, et al. Best Current Practice [Page 14] - -RFC 3150 PILC - Slow Links July 2001 - - - [RFC3095] Bormann, C., Burmeister, C., Degermark, M., Fukushima, - H., Hannu, H., Jonsson, L-E., Hakenberg, R., Koren, T., - Le, K., Liu, Z., Martensson, A., Miyazaki, A., Svanbro, - K., Wiebke, T., Yoshimura, T. and H. Zheng, "RObust - Header Compression (ROHC): Framework and four Profiles: - RTP, UDP ESP and uncompressed", RFC 3095, July 2001. - - [SMM98] Jeffrey Semke, Matthew Mathis, and Jamshid Mahdavi, - "Automatic TCP Buffer Tuning", in ACM SIGCOMM 98 - Proceedings 1998. Available from - http://www.acm.org/sigcomm/sigcomm98/tp/abs_26.html. - - [SSL] Alan O. Freier, Philip Karlton, Paul C. Kocher, The SSL - Protocol: Version 3.0, March 1996. (Expired Internet- - Draft, available from - http://home.netscape.com/eng/ssl3/ssl-toc.html) - - [TCPB98] Hari Balakrishnan, Venkata N. Padmanabhan, Srinivasan - Seshan, Mark Stemm, Randy H. Katz, "TCP Behavior of a - Busy Internet Server: Analysis and Improvements", IEEE - Infocom, March 1998. Available from: - http://www.cs.berkeley.edu/~hari/papers/infocom98.ps.gz - - [TCPF98] Dong Lin and H.T. Kung, "TCP Fast Recovery Strategies: - Analysis and Improvements", IEEE Infocom, March 1998. - Available from: - http://www.eecs.harvard.edu/networking/papers/ infocom- - tcp-final-198.pdf - - [WSP] Wireless Application Protocol Forum, "WAP Wireless - Session Protocol Specification", approved 4 May, 2000, - available from - http://www1.wapforum.org/tech/documents/WAP-203-WSP- - 20000504-a.pdf. (informative reference). - - - - - - - - - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 15] - -RFC 3150 PILC - Slow Links July 2001 - - -Authors' Addresses - - Questions about this document may be directed to: - - Spencer Dawkins - Fujitsu Network Communications - 2801 Telecom Parkway - Richardson, Texas 75082 - - Phone: +1-972-479-3782 - EMail: spencer.dawkins@fnc.fujitsu.com - - - Gabriel Montenegro - Sun Microsystems Laboratories, Europe - 29, chemin du Vieux Chene - 38240 Meylan, FRANCE - - Phone: +33 476 18 80 45 - EMail: gab@sun.com - - - Markku Kojo - Department of Computer Science - University of Helsinki - P.O. Box 26 (Teollisuuskatu 23) - FIN-00014 HELSINKI - Finland - - Phone: +358-9-1914-4179 - Fax: +358-9-1914-4441 - EMail: kojo@cs.helsinki.fi - - - Vincent Magret - Alcatel Internetworking, Inc. - 26801 W. Agoura road - Calabasas, CA, 91301 - - Phone: +1 818 878 4485 - EMail: vincent.magret@alcatel.com - - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 16] - -RFC 3150 PILC - Slow Links July 2001 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2001). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 17] - diff --git a/kernel/picotcp/RFC/rfc3155.txt b/kernel/picotcp/RFC/rfc3155.txt deleted file mode 100644 index 3dd5e91..0000000 --- a/kernel/picotcp/RFC/rfc3155.txt +++ /dev/null @@ -1,899 +0,0 @@ - - - - - - -Network Working Group S. Dawkins -Request for Comments: 3155 G. Montenegro -BCP: 50 M. Kojo -Category: Best Current Practice V. Magret - N. Vaidya - August 2001 - - - End-to-end Performance Implications of Links with Errors - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2001). All Rights Reserved. - -Abstract - - This document discusses the specific TCP mechanisms that are - problematic in environments with high uncorrected error rates, and - discusses what can be done to mitigate the problems without - introducing intermediate devices into the connection. - -Table of Contents - - 1.0 Introduction ............................................. 2 - 1.1 Should you be reading this recommendation? ........... 3 - 1.2 Relationship of this recommendation to PEPs ........... 4 - 1.3 Relationship of this recommendation to Link Layer - Mechanisms............................................. 4 - 2.0 Errors and Interactions with TCP Mechanisms .............. 5 - 2.1 Slow Start and Congestion Avoidance [RFC2581] ......... 5 - 2.2 Fast Retransmit and Fast Recovery [RFC2581] ........... 6 - 2.3 Selective Acknowledgements [RFC2018, RFC2883] ......... 7 - 3.0 Summary of Recommendations ............................... 8 - 4.0 Topics For Further Work .................................. 9 - 4.1 Achieving, and maintaining, large windows ............. 10 - 5.0 Security Considerations .................................. 11 - 6.0 IANA Considerations ...................................... 11 - 7.0 Acknowledgements ......................................... 11 - References ................................................... 11 - Authors' Addresses ........................................... 14 - Full Copyright Statement ..................................... 16 - - - - -Dawkins, et al. Best Current Practice [Page 1] - -RFC 3155 PILC - Links with Errors August 2001 - - -1.0 Introduction - - The rapidly-growing Internet is being accessed by an increasingly - wide range of devices over an increasingly wide variety of links. At - least some of these links do not provide the degree of reliability - that hosts expect, and this expansion into unreliable links causes - some Internet protocols, especially TCP [RFC793], to perform poorly. - - Specifically, TCP congestion control [RFC2581], while appropriate for - connections that lose traffic primarily because of congestion and - buffer exhaustion, interacts badly with uncorrected errors when TCP - connections traverse links with high uncorrected error rates. The - result is that sending TCPs may spend an excessive amount of time - waiting for acknowledgement that do not arrive, and then, although - these losses are not due to congestion-related buffer exhaustion, the - sending TCP transmits at substantially reduced traffic levels as it - probes the network to determine "safe" traffic levels. - - This document does not address issues with other transport protocols, - for example, UDP. - - Congestion avoidance in the Internet is based on an assumption that - most packet losses are due to congestion. TCP's congestion avoidance - strategy treats the absence of acknowledgement as a congestion - signal. This has worked well since it was introduced in 1988 [VJ- - DCAC], because most links and subnets have relatively low error rates - in normal operation, and congestion is the primary cause of loss in - these environments. However, links and subnets that do not enjoy low - uncorrected error rates are becoming more prevalent in parts of the - Internet. In particular, these include terrestrial and satellite - wireless links. Users relying on traffic traversing these links may - see poor performance because their TCP connections are spending - excessive time in congestion avoidance and/or slow start procedures - triggered by packet losses due to transmission errors. - - The recommendations in this document aim at improving utilization of - available path capacity over such high error-rate links in ways that - do not threaten the stability of the Internet. - - Applications use TCP in very different ways, and these have - interactions with TCP's behavior [RFC2861]. Nevertheless, it is - possible to make some basic assumptions about TCP flows. - Accordingly, the mechanisms discussed here are applicable to all uses - of TCP, albeit in varying degrees according to different scenarios - (as noted where appropriate). - - - - - - -Dawkins, et al. Best Current Practice [Page 2] - -RFC 3155 PILC - Links with Errors August 2001 - - - This recommendation is based on the explicit assumption that major - changes to the entire installed base of routers and hosts are not a - practical possibility. This constrains any changes to hosts that are - directly affected by errored links. - -1.1 Should you be reading this recommendation? - - All known subnetwork technologies provide an "imperfect" subnetwork - service - the bit error rate is non-zero. But there's no obvious way - for end stations to tell the difference between packets discarded due - to congestion and losses due to transmission errors. - - If a directly-attached subnetwork is reporting transmission errors to - a host, these reports matter, but we can't rely on explicit - transmission error reports to both hosts. - - Another way of deciding if a subnetwork should be considered to have - a "high error rate" is by appealing to mathematics. - - An approximate formula for the TCP Reno response function is given in - [PFTK98]: - - s - T = -------------------------------------------------- - RTT*sqrt(2p/3) + tRTO*(3*sqrt(3p/8))*p*(1 + 32p**2) - - where - - T = the sending rate in bytes per second - s = the packet size in bytes - RTT = round-trip time in seconds - tRTO = TCP retransmit timeout value in seconds - p = steady-state packet loss rate - - If one plugs in an observed packet loss rate, does the math and then - sees predicted bandwidth utilization that is greater than the link - speed, the connection will not benefit from recommendations in this - document, because the level of packet losses being encountered won't - affect the ability of TCP to utilize the link. If, however, the - predicted bandwidth is less than the link speed, packet losses are - affecting the ability of TCP to utilize the link. - - If further investigation reveals a subnetwork with significant - transmission error rates, the recommendations in this document will - improve the ability of TCP to utilize the link. - - - - - - -Dawkins, et al. Best Current Practice [Page 3] - -RFC 3155 PILC - Links with Errors August 2001 - - - A few caveats are in order, when doing this calculation: - - (1) the RTT is the end-to-end RTT, not the link RTT. - (2) Max(1.0, 4*RTT) can be substituted as a simplification for - tRTO. - (3) losses may be bursty - a loss rate measured over an interval - that includes multiple bursty loss events may understate the - impact of these loss events on the sending rate. - -1.2 Relationship of this recommendation to PEPs - - This document discusses end-to-end mechanisms that do not require - TCP-level awareness by intermediate nodes. This places severe - limitations on what the end nodes can know about the nature of losses - that are occurring between the end nodes. Attempts to apply - heuristics to distinguish between congestion and transmission error - have not been successful [BV97, BV98, BV98a]. This restriction is - relaxed in an informational document on Performance Enhancing Proxies - (PEPs) [RFC3135]. Because PEPs can be placed on boundaries where - network characteristics change dramatically, PEPs have an additional - opportunity to improve performance over links with uncorrected - errors. - - However, generalized use of PEPs contravenes the end-to-end principle - and is highly undesirable given their deleterious implications, which - include the following: lack of fate sharing (a PEP adds a third point - of failure besides the endpoints themselves), end-to-end reliability - and diagnostics, preventing end-to-end security (particularly network - layer security such as IPsec), mobility (handoffs are much more - complex because state must be transferred), asymmetric routing (PEPs - typically require being on both the forward and reverse paths of a - connection), scalability (PEPs add more state to maintain), QoS - transparency and guarantees. - - Not every type of PEP has all the drawbacks listed above. - Nevertheless, the use of PEPs may have very serious consequences - which must be weighed carefully. - -1.3 Relationship of this recommendation to Link Layer Mechanisms - - This recommendation is for use with TCP over subnetwork technologies - (link layers) that have already been deployed. Subnetworks that are - intended to carry Internet protocols, but have not been completely - specified are the subject of a best common practices (BCP) document - which has been developed or is under development by the Performance - - - - - - -Dawkins, et al. Best Current Practice [Page 4] - -RFC 3155 PILC - Links with Errors August 2001 - - - Implications of Link Characteristics WG (PILC) [PILC-WEB]. This last - document is aimed at designers who still have the opportunity to - reduce the number of uncorrected errors TCP will encounter. - -2.0 Errors and Interactions with TCP Mechanisms - - A TCP sender adapts its use of network path capacity based on - feedback from the TCP receiver. As TCP is not able to distinguish - between losses due to congestion and losses due to uncorrected - errors, it is not able to accurately determine available path - capacity in the presence of significant uncorrected errors. - -2.1 Slow Start and Congestion Avoidance [RFC2581] - - Slow Start and Congestion Avoidance [RFC2581] are essential to the - current stability of the Internet. These mechanisms were designed to - accommodate networks that do not provide explicit congestion - notification. Although experimental mechanisms such as [RFC2481] are - moving in the direction of explicit congestion notification, the - effect of ECN on ECN-aware TCPs is essentially the same as the effect - of implicit congestion notification through congestion-related loss, - except that ECN provides this notification before packets are lost, - and must then be retransmitted. - - TCP connections experiencing high error rates on their paths interact - badly with Slow Start and with Congestion Avoidance, because high - error rates make the interpretation of losses ambiguous - the sender - cannot know whether detected losses are due to congestion or to data - corruption. TCP makes the "safe" choice and assumes that the losses - are due to congestion. - - - Whenever sending TCPs receive three out-of-order - acknowledgement, they assume the network is mildly congested - and invoke fast retransmit/fast recovery (described below). - - - Whenever TCP's retransmission timer expires, the sender assumes - that the network is congested and invokes slow start. - - - Less-reliable link layers often use small link MTUs. This - slows the rate of increase in the sender's window size during - slow start, because the sender's window is increased in units - of segments. Small link MTUs alone don't improve reliability. - Path MTU discovery [RFC1191] must also be used to prevent - fragmentation. Path MTU discovery allows the most rapid - opening of the sender's window size during slow start, but a - number of round trips may still be required to open the window - completely. - - - - -Dawkins, et al. Best Current Practice [Page 5] - -RFC 3155 PILC - Links with Errors August 2001 - - - Recommendation: Any standards-conformant TCP will implement Slow - Start and Congestion Avoidance, which are MUSTs in STD 3 [RFC1122]. - Recommendations in this document will not interfere with these - mechanisms. - -2.2 Fast Retransmit and Fast Recovery [RFC2581] - - TCP provides reliable delivery of data as a byte-stream to an - application, so that when a segment is lost (whether due to either - congestion or transmission loss), the receiver TCP implementation - must wait to deliver data to the receiving application until the - missing data is received. The receiver TCP implementation detects - missing segments by segments arriving with out-of-order sequence - numbers. - - TCPs should immediately send an acknowledgement when data is received - out-of-order [RFC2581], providing the next expected sequence number - with no delay, so that the sender can retransmit the required data as - quickly as possible and the receiver can resume delivery of data to - the receiving application. When an acknowledgement carries the same - expected sequence number as an acknowledgement that has already been - sent for the last in-order segment received, these acknowledgement - are called "duplicate ACKs". - - Because IP networks are allowed to reorder packets, the receiver may - send duplicate acknowledgments for segments that arrive out of order - due to routing changes, link-level retransmission, etc. When a TCP - sender receives three duplicate ACKs, fast retransmit [RFC2581] - allows it to infer that a segment was lost. The sender retransmits - what it considers to be this lost segment without waiting for the - full retransmission timeout, thus saving time. - - After a fast retransmit, a sender halves its congestion window and - invokes the fast recovery [RFC2581] algorithm, whereby it invokes - congestion avoidance from a halved congestion window, but does not - invoke slow start from a one-segment congestion window as it would do - after a retransmission timeout. As the sender is still receiving - dupacks, it knows the receiver is receiving packets sent, so the full - reduction after a timeout when no communication has been received is - not called for. This relatively safe optimization also saves time. - - It is important to be realistic about the maximum throughput that TCP - can have over a connection that traverses a high error-rate link. In - general, TCP will increase its congestion window beyond the delay- - bandwidth product. TCP's congestion avoidance strategy is additive- - increase, multiplicative-decrease, which means that if additional - errors are encountered before the congestion window recovers - completely from a 50-percent reduction, the effect can be a "downward - - - -Dawkins, et al. Best Current Practice [Page 6] - -RFC 3155 PILC - Links with Errors August 2001 - - - spiral" of the congestion window due to additional 50-percent - reductions. Even using Fast Retransmit/Fast Recovery, the sender - will halve the congestion window each time a window contains one or - more segments that are lost, and will re-open the window by one - additional segment for each congestion window's worth of - acknowledgement received. - - If a connection's path traverses a link that loses one or more - segments during this recovery period, the one-half reduction takes - place again, this time on a reduced congestion window - and this - downward spiral will continue to hold the congestion window below - path capacity until the connection is able to recover completely by - additive increase without experiencing loss. - - Of course, no downward spiral occurs if the error rate is constantly - high and the congestion window always remains small; the - multiplicative-increase "slow start" will be exited early, and the - congestion window remains low for the duration of the TCP connection. - In links with high error rates, the TCP window may remain rather - small for long periods of time. - - Not all causes of small windows are related to errors. For example, - HTTP/1.0 commonly closes TCP connections to indicate boundaries - between requested resources. This means that these applications are - constantly closing "trained" TCP connections and opening "untrained" - TCP connections which will execute slow start, beginning with one or - two segments. This can happen even with HTTP/1.1, if webmasters - configure their HTTP/1.1 servers to close connections instead of - waiting to see if the connection will be useful again. - - A small window - especially a window of less than four segments - - effectively prevents the sender from taking advantage of Fast - Retransmits. Moreover, efficient recovery from multiple losses - within a single window requires adoption of new proposals (NewReno - [RFC2582]). - - Recommendation: Implement Fast Retransmit and Fast Recovery at this - time. This is a widely-implemented optimization and is currently at - Proposed Standard level. [RFC2488] recommends implementation of Fast - Retransmit/Fast Recovery in satellite environments. - -2.3 Selective Acknowledgements [RFC2018, RFC2883] - - Selective Acknowledgements [RFC2018] allow the repair of multiple - segment losses per window without requiring one (or more) round-trips - per loss. - - - - - -Dawkins, et al. Best Current Practice [Page 7] - -RFC 3155 PILC - Links with Errors August 2001 - - - [RFC2883] proposes a minor extension to SACK that allows receiving - TCPs to provide more information about the order of delivery of - segments, allowing "more robust operation in an environment of - reordered packets, ACK loss, packet replication, and/or early - retransmit timeouts". Unless explicitly stated otherwise, in this - document, "Selective Acknowledgements" (or "SACK") refers to the - combination of [RFC2018] and [RFC2883]. - - Selective acknowledgments are most useful in LFNs ("Long Fat - Networks") because of the long round trip times that may be - encountered in these environments, according to Section 1.1 of - [RFC1323], and are especially useful if large windows are required, - because there is a higher probability of multiple segment losses per - window. - - On the other hand, if error rates are generally low but occasionally - higher due to channel conditions, TCP will have the opportunity to - increase its window to larger values during periods of improved - channel conditions between bursts of errors. When bursts of errors - occur, multiple losses within a window are likely to occur. In this - case, SACK would provide benefits in speeding the recovery and - preventing unnecessary reduction of the window size. - - Recommendation: Implement SACK as specified in [RFC2018] and updated - by [RFC2883], both Proposed Standards. In cases where SACK cannot be - enabled for both sides of a connection, TCP senders may use NewReno - [RFC2582] to better handle partial ACKs and multiple losses within a - single window. - -3.0 Summary of Recommendations - - The Internet does not provide a widely-available loss feedback - mechanism that allows TCP to distinguish between congestion loss and - transmission error. Because congestion affects all traffic on a path - while transmission loss affects only the specific traffic - encountering uncorrected errors, avoiding congestion has to take - precedence over quickly repairing transmission errors. This means - that the best that can be achieved without new feedback mechanisms is - minimizing the amount of time that is spent unnecessarily in - congestion avoidance. - - The Fast Retransmit/Fast Recovery mechanism allows quick repair of - loss without giving up the safety of congestion avoidance. In order - for Fast Retransmit/Fast Recovery to work, the window size must be - large enough to force the receiver to send three duplicate - acknowledgments before the retransmission timeout interval expires, - forcing full TCP slow-start. - - - - -Dawkins, et al. Best Current Practice [Page 8] - -RFC 3155 PILC - Links with Errors August 2001 - - - Selective Acknowledgements (SACK) extend the benefit of Fast - Retransmit/Fast Recovery to situations where multiple segment losses - in the window need to be repaired more quickly than can be - accomplished by executing Fast Retransmit for each segment loss, only - to discover the next segment loss. - - These mechanisms are not limited to wireless environments. They are - usable in all environments. - -4.0 Topics For Further Work - - "Limited Transmit" [RFC3042] has been specified as an optimization - extending Fast Retransmit/Fast Recovery for TCP connections with - small congestion windows that will not trigger three duplicate - acknowledgments. This specification is deemed safe, and it also - provides benefits for TCP connections that experience a large amount - of packet (data or ACK) loss. Implementors should evaluate this - standards track specification for TCP in loss environments. - - Delayed Duplicate Acknowledgements [MV97, VMPM99] attempts to prevent - TCP-level retransmission when link-level retransmission is still in - progress, adding additional traffic to the network. This proposal is - worthy of additional study, but is not recommended at this time, - because we don't know how to calculate appropriate amounts of delay - for an arbitrary network topology. - - It is not possible to use explicit congestion notification [RFC2481] - as a surrogate for explicit transmission error notification (no - matter how much we wish it was!). Some mechanism to provide explicit - notification of transmission error would be very helpful. This might - be more easily provided in a PEP environment, especially when the PEP - is the "first hop" in a connection path, because current checksum - mechanisms do not distinguish between transmission error to a payload - and transmission error to the header. Furthermore, if the header is - damaged, sending explicit transmission error notification to the - right endpoint is problematic. - - Losses that take place on the ACK stream, especially while a TCP is - learning network characteristics, can make the data stream quite - bursty (resulting in losses on the data stream, as well). Several - ways of limiting this burstiness have been proposed, including TCP - transmit pacing at the sender and ACK rate control within the - network. - - "Appropriate Byte Counting" (ABC) [ALL99], has been proposed as a way - of opening the congestion window based on the number of bytes that - have been successfully transfered to the receiver, giving more - appropriate behavior for application protocols that initiate - - - -Dawkins, et al. Best Current Practice [Page 9] - -RFC 3155 PILC - Links with Errors August 2001 - - - connections with relatively short packets. For SMTP [RFC2821], for - instance, the client might send a short HELO packet, a short MAIL - packet, one or more short RCPT packets, and a short DATA packet - - followed by the entire mail body sent as maximum-length packets. An - ABC TCP sender would not use ACKs for each of these short packets to - increase the congestion window to allow additional full-length - packets. ABC is worthy of additional study, but is not recommended - at this time, because ABC can lead to increased burstiness when - acknowledgments are lost. - -4.1 Achieving, and maintaining, large windows - - The recommendations described in this document will aid TCPs in - injecting packets into ERRORed connections as fast as possible - without destabilizing the Internet, and so optimizing the use of - available bandwidth. - - In addition to these TCP-level recommendations, there is still - additional work to do at the application level, especially with the - dominant application protocol on the World Wide Web, HTTP. - - HTTP/1.0 (and earlier versions) closes TCP connections to signal a - receiver that all of a requested resource had been transmitted. - Because WWW objects tend to be small in size [MOGUL], TCPs carrying - HTTP/1.0 traffic experience difficulty in "training" on available - path capacity (a substantial portion of the transfer has already - happened by the time TCP exits slow start). - - Several HTTP modifications have been introduced to improve this - interaction with TCP ("persistent connections" in HTTP/1.0, with - improvements in HTTP/1.1 [RFC2616]). For a variety of reasons, many - HTTP interactions are still HTTP/1.0-style - relatively short-lived. - - Proposals which reuse TCP congestion information across connections, - like TCP Control Block Interdependence [RFC2140], or the more recent - Congestion Manager [BS00] proposal, will have the effect of making - multiple parallel connections impact the network as if they were a - single connection, "trained" after a single startup transient. These - proposals are critical to the long-term stability of the Internet, - because today's users always have the choice of clicking on the - "reload" button in their browsers and cutting off TCP's exponential - backoff - replacing connections which are building knowledge of the - available bandwidth with connections with no knowledge at all. - - - - - - - - -Dawkins, et al. Best Current Practice [Page 10] - -RFC 3155 PILC - Links with Errors August 2001 - - -5.0 Security Considerations - - A potential vulnerability introduced by Fast Retransmit/Fast Recovery - is (as pointed out in [RFC2581]) that an attacker may force TCP - connections to grind to a halt, or, more dangerously, behave more - aggressively. The latter possibility may lead to congestion - collapse, at least in some regions of the network. - - Selective acknowledgments is believed to neither strengthen nor - weaken TCP's current security properties [RFC2018]. - - Given that the recommendations in this document are performed on an - end-to-end basis, they continue working even in the presence of end- - to-end IPsec. This is in direct contrast with mechanisms such as - PEP's which are implemented in intermediate nodes (section 1.2). - -6.0 IANA Considerations - - This document is a pointer to other, existing IETF standards. There - are no new IANA considerations. - -7.0 Acknowledgements - - This recommendation has grown out of RFC 2757, "Long Thin Networks", - which was in turn based on work done in the IETF TCPSAT working - group. The authors are indebted to the active members of the PILC - working group. In particular, Mark Allman and Lloyd Wood gave us - copious and insightful feedback, and Dan Grossman and Jamshid Mahdavi - provided text replacements. - -References - - [ALL99] M. Allman, "TCP Byte Counting Refinements," ACM Computer - Communication Review, Volume 29, Number 3, July 1999. - http://www.acm.org/sigcomm/ccr/archive/ccr-toc/ccr-toc- - 99.html - - [BS00] Balakrishnan, H. and S. Seshan, "The Congestion Manager", - RFC 3124, June 2001. - - [BV97] S. Biaz and N. Vaidya, "Using End-to-end Statistics to - Distinguish Congestion and Corruption Losses: A Negative - Result," Texas A&M University, Technical Report 97-009, - August 18, 1997. - - - - - - - -Dawkins, et al. Best Current Practice [Page 11] - -RFC 3155 PILC - Links with Errors August 2001 - - - [BV98] S. Biaz and N. Vaidya, "Sender-Based heuristics for - Distinguishing Congestion Losses from Wireless - Transmission Losses," Texas A&M University, Technical - Report 98-013, June 1998. - - [BV98a] S. Biaz and N. Vaidya, "Discriminating Congestion Losses - from Wireless Losses using Inter-Arrival Times at the - Receiver," Texas A&M University, Technical Report 98-014, - June 1998. - - [MOGUL] "The Case for Persistent-Connection HTTP", J. C. Mogul, - Research Report 95/4, May 1995. Available as - http://www.research.digital.com/wrl/techreports/abstracts/ - 95.4.html - - [MV97] M. Mehta and N. Vaidya, "Delayed Duplicate- - Acknowledgements: A Proposal to Improve Performance of - TCP on Wireless Links," Texas A&M University, December 24, - 1997. Available at - http://www.cs.tamu.edu/faculty/vaidya/mobile.html - - [PILC-WEB] http://pilc.grc.nasa.gov/ - - [PFTK98] Padhye, J., Firoiu, V., Towsley, D. and J.Kurose, "TCP - Throughput: A simple model and its empirical validation", - SIGCOMM Symposium on Communications Architectures and - Protocols, August 1998. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC2821] Klensin, J., Editor, "Simple Mail Transfer Protocol", RFC - 2821, April 2001. - - [RFC1122] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC1191] Mogul J., and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - [RFC1323] Jacobson, V., Braden, R. and D. Borman. "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [RFC2140] Touch, J., "TCP Control Block Interdependence", RFC 2140, - April 1997. - - - -Dawkins, et al. Best Current Practice [Page 12] - -RFC 3155 PILC - Links with Errors August 2001 - - - [RFC2309] Braden, B., Clark, D., Crowcrfot, J., Davie, B., Deering, - S., Estrin, D., Floyd, S., Jacobson, V., Minshall, G., - Partridge, C., Peterson, L., Ramakrishnan, K., Shecker, - S., Wroclawski, J. and L, Zhang, "Recommendations on Queue - Management and Congestion Avoidance in the Internet", RFC - 2309, April 1998. - - [RFC2481] Ramakrishnan K. and S. Floyd, "A Proposal to add Explicit - Congestion Notification (ECN) to IP", RFC 2481, January - 1999. - - [RFC2488] Allman, M., Glover, D. and L. Sanchez. "Enhancing TCP Over - Satellite Channels using Standard Mechanisms", BCP 28, RFC - 2488, January 1999. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., - Masinter, L., Leach P. and T. Berners-Lee, "Hypertext - Transfer Protocol -- HTTP/1.1", RFC 2616, June 1999. - - [RFC2861] Handley, H., Padhye, J. and S., Floyd, "TCP Congestion - Window Validation", RFC 2861, June 2000. - - [RFC2883] Floyd, S., Mahdavi, M., Mathis, M. and M. Podlosky, "An - Extension to the Selective Acknowledgement (SACK) Option - for TCP", RFC 2883, August 1999. - - [RFC2923] Lahey, K., "TCP Problems with Path MTU Discovery", RFC - 2923, September 2000. - - [RFC3042] Allman, M., Balakrishnan, H. and S. Floyd, "Enhancing - TCP's Loss Recovery Using Limited Transmit", RFC 3042, - January, 2001. - - [RFC3135] Border, J., Kojo, M., Griner, J., Montenegro, G. and Z. - Shelby, "Performance Enhancing Proxies Intended to - Mitigate Link-Related Degradations", RFC 3135, June 2001. - - [VJ-DCAC] Jacobson, V., "Dynamic Congestion Avoidance / Control" e- - mail dated February 11, 1988, available from - http://www.kohala.com/~rstevens/vanj.88feb11.txt - - - - - -Dawkins, et al. Best Current Practice [Page 13] - -RFC 3155 PILC - Links with Errors August 2001 - - - [VMPM99] N. Vaidya, M. Mehta, C. Perkins, and G. Montenegro, - "Delayed Duplicate Acknowledgements: A TCP-Unaware - Approach to Improve Performance of TCP over Wireless," - Technical Report 99-003, Computer Science Dept., Texas A&M - University, February 1999. Also, to appear in Journal of - Wireless Communications and Wireless Computing (Special - Issue on Reliable Transport Protocols for Mobile - Computing). - -Authors' Addresses - - Questions about this document may be directed to: - - Spencer Dawkins - Fujitsu Network Communications - 2801 Telecom Parkway - Richardson, Texas 75082 - - Phone: +1-972-479-3782 - EMail: spencer.dawkins@fnc.fujitsu.com - - - Gabriel E. Montenegro - Sun Microsystems - Laboratories, Europe - 29, chemin du Vieux Chene - 38240 Meylan - FRANCE - - Phone: +33 476 18 80 45 - EMail: gab@sun.com - - - Markku Kojo - Department of Computer Science - University of Helsinki - P.O. Box 26 (Teollisuuskatu 23) - FIN-00014 HELSINKI - Finland - - Phone: +358-9-1914-4179 - EMail: kojo@cs.helsinki.fi - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 14] - -RFC 3155 PILC - Links with Errors August 2001 - - - Vincent Magret - Alcatel Internetworking, Inc. - 26801 W. Agoura road - Calabasas, CA, 91301 - - Phone: +1 818 878 4485 - EMail: vincent.magret@alcatel.com - - - Nitin H. Vaidya - 458 Coodinated Science Laboratory, MC-228 - 1308 West Main Street - Urbana, IL 61801 - - Phone: 217-265-5414 - E-mail: nhv@crhc.uiuc.edu - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 15] - -RFC 3155 PILC - Links with Errors August 2001 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2001). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Dawkins, et al. Best Current Practice [Page 16] - diff --git a/kernel/picotcp/RFC/rfc3168.txt b/kernel/picotcp/RFC/rfc3168.txt deleted file mode 100644 index 30b05f7..0000000 --- a/kernel/picotcp/RFC/rfc3168.txt +++ /dev/null @@ -1,3531 +0,0 @@ - - - - - - -Network Working Group K. Ramakrishnan -Request for Comments: 3168 TeraOptic Networks -Updates: 2474, 2401, 793 S. Floyd -Obsoletes: 2481 ACIRI -Category: Standards Track D. Black - EMC - September 2001 - - - The Addition of Explicit Congestion Notification (ECN) to IP - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2001). All Rights Reserved. - -Abstract - - This memo specifies the incorporation of ECN (Explicit Congestion - Notification) to TCP and IP, including ECN's use of two bits in the - IP header. - -Table of Contents - - 1. Introduction.................................................. 3 - 2. Conventions and Acronyms...................................... 5 - 3. Assumptions and General Principles............................ 5 - 4. Active Queue Management (AQM)................................. 6 - 5. Explicit Congestion Notification in IP........................ 6 - 5.1. ECN as an Indication of Persistent Congestion............... 10 - 5.2. Dropped or Corrupted Packets................................ 11 - 5.3. Fragmentation............................................... 11 - 6. Support from the Transport Protocol........................... 12 - 6.1. TCP......................................................... 13 - 6.1.1 TCP Initialization......................................... 14 - 6.1.1.1. Middlebox Issues........................................ 16 - 6.1.1.2. Robust TCP Initialization with an Echoed Reserved Field. 17 - 6.1.2. The TCP Sender............................................ 18 - 6.1.3. The TCP Receiver.......................................... 19 - 6.1.4. Congestion on the ACK-path................................ 20 - 6.1.5. Retransmitted TCP packets................................. 20 - - - -Ramakrishnan, et al. Standards Track [Page 1] - -RFC 3168 The Addition of ECN to IP September 2001 - - - 6.1.6. TCP Window Probes......................................... 22 - 7. Non-compliance by the End Nodes............................... 22 - 8. Non-compliance in the Network................................. 24 - 8.1. Complications Introduced by Split Paths..................... 25 - 9. Encapsulated Packets.......................................... 25 - 9.1. IP packets encapsulated in IP............................... 25 - 9.1.1. The Limited-functionality and Full-functionality Options.. 27 - 9.1.2. Changes to the ECN Field within an IP Tunnel.............. 28 - 9.2. IPsec Tunnels............................................... 29 - 9.2.1. Negotiation between Tunnel Endpoints...................... 31 - 9.2.1.1. ECN Tunnel Security Association Database Field.......... 32 - 9.2.1.2. ECN Tunnel Security Association Attribute............... 32 - 9.2.1.3. Changes to IPsec Tunnel Header Processing............... 33 - 9.2.2. Changes to the ECN Field within an IPsec Tunnel........... 35 - 9.2.3. Comments for IPsec Support................................ 35 - 9.3. IP packets encapsulated in non-IP Packet Headers............ 36 - 10. Issues Raised by Monitoring and Policing Devices............. 36 - 11. Evaluations of ECN........................................... 37 - 11.1. Related Work Evaluating ECN................................ 37 - 11.2. A Discussion of the ECN nonce.............................. 37 - 11.2.1. The Incremental Deployment of ECT(1) in Routers.......... 38 - 12. Summary of changes required in IP and TCP.................... 38 - 13. Conclusions.................................................. 40 - 14. Acknowledgements............................................. 41 - 15. References................................................... 41 - 16. Security Considerations...................................... 45 - 17. IPv4 Header Checksum Recalculation........................... 45 - 18. Possible Changes to the ECN Field in the Network............. 45 - 18.1. Possible Changes to the IP Header.......................... 46 - 18.1.1. Erasing the Congestion Indication........................ 46 - 18.1.2. Falsely Reporting Congestion............................. 47 - 18.1.3. Disabling ECN-Capability................................. 47 - 18.1.4. Falsely Indicating ECN-Capability........................ 47 - 18.2. Information carried in the Transport Header................ 48 - 18.3. Split Paths................................................ 49 - 19. Implications of Subverting End-to-End Congestion Control..... 50 - 19.1. Implications for the Network and for Competing Flows....... 50 - 19.2. Implications for the Subverted Flow........................ 53 - 19.3. Non-ECN-Based Methods of Subverting End-to-end Congestion - Control.................................................... 54 - 20. The Motivation for the ECT Codepoints........................ 54 - 20.1. The Motivation for an ECT Codepoint........................ 54 - 20.2. The Motivation for two ECT Codepoints...................... 55 - 21. Why use Two Bits in the IP Header?........................... 57 - 22. Historical Definitions for the IPv4 TOS Octet................ 58 - 23. IANA Considerations.......................................... 60 - 23.1. IPv4 TOS Byte and IPv6 Traffic Class Octet................. 60 - 23.2. TCP Header Flags........................................... 61 - - - -Ramakrishnan, et al. Standards Track [Page 2] - -RFC 3168 The Addition of ECN to IP September 2001 - - - 23.3. IPSEC Security Association Attributes....................... 62 - 24. Authors' Addresses........................................... 62 - 25. Full Copyright Statement..................................... 63 - -1. Introduction - - We begin by describing TCP's use of packet drops as an indication of - congestion. Next we explain that with the addition of active queue - management (e.g., RED) to the Internet infrastructure, where routers - detect congestion before the queue overflows, routers are no longer - limited to packet drops as an indication of congestion. Routers can - instead set the Congestion Experienced (CE) codepoint in the IP - header of packets from ECN-capable transports. We describe when the - CE codepoint is to be set in routers, and describe modifications - needed to TCP to make it ECN-capable. Modifications to other - transport protocols (e.g., unreliable unicast or multicast, reliable - multicast, other reliable unicast transport protocols) could be - considered as those protocols are developed and advance through the - standards process. We also describe in this document the issues - involving the use of ECN within IP tunnels, and within IPsec tunnels - in particular. - - One of the guiding principles for this document is that, to the - extent possible, the mechanisms specified here be incrementally - deployable. One challenge to the principle of incremental deployment - has been the prior existence of some IP tunnels that were not - compatible with the use of ECN. As ECN becomes deployed, non- - compatible IP tunnels will have to be upgraded to conform to this - document. - - This document obsoletes RFC 2481, "A Proposal to add Explicit - Congestion Notification (ECN) to IP", which defined ECN as an - Experimental Protocol for the Internet Community. This document also - updates RFC 2474, "Definition of the Differentiated Services Field - (DS Field) in the IPv4 and IPv6 Headers", in defining the ECN field - in the IP header, RFC 2401, "Security Architecture for the Internet - Protocol" to change the handling of IPv4 TOS Byte and IPv6 Traffic - Class Octet in tunnel mode header construction to be compatible with - the use of ECN, and RFC 793, "Transmission Control Protocol", in - defining two new flags in the TCP header. - - TCP's congestion control and avoidance algorithms are based on the - notion that the network is a black-box [Jacobson88, Jacobson90]. The - network's state of congestion or otherwise is determined by end- - systems probing for the network state, by gradually increasing the - load on the network (by increasing the window of packets that are - outstanding in the network) until the network becomes congested and a - packet is lost. Treating the network as a "black-box" and treating - - - -Ramakrishnan, et al. Standards Track [Page 3] - -RFC 3168 The Addition of ECN to IP September 2001 - - - loss as an indication of congestion in the network is appropriate for - pure best-effort data carried by TCP, with little or no sensitivity - to delay or loss of individual packets. In addition, TCP's - congestion management algorithms have techniques built-in (such as - Fast Retransmit and Fast Recovery) to minimize the impact of losses, - from a throughput perspective. However, these mechanisms are not - intended to help applications that are in fact sensitive to the delay - or loss of one or more individual packets. Interactive traffic such - as telnet, web-browsing, and transfer of audio and video data can be - sensitive to packet losses (especially when using an unreliable data - delivery transport such as UDP) or to the increased latency of the - packet caused by the need to retransmit the packet after a loss (with - the reliable data delivery semantics provided by TCP). - - Since TCP determines the appropriate congestion window to use by - gradually increasing the window size until it experiences a dropped - packet, this causes the queues at the bottleneck router to build up. - With most packet drop policies at the router that are not sensitive - to the load placed by each individual flow (e.g., tail-drop on queue - overflow), this means that some of the packets of latency-sensitive - flows may be dropped. In addition, such drop policies lead to - synchronization of loss across multiple flows. - - Active queue management mechanisms detect congestion before the queue - overflows, and provide an indication of this congestion to the end - nodes. Thus, active queue management can reduce unnecessary queuing - delay for all traffic sharing that queue. The advantages of active - queue management are discussed in RFC 2309 [RFC2309]. Active queue - management avoids some of the bad properties of dropping on queue - overflow, including the undesirable synchronization of loss across - multiple flows. More importantly, active queue management means that - transport protocols with mechanisms for congestion control (e.g., - TCP) do not have to rely on buffer overflow as the only indication of - congestion. - - Active queue management mechanisms may use one of several methods for - indicating congestion to end-nodes. One is to use packet drops, as is - currently done. However, active queue management allows the router to - separate policies of queuing or dropping packets from the policies - for indicating congestion. Thus, active queue management allows - routers to use the Congestion Experienced (CE) codepoint in a packet - header as an indication of congestion, instead of relying solely on - packet drops. This has the potential of reducing the impact of loss - on latency-sensitive flows. - - - - - - - -Ramakrishnan, et al. Standards Track [Page 4] - -RFC 3168 The Addition of ECN to IP September 2001 - - - There exist some middleboxes (firewalls, load balancers, or intrusion - detection systems) in the Internet that either drop a TCP SYN packet - configured to negotiate ECN, or respond with a RST. This document - specifies procedures that TCP implementations may use to provide - robust connectivity even in the presence of such equipment. - -2. Conventions and Acronyms - - The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, - SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this - document, are to be interpreted as described in [RFC2119]. - -3. Assumptions and General Principles - - In this section, we describe some of the important design principles - and assumptions that guided the design choices in this proposal. - - * Because ECN is likely to be adopted gradually, accommodating - migration is essential. Some routers may still only drop packets - to indicate congestion, and some end-systems may not be ECN- - capable. The most viable strategy is one that accommodates - incremental deployment without having to resort to "islands" of - ECN-capable and non-ECN-capable environments. - - * New mechanisms for congestion control and avoidance need to co- - exist and cooperate with existing mechanisms for congestion - control. In particular, new mechanisms have to co-exist with - TCP's current methods of adapting to congestion and with - routers' current practice of dropping packets in periods of - congestion. - - * Congestion may persist over different time-scales. The time - scales that we are concerned with are congestion events that may - last longer than a round-trip time. - - * The number of packets in an individual flow (e.g., TCP - connection or an exchange using UDP) may range from a small - number of packets to quite a large number. We are interested in - managing the congestion caused by flows that send enough packets - so that they are still active when network feedback reaches - them. - - * Asymmetric routing is likely to be a normal occurrence in the - Internet. The path (sequence of links and routers) followed by - data packets may be different from the path followed by the - acknowledgment packets in the reverse direction. - - - - - -Ramakrishnan, et al. Standards Track [Page 5] - -RFC 3168 The Addition of ECN to IP September 2001 - - - * Many routers process the "regular" headers in IP packets more - efficiently than they process the header information in IP - options. This suggests keeping congestion experienced - information in the regular headers of an IP packet. - - * It must be recognized that not all end-systems will cooperate in - mechanisms for congestion control. However, new mechanisms - shouldn't make it easier for TCP applications to disable TCP - congestion control. The benefit of lying about participating in - new mechanisms such as ECN-capability should be small. - -4. Active Queue Management (AQM) - - Random Early Detection (RED) is one mechanism for Active Queue - Management (AQM) that has been proposed to detect incipient - congestion [FJ93], and is currently being deployed in the Internet - [RFC2309]. AQM is meant to be a general mechanism using one of - several alternatives for congestion indication, but in the absence of - ECN, AQM is restricted to using packet drops as a mechanism for - congestion indication. AQM drops packets based on the average queue - length exceeding a threshold, rather than only when the queue - overflows. However, because AQM may drop packets before the queue - actually overflows, AQM is not always forced by memory limitations to - discard the packet. - - AQM can set a Congestion Experienced (CE) codepoint in the packet - header instead of dropping the packet, when such a field is provided - in the IP header and understood by the transport protocol. The use - of the CE codepoint with ECN allows the receiver(s) to receive the - packet, avoiding the potential for excessive delays due to - retransmissions after packet losses. We use the term 'CE packet' to - denote a packet that has the CE codepoint set. - -5. Explicit Congestion Notification in IP - - This document specifies that the Internet provide a congestion - indication for incipient congestion (as in RED and earlier work - [RJ90]) where the notification can sometimes be through marking - packets rather than dropping them. This uses an ECN field in the IP - header with two bits, making four ECN codepoints, '00' to '11'. The - ECN-Capable Transport (ECT) codepoints '10' and '01' are set by the - data sender to indicate that the end-points of the transport protocol - are ECN-capable; we call them ECT(0) and ECT(1) respectively. The - phrase "the ECT codepoint" in this documents refers to either of the - two ECT codepoints. Routers treat the ECT(0) and ECT(1) codepoints - as equivalent. Senders are free to use either the ECT(0) or the - ECT(1) codepoint to indicate ECT, on a packet-by-packet basis. - - - - -Ramakrishnan, et al. Standards Track [Page 6] - -RFC 3168 The Addition of ECN to IP September 2001 - - - The use of both the two codepoints for ECT, ECT(0) and ECT(1), is - motivated primarily by the desire to allow mechanisms for the data - sender to verify that network elements are not erasing the CE - codepoint, and that data receivers are properly reporting to the - sender the receipt of packets with the CE codepoint set, as required - by the transport protocol. Guidelines for the senders and receivers - to differentiate between the ECT(0) and ECT(1) codepoints will be - addressed in separate documents, for each transport protocol. In - particular, this document does not address mechanisms for TCP end- - nodes to differentiate between the ECT(0) and ECT(1) codepoints. - Protocols and senders that only require a single ECT codepoint SHOULD - use ECT(0). - - The not-ECT codepoint '00' indicates a packet that is not using ECN. - The CE codepoint '11' is set by a router to indicate congestion to - the end nodes. Routers that have a packet arriving at a full queue - drop the packet, just as they do in the absence of ECN. - - +-----+-----+ - | ECN FIELD | - +-----+-----+ - ECT CE [Obsolete] RFC 2481 names for the ECN bits. - 0 0 Not-ECT - 0 1 ECT(1) - 1 0 ECT(0) - 1 1 CE - - Figure 1: The ECN Field in IP. - - The use of two ECT codepoints essentially gives a one-bit ECN nonce - in packet headers, and routers necessarily "erase" the nonce when - they set the CE codepoint [SCWA99]. For example, routers that erased - the CE codepoint would face additional difficulty in reconstructing - the original nonce, and thus repeated erasure of the CE codepoint - would be more likely to be detected by the end-nodes. The ECN nonce - also can address the problem of misbehaving transport receivers lying - to the transport sender about whether or not the CE codepoint was set - in a packet. The motivations for the use of two ECT codepoints is - discussed in more detail in Section 20, along with some discussion of - alternate possibilities for the fourth ECT codepoint (that is, the - codepoint '01'). Backwards compatibility with earlier ECN - implementations that do not understand the ECT(1) codepoint is - discussed in Section 11. - - In RFC 2481 [RFC2481], the ECN field was divided into the ECN-Capable - Transport (ECT) bit and the CE bit. The ECN field with only the - ECN-Capable Transport (ECT) bit set in RFC 2481 corresponds to the - ECT(0) codepoint in this document, and the ECN field with both the - - - -Ramakrishnan, et al. Standards Track [Page 7] - -RFC 3168 The Addition of ECN to IP September 2001 - - - ECT and CE bit in RFC 2481 corresponds to the CE codepoint in this - document. The '01' codepoint was left undefined in RFC 2481, and - this is the reason for recommending the use of ECT(0) when only a - single ECT codepoint is needed. - - 0 1 2 3 4 5 6 7 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | DS FIELD, DSCP | ECN FIELD | - +-----+-----+-----+-----+-----+-----+-----+-----+ - - DSCP: differentiated services codepoint - ECN: Explicit Congestion Notification - - Figure 2: The Differentiated Services and ECN Fields in IP. - - Bits 6 and 7 in the IPv4 TOS octet are designated as the ECN field. - The IPv4 TOS octet corresponds to the Traffic Class octet in IPv6, - and the ECN field is defined identically in both cases. The - definitions for the IPv4 TOS octet [RFC791] and the IPv6 Traffic - Class octet have been superseded by the six-bit DS (Differentiated - Services) Field [RFC2474, RFC2780]. Bits 6 and 7 are listed in - [RFC2474] as Currently Unused, and are specified in RFC 2780 as - approved for experimental use for ECN. Section 22 gives a brief - history of the TOS octet. - - Because of the unstable history of the TOS octet, the use of the ECN - field as specified in this document cannot be guaranteed to be - backwards compatible with those past uses of these two bits that - pre-date ECN. The potential dangers of this lack of backwards - compatibility are discussed in Section 22. - - Upon the receipt by an ECN-Capable transport of a single CE packet, - the congestion control algorithms followed at the end-systems MUST be - essentially the same as the congestion control response to a *single* - dropped packet. For example, for ECN-Capable TCP the source TCP is - required to halve its congestion window for any window of data - containing either a packet drop or an ECN indication. - - One reason for requiring that the congestion-control response to the - CE packet be essentially the same as the response to a dropped packet - is to accommodate the incremental deployment of ECN in both end- - systems and in routers. Some routers may drop ECN-Capable packets - (e.g., using the same AQM policies for congestion detection) while - other routers set the CE codepoint, for equivalent levels of - congestion. Similarly, a router might drop a non-ECN-Capable packet - but set the CE codepoint in an ECN-Capable packet, for equivalent - - - - - -Ramakrishnan, et al. Standards Track [Page 8] - -RFC 3168 The Addition of ECN to IP September 2001 - - - levels of congestion. If there were different congestion control - responses to a CE codepoint than to a packet drop, this could result - in unfair treatment for different flows. - - An additional goal is that the end-systems should react to congestion - at most once per window of data (i.e., at most once per round-trip - time), to avoid reacting multiple times to multiple indications of - congestion within a round-trip time. - - For a router, the CE codepoint of an ECN-Capable packet SHOULD only - be set if the router would otherwise have dropped the packet as an - indication of congestion to the end nodes. When the router's buffer - is not yet full and the router is prepared to drop a packet to inform - end nodes of incipient congestion, the router should first check to - see if the ECT codepoint is set in that packet's IP header. If so, - then instead of dropping the packet, the router MAY instead set the - CE codepoint in the IP header. - - An environment where all end nodes were ECN-Capable could allow new - criteria to be developed for setting the CE codepoint, and new - congestion control mechanisms for end-node reaction to CE packets. - However, this is a research issue, and as such is not addressed in - this document. - - When a CE packet (i.e., a packet that has the CE codepoint set) is - received by a router, the CE codepoint is left unchanged, and the - packet is transmitted as usual. When severe congestion has occurred - and the router's queue is full, then the router has no choice but to - drop some packet when a new packet arrives. We anticipate that such - packet losses will become relatively infrequent when a majority of - end-systems become ECN-Capable and participate in TCP or other - compatible congestion control mechanisms. In an ECN-Capable - environment that is adequately-provisioned, packet losses should - occur primarily during transients or in the presence of non- - cooperating sources. - - The above discussion of when CE may be set instead of dropping a - packet applies by default to all Differentiated Services Per-Hop - Behaviors (PHBs) [RFC 2475]. Specifications for PHBs MAY provide - more specifics on how a compliant implementation is to choose between - setting CE and dropping a packet, but this is NOT REQUIRED. A router - MUST NOT set CE instead of dropping a packet when the drop that would - occur is caused by reasons other than congestion or the desire to - indicate incipient congestion to end nodes (e.g., a diffserv edge - node may be configured to unconditionally drop certain classes of - traffic to prevent them from entering its diffserv domain). - - - - - -Ramakrishnan, et al. Standards Track [Page 9] - -RFC 3168 The Addition of ECN to IP September 2001 - - - We expect that routers will set the CE codepoint in response to - incipient congestion as indicated by the average queue size, using - the RED algorithms suggested in [FJ93, RFC2309]. To the best of our - knowledge, this is the only proposal currently under discussion in - the IETF for routers to drop packets proactively, before the buffer - overflows. However, this document does not attempt to specify a - particular mechanism for active queue management, leaving that - endeavor, if needed, to other areas of the IETF. While ECN is - inextricably tied up with the need to have a reasonable active queue - management mechanism at the router, the reverse does not hold; active - queue management mechanisms have been developed and deployed - independent of ECN, using packet drops as indications of congestion - in the absence of ECN in the IP architecture. - -5.1. ECN as an Indication of Persistent Congestion - - We emphasize that a *single* packet with the CE codepoint set in an - IP packet causes the transport layer to respond, in terms of - congestion control, as it would to a packet drop. The instantaneous - queue size is likely to see considerable variations even when the - router does not experience persistent congestion. As such, it is - important that transient congestion at a router, reflected by the - instantaneous queue size reaching a threshold much smaller than the - capacity of the queue, not trigger a reaction at the transport layer. - Therefore, the CE codepoint should not be set by a router based on - the instantaneous queue size. - - For example, since the ATM and Frame Relay mechanisms for congestion - indication have typically been defined without an associated notion - of average queue size as the basis for determining that an - intermediate node is congested, we believe that they provide a very - noisy signal. The TCP-sender reaction specified in this document for - ECN is NOT the appropriate reaction for such a noisy signal of - congestion notification. However, if the routers that interface to - the ATM network have a way of maintaining the average queue at the - interface, and use it to come to a reliable determination that the - ATM subnet is congested, they may use the ECN notification that is - defined here. - - We continue to encourage experiments in techniques at layer 2 (e.g., - in ATM switches or Frame Relay switches) to take advantage of ECN. - For example, using a scheme such as RED (where packet marking is - based on the average queue length exceeding a threshold), layer 2 - devices could provide a reasonably reliable indication of congestion. - When all the layer 2 devices in a path set that layer's own - Congestion Experienced codepoint (e.g., the EFCI bit for ATM, the - FECN bit in Frame Relay) in this reliable manner, then the interface - router to the layer 2 network could copy the state of that layer 2 - - - -Ramakrishnan, et al. Standards Track [Page 10] - -RFC 3168 The Addition of ECN to IP September 2001 - - - Congestion Experienced codepoint into the CE codepoint in the IP - header. We recognize that this is not the current practice, nor is - it in current standards. However, encouraging experimentation in this - manner may provide the information needed to enable evolution of - existing layer 2 mechanisms to provide a more reliable means of - congestion indication, when they use a single bit for indicating - congestion. - -5.2. Dropped or Corrupted Packets - - For the proposed use for ECN in this document (that is, for a - transport protocol such as TCP for which a dropped data packet is an - indication of congestion), end nodes detect dropped data packets, and - the congestion response of the end nodes to a dropped data packet is - at least as strong as the congestion response to a received CE - packet. To ensure the reliable delivery of the congestion indication - of the CE codepoint, an ECT codepoint MUST NOT be set in a packet - unless the loss of that packet in the network would be detected by - the end nodes and interpreted as an indication of congestion. - - Transport protocols such as TCP do not necessarily detect all packet - drops, such as the drop of a "pure" ACK packet; for example, TCP does - not reduce the arrival rate of subsequent ACK packets in response to - an earlier dropped ACK packet. Any proposal for extending ECN- - Capability to such packets would have to address issues such as the - case of an ACK packet that was marked with the CE codepoint but was - later dropped in the network. We believe that this aspect is still - the subject of research, so this document specifies that at this - time, "pure" ACK packets MUST NOT indicate ECN-Capability. - - Similarly, if a CE packet is dropped later in the network due to - corruption (bit errors), the end nodes should still invoke congestion - control, just as TCP would today in response to a dropped data - packet. This issue of corrupted CE packets would have to be - considered in any proposal for the network to distinguish between - packets dropped due to corruption, and packets dropped due to - congestion or buffer overflow. In particular, the ubiquitous - deployment of ECN would not, in and of itself, be a sufficient - development to allow end-nodes to interpret packet drops as - indications of corruption rather than congestion. - -5.3. Fragmentation - - ECN-capable packets MAY have the DF (Don't Fragment) bit set. - Reassembly of a fragmented packet MUST NOT lose indications of - congestion. In other words, if any fragment of an IP packet to be - reassembled has the CE codepoint set, then one of two actions MUST be - taken: - - - -Ramakrishnan, et al. Standards Track [Page 11] - -RFC 3168 The Addition of ECN to IP September 2001 - - - * Set the CE codepoint on the reassembled packet. However, this - MUST NOT occur if any of the other fragments contributing to - this reassembly carries the Not-ECT codepoint. - - * The packet is dropped, instead of being reassembled, for any - other reason. - - If both actions are applicable, either MAY be chosen. Reassembly of - a fragmented packet MUST NOT change the ECN codepoint when all of the - fragments carry the same codepoint. - - We would note that because RFC 2481 did not specify reassembly - behavior, older ECN implementations conformant with that Experimental - RFC do not necessarily perform reassembly correctly, in terms of - preserving the CE codepoint in a fragment. The sender could avoid - the consequences of this behavior by setting the DF bit in ECN- - Capable packets. - - Situations may arise in which the above reassembly specification is - insufficiently precise. For example, if there is a malicious or - broken entity in the path at or after the fragmentation point, packet - fragments could carry a mixture of ECT(0), ECT(1), and/or Not-ECT - codepoints. The reassembly specification above does not place - requirements on reassembly of fragments in this case. In situations - where more precise reassembly behavior would be required, protocol - specifications SHOULD instead specify that DF MUST be set in all - ECN-capable packets sent by the protocol. - -6. Support from the Transport Protocol - - ECN requires support from the transport protocol, in addition to the - functionality given by the ECN field in the IP packet header. The - transport protocol might require negotiation between the endpoints - during setup to determine that all of the endpoints are ECN-capable, - so that the sender can set the ECT codepoint in transmitted packets. - Second, the transport protocol must be capable of reacting - appropriately to the receipt of CE packets. This reaction could be - in the form of the data receiver informing the data sender of the - received CE packet (e.g., TCP), of the data receiver unsubscribing to - a layered multicast group (e.g., RLM [MJV96]), or of some other - action that ultimately reduces the arrival rate of that flow on that - congested link. CE packets indicate persistent rather than transient - congestion (see Section 5.1), and hence reactions to the receipt of - CE packets should be those appropriate for persistent congestion. - - This document only addresses the addition of ECN Capability to TCP, - leaving issues of ECN in other transport protocols to further - research. For TCP, ECN requires three new pieces of functionality: - - - -Ramakrishnan, et al. Standards Track [Page 12] - -RFC 3168 The Addition of ECN to IP September 2001 - - - negotiation between the endpoints during connection setup to - determine if they are both ECN-capable; an ECN-Echo (ECE) flag in the - TCP header so that the data receiver can inform the data sender when - a CE packet has been received; and a Congestion Window Reduced (CWR) - flag in the TCP header so that the data sender can inform the data - receiver that the congestion window has been reduced. The support - required from other transport protocols is likely to be different, - particularly for unreliable or reliable multicast transport - protocols, and will have to be determined as other transport - protocols are brought to the IETF for standardization. - - In a mild abuse of terminology, in this document we refer to `TCP - packets' instead of `TCP segments'. - -6.1. TCP - - The following sections describe in detail the proposed use of ECN in - TCP. This proposal is described in essentially the same form in - [Floyd94]. We assume that the source TCP uses the standard congestion - control algorithms of Slow-start, Fast Retransmit and Fast Recovery - [RFC2581]. - - This proposal specifies two new flags in the Reserved field of the - TCP header. The TCP mechanism for negotiating ECN-Capability uses - the ECN-Echo (ECE) flag in the TCP header. Bit 9 in the Reserved - field of the TCP header is designated as the ECN-Echo flag. The - location of the 6-bit Reserved field in the TCP header is shown in - Figure 4 of RFC 793 [RFC793] (and is reproduced below for - completeness). This specification of the ECN Field leaves the - Reserved field as a 4-bit field using bits 4-7. - - To enable the TCP receiver to determine when to stop setting the - ECN-Echo flag, we introduce a second new flag in the TCP header, the - CWR flag. The CWR flag is assigned to Bit 8 in the Reserved field of - the TCP header. - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | U | A | P | R | S | F | - | Header Length | Reserved | R | C | S | S | Y | I | - | | | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - Figure 3: The old definition of bytes 13 and 14 of the TCP - header. - - - - - - -Ramakrishnan, et al. Standards Track [Page 13] - -RFC 3168 The Addition of ECN to IP September 2001 - - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | C | E | U | A | P | R | S | F | - | Header Length | Reserved | W | C | R | C | S | S | Y | I | - | | | R | E | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - Figure 4: The new definition of bytes 13 and 14 of the TCP - Header. - - Thus, ECN uses the ECT and CE flags in the IP header (as shown in - Figure 1) for signaling between routers and connection endpoints, and - uses the ECN-Echo and CWR flags in the TCP header (as shown in Figure - 4) for TCP-endpoint to TCP-endpoint signaling. For a TCP connection, - a typical sequence of events in an ECN-based reaction to congestion - is as follows: - - * An ECT codepoint is set in packets transmitted by the sender to - indicate that ECN is supported by the transport entities for - these packets. - - * An ECN-capable router detects impending congestion and detects - that an ECT codepoint is set in the packet it is about to drop. - Instead of dropping the packet, the router chooses to set the CE - codepoint in the IP header and forwards the packet. - - * The receiver receives the packet with the CE codepoint set, and - sets the ECN-Echo flag in its next TCP ACK sent to the sender. - - * The sender receives the TCP ACK with ECN-Echo set, and reacts to - the congestion as if a packet had been dropped. - - * The sender sets the CWR flag in the TCP header of the next - packet sent to the receiver to acknowledge its receipt of and - reaction to the ECN-Echo flag. - - The negotiation for using ECN by the TCP transport entities and the - use of the ECN-Echo and CWR flags is described in more detail in the - sections below. - -6.1.1 TCP Initialization - - In the TCP connection setup phase, the source and destination TCPs - exchange information about their willingness to use ECN. Subsequent - to the completion of this negotiation, the TCP sender sets an ECT - codepoint in the IP header of data packets to indicate to the network - that the transport is capable and willing to participate in ECN for - this packet. This indicates to the routers that they may mark this - - - -Ramakrishnan, et al. Standards Track [Page 14] - -RFC 3168 The Addition of ECN to IP September 2001 - - - packet with the CE codepoint, if they would like to use that as a - method of congestion notification. If the TCP connection does not - wish to use ECN notification for a particular packet, the sending TCP - sets the ECN codepoint to not-ECT, and the TCP receiver ignores the - CE codepoint in the received packet. - - For this discussion, we designate the initiating host as Host A and - the responding host as Host B. We call a SYN packet with the ECE and - CWR flags set an "ECN-setup SYN packet", and we call a SYN packet - with at least one of the ECE and CWR flags not set a "non-ECN-setup - SYN packet". Similarly, we call a SYN-ACK packet with only the ECE - flag set but the CWR flag not set an "ECN-setup SYN-ACK packet", and - we call a SYN-ACK packet with any other configuration of the ECE and - CWR flags a "non-ECN-setup SYN-ACK packet". - - Before a TCP connection can use ECN, Host A sends an ECN-setup SYN - packet, and Host B sends an ECN-setup SYN-ACK packet. For a SYN - packet, the setting of both ECE and CWR in the ECN-setup SYN packet - is defined as an indication that the sending TCP is ECN-Capable, - rather than as an indication of congestion or of response to - congestion. More precisely, an ECN-setup SYN packet indicates that - the TCP implementation transmitting the SYN packet will participate - in ECN as both a sender and receiver. Specifically, as a receiver, - it will respond to incoming data packets that have the CE codepoint - set in the IP header by setting ECE in outgoing TCP Acknowledgement - (ACK) packets. As a sender, it will respond to incoming packets that - have ECE set by reducing the congestion window and setting CWR when - appropriate. An ECN-setup SYN packet does not commit the TCP sender - to setting the ECT codepoint in any or all of the packets it may - transmit. However, the commitment to respond appropriately to - incoming packets with the CE codepoint set remains even if the TCP - sender in a later transmission, within this TCP connection, sends a - SYN packet without ECE and CWR set. - - When Host B sends an ECN-setup SYN-ACK packet, it sets the ECE flag - but not the CWR flag. An ECN-setup SYN-ACK packet is defined as an - indication that the TCP transmitting the SYN-ACK packet is ECN- - Capable. As with the SYN packet, an ECN-setup SYN-ACK packet does - not commit the TCP host to setting the ECT codepoint in transmitted - packets. - - The following rules apply to the sending of ECN-setup packets within - a TCP connection, where a TCP connection is defined by the standard - rules for TCP connection establishment and termination. - - * If a host has received an ECN-setup SYN packet, then it MAY send - an ECN-setup SYN-ACK packet. Otherwise, it MUST NOT send an - ECN-setup SYN-ACK packet. - - - -Ramakrishnan, et al. Standards Track [Page 15] - -RFC 3168 The Addition of ECN to IP September 2001 - - - * A host MUST NOT set ECT on data packets unless it has sent at - least one ECN-setup SYN or ECN-setup SYN-ACK packet, and has - received at least one ECN-setup SYN or ECN-setup SYN-ACK packet, - and has sent no non-ECN-setup SYN or non-ECN-setup SYN-ACK - packet. If a host has received at least one non-ECN-setup SYN - or non-ECN-setup SYN-ACK packet, then it SHOULD NOT set ECT on - data packets. - - * If a host ever sets the ECT codepoint on a data packet, then - that host MUST correctly set/clear the CWR TCP bit on all - subsequent packets in the connection. - - * If a host has sent at least one ECN-setup SYN or ECN-setup SYN- - ACK packet, and has received no non-ECN-setup SYN or non-ECN- - setup SYN-ACK packet, then if that host receives TCP data - packets with ECT and CE codepoints set in the IP header, then - that host MUST process these packets as specified for an ECN- - capable connection. - - * A host that is not willing to use ECN on a TCP connection SHOULD - clear both the ECE and CWR flags in all non-ECN-setup SYN and/or - SYN-ACK packets that it sends to indicate this unwillingness. - Receivers MUST correctly handle all forms of the non-ECN-setup - SYN and SYN-ACK packets. - - * A host MUST NOT set ECT on SYN or SYN-ACK packets. - - A TCP client enters TIME-WAIT state after receiving a FIN-ACK, and - transitions to CLOSED state after a timeout. Many TCP - implementations create a new TCP connection if they receive an in- - window SYN packet during TIME-WAIT state. When a TCP host enters - TIME-WAIT or CLOSED state, it should ignore any previous state about - the negotiation of ECN for that connection. - -6.1.1.1. Middlebox Issues - - ECN introduces the use of the ECN-Echo and CWR flags in the TCP - header (as shown in Figure 3) for initialization. There exist some - faulty firewalls, load balancers, and intrusion detection systems in - the Internet that either drop an ECN-setup SYN packet or respond with - a RST, in the belief that such a packet (with these bits set) is a - signature for a port-scanning tool that could be used in a denial- - of-service attack. Some of the offending equipment has been - identified, and a web page [FIXES] contains a list of non-compliant - products and the fixes posted by the vendors, where these are - available. The TBIT web page [TBIT] lists some of the web servers - affected by this faulty equipment. We mention this in this document - as a warning to the community of this problem. - - - -Ramakrishnan, et al. Standards Track [Page 16] - -RFC 3168 The Addition of ECN to IP September 2001 - - - To provide robust connectivity even in the presence of such faulty - equipment, a host that receives a RST in response to the transmission - of an ECN-setup SYN packet MAY resend a SYN with CWR and ECE cleared. - This could result in a TCP connection being established without using - ECN. - - A host that receives no reply to an ECN-setup SYN within the normal - SYN retransmission timeout interval MAY resend the SYN and any - subsequent SYN retransmissions with CWR and ECE cleared. To overcome - normal packet loss that results in the original SYN being lost, the - originating host may retransmit one or more ECN-setup SYN packets - before giving up and retransmitting the SYN with the CWR and ECE bits - cleared. - - We note that in this case, the following example scenario is - possible: - - (1) Host A: Sends an ECN-setup SYN. - (2) Host B: Sends an ECN-setup SYN/ACK, packet is dropped or delayed. - (3) Host A: Sends a non-ECN-setup SYN. - (4) Host B: Sends a non-ECN-setup SYN/ACK. - - We note that in this case, following the procedures above, neither - Host A nor Host B may set the ECT bit on data packets. Further, an - important consequence of the rules for ECN setup and usage in Section - 6.1.1 is that a host is forbidden from using the reception of ECT - data packets as an implicit signal that the other host is ECN- - capable. - -6.1.1.2. Robust TCP Initialization with an Echoed Reserved Field - - There is the question of why we chose to have the TCP sending the SYN - set two ECN-related flags in the Reserved field of the TCP header for - the SYN packet, while the responding TCP sending the SYN-ACK sets - only one ECN-related flag in the SYN-ACK packet. This asymmetry is - necessary for the robust negotiation of ECN-capability with some - deployed TCP implementations. There exists at least one faulty TCP - implementation in which TCP receivers set the Reserved field of the - TCP header in ACK packets (and hence the SYN-ACK) simply to reflect - the Reserved field of the TCP header in the received data packet. - Because the TCP SYN packet sets the ECN-Echo and CWR flags to - indicate ECN-capability, while the SYN-ACK packet sets only the ECN- - Echo flag, the sending TCP correctly interprets a receiver's - reflection of its own flags in the Reserved field as an indication - that the receiver is not ECN-capable. The sending TCP is not mislead - by a faulty TCP implementation sending a SYN-ACK packet that simply - reflects the Reserved field of the incoming SYN packet. - - - - -Ramakrishnan, et al. Standards Track [Page 17] - -RFC 3168 The Addition of ECN to IP September 2001 - - -6.1.2. The TCP Sender - - For a TCP connection using ECN, new data packets are transmitted with - an ECT codepoint set in the IP header. When only one ECT codepoint - is needed by a sender for all packets sent on a TCP connection, - ECT(0) SHOULD be used. If the sender receives an ECN-Echo (ECE) ACK - packet (that is, an ACK packet with the ECN-Echo flag set in the TCP - header), then the sender knows that congestion was encountered in the - network on the path from the sender to the receiver. The indication - of congestion should be treated just as a congestion loss in non- - ECN-Capable TCP. That is, the TCP source halves the congestion window - "cwnd" and reduces the slow start threshold "ssthresh". The sending - TCP SHOULD NOT increase the congestion window in response to the - receipt of an ECN-Echo ACK packet. - - TCP should not react to congestion indications more than once every - window of data (or more loosely, more than once every round-trip - time). That is, the TCP sender's congestion window should be reduced - only once in response to a series of dropped and/or CE packets from a - single window of data. In addition, the TCP source should not - decrease the slow-start threshold, ssthresh, if it has been decreased - within the last round trip time. However, if any retransmitted - packets are dropped, then this is interpreted by the source TCP as a - new instance of congestion. - - After the source TCP reduces its congestion window in response to a - CE packet, incoming acknowledgments that continue to arrive can - "clock out" outgoing packets as allowed by the reduced congestion - window. If the congestion window consists of only one MSS (maximum - segment size), and the sending TCP receives an ECN-Echo ACK packet, - then the sending TCP should in principle still reduce its congestion - window in half. However, the value of the congestion window is - bounded below by a value of one MSS. If the sending TCP were to - continue to send, using a congestion window of 1 MSS, this results in - the transmission of one packet per round-trip time. It is necessary - to still reduce the sending rate of the TCP sender even further, on - receipt of an ECN-Echo packet when the congestion window is one. We - use the retransmit timer as a means of reducing the rate further in - this circumstance. Therefore, the sending TCP MUST reset the - retransmit timer on receiving the ECN-Echo packet when the congestion - window is one. The sending TCP will then be able to send a new - packet only when the retransmit timer expires. - - When an ECN-Capable TCP sender reduces its congestion window for any - reason (because of a retransmit timeout, a Fast Retransmit, or in - response to an ECN Notification), the TCP sender sets the CWR flag in - the TCP header of the first new data packet sent after the window - reduction. If that data packet is dropped in the network, then the - - - -Ramakrishnan, et al. Standards Track [Page 18] - -RFC 3168 The Addition of ECN to IP September 2001 - - - sending TCP will have to reduce the congestion window again and - retransmit the dropped packet. - - We ensure that the "Congestion Window Reduced" information is - reliably delivered to the TCP receiver. This comes about from the - fact that if the new data packet carrying the CWR flag is dropped, - then the TCP sender will have to again reduce its congestion window, - and send another new data packet with the CWR flag set. Thus, the - CWR bit in the TCP header SHOULD NOT be set on retransmitted packets. - - When the TCP data sender is ready to set the CWR bit after reducing - the congestion window, it SHOULD set the CWR bit only on the first - new data packet that it transmits. - - [Floyd94] discusses TCP's response to ECN in more detail. [Floyd98] - discusses the validation test in the ns simulator, which illustrates - a wide range of ECN scenarios. These scenarios include the following: - an ECN followed by another ECN, a Fast Retransmit, or a Retransmit - Timeout; a Retransmit Timeout or a Fast Retransmit followed by an - ECN; and a congestion window of one packet followed by an ECN. - - TCP follows existing algorithms for sending data packets in response - to incoming ACKs, multiple duplicate acknowledgments, or retransmit - timeouts [RFC2581]. TCP also follows the normal procedures for - increasing the congestion window when it receives ACK packets without - the ECN-Echo bit set [RFC2581]. - -6.1.3. The TCP Receiver - - When TCP receives a CE data packet at the destination end-system, the - TCP data receiver sets the ECN-Echo flag in the TCP header of the - subsequent ACK packet. If there is any ACK withholding implemented, - as in current "delayed-ACK" TCP implementations where the TCP - receiver can send an ACK for two arriving data packets, then the - ECN-Echo flag in the ACK packet will be set to '1' if the CE - codepoint is set in any of the data packets being acknowledged. That - is, if any of the received data packets are CE packets, then the - returning ACK has the ECN-Echo flag set. - - To provide robustness against the possibility of a dropped ACK packet - carrying an ECN-Echo flag, the TCP receiver sets the ECN-Echo flag in - a series of ACK packets sent subsequently. The TCP receiver uses the - CWR flag received from the TCP sender to determine when to stop - setting the ECN-Echo flag. - - After a TCP receiver sends an ACK packet with the ECN-Echo bit set, - that TCP receiver continues to set the ECN-Echo flag in all the ACK - packets it sends (whether they acknowledge CE data packets or non-CE - - - -Ramakrishnan, et al. Standards Track [Page 19] - -RFC 3168 The Addition of ECN to IP September 2001 - - - data packets) until it receives a CWR packet (a packet with the CWR - flag set). After the receipt of the CWR packet, acknowledgments for - subsequent non-CE data packets do not have the ECN-Echo flag set. If - another CE packet is received by the data receiver, the receiver - would once again send ACK packets with the ECN-Echo flag set. While - the receipt of a CWR packet does not guarantee that the data sender - received the ECN-Echo message, this does suggest that the data sender - reduced its congestion window at some point *after* it sent the data - packet for which the CE codepoint was set. - - We have already specified that a TCP sender is not required to reduce - its congestion window more than once per window of data. Some care - is required if the TCP sender is to avoid unnecessary reductions of - the congestion window when a window of data includes both dropped - packets and (marked) CE packets. This is illustrated in [Floyd98]. - -6.1.4. Congestion on the ACK-path - - For the current generation of TCP congestion control algorithms, pure - acknowledgement packets (e.g., packets that do not contain any - accompanying data) MUST be sent with the not-ECT codepoint. Current - TCP receivers have no mechanisms for reducing traffic on the ACK-path - in response to congestion notification. Mechanisms for responding to - congestion on the ACK-path are areas for current and future research. - (One simple possibility would be for the sender to reduce its - congestion window when it receives a pure ACK packet with the CE - codepoint set). For current TCP implementations, a single dropped ACK - generally has only a very small effect on the TCP's sending rate. - -6.1.5. Retransmitted TCP packets - - This document specifies ECN-capable TCP implementations MUST NOT set - either ECT codepoint (ECT(0) or ECT(1)) in the IP header for - retransmitted data packets, and that the TCP data receiver SHOULD - ignore the ECN field on arriving data packets that are outside of the - receiver's current window. This is for greater security against - denial-of-service attacks, as well as for robustness of the ECN - congestion indication with packets that are dropped later in the - network. - - First, we note that if the TCP sender were to set an ECT codepoint on - a retransmitted packet, then if an unnecessarily-retransmitted packet - was later dropped in the network, the end nodes would never receive - the indication of congestion from the router setting the CE - codepoint. Thus, setting an ECT codepoint on retransmitted data - packets is not consistent with the robust delivery of the congestion - indication even for packets that are later dropped in the network. - - - - -Ramakrishnan, et al. Standards Track [Page 20] - -RFC 3168 The Addition of ECN to IP September 2001 - - - In addition, an attacker capable of spoofing the IP source address of - the TCP sender could send data packets with arbitrary sequence - numbers, with the CE codepoint set in the IP header. On receiving - this spoofed data packet, the TCP data receiver would determine that - the data does not lie in the current receive window, and return a - duplicate acknowledgement. We define an out-of-window packet at the - TCP data receiver as a data packet that lies outside the receiver's - current window. On receiving an out-of-window packet, the TCP data - receiver has to decide whether or not to treat the CE codepoint in - the packet header as a valid indication of congestion, and therefore - whether to return ECN-Echo indications to the TCP data sender. If - the TCP data receiver ignored the CE codepoint in an out-of-window - packet, then the TCP data sender would not receive this possibly- - legitimate indication of congestion from the network, resulting in a - violation of end-to-end congestion control. On the other hand, if - the TCP data receiver honors the CE indication in the out-of-window - packet, and reports the indication of congestion to the TCP data - sender, then the malicious node that created the spoofed, out-of- - window packet has successfully "attacked" the TCP connection by - forcing the data sender to unnecessarily reduce (halve) its - congestion window. To prevent such a denial-of-service attack, we - specify that a legitimate TCP data sender MUST NOT set an ECT - codepoint on retransmitted data packets, and that the TCP data - receiver SHOULD ignore the CE codepoint on out-of-window packets. - - One drawback of not setting ECT(0) or ECT(1) on retransmitted packets - is that it denies ECN protection for retransmitted packets. However, - for an ECN-capable TCP connection in a fully-ECN-capable environment - with mild congestion, packets should rarely be dropped due to - congestion in the first place, and so instances of retransmitted - packets should rarely arise. If packets are being retransmitted, - then there are already packet losses (from corruption or from - congestion) that ECN has been unable to prevent. - - We note that if the router sets the CE codepoint for an ECN-capable - data packet within a TCP connection, then the TCP connection is - guaranteed to receive that indication of congestion, or to receive - some other indication of congestion within the same window of data, - even if this packet is dropped or reordered in the network. We - consider two cases, when the packet is later retransmitted, and when - the packet is not later retransmitted. - - In the first case, if the packet is either dropped or delayed, and at - some point retransmitted by the data sender, then the retransmission - is a result of a Fast Retransmit or a Retransmit Timeout for either - that packet or for some prior packet in the same window of data. In - this case, because the data sender already has retransmitted this - packet, we know that the data sender has already responded to an - - - -Ramakrishnan, et al. Standards Track [Page 21] - -RFC 3168 The Addition of ECN to IP September 2001 - - - indication of congestion for some packet within the same window of - data as the original packet. Thus, even if the first transmission of - the packet is dropped in the network, or is delayed, if it had the CE - codepoint set, and is later ignored by the data receiver as an out- - of-window packet, this is not a problem, because the sender has - already responded to an indication of congestion for that window of - data. - - In the second case, if the packet is never retransmitted by the data - sender, then this data packet is the only copy of this data received - by the data receiver, and therefore arrives at the data receiver as - an in-window packet, regardless of how much the packet might be - delayed or reordered. In this case, if the CE codepoint is set on - the packet within the network, this will be treated by the data - receiver as a valid indication of congestion. - -6.1.6. TCP Window Probes. - - When the TCP data receiver advertises a zero window, the TCP data - sender sends window probes to determine if the receiver's window has - increased. Window probe packets do not contain any user data except - for the sequence number, which is a byte. If a window probe packet - is dropped in the network, this loss is not detected by the receiver. - Therefore, the TCP data sender MUST NOT set either an ECT codepoint - or the CWR bit on window probe packets. - - However, because window probes use exact sequence numbers, they - cannot be easily spoofed in denial-of-service attacks. Therefore, if - a window probe arrives with the CE codepoint set, then the receiver - SHOULD respond to the ECN indications. - -7. Non-compliance by the End Nodes - - This section discusses concerns about the vulnerability of ECN to - non-compliant end-nodes (i.e., end nodes that set the ECT codepoint - in transmitted packets but do not respond to received CE packets). - We argue that the addition of ECN to the IP architecture will not - significantly increase the current vulnerability of the architecture - to unresponsive flows. - - Even for non-ECN environments, there are serious concerns about the - damage that can be done by non-compliant or unresponsive flows (that - is, flows that do not respond to congestion control indications by - reducing their arrival rate at the congested link). For example, an - end-node could "turn off congestion control" by not reducing its - congestion window in response to packet drops. This is a concern for - the current Internet. It has been argued that routers will have to - deploy mechanisms to detect and differentially treat packets from - - - -Ramakrishnan, et al. Standards Track [Page 22] - -RFC 3168 The Addition of ECN to IP September 2001 - - - non-compliant flows [RFC2309,FF99]. It has also been suggested that - techniques such as end-to-end per-flow scheduling and isolation of - one flow from another, differentiated services, or end-to-end - reservations could remove some of the more damaging effects of - unresponsive flows. - - It might seem that dropping packets in itself is an adequate - deterrent for non-compliance, and that the use of ECN removes this - deterrent. We would argue in response that (1) ECN-capable routers - preserve packet-dropping behavior in times of high congestion; and - (2) even in times of high congestion, dropping packets in itself is - not an adequate deterrent for non-compliance. - - First, ECN-Capable routers will only mark packets (as opposed to - dropping them) when the packet marking rate is reasonably low. During - periods where the average queue size exceeds an upper threshold, and - therefore the potential packet marking rate would be high, our - recommendation is that routers drop packets rather then set the CE - codepoint in packet headers. - - During the periods of low or moderate packet marking rates when ECN - would be deployed, there would be little deterrent effect on - unresponsive flows of dropping rather than marking those packets. For - example, delay-insensitive flows using reliable delivery might have - an incentive to increase rather than to decrease their sending rate - in the presence of dropped packets. Similarly, delay-sensitive flows - using unreliable delivery might increase their use of FEC in response - to an increased packet drop rate, increasing rather than decreasing - their sending rate. For the same reasons, we do not believe that - packet dropping itself is an effective deterrent for non-compliance - even in an environment of high packet drop rates, when all flows are - sharing the same packet drop rate. - - Several methods have been proposed to identify and restrict non- - compliant or unresponsive flows. The addition of ECN to the network - environment would not in any way increase the difficulty of designing - and deploying such mechanisms. If anything, the addition of ECN to - the architecture would make the job of identifying unresponsive flows - slightly easier. For example, in an ECN-Capable environment routers - are not limited to information about packets that are dropped or have - the CE codepoint set at that router itself; in such an environment, - routers could also take note of arriving CE packets that indicate - congestion encountered by that packet earlier in the path. - - - - - - - - -Ramakrishnan, et al. Standards Track [Page 23] - -RFC 3168 The Addition of ECN to IP September 2001 - - -8. Non-compliance in the Network - - This section considers the issues when a router is operating, - possibly maliciously, to modify either of the bits in the ECN field. - We note that in IPv4, the IP header is protected from bit errors by a - header checksum; this is not the case in IPv6. Thus for IPv6 the - ECN field can be accidentally modified by bit errors on links or in - routers without being detected by an IP header checksum. - - By tampering with the bits in the ECN field, an adversary (or a - broken router) could do one or more of the following: falsely report - congestion, disable ECN-Capability for an individual packet, erase - the ECN congestion indication, or falsely indicate ECN-Capability. - Section 18 systematically examines the various cases by which the ECN - field could be modified. The important criterion considered in - determining the consequences of such modifications is whether it is - likely to lead to poorer behavior in any dimension (throughput, - delay, fairness or functionality) than if a router were to drop a - packet. - - The first two possible changes, falsely reporting congestion or - disabling ECN-Capability for an individual packet, are no worse than - if the router were to simply drop the packet. From a congestion - control point of view, setting the CE codepoint in the absence of - congestion by a non-compliant router would be no worse than a router - dropping a packet unnecessarily. By "erasing" an ECT codepoint of a - packet that is later dropped in the network, a router's actions could - result in an unnecessary packet drop for that packet later in the - network. - - However, as discussed in Section 18, a router that erases the ECN - congestion indication or falsely indicates ECN-Capability could - potentially do more damage to the flow that if it has simply dropped - the packet. A rogue or broken router that "erased" the CE codepoint - in arriving CE packets would prevent that indication of congestion - from reaching downstream receivers. This could result in the failure - of congestion control for that flow and a resulting increase in - congestion in the network, ultimately resulting in subsequent packets - dropped for this flow as the average queue size increased at the - congested gateway. - - Section 19 considers the potential repercussions of subverting end- - to-end congestion control by either falsely indicating ECN- - Capability, or by erasing the congestion indication in ECN (the CE- - codepoint). We observe in Section 19 that the consequence of - subverting ECN-based congestion control may lead to potential - unfairness, but this is likely to be no worse than the subversion of - either ECN-based or packet-based congestion control by the end nodes. - - - -Ramakrishnan, et al. Standards Track [Page 24] - -RFC 3168 The Addition of ECN to IP September 2001 - - -8.1. Complications Introduced by Split Paths - - If a router or other network element has access to all of the packets - of a flow, then that router could do no more damage to a flow by - altering the ECN field than it could by simply dropping all of the - packets from that flow. However, in some cases, a malicious or - broken router might have access to only a subset of the packets from - a flow. The question is as follows: can this router, by altering - the ECN field in this subset of the packets, do more damage to that - flow than if it has simply dropped that set of the packets? - - This is also discussed in detail in Section 18, which concludes as - follows: It is true that the adversary that has access only to a - subset of packets in an aggregate might, by subverting ECN-based - congestion control, be able to deny the benefits of ECN to the other - packets in the aggregate. While this is undesirable, this is not a - sufficient concern to result in disabling ECN. - -9. Encapsulated Packets - -9.1. IP packets encapsulated in IP - - The encapsulation of IP packet headers in tunnels is used in many - places, including IPsec and IP in IP [RFC2003]. This section - considers issues related to interactions between ECN and IP tunnels, - and specifies two alternative solutions. This discussion is - complemented by RFC 2983's discussion of interactions between - Differentiated Services and IP tunnels of various forms [RFC 2983], - as Differentiated Services uses the remaining six bits of the IP - header octet that is used by ECN (see Figure 2 in Section 5). - - - Some IP tunnel modes are based on adding a new "outer" IP header that - encapsulates the original, or "inner" IP header and its associated - packet. In many cases, the new "outer" IP header may be added and - removed at intermediate points along a connection, enabling the - network to establish a tunnel without requiring endpoint - participation. We denote tunnels that specify that the outer header - be discarded at tunnel egress as "simple tunnels". - - ECN uses the ECN field in the IP header for signaling between routers - and connection endpoints. ECN interacts with IP tunnels based on the - treatment of the ECN field in the IP header. In simple IP tunnels - the octet containing the ECN field is copied or mapped from the inner - IP header to the outer IP header at IP tunnel ingress, and the outer - header's copy of this field is discarded at IP tunnel egress. If the - outer header were to be simply discarded without taking care to deal - with the ECN field, and an ECN-capable router were to set the CE - - - -Ramakrishnan, et al. Standards Track [Page 25] - -RFC 3168 The Addition of ECN to IP September 2001 - - - (Congestion Experienced) codepoint within a packet in a simple IP - tunnel, this indication would be discarded at tunnel egress, losing - the indication of congestion. - - Thus, the use of ECN over simple IP tunnels would result in routers - attempting to use the outer IP header to signal congestion to - endpoints, but those congestion warnings never arriving because the - outer header is discarded at the tunnel egress point. This problem - was encountered with ECN and IPsec in tunnel mode, and RFC 2481 - recommended that ECN not be used with the older simple IPsec tunnels - in order to avoid this behavior and its consequences. When ECN - becomes widely deployed, then simple tunnels likely to carry ECN- - capable traffic will have to be changed. If ECN-capable traffic is - carried by a simple tunnel through a congested, ECN-capable router, - this could result in subsequent packets being dropped for this flow - as the average queue size increases at the congested router, as - discussed in Section 8 above. - - From a security point of view, the use of ECN in the outer header of - an IP tunnel might raise security concerns because an adversary could - tamper with the ECN information that propagates beyond the tunnel - endpoint. Based on an analysis in Sections 18 and 19 of these - concerns and the resultant risks, our overall approach is to make - support for ECN an option for IP tunnels, so that an IP tunnel can be - specified or configured either to use ECN or not to use ECN in the - outer header of the tunnel. Thus, in environments or tunneling - protocols where the risks of using ECN are judged to outweigh its - benefits, the tunnel can simply not use ECN in the outer header. - Then the only indication of congestion experienced at routers within - the tunnel would be through packet loss. - - The result is that there are two viable options for the behavior of - ECN-capable connections over an IP tunnel, including IPsec tunnels: - - * A limited-functionality option in which ECN is preserved in the - inner header, but disabled in the outer header. The only - mechanism available for signaling congestion occurring within - the tunnel in this case is dropped packets. - - * A full-functionality option that supports ECN in both the inner - and outer headers, and propagates congestion warnings from nodes - within the tunnel to endpoints. - - Support for these options requires varying amounts of changes to IP - header processing at tunnel ingress and egress. A small subset of - these changes sufficient to support only the limited-functionality - option would be sufficient to eliminate any incompatibility between - ECN and IP tunnels. - - - -Ramakrishnan, et al. Standards Track [Page 26] - -RFC 3168 The Addition of ECN to IP September 2001 - - - One goal of this document is to give guidance about the tradeoffs - between the limited-functionality and full-functionality options. A - full discussion of the potential effects of an adversary's - modifications of the ECN field is given in Sections 18 and 19. - -9.1.1. The Limited-functionality and Full-functionality Options - - The limited-functionality option for ECN encapsulation in IP tunnels - is for the not-ECT codepoint to be set in the outside (encapsulating) - header regardless of the value of the ECN field in the inside - (encapsulated) header. With this option, the ECN field in the inner - header is not altered upon de-capsulation. The disadvantage of this - approach is that the flow does not have ECN support for that part of - the path that is using IP tunneling, even if the encapsulated packet - (from the original TCP sender) is ECN-Capable. That is, if the - encapsulated packet arrives at a congested router that is ECN- - capable, and the router can decide to drop or mark the packet as an - indication of congestion to the end nodes, the router will not be - permitted to set the CE codepoint in the packet header, but instead - will have to drop the packet. - - The full-functionality option for ECN encapsulation is to copy the - ECN codepoint of the inside header to the outside header on - encapsulation if the inside header is not-ECT or ECT, and to set the - ECN codepoint of the outside header to ECT(0) if the ECN codepoint of - the inside header is CE. On decapsulation, if the CE codepoint is - set on the outside header, then the CE codepoint is also set in the - inner header. Otherwise, the ECN codepoint on the inner header is - left unchanged. That is, for full ECN support the encapsulation and - decapsulation processing involves the following: At tunnel ingress, - the full-functionality option sets the ECN codepoint in the outer - header. If the ECN codepoint in the inner header is not-ECT or ECT, - then it is copied to the ECN codepoint in the outer header. If the - ECN codepoint in the inner header is CE, then the ECN codepoint in - the outer header is set to ECT(0). Upon decapsulation at the tunnel - egress, the full-functionality option sets the CE codepoint in the - inner header if the CE codepoint is set in the outer header. - Otherwise, no change is made to this field of the inner header. - - With the full-functionality option, a flow can take advantage of ECN - in those parts of the path that might use IP tunneling. The - disadvantage of the full-functionality option from a security - perspective is that the IP tunnel cannot protect the flow from - certain modifications to the ECN bits in the IP header within the - tunnel. The potential dangers from modifications to the ECN bits in - the IP header are described in detail in Sections 18 and 19. - - - - - -Ramakrishnan, et al. Standards Track [Page 27] - -RFC 3168 The Addition of ECN to IP September 2001 - - - (1) An IP tunnel MUST modify the handling of the DS field octet at - IP tunnel endpoints by implementing either the limited- - functionality or the full-functionality option. - - (2) Optionally, an IP tunnel MAY enable the endpoints of an IP - tunnel to negotiate the choice between the limited-functionality - and the full-functionality option for ECN in the tunnel. - - The minimum required to make ECN usable with IP tunnels is the - limited-functionality option, which prevents ECN from being enabled - in the outer header of the tunnel. Full support for ECN requires the - use of the full-functionality option. If there are no optional - mechanisms for the tunnel endpoints to negotiate a choice between the - limited-functionality or full-functionality option, there can be a - pre-existing agreement between the tunnel endpoints about whether to - support the limited-functionality or the full-functionality ECN - option. - - All IP tunnels MUST implement the limited-functionality option, and - SHOULD support the full-functionality option. - - In addition, it is RECOMMENDED that packets with the CE codepoint in - the outer header be dropped if they arrive at the tunnel egress point - for a tunnel that uses the limited-functionality option, or for a - tunnel that uses the full-functionality option but for which the - not-ECT codepoint is set in the inner header. This is motivated by - backwards compatibility and to ensure that no unauthorized - modifications of the ECN field take place, and is discussed further - in the next Section (9.1.2). - -9.1.2. Changes to the ECN Field within an IP Tunnel. - - The presence of a copy of the ECN field in the inner header of an IP - tunnel mode packet provides an opportunity for detection of - unauthorized modifications to the ECN field in the outer header. - Comparison of the ECT fields in the inner and outer headers falls - into two categories for implementations that conform to this - document: - - * If the IP tunnel uses the full-functionality option, then the - not-ECT codepoint should be set in the outer header if and only - if it is also set in the inner header. - - * If the tunnel uses the limited-functionality option, then the - not-ECT codepoint should be set in the outer header. - - Receipt of a packet not satisfying the appropriate condition could be - a cause of concern. - - - -Ramakrishnan, et al. Standards Track [Page 28] - -RFC 3168 The Addition of ECN to IP September 2001 - - - Consider the case of an IP tunnel where the tunnel ingress point has - not been updated to this document's requirements, while the tunnel - egress point has been updated to support ECN. In this case, the IP - tunnel is not explicitly configured to support the full-functionality - ECN option. However, the tunnel ingress point is behaving identically - to a tunnel ingress point that supports the full-functionality - option. If packets from an ECN-capable connection use this tunnel, - the ECT codepoint will be set in the outer header at the tunnel - ingress point. Congestion within the tunnel may then result in ECN- - capable routers setting CE in the outer header. Because the tunnel - has not been explicitly configured to support the full-functionality - option, the tunnel egress point expects the not-ECT codepoint to be - set in the outer header. When an ECN-capable tunnel egress point - receives a packet with the ECT or CE codepoint in the outer header, - in a tunnel that has not been configured to support the full- - functionality option, that packet should be processed, according to - whether the CE codepoint was set, as follows. It is RECOMMENDED that - on a tunnel that has not been configured to support the full- - functionality option, packets should be dropped at the egress point - if the CE codepoint is set in the outer header but not in the inner - header, and should be forwarded otherwise. - - An IP tunnel cannot provide protection against erasure of congestion - indications based on changing the ECN codepoint from CE to ECT. The - erasure of congestion indications may impact the network and other - flows in ways that would not be possible in the absence of ECN. It - is important to note that erasure of congestion indications can only - be performed to congestion indications placed by nodes within the - tunnel; the copy of the ECN field in the inner header preserves - congestion notifications from nodes upstream of the tunnel ingress - (unless the inner header is also erased). If erasure of congestion - notifications is judged to be a security risk that exceeds the - congestion management benefits of ECN, then tunnels could be - specified or configured to use the limited-functionality option. - -9.2. IPsec Tunnels - - IPsec supports secure communication over potentially insecure network - components such as intermediate routers. IPsec protocols support two - operating modes, transport mode and tunnel mode, that span a wide - range of security requirements and operating environments. Transport - mode security protocol header(s) are inserted between the IP (IPv4 or - IPv6) header and higher layer protocol headers (e.g., TCP), and hence - transport mode can only be used for end-to-end security on a - connection. IPsec tunnel mode is based on adding a new "outer" IP - header that encapsulates the original, or "inner" IP header and its - associated packet. Tunnel mode security headers are inserted between - these two IP headers. In contrast to transport mode, the new "outer" - - - -Ramakrishnan, et al. Standards Track [Page 29] - -RFC 3168 The Addition of ECN to IP September 2001 - - - IP header and tunnel mode security headers can be added and removed - at intermediate points along a connection, enabling security gateways - to secure vulnerable portions of a connection without requiring - endpoint participation in the security protocols. An important - aspect of tunnel mode security is that in the original specification, - the outer header is discarded at tunnel egress, ensuring that - security threats based on modifying the IP header do not propagate - beyond that tunnel endpoint. Further discussion of IPsec can be - found in [RFC2401]. - - The IPsec protocol as originally defined in [ESP, AH] required that - the inner header's ECN field not be changed by IPsec decapsulation - processing at a tunnel egress node; this would have ruled out the - possibility of full-functionality mode for ECN. At the same time, - this would ensure that an adversary's modifications to the ECN field - cannot be used to launch theft- or denial-of-service attacks across - an IPsec tunnel endpoint, as any such modifications will be discarded - at the tunnel endpoint. - - In principle, permitting the use of ECN functionality in the outer - header of an IPsec tunnel raises security concerns because an - adversary could tamper with the information that propagates beyond - the tunnel endpoint. Based on an analysis (included in Sections 18 - and 19) of these concerns and the associated risks, our overall - approach has been to provide configuration support for IPsec changes - to remove the conflict with ECN. - - In particular, in tunnel mode the IPsec tunnel MUST support the - limited-functionality option outlined in Section 9.1.1, and SHOULD - support the full-functionality option outlined in Section 9.1.1. - - This makes permission to use ECN functionality in the outer header of - an IPsec tunnel a configurable part of the corresponding IPsec - Security Association (SA), so that it can be disabled in situations - where the risks are judged to outweigh the benefits. The result is - that an IPsec security administrator is presented with two - alternatives for the behavior of ECN-capable connections within an - IPsec tunnel, the limited-functionality alternative and full- - functionality alternative described earlier. - - In addition, this document specifies how the endpoints of an IPsec - tunnel could negotiate enabling ECN functionality in the outer - headers of that tunnel based on security policy. The ability to - negotiate ECN usage between tunnel endpoints would enable a security - administrator to disable ECN in situations where she believes the - risks (e.g., of lost congestion notifications) outweigh the benefits - of ECN. - - - - -Ramakrishnan, et al. Standards Track [Page 30] - -RFC 3168 The Addition of ECN to IP September 2001 - - - The IPsec protocol, as defined in [ESP, AH], does not include the IP - header's ECN field in any of its cryptographic calculations (in the - case of tunnel mode, the outer IP header's ECN field is not - included). Hence modification of the ECN field by a network node has - no effect on IPsec's end-to-end security, because it cannot cause any - IPsec integrity check to fail. As a consequence, IPsec does not - provide any defense against an adversary's modification of the ECN - field (i.e., a man-in-the-middle attack), as the adversary's - modification will also have no effect on IPsec's end-to-end security. - In some environments, the ability to modify the ECN field without - affecting IPsec integrity checks may constitute a covert channel; if - it is necessary to eliminate such a channel or reduce its bandwidth, - then the IPsec tunnel should be run in limited-functionality mode. - -9.2.1. Negotiation between Tunnel Endpoints - - This section describes the detailed changes to enable usage of ECN - over IPsec tunnels, including the negotiation of ECN support between - tunnel endpoints. This is supported by three changes to IPsec: - - * An optional Security Association Database (SAD) field indicating - whether tunnel encapsulation and decapsulation processing allows - or forbids ECN usage in the outer IP header. - - * An optional Security Association Attribute that enables - negotiation of this SAD field between the two endpoints of an SA - that supports tunnel mode. - - * Changes to tunnel mode encapsulation and decapsulation - processing to allow or forbid ECN usage in the outer IP header - based on the value of the SAD field. When ECN usage is allowed - in the outer IP header, the ECT codepoint is set in the outer - header for ECN-capable connections and congestion notifications - (indicated by the CE codepoint) from such connections are - propagated to the inner header at tunnel egress. - - If negotiation of ECN usage is implemented, then the SAD field SHOULD - also be implemented. On the other hand, negotiation of ECN usage is - OPTIONAL in all cases, even for implementations that support the SAD - field. The encapsulation and decapsulation processing changes are - REQUIRED, but MAY be implemented without the other two changes by - assuming that ECN usage is always forbidden. The full-functionality - alternative for ECN usage over IPsec tunnels consists of the SAD - field and the full version of encapsulation and decapsulation - processing changes, with or without the OPTIONAL negotiation support. - The limited-functionality alternative consists of a subset of the - encapsulation and decapsulation changes that always forbids ECN - usage. - - - -Ramakrishnan, et al. Standards Track [Page 31] - -RFC 3168 The Addition of ECN to IP September 2001 - - - These changes are covered further in the following three subsections. - -9.2.1.1. ECN Tunnel Security Association Database Field - - Full ECN functionality adds a new field to the SAD (see [RFC2401]): - - ECN Tunnel: allowed or forbidden. - - Indicates whether ECN-capable connections using this SA in tunnel - mode are permitted to receive ECN congestion notifications for - congestion occurring within the tunnel. The allowed value enables - ECN congestion notifications. The forbidden value disables such - notifications, causing all congestion to be indicated via dropped - packets. - - [OPTIONAL. The value of this field SHOULD be assumed to be - "forbidden" in implementations that do not support it.] - - If this attribute is implemented, then the SA specification in a - Security Policy Database (SPD) entry MUST support a corresponding - attribute, and this SPD attribute MUST be covered by the SPD - administrative interface (currently described in Section 4.4.1 of - [RFC2401]). - -9.2.1.2. ECN Tunnel Security Association Attribute - - A new IPsec Security Association Attribute is defined to enable the - support for ECN congestion notifications based on the outer IP header - to be negotiated for IPsec tunnels (see [RFC2407]). This attribute - is OPTIONAL, although implementations that support it SHOULD also - support the SAD field defined in Section 9.2.1.1. - - Attribute Type - - class value type - ------------------------------------------------- - ECN Tunnel 10 Basic - - The IPsec SA Attribute value 10 has been allocated by IANA to - indicate that the ECN Tunnel SA Attribute is being negotiated; the - type of this attribute is Basic (see Section 4.5 of [RFC2407]). The - Class Values are used to conduct the negotiation. See [RFC2407, - RFC2408, RFC2409] for further information including encoding formats - and requirements for negotiating this SA attribute. - - - - - - - -Ramakrishnan, et al. Standards Track [Page 32] - -RFC 3168 The Addition of ECN to IP September 2001 - - - Class Values - - ECN Tunnel - - Specifies whether ECN functionality is allowed to be used with Tunnel - Encapsulation Mode. This affects tunnel encapsulation and - decapsulation processing - see Section 9.2.1.3. - - RESERVED 0 - Allowed 1 - Forbidden 2 - - Values 3-61439 are reserved to IANA. Values 61440-65535 are for - private use. - - If unspecified, the default shall be assumed to be Forbidden. - - ECN Tunnel is a new SA attribute, and hence initiators that use it - can expect to encounter responders that do not understand it, and - therefore reject proposals containing it. For backwards - compatibility with such implementations initiators SHOULD always also - include a proposal without the ECN Tunnel attribute to enable such a - responder to select a transform or proposal that does not contain the - ECN Tunnel attribute. RFC 2407 currently requires responders to - reject all proposals if any proposal contains an unknown attribute; - this requirement is expected to be changed to require a responder not - to select proposals or transforms containing unknown attributes. - -9.2.1.3. Changes to IPsec Tunnel Header Processing - - For full ECN support, the encapsulation and decapsulation processing - for the IPv4 TOS field and the IPv6 Traffic Class field are changed - from that specified in [RFC2401] to the following: - - <-- How Outer Hdr Relates to Inner Hdr --> - Outer Hdr at Inner Hdr at - IPv4 Encapsulator Decapsulator - Header fields: -------------------- ------------ - DS Field copied from inner hdr (5) no change - ECN Field constructed (7) constructed (8) - - IPv6 - Header fields: - DS Field copied from inner hdr (6) no change - ECN Field constructed (7) constructed (8) - - - - - - -Ramakrishnan, et al. Standards Track [Page 33] - -RFC 3168 The Addition of ECN to IP September 2001 - - - (5)(6) If the packet will immediately enter a domain for which the - DSCP value in the outer header is not appropriate, that value MUST - be mapped to an appropriate value for the domain [RFC 2474]. Also - see [RFC 2475] for further information. - - (7) If the value of the ECN Tunnel field in the SAD entry for this - SA is "allowed" and the ECN field in the inner header is set to - any value other than CE, copy this ECN field to the outer header. - If the ECN field in the inner header is set to CE, then set the - ECN field in the outer header to ECT(0). - - (8) If the value of the ECN tunnel field in the SAD entry for this - SA is "allowed" and the ECN field in the inner header is set to - ECT(0) or ECT(1) and the ECN field in the outer header is set to - CE, then copy the ECN field from the outer header to the inner - header. Otherwise, make no change to the ECN field in the inner - header. - - (5) and (6) are identical to match usage in [RFC2401], although - they are different in [RFC2401]. - - The above description applies to implementations that support the ECN - Tunnel field in the SAD; such implementations MUST implement this - processing instead of the processing of the IPv4 TOS octet and IPv6 - Traffic Class octet defined in [RFC2401]. This constitutes the - full-functionality alternative for ECN usage with IPsec tunnels. - - An implementation that does not support the ECN Tunnel field in the - SAD MUST implement this processing by assuming that the value of the - ECN Tunnel field of the SAD is "forbidden" for every SA. In this - case, the processing of the ECN field reduces to: - - (7) Set the ECN field to not-ECT in the outer header. - (8) Make no change to the ECN field in the inner header. - - This constitutes the limited functionality alternative for ECN usage - with IPsec tunnels. - - For backwards compatibility, packets with the CE codepoint set in the - outer header SHOULD be dropped if they arrive on an SA that is using - the limited-functionality option, or that is using the full- - functionality option with the not-ECN codepoint set in the inner - header. - - - - - - - - -Ramakrishnan, et al. Standards Track [Page 34] - -RFC 3168 The Addition of ECN to IP September 2001 - - -9.2.2. Changes to the ECN Field within an IPsec Tunnel. - - If the ECN Field is changed inappropriately within an IPsec tunnel, - and this change is detected at the tunnel egress, then the receipt of - a packet not satisfying the appropriate condition for its SA is an - auditable event. An implementation MAY create audit records with - per-SA counts of incorrect packets over some time period rather than - creating an audit record for each erroneous packet. Any such audit - record SHOULD contain the headers from at least one erroneous packet, - but need not contain the headers from every packet represented by the - entry. - -9.2.3. Comments for IPsec Support - - Substantial comments were received on two areas of this document - during review by the IPsec working group. This section describes - these comments and explains why the proposed changes were not - incorporated. - - The first comment indicated that per-node configuration is easier to - implement than per-SA configuration. After serious thought and - despite some initial encouragement of per-node configuration, it no - longer seems to be a good idea. The concern is that as ECN-awareness - is progressively deployed in IPsec, many ECN-aware IPsec - implementations will find themselves communicating with a mixture of - ECN-aware and ECN-unaware IPsec tunnel endpoints. In such an - environment with per-node configuration, the only reasonable thing to - do is forbid ECN usage for all IPsec tunnels, which is not the - desired outcome. - - In the second area, several reviewers noted that SA negotiation is - complex, and adding to it is non-trivial. One reviewer suggested - using ICMP after tunnel setup as a possible alternative. The - addition to SA negotiation in this document is OPTIONAL and will - remain so; implementers are free to ignore it. The authors believe - that the assurance it provides can be useful in a number of - situations. In practice, if this is not implemented, it can be - deleted at a subsequent stage in the standards process. Extending - ICMP to negotiate ECN after tunnel setup is more complex than - extending SA attribute negotiation. Some tunnels do not permit - traffic to be addressed to the tunnel egress endpoint, hence the ICMP - packet would have to be addressed to somewhere else, scanned for by - the egress endpoint, and discarded there or at its actual - destination. In addition, ICMP delivery is unreliable, and hence - there is a possibility of an ICMP packet being dropped, entailing the - invention of yet another ack/retransmit mechanism. It seems better - simply to specify an OPTIONAL extension to the existing SA - negotiation mechanism. - - - -Ramakrishnan, et al. Standards Track [Page 35] - -RFC 3168 The Addition of ECN to IP September 2001 - - -9.3. IP packets encapsulated in non-IP Packet Headers. - - A different set of issues are raised, relative to ECN, when IP - packets are encapsulated in tunnels with non-IP packet headers. This - occurs with MPLS [MPLS], GRE [GRE], L2TP [L2TP], and PPTP [PPTP]. - For these protocols, there is no conflict with ECN; it is just that - ECN cannot be used within the tunnel unless an ECN codepoint can be - specified for the header of the encapsulating protocol. Earlier work - considered a preliminary proposal for incorporating ECN into MPLS, - and proposals for incorporating ECN into GRE, L2TP, or PPTP will be - considered as the need arises. - -10. Issues Raised by Monitoring and Policing Devices - - One possibility is that monitoring and policing devices (or more - informally, "penalty boxes") will be installed in the network to - monitor whether best-effort flows are appropriately responding to - congestion, and to preferentially drop packets from flows determined - not to be using adequate end-to-end congestion control procedures. - - We recommend that any "penalty box" that detects a flow or an - aggregate of flows that is not responding to end-to-end congestion - control first change from marking to dropping packets from that flow, - before taking any additional action to restrict the bandwidth - available to that flow. Thus, initially, the router may drop packets - in which the router would otherwise would have set the CE codepoint. - This could include dropping those arriving packets for that flow that - are ECN-Capable and that already have the CE codepoint set. In this - way, any congestion indications seen by that router for that flow - will be guaranteed to also be seen by the end nodes, even in the - presence of malicious or broken routers elsewhere in the path. If we - assume that the first action taken at any "penalty box" for an ECN- - capable flow will be to drop packets instead of marking them, then - there is no way that an adversary that subverts ECN-based end-to-end - congestion control can cause a flow to be characterized as being - non-cooperative and placed into a more severe action within the - "penalty box". - - The monitoring and policing devices that are actually deployed could - fall short of the `ideal' monitoring device described above, in that - the monitoring is applied not to a single flow, but to an aggregate - of flows (e.g., those sharing a single IPsec tunnel). In this case, - the switch from marking to dropping would apply to all of the flows - in that aggregate, denying the benefits of ECN to the other flows in - the aggregate also. At the highest level of aggregation, another - form of the disabling of ECN happens even in the absence of - - - - - -Ramakrishnan, et al. Standards Track [Page 36] - -RFC 3168 The Addition of ECN to IP September 2001 - - - monitoring and policing devices, when ECN-Capable RED queues switch - from marking to dropping packets as an indication of congestion when - the average queue size has exceeded some threshold. - -11. Evaluations of ECN - -11.1. Related Work Evaluating ECN - - This section discusses some of the related work evaluating the use of - ECN. The ECN Web Page [ECN] has pointers to other papers, as well as - to implementations of ECN. - - [Floyd94] considers the advantages and drawbacks of adding ECN to the - TCP/IP architecture. As shown in the simulation-based comparisons, - one advantage of ECN is to avoid unnecessary packet drops for short - or delay-sensitive TCP connections. A second advantage of ECN is in - avoiding some unnecessary retransmit timeouts in TCP. This paper - discusses in detail the integration of ECN into TCP's congestion - control mechanisms. The possible disadvantages of ECN discussed in - the paper are that a non-compliant TCP connection could falsely - advertise itself as ECN-capable, and that a TCP ACK packet carrying - an ECN-Echo message could itself be dropped in the network. The - first of these two issues is discussed in the appendix of this - document, and the second is addressed by the addition of the CWR flag - in the TCP header. - - Experimental evaluations of ECN include [RFC2884,K98]. The - conclusions of [K98] and [RFC2884] are that ECN TCP gets moderately - better throughput than non-ECN TCP; that ECN TCP flows are fair - towards non-ECN TCP flows; and that ECN TCP is robust with two-way - traffic (with congestion in both directions) and with multiple - congested gateways. Experiments with many short web transfers show - that, while most of the short connections have similar transfer times - with or without ECN, a small percentage of the short connections have - very long transfer times for the non-ECN experiments as compared to - the ECN experiments. - -11.2. A Discussion of the ECN nonce. - - The use of two ECT codepoints, ECT(0) and ECT(1), can provide a one- - bit ECN nonce in packet headers [SCWA99]. The primary motivation for - this is the desire to allow mechanisms for the data sender to verify - that network elements are not erasing the CE codepoint, and that data - receivers are properly reporting to the sender the receipt of packets - with the CE codepoint set, as required by the transport protocol. - This section discusses issues of backwards compatibility with IP ECN - implementations in routers conformant with RFC 2481, in which only - one ECT codepoint was defined. We do not believe that the - - - -Ramakrishnan, et al. Standards Track [Page 37] - -RFC 3168 The Addition of ECN to IP September 2001 - - - incremental deployment of ECN implementations that understand the - ECT(1) codepoint will cause significant operational problems. This - is particularly likely to be the case when the deployment of the - ECT(1) codepoint begins with routers, before the ECT(1) codepoint - starts to be used by end-nodes. - -11.2.1. The Incremental Deployment of ECT(1) in Routers. - - ECN has been an Experimental standard since January 1999, and there - are already implementations of ECN in routers that do not understand - the ECT(1) codepoint. When the use of the ECT(1) codepoint is - standardized for TCP or for other transport protocols, this could - mean that a data sender is using the ECT(1) codepoint, but that this - codepoint is not understood by a congested router on the path. - - If allowed by the transport protocol, a data sender would be free not - to make use of ECT(1) at all, and to send all ECN-capable packets - with the codepoint ECT(0). However, if an ECN-capable sender is - using ECT(1), and the congested router on the path did not understand - the ECT(1) codepoint, then the router would end up marking some of - the ECT(0) packets, and dropping some of the ECT(1) packets, as - indications of congestion. Since TCP is required to react to both - marked and dropped packets, this behavior of dropping packets that - could have been marked poses no significant threat to the network, - and is consistent with the overall approach to ECN that allows - routers to determine when and whether to mark packets as they see fit - (see Section 5). - -12. Summary of changes required in IP and TCP - - This document specified two bits in the IP header to be used for ECN. - The not-ECT codepoint indicates that the transport protocol will - ignore the CE codepoint. This is the default value for the ECN - codepoint. The ECT codepoints indicate that the transport protocol - is willing and able to participate in ECN. - - The router sets the CE codepoint to indicate congestion to the end - nodes. The CE codepoint in a packet header MUST NOT be reset by a - router. - - TCP requires three changes for ECN, a setup phase and two new flags - in the TCP header. The ECN-Echo flag is used by the data receiver to - inform the data sender of a received CE packet. The Congestion - Window Reduced (CWR) flag is used by the data sender to inform the - data receiver that the congestion window has been reduced. - - - - - - -Ramakrishnan, et al. Standards Track [Page 38] - -RFC 3168 The Addition of ECN to IP September 2001 - - - When ECN (Explicit Congestion Notification) is used, it is required - that congestion indications generated within an IP tunnel not be lost - at the tunnel egress. We specified a minor modification to the IP - protocol's handling of the ECN field during encapsulation and de- - capsulation to allow flows that will undergo IP tunneling to use ECN. - - Two options for ECN in tunnels were specified: - - 1) A limited-functionality option that does not use ECN inside the IP - tunnel, by setting the ECN field in the outer header to not-ECT, and - not altering the inner header at the time of decapsulation. - - 2) The full-functionality option, which sets the ECN field in the - outer header to either not-ECT or to one of the ECT codepoints, - depending on the ECN field in the inner header. At decapsulation, if - the CE codepoint is set in the outer header, and the inner header is - set to one of the ECT codepoints, then the CE codepoint is copied to - the inner header. - - For IPsec tunnels, this document also defines an optional IPsec - Security Association (SA) attribute that enables negotiation of ECN - usage within IPsec tunnels and an optional field in the Security - Association Database to indicate whether ECN is permitted in tunnel - mode on a SA. The required changes to IPsec tunnels for ECN usage - modify RFC 2401 [RFC2401], which defines the IPsec architecture and - specifies some aspects of its implementation. The new IPsec SA - attribute is in addition to those already defined in Section 4.5 of - [RFC2407]. - - This document obsoletes RFC 2481, "A Proposal to add Explicit - Congestion Notification (ECN) to IP", which defined ECN as an - Experimental Protocol for the Internet Community. The rest of this - section describes the relationship between this document and its - predecessor. - - RFC 2481 included a brief discussion of the use of ECN with - encapsulated packets, and noted that for the IPsec specifications at - the time (January 1999), flows could not safely use ECN if they were - to traverse IPsec tunnels. RFC 2481 also described the changes that - could be made to IPsec tunnel specifications to made them compatible - with ECN. - - This document also incorporates work that was done after RFC 2481. - First was to describe the changes to IPsec tunnels in detail, and - extensively discuss the security implications of ECN (now included as - Sections 18 and 19 of this document). Second was to extend the - discussion of IPsec tunnels to include all IP tunnels. Because older - IP tunnels are not compatible with a flow's use of ECN, the - - - -Ramakrishnan, et al. Standards Track [Page 39] - -RFC 3168 The Addition of ECN to IP September 2001 - - - deployment of ECN in the Internet will create strong pressure for - older IP tunnels to be updated to an ECN-compatible version, using - either the limited-functionality or the full-functionality option. - - This document does not address the issue of including ECN in non-IP - tunnels such as MPLS, GRE, L2TP, or PPTP. An earlier preliminary - document about adding ECN support to MPLS was not advanced. - - A third new piece of work after RFC2481 was to describe the ECN - procedure with retransmitted data packets, that an ECT codepoint - should not be set on retransmitted data packets. The motivation for - this additional specification is to eliminate a possible avenue for - denial-of-service attacks on an existing TCP connection. Some prior - deployments of ECN-capable TCP might not conform to the (new) - requirement not to set an ECT codepoint on retransmitted packets; we - do not believe this will cause significant problems in practice. - - This document also expands slightly on the specification of the use - of SYN packets for the negotiation of ECN. While some prior - deployments of ECN-capable TCP might not conform to the requirements - specified in this document, we do not believe that this will lead to - any performance or compatibility problems for TCP connections with a - combination of TCP implementations at the endpoints. - - This document also includes the specification of the ECT(1) - codepoint, which may be used by TCP as part of the implementation of - an ECN nonce. - -13. Conclusions - - Given the current effort to implement AQM, we believe this is the - right time to deploy congestion avoidance mechanisms that do not - depend on packet drops alone. With the increased deployment of - applications and transports sensitive to the delay and loss of a - single packet (e.g., realtime traffic, short web transfers), - depending on packet loss as a normal congestion notification - mechanism appears to be insufficient (or at the very least, non- - optimal). - - We examined the consequence of modifications of the ECN field within - the network, analyzing all the opportunities for an adversary to - change the ECN field. In many cases, the change to the ECN field is - no worse than dropping a packet. However, we noted that some changes - have the more serious consequence of subverting end-to-end congestion - control. However, we point out that even then the potential damage - is limited, and is similar to the threat posed by end-systems - intentionally failing to cooperate with end-to-end congestion - control. - - - -Ramakrishnan, et al. Standards Track [Page 40] - -RFC 3168 The Addition of ECN to IP September 2001 - - -14. Acknowledgements - - Many people have made contributions to this work and this document, - including many that we have not managed to directly acknowledge in - this document. In addition, we would like to thank Kenjiro Cho for - the proposal for the TCP mechanism for negotiating ECN-Capability, - Kevin Fall for the proposal of the CWR bit, Steve Blake for material - on IPv4 Header Checksum Recalculation, Jamal Hadi-Salim for - discussions of ECN issues, and Steve Bellovin, Jim Bound, Brian - Carpenter, Paul Ferguson, Stephen Kent, Greg Minshall, and Vern - Paxson for discussions of security issues. We also thank the - Internet End-to-End Research Group for ongoing discussions of these - issues. - - Email discussions with a number of people, including Dax Kelson, - Alexey Kuznetsov, Jamal Hadi-Salim, and Venkat Venkatsubra, have - addressed the issues raised by non-conformant equipment in the - Internet that does not respond to TCP SYN packets with the ECE and - CWR flags set. We thank Mark Handley, Jitentra Padhye, and others - for discussions on the TCP initialization procedures. - - The discussion of ECN and IP tunnel considerations draws heavily on - related discussions and documents from the Differentiated Services - Working Group. We thank Tabassum Bint Haque from Dhaka, Bangladesh, - for feedback on IP tunnels. We thank Derrell Piper and Kero Tivinen - for proposing modifications to RFC 2407 that improve the usability of - negotiating the ECN Tunnel SA attribute. - - We thank David Wetherall, David Ely, and Neil Spring for the proposal - for the ECN nonce. We also thank Stefan Savage for discussions on - this issue. We thank Bob Briscoe and Jon Crowcroft for raising the - issue of fragmentation in IP, on alternate semantics for the fourth - ECN codepoint, and several other topics. We thank Richard Wendland - for feedback on several issues in the document. - - We also thank the IESG, and in particular the Transport Area - Directors over the years, for their feedback and their work towards - the standardization of ECN. - -15. References - - [AH] Kent, S. and R. Atkinson, "IP Authentication Header", - RFC 2402, November 1998. - - [ECN] "The ECN Web Page", URL - "http://www.aciri.org/floyd/ecn.html". Reference for - informational purposes only. - - - - -Ramakrishnan, et al. Standards Track [Page 41] - -RFC 3168 The Addition of ECN to IP September 2001 - - - [ESP] Kent, S. and R. Atkinson, "IP Encapsulating Security - Payload", RFC 2406, November 1998. - - [FIXES] ECN-under-Linux Unofficial Vendor Support Page, URL - "http://gtf.org/garzik/ecn/". Reference for - informational purposes only. - - [FJ93] Floyd, S., and Jacobson, V., "Random Early Detection - gateways for Congestion Avoidance", IEEE/ACM - Transactions on Networking, V.1 N.4, August 1993, p. - 397-413. - - [Floyd94] Floyd, S., "TCP and Explicit Congestion Notification", - ACM Computer Communication Review, V. 24 N. 5, October - 1994, p. 10-23. - - [Floyd98] Floyd, S., "The ECN Validation Test in the NS - Simulator", URL "http://www-mash.cs.berkeley.edu/ns/", - test tcl/test/test-all- ecn. Reference for - informational purposes only. - - [FF99] Floyd, S., and Fall, K., "Promoting the Use of End-to- - End Congestion Control in the Internet", IEEE/ACM - Transactions on Networking, August 1999. - - [FRED] Lin, D., and Morris, R., "Dynamics of Random Early - Detection", SIGCOMM '97, September 1997. - - [GRE] Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic - Routing Encapsulation (GRE)", RFC 1701, October 1994. - - [Jacobson88] V. Jacobson, "Congestion Avoidance and Control", Proc. - ACM SIGCOMM '88, pp. 314-329. - - [Jacobson90] V. Jacobson, "Modified TCP Congestion Avoidance - Algorithm", Message to end2end-interest mailing list, - April 1990. URL - "ftp://ftp.ee.lbl.gov/email/vanj.90apr30.txt". - - [K98] Krishnan, H., "Analyzing Explicit Congestion - Notification (ECN) benefits for TCP", Master's thesis, - UCLA, 1998. Citation for acknowledgement purposes only. - - [L2TP] Townsley, W., Valencia, A., Rubens, A., Pall, G., Zorn, - G. and B. Palter, "Layer Two Tunneling Protocol "L2TP"", - RFC 2661, August 1999. - - - - - -Ramakrishnan, et al. Standards Track [Page 42] - -RFC 3168 The Addition of ECN to IP September 2001 - - - [MJV96] S. McCanne, V. Jacobson, and M. Vetterli, "Receiver- - driven Layered Multicast", SIGCOMM '96, August 1996, pp. - 117-130. - - [MPLS] Awduche, D., Malcolm, J., Agogbua, J., O'Dell, M. and J. - McManus, Requirements for Traffic Engineering Over MPLS, - RFC 2702, September 1999. - - [PPTP] Hamzeh, K., Pall, G., Verthein, W., Taarud, J., Little, - W. and G. Zorn, "Point-to-Point Tunneling Protocol - (PPTP)", RFC 2637, July 1999. - - [RFC791] Postel, J., "Internet Protocol", STD 5, RFC 791, - September 1981. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC1141] Mallory, T. and A. Kullberg, "Incremental Updating of - the Internet Checksum", RFC 1141, January 1990. - - [RFC1349] Almquist, P., "Type of Service in the Internet Protocol - Suite", RFC 1349, July 1992. - - [RFC1455] Eastlake, D., "Physical Link Security Type of Service", - RFC 1455, May 1993. - - [RFC1701] Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic - Routing Encapsulation (GRE)", RFC 1701, October 1994. - - [RFC1702] Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic - Routing Encapsulation over IPv4 networks", RFC 1702, - October 1994. - - [RFC2003] Perkins, C., "IP Encapsulation within IP", RFC 2003, - October 1996. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2309] Braden, B., et al., "Recommendations on Queue Management - and Congestion Avoidance in the Internet", RFC 2309, - April 1998. - - [RFC2401] Kent, S. and R. Atkinson, Security Architecture for the - Internet Protocol, RFC 2401, November 1998. - - - - - -Ramakrishnan, et al. Standards Track [Page 43] - -RFC 3168 The Addition of ECN to IP September 2001 - - - [RFC2407] Piper, D., "The Internet IP Security Domain of - Interpretation for ISAKMP", RFC 2407, November 1998. - - [RFC2408] Maughan, D., Schertler, M., Schneider, M. and J. Turner, - "Internet Security Association and Key Management - Protocol (ISAKMP)", RFC 2409, November 1998. - - [RFC2409] Harkins D. and D. Carrel, "The Internet Key Exchange - (IKE)", RFC 2409, November 1998. - - [RFC2474] Nichols, K., Blake, S., Baker, F. and D. Black, - "Definition of the Differentiated Services Field (DS - Field) in the IPv4 and IPv6 Headers", RFC 2474, December - 1998. - - [RFC2475] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. - and W. Weiss, "An Architecture for Differentiated - Services", RFC 2475, December 1998. - - [RFC2481] Ramakrishnan K. and S. Floyd, "A Proposal to add - Explicit Congestion Notification (ECN) to IP", RFC 2481, - January 1999. - - [RFC2581] Alman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2884] Hadi Salim, J. and U. Ahmed, "Performance Evaluation of - Explicit Congestion Notification (ECN) in IP Networks", - RFC 2884, July 2000. - - [RFC2983] Black, D., "Differentiated Services and Tunnels", - RFC2983, October 2000. - - [RFC2780] Bradner S. and V. Paxson, "IANA Allocation Guidelines - For Values In the Internet Protocol and Related - Headers", BCP 37, RFC 2780, March 2000. - - [RJ90] K. K. Ramakrishnan and Raj Jain, "A Binary Feedback - Scheme for Congestion Avoidance in Computer Networks", - ACM Transactions on Computer Systems, Vol.8, No.2, pp. - 158-181, May 1990. - - [SCWA99] Stefan Savage, Neal Cardwell, David Wetherall, and Tom - Anderson, TCP Congestion Control with a Misbehaving - Receiver, ACM Computer Communications Review, October - 1999. - - - - - -Ramakrishnan, et al. Standards Track [Page 44] - -RFC 3168 The Addition of ECN to IP September 2001 - - - [TBIT] Jitendra Padhye and Sally Floyd, "Identifying the TCP - Behavior of Web Servers", ICSI TR-01-002, February 2001. - URL "http://www.aciri.org/tbit/". - -16. Security Considerations - - Security considerations have been discussed in Sections 7, 8, 18, and - 19. - -17. IPv4 Header Checksum Recalculation - - IPv4 header checksum recalculation is an issue with some high-end - router architectures using an output-buffered switch, since most if - not all of the header manipulation is performed on the input side of - the switch, while the ECN decision would need to be made local to the - output buffer. This is not an issue for IPv6, since there is no IPv6 - header checksum. The IPv4 TOS octet is the last byte of a 16-bit - half-word. - - RFC 1141 [RFC1141] discusses the incremental updating of the IPv4 - checksum after the TTL field is decremented. The incremental - updating of the IPv4 checksum after the CE codepoint was set would - work as follows: Let HC be the original header checksum for an ECT(0) - packet, and let HC' be the new header checksum after the CE bit has - been set. That is, the ECN field has changed from '10' to '11'. - Then for header checksums calculated with one's complement - subtraction, HC' would be recalculated as follows: - - HC' = { HC - 1 HC > 1 - { 0x0000 HC = 1 - - For header checksums calculated on two's complement machines, HC' - would be recalculated as follows after the CE bit was set: - - HC' = { HC - 1 HC > 0 - { 0xFFFE HC = 0 - - A similar incremental updating of the IPv4 checksum can be carried - out when the ECN field is changed from ECT(1) to CE, that is, from ' - 01' to '11'. - -18. Possible Changes to the ECN Field in the Network - - This section discusses in detail possible changes to the ECN field in - the network, such as falsely reporting congestion, disabling ECN- - Capability for an individual packet, erasing the ECN congestion - indication, or falsely indicating ECN-Capability. - - - - -Ramakrishnan, et al. Standards Track [Page 45] - -RFC 3168 The Addition of ECN to IP September 2001 - - -18.1. Possible Changes to the IP Header - -18.1.1. Erasing the Congestion Indication - - First, we consider the changes that a router could make that would - result in effectively erasing the congestion indication after it had - been set by a router upstream. The convention followed is: ECN - codepoint of received packet -> ECN codepoint of packet transmitted. - - Replacing the CE codepoint with the ECT(0) or ECT(1) codepoint - effectively erases the congestion indication. However, with the use - of two ECT codepoints, a router erasing the CE codepoint has no way - to know whether the original ECT codepoint was ECT(0) or ECT(1). - Thus, it is possible for the transport protocol to deploy mechanisms - to detect such erasures of the CE codepoint. - - The consequence of the erasure of the CE codepoint for the upstream - router is that there is a potential for congestion to build for a - time, because the congestion indication does not reach the source. - However, the packet would be received and acknowledged. - - The potential effect of erasing the congestion indication is complex, - and is discussed in depth in Section 19 below. Note that the effect - of erasing the congestion indication is different from dropping a - packet in the network. When a data packet is dropped, the drop is - detected by the TCP sender, and interpreted as an indication of - congestion. Similarly, if a sufficient number of consecutive - acknowledgement packets are dropped, causing the cumulative - acknowledgement field not to be advanced at the sender, the sender is - limited by the congestion window from sending additional packets, and - ultimately the retransmit timer expires. - - In contrast, a systematic erasure of the CE bit by a downstream - router can have the effect of causing a queue buildup at an upstream - router, including the possible loss of packets due to buffer - overflow. There is a potential of unfairness in that another flow - that goes through the congested router could react to the CE bit set - while the flow that has the CE bit erased could see better - performance. The limitations on this potential unfairness are - discussed in more detail in Section 19 below. - - The last of the three changes is to replace the CE codepoint with the - not-ECT codepoint, thus erasing the congestion indication and - disabling ECN-Capability at the same time. - - The `erasure' of the congestion indication is only effective if the - packet does not end up being marked or dropped again by a downstream - router. If the CE codepoint is replaced by an ECT codepoint, the - - - -Ramakrishnan, et al. Standards Track [Page 46] - -RFC 3168 The Addition of ECN to IP September 2001 - - - packet remains ECN-Capable, and could be either marked or dropped by - a downstream router as an indication of congestion. If the CE - codepoint is replaced by the not-ECT codepoint, the packet is no - longer ECN-capable, and can therefore be dropped but not marked by a - downstream router as an indication of congestion. - -18.1.2. Falsely Reporting Congestion - - This change is to set the CE codepoint when an ECT codepoint was - already set, even though there was no congestion. This change does - not affect the treatment of that packet along the rest of the path. - In particular, a router does not examine the CE codepoint in deciding - whether to drop or mark an arriving packet. - - However, this could result in the application unnecessarily invoking - end-to-end congestion control, and reducing its arrival rate. By - itself, this is no worse (for the application or for the network) - than if the tampering router had actually dropped the packet. - -18.1.3. Disabling ECN-Capability - - This change is to turn off the ECT codepoint of a packet. This means - that if the packet later encounters congestion (e.g., by arriving to - a RED queue with a moderate average queue size), it will be dropped - instead of being marked. By itself, this is no worse (for the - application) than if the tampering router had actually dropped the - packet. The saving grace in this particular case is that there is no - congested router upstream expecting a reaction from setting the CE - bit. - -18.1.4. Falsely Indicating ECN-Capability - - This change would incorrectly label a packet as ECN-Capable. The - packet may have been sent either by an ECN-Capable transport or a - transport that is not ECN-Capable. - - If the packet later encounters moderate congestion at an ECN-Capable - router, the router could set the CE codepoint instead of dropping the - packet. If the transport protocol in fact is not ECN-Capable, then - the transport will never receive this indication of congestion, and - will not reduce its sending rate in response. The potential - consequences of falsely indicating ECN-capability are discussed - further in Section 19 below. - - If the packet never later encounters congestion at an ECN-Capable - router, then the first of these two changes would have no effect, - other than possibly interfering with the use of the ECN nonce by the - transport protocol. The last change, however, would have the effect - - - -Ramakrishnan, et al. Standards Track [Page 47] - -RFC 3168 The Addition of ECN to IP September 2001 - - - of giving false reports of congestion to a monitoring device along - the path. If the transport protocol is ECN-Capable, then this change - could also have an effect at the transport level, by combining - falsely indicating ECN-Capability with falsely reporting congestion. - For an ECN-capable transport, this would cause the transport to - unnecessarily react to congestion. In this particular case, the - router that is incorrectly changing the ECN field could have dropped - the packet. Thus for this case of an ECN-capable transport, the - consequence of this change to the ECN field is no worse than dropping - the packet. - -18.2. Information carried in the Transport Header - - For TCP, an ECN-capable TCP receiver informs its TCP peer that it is - ECN-capable at the TCP level, conveying this information in the TCP - header at the time the connection is setup. This document does not - consider potential dangers introduced by changes in the transport - header within the network. We note that when IPsec is used, the - transport header is protected both in tunnel and transport modes - [ESP, AH]. - - Another issue concerns TCP packets with a spoofed IP source address - carrying invalid ECN information in the transport header. For - completeness, we examine here some possible ways that a node spoofing - the IP source address of another node could use the two ECN flags in - the TCP header to launch a denial-of-service attack. However, these - attacks would require an ability for the attacker to use valid TCP - sequence numbers, and any attacker with this ability and with the - ability to spoof IP source addresses could damage the TCP connection - without using the ECN flags. Therefore, ECN does not add any new - vulnerabilities in this respect. - - An acknowledgement packet with a spoofed IP source address of the TCP - data receiver could include the ECE bit set. If accepted by the TCP - data sender as a valid packet, this spoofed acknowledgement packet - could result in the TCP data sender unnecessarily halving its - congestion window. However, to be accepted by the data sender, such - a spoofed acknowledgement packet would have to have the correct 32- - bit sequence number as well as a valid acknowledgement number. An - attacker that could successfully send such a spoofed acknowledgement - packet could also send a spoofed RST packet, or do other equally - damaging operations to the TCP connection. - - Packets with a spoofed IP source address of the TCP data sender could - include the CWR bit set. Again, to be accepted, such a packet would - have to have a valid sequence number. In addition, such a spoofed - packet would have a limited performance impact. Spoofing a data - packet with the CWR bit set could result in the TCP data receiver - - - -Ramakrishnan, et al. Standards Track [Page 48] - -RFC 3168 The Addition of ECN to IP September 2001 - - - sending fewer ECE packets than it would otherwise, if the data - receiver was sending ECE packets when it received the spoofed CWR - packet. - -18.3. Split Paths - - In some cases, a malicious or broken router might have access to only - a subset of the packets from a flow. The question is as follows: - can this router, by altering the ECN field in this subset of the - packets, do more damage to that flow than if it had simply dropped - that set of packets? - - We will classify the packets in the flow as A packets and B packets, - and assume that the adversary only has access to A packets. Assume - that the adversary is subverting end-to-end congestion control along - the path traveled by A packets only, by either falsely indicating - ECN-Capability upstream of the point where congestion occurs, or - erasing the congestion indication downstream. Consider also that - there exists a monitoring device that sees both the A and B packets, - and will "punish" both the A and B packets if the total flow is - determined not to be properly responding to indications of - congestion. Another key characteristic that we believe is likely to - be true is that the monitoring device, before `punishing' the A&B - flow, will first drop packets instead of setting the CE codepoint, - and will drop arriving packets of that flow that already have the CE - codepoint set. If the end nodes are in fact using end-to-end - congestion control, they will see all of the indications of - congestion seen by the monitoring device, and will begin to respond - to these indications of congestion. Thus, the monitoring device is - successful in providing the indications to the flow at an early - stage. - - It is true that the adversary that has access only to the A packets - might, by subverting ECN-based congestion control, be able to deny - the benefits of ECN to the other packets in the A&B aggregate. While - this is unfortunate, this is not a reason to disable ECN. - - A variant of falsely reporting congestion occurs when there are two - adversaries along a path, where the first adversary falsely reports - congestion, and the second adversary `erases' those reports. (Unlike - packet drops, ECN congestion reports can be `reversed' later in the - network by a malicious or broken router. However, the use of the ECN - nonce could help the transport to detect this behavior.) While this - would be transparent to the end node, it is possible that a - monitoring device between the first and second adversaries would see - the false indications of congestion. Keep in mind our recommendation - in this document, that before `punishing' a flow for not responding - appropriately to congestion, the router will first switch to dropping - - - -Ramakrishnan, et al. Standards Track [Page 49] - -RFC 3168 The Addition of ECN to IP September 2001 - - - rather than marking as an indication of congestion, for that flow. - When this includes dropping arriving packets from that flow that have - the CE codepoint set, this ensures that these indications of - congestion are being seen by the end nodes. Thus, there is no - additional harm that we are able to postulate as a result of multiple - conflicting adversaries. - -19. Implications of Subverting End-to-End Congestion Control - - This section focuses on the potential repercussions of subverting - end-to-end congestion control by either falsely indicating ECN- - Capability, or by erasing the congestion indication in ECN (the CE - codepoint). Subverting end-to-end congestion control by either of - these two methods can have consequences both for the application and - for the network. We discuss these separately below. - - The first method to subvert end-to-end congestion control, that of - falsely indicating ECN-Capability, effectively subverts end-to-end - congestion control only if the packet later encounters congestion - that results in the setting of the CE codepoint. In this case, the - transport protocol (which may not be ECN-capable) does not receive - the indication of congestion from these downstream congested routers. - - The second method to subvert end-to-end congestion control, `erasing' - the CE codepoint in a packet, effectively subverts end-to-end - congestion control only when the CE codepoint in the packet was set - earlier by a congested router. In this case, the transport protocol - does not receive the indication of congestion from the upstream - congested routers. - - Either of these two methods of subverting end-to-end congestion - control can potentially introduce more damage to the network (and - possibly to the flow itself) than if the adversary had simply dropped - packets from that flow. However, as we discuss later in this section - and in Section 7, this potential damage is limited. - -19.1. Implications for the Network and for Competing Flows - - The CE codepoint of the ECN field is only used by routers as an - indication of congestion during periods of *moderate* congestion. - ECN-capable routers should drop rather than mark packets during heavy - congestion even if the router's queue is not yet full. For example, - for routers using active queue management based on RED, the router - should drop rather than mark packets that arrive while the average - queue sizes exceed the RED queue's maximum threshold. - - - - - - -Ramakrishnan, et al. Standards Track [Page 50] - -RFC 3168 The Addition of ECN to IP September 2001 - - - One consequence for the network of subverting end-to-end congestion - control is that flows that do not receive the congestion indications - from the network might increase their sending rate until they drive - the network into heavier congestion. Then, the congested router - could begin to drop rather than mark arriving packets. For flows - that are not isolated by some form of per-flow scheduling or other - per-flow mechanisms, but are instead aggregated with other flows in a - single queue in an undifferentiated fashion, this packet-dropping at - the congested router would apply to all flows that share that queue. - Thus, the consequences would be to increase the level of congestion - in the network. - - In some cases, the increase in the level of congestion will lead to a - substantial buffer buildup at the congested queue that will be - sufficient to drive the congested queue from the packet-marking to - the packet-dropping regime. This transition could occur either - because of buffer overflow, or because of the active queue management - policy described above that drops packets when the average queue is - above RED's maximum threshold. At this point, all flows, including - the subverted flow, will begin to see packet drops instead of packet - marks, and a malicious or broken router will no longer be able to ` - erase' these indications of congestion in the network. If the end - nodes are deploying appropriate end-to-end congestion control, then - the subverted flow will reduce its arrival rate in response to - congestion. When the level of congestion is sufficiently reduced, - the congested queue can return from the packet-dropping regime to the - packet-marking regime. The steady-state pattern could be one of the - congested queue oscillating between these two regimes. - - In other cases, the consequences of subverting end-to-end congestion - control will not be severe enough to drive the congested link into - sufficiently-heavy congestion that packets are dropped instead of - being marked. In this case, the implications for competing flows in - the network will be a slightly-increased rate of packet marking or - dropping, and a corresponding decrease in the bandwidth available to - those flows. This can be a stable state if the arrival rate of the - subverted flow is sufficiently small, relative to the link bandwidth, - that the average queue size at the congested router remains under - control. In particular, the subverted flow could have a limited - bandwidth demand on the link at this router, while still getting more - than its "fair" share of the link. This limited demand could be due - to a limited demand from the data source; a limitation from the TCP - advertised window; a lower-bandwidth access pipe; or other factors. - Thus the subversion of ECN-based congestion control can still lead to - unfairness, which we believe is appropriate to note here. - - - - - - -Ramakrishnan, et al. Standards Track [Page 51] - -RFC 3168 The Addition of ECN to IP September 2001 - - - The threat to the network posed by the subversion of ECN-based - congestion control in the network is essentially the same as the - threat posed by an end-system that intentionally fails to cooperate - with end-to-end congestion control. The deployment of mechanisms in - routers to address this threat is an open research question, and is - discussed further in Section 10. - - Let us take the example described in Section 18.1.1, where the CE - codepoint that was set in a packet is erased: {'11' -> '10' or '11' - -> '01'}. The consequence for the congested upstream router that set - the CE codepoint is that this congestion indication does not reach - the end nodes for that flow. The source (even one which is completely - cooperative and not malicious) is thus allowed to continue to - increase its sending rate (if it is a TCP flow, by increasing its - congestion window). The flow potentially achieves better throughput - than the other flows that also share the congested router, especially - if there are no policing mechanisms or per-flow queuing mechanisms at - that router. Consider the behavior of the other flows, especially if - they are cooperative: that is, the flows that do not experience - subverted end-to-end congestion control. They are likely to reduce - their load (e.g., by reducing their window size) on the congested - router, thus benefiting our subverted flow. This results in - unfairness. As we discussed above, this unfairness could either be - transient (because the congested queue is driven into the packet- - marking regime), oscillatory (because the congested queue oscillates - between the packet marking and the packet dropping regime), or more - moderate but a persistent stable state (because the congested queue - is never driven to the packet dropping regime). - - The results would be similar if the subverted flow was intentionally - avoiding end-to-end congestion control. One difference is that a - flow that is intentionally avoiding end-to-end congestion control at - the end nodes can avoid end-to-end congestion control even when the - congested queue is in packet-dropping mode, by refusing to reduce its - sending rate in response to packet drops in the network. Thus the - problems for the network from the subversion of ECN-based congestion - control are less severe than the problems caused by the intentional - avoidance of end-to-end congestion control in the end nodes. It is - also the case that it is considerably more difficult to control the - behavior of the end nodes than it is to control the behavior of the - infrastructure itself. This is not to say that the problems for the - network posed by the network's subversion of ECN-based congestion - control are small; just that they are dwarfed by the problems for the - network posed by the subversion of either ECN-based or other - currently known packet-based congestion control mechanisms by the end - nodes. - - - - - -Ramakrishnan, et al. Standards Track [Page 52] - -RFC 3168 The Addition of ECN to IP September 2001 - - -19.2. Implications for the Subverted Flow - - When a source indicates that it is ECN-capable, there is an - expectation that the routers in the network that are capable of - participating in ECN will use the CE codepoint for indication of - congestion. There is the potential benefit of using ECN in reducing - the amount of packet loss (in addition to the reduced queuing delays - because of active queue management policies). When the packet flows - through an IPsec tunnel where the nodes that the tunneled packets - traverse are untrusted in some way, the expectation is that IPsec - will protect the flow from subversion that results in undesirable - consequences. - - In many cases, a subverted flow will benefit from the subversion of - end-to-end congestion control for that flow in the network, by - receiving more bandwidth than it would have otherwise, relative to - competing non-subverted flows. If the congested queue reaches the - packet-dropping stage, then the subversion of end-to-end congestion - control might or might not be of overall benefit to the subverted - flow, depending on that flow's relative tradeoffs between throughput, - loss, and delay. - - One form of subverting end-to-end congestion control is to falsely - indicate ECN-capability by setting the ECT codepoint. This has the - consequence of downstream congested routers setting the CE codepoint - in vain. However, as described in Section 9.1.2, if an ECT codepoint - is changed in an IP tunnel, this can be detected at the egress point - of the tunnel, as long as the inner header was not changed within the - tunnel. - - The second form of subverting end-to-end congestion control is to - erase the congestion indication by erasing the CE codepoint. In this - case, it is the upstream congested routers that set the CE codepoint - in vain. - - If an ECT codepoint is erased within an IP tunnel, then this can be - detected at the egress point of the tunnel, as long as the inner - header was not changed within the tunnel. If the CE codepoint is set - upstream of the IP tunnel, then any erasure of the outer header's CE - codepoint within the tunnel will have no effect because the inner - header preserves the set value of the CE codepoint. However, if the - CE codepoint is set within the tunnel, and erased either within or - downstream of the tunnel, this is not necessarily detected at the - egress point of the tunnel. - - With this subversion of end-to-end congestion control, an end-system - transport does not respond to the congestion indication. Along with - the increased unfairness for the non-subverted flows described in the - - - -Ramakrishnan, et al. Standards Track [Page 53] - -RFC 3168 The Addition of ECN to IP September 2001 - - - previous section, the congested router's queue could continue to - build, resulting in packet loss at the congested router - which is a - means for indicating congestion to the transport in any case. In the - interim, the flow might experience higher queuing delays, possibly - along with an increased bandwidth relative to other non-subverted - flows. But transports do not inherently make assumptions of - consistently experiencing carefully managed queuing in the path. We - believe that these forms of subverting end-to-end congestion control - are no worse for the subverted flow than if the adversary had simply - dropped the packets of that flow itself. - -19.3. Non-ECN-Based Methods of Subverting End-to-end Congestion Control - - We have shown that, in many cases, a malicious or broken router that - is able to change the bits in the ECN field can do no more damage - than if it had simply dropped the packet in question. However, this - is not true in all cases, in particular in the cases where the broken - router subverted end-to-end congestion control by either falsely - indicating ECN-Capability or by erasing the ECN congestion indication - (in the CE codepoint). While there are many ways that a router can - harm a flow by dropping packets, a router cannot subvert end-to-end - congestion control by dropping packets. As an example, a router - cannot subvert TCP congestion control by dropping data packets, - acknowledgement packets, or control packets. - - Even though packet-dropping cannot be used to subvert end-to-end - congestion control, there *are* non-ECN-based methods for subverting - end-to-end congestion control that a broken or malicious router could - use. For example, a broken router could duplicate data packets, thus - effectively negating the effects of end-to-end congestion control - along some portion of the path. (For a router that duplicated - packets within an IPsec tunnel, the security administrator can cause - the duplicate packets to be discarded by configuring anti-replay - protection for the tunnel.) This duplication of packets within the - network would have similar implications for the network and for the - subverted flow as those described in Sections 18.1.1 and 18.1.4 - above. - -20. The Motivation for the ECT Codepoints. - -20.1. The Motivation for an ECT Codepoint. - - The need for an ECT codepoint is motivated by the fact that ECN will - be deployed incrementally in an Internet where some transport - protocols and routers understand ECN and some do not. With an ECT - codepoint, the router can drop packets from flows that are not ECN- - capable, but can *instead* set the CE codepoint in packets that *are* - - - - -Ramakrishnan, et al. Standards Track [Page 54] - -RFC 3168 The Addition of ECN to IP September 2001 - - - ECN-capable. Because an ECT codepoint allows an end node to have the - CE codepoint set in a packet *instead* of having the packet dropped, - an end node might have some incentive to deploy ECN. - - If there was no ECT codepoint, then the router would have to set the - CE codepoint for packets from both ECN-capable and non-ECN-capable - flows. In this case, there would be no incentive for end-nodes to - deploy ECN, and no viable path of incremental deployment from a non- - ECN world to an ECN-capable world. Consider the first stages of such - an incremental deployment, where a subset of the flows are ECN- - capable. At the onset of congestion, when the packet - dropping/marking rate would be low, routers would only set CE - codepoints, rather than dropping packets. However, only those flows - that are ECN-capable would understand and respond to CE packets. The - result is that the ECN-capable flows would back off, and the non- - ECN-capable flows would be unaware of the ECN signals and would - continue to open their congestion windows. - - In this case, there are two possible outcomes: (1) the ECN-capable - flows back off, the non-ECN-capable flows get all of the bandwidth, - and congestion remains mild, or (2) the ECN-capable flows back off, - the non-ECN-capable flows don't, and congestion increases until the - router transitions from setting the CE codepoint to dropping packets. - While this second outcome evens out the fairness, the ECN-capable - flows would still receive little benefit from being ECN-capable, - because the increased congestion would drive the router to packet- - dropping behavior. - - A flow that advertised itself as ECN-Capable but does not respond to - CE codepoints is functionally equivalent to a flow that turns off - congestion control, as discussed earlier in this document. - - Thus, in a world when a subset of the flows are ECN-capable, but - where ECN-capable flows have no mechanism for indicating that fact to - the routers, there would be less effective and less fair congestion - control in the Internet, resulting in a strong incentive for end - nodes not to deploy ECN. - -20.2. The Motivation for two ECT Codepoints. - - The primary motivation for the two ECT codepoints is to provide a - one-bit ECN nonce. The ECN nonce allows the development of - mechanisms for the sender to probabilistically verify that network - elements are not erasing the CE codepoint, and that data receivers - are properly reporting to the sender the receipt of packets with the - CE codepoint set. - - - - - -Ramakrishnan, et al. Standards Track [Page 55] - -RFC 3168 The Addition of ECN to IP September 2001 - - - Another possibility for senders to detect misbehaving network - elements or receivers would be for the data sender to occasionally - send a data packet with the CE codepoint set, to see if the receiver - reports receiving the CE codepoint. Of course, if these packets - encountered congestion in the network, the router might make no - change in the packets, because the CE codepoint would already be set. - Thus, for packets sent with the CE codepoint set, the TCP end-nodes - could not determine if some router intended to set the CE codepoint - in these packets. For this reason, sending packets with the CE - codepoint would have to be done sparingly, and would be a less - effective check against misbehaving network elements and receivers - than would be the ECN nonce. - - The assignment of the fourth ECN codepoint to ECT(1) precludes the - use of this codepoint for some other purposes. For clarity, we - briefly list other possible purposes here. - - One possibility might have been for the data sender to use the fourth - ECN codepoint to indicate an alternate semantics for ECN. However, - this seems to us more appropriate to be signaled using a - differentiated services codepoint in the DS field. - - A second possible use for the fourth ECN codepoint would have been to - give the router two separate codepoints for the indication of - congestion, CE(0) and CE(1), for mild and severe congestion - respectively. While this could be useful in some cases, this - certainly does not seem a compelling requirement at this point. If - there was judged to be a compelling need for this, the complications - of incremental deployment would most likely necessitate more that - just one codepoint for this function. - - A third use that has been informally proposed for the ECN codepoint - is for use in some forms of multicast congestion control, based on - randomized procedures for duplicating marked packets at routers. - Some proposed multicast packet duplication procedures are based on a - new ECN codepoint that (1) conveys the fact that congestion occurred - upstream of the duplication point that marked the packet with this - codepoint and (2) can detect congestion downstream of that - duplication point. ECT(1) can serve this purpose because it is both - distinct from ECT(0) and is replaced by CE when ECN marking occurs in - response to congestion or incipient congestion. Explanation of how - this enhanced version of ECN would be used by multicast congestion - control is beyond the scope of this document, as are ECN-aware - multicast packet duplication procedures and the processing of the ECN - field at multicast receivers in all cases (i.e., irrespective of the - multicast packet duplication procedure(s) used). - - - - - -Ramakrishnan, et al. Standards Track [Page 56] - -RFC 3168 The Addition of ECN to IP September 2001 - - - The specification of IP tunnel modifications for ECN in this document - assumes that the only change made to the outer IP header's ECN field - between tunnel endpoints is to set the CE codepoint to indicate - congestion. This is not consistent with some of the proposed uses of - ECT(1) by the multicast duplication procedures in the previous - paragraph, and such procedures SHOULD NOT be deployed unless this - inconsistency between multicast duplication procedures and IP tunnels - with full ECN functionality is resolved. Limited ECN functionality - may be used instead, although in practice many tunnel protocols - (including IPsec) will not work correctly if multicast traffic - duplication occurs within the tunnel - -21. Why use Two Bits in the IP Header? - - Given the need for an ECT indication in the IP header, there still - remains the question of whether the ECT (ECN-Capable Transport) and - CE (Congestion Experienced) codepoints should have been overloaded on - a single bit. This overloaded-one-bit alternative, explored in - [Floyd94], would have involved a single bit with two values. One - value, "ECT and not CE", would represent an ECN-Capable Transport, - and the other value, "CE or not ECT", would represent either - Congestion Experienced or a non-ECN-Capable transport. - - One difference between the one-bit and two-bit implementations - concerns packets that traverse multiple congested routers. Consider - a CE packet that arrives at a second congested router, and is - selected by the active queue management at that router for either - marking or dropping. In the one-bit implementation, the second - congested router has no choice but to drop the CE packet, because it - cannot distinguish between a CE packet and a non-ECT packet. In the - two-bit implementation, the second congested router has the choice of - either dropping the CE packet, or of leaving it alone with the CE - codepoint set. - - Another difference between the one-bit and two-bit implementations - comes from the fact that with the one-bit implementation, receivers - in a single flow cannot distinguish between CE and non-ECT packets. - Thus, in the one-bit implementation an ECN-capable data sender would - have to unambiguously indicate to the receiver or receivers whether - each packet had been sent as ECN-Capable or as non-ECN-Capable. One - possibility would be for the sender to indicate in the transport - header whether the packet was sent as ECN-Capable. A second - possibility that would involve a functional limitation for the one- - bit implementation would be for the sender to unambiguously indicate - that it was going to send *all* of its packets as ECN-Capable or as - non-ECN-Capable. For a multicast transport protocol, this - unambiguous indication would have to be apparent to receivers joining - an on-going multicast session. - - - -Ramakrishnan, et al. Standards Track [Page 57] - -RFC 3168 The Addition of ECN to IP September 2001 - - - Another concern that was described earlier (and recommended in this - document) is that transports (particularly TCP) should not mark pure - ACK packets or retransmitted packets as being ECN-Capable. A pure - ACK packet from a non-ECN-capable transport could be dropped, without - necessarily having an impact on the transport from a congestion - control perspective (because subsequent ACKs are cumulative). An - ECN-capable transport reacting to the CE codepoint in a pure ACK - packet by reducing the window would be at a disadvantage in - comparison to a non-ECN-capable transport. For this reason (and for - reasons described earlier in relation to retransmitted packets), it - is desirable to have the ECT codepoint set on a per-packet basis. - - Another advantage of the two-bit approach is that it is somewhat more - robust. The most critical issue, discussed in Section 8, is that the - default indication should be that of a non-ECN-Capable transport. In - a two-bit implementation, this requirement for the default value - simply means that the not-ECT codepoint should be the default. In - the one-bit implementation, this means that the single overloaded bit - should by default be in the "CE or not ECT" position. This is less - clear and straightforward, and possibly more open to incorrect - implementations either in the end nodes or in the routers. - - In summary, while the one-bit implementation could be a possible - implementation, it has the following significant limitations relative - to the two-bit implementation. First, the one-bit implementation has - more limited functionality for the treatment of CE packets at a - second congested router. Second, the one-bit implementation requires - either that extra information be carried in the transport header of - packets from ECN-Capable flows (to convey the functionality of the - second bit elsewhere, namely in the transport header), or that - senders in ECN-Capable flows accept the limitation that receivers - must be able to determine a priori which packets are ECN-Capable and - which are not ECN-Capable. Third, the one-bit implementation is - possibly more open to errors from faulty implementations that choose - the wrong default value for the ECN bit. We believe that the use of - the extra bit in the IP header for the ECT-bit is extremely valuable - to overcome these limitations. - -22. Historical Definitions for the IPv4 TOS Octet - - RFC 791 [RFC791] defined the ToS (Type of Service) octet in the IP - header. In RFC 791, bits 6 and 7 of the ToS octet are listed as - "Reserved for Future Use", and are shown set to zero. The first two - fields of the ToS octet were defined as the Precedence and Type of - Service (TOS) fields. - - - - - - -Ramakrishnan, et al. Standards Track [Page 58] - -RFC 3168 The Addition of ECN to IP September 2001 - - - 0 1 2 3 4 5 6 7 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | PRECEDENCE | TOS | 0 | 0 | RFC 791 - +-----+-----+-----+-----+-----+-----+-----+-----+ - - RFC 1122 included bits 6 and 7 in the TOS field, though it did not - discuss any specific use for those two bits: - - 0 1 2 3 4 5 6 7 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | PRECEDENCE | TOS | RFC 1122 - +-----+-----+-----+-----+-----+-----+-----+-----+ - - The IPv4 TOS octet was redefined in RFC 1349 [RFC1349] as follows: - - 0 1 2 3 4 5 6 7 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | PRECEDENCE | TOS | MBZ | RFC 1349 - +-----+-----+-----+-----+-----+-----+-----+-----+ - - Bit 6 in the TOS field was defined in RFC 1349 for "Minimize Monetary - Cost". In addition to the Precedence and Type of Service (TOS) - fields, the last field, MBZ (for "must be zero") was defined as - currently unused. RFC 1349 stated that "The originator of a datagram - sets [the MBZ] field to zero (unless participating in an Internet - protocol experiment which makes use of that bit)." - - RFC 1455 [RFC 1455] defined an experimental standard that used all - four bits in the TOS field to request a guaranteed level of link - security. - - RFC 1349 and RFC 1455 have been obsoleted by "Definition of the - Differentiated Services Field (DS Field) in the IPv4 and IPv6 - Headers" [RFC2474] in which bits 6 and 7 of the DS field are listed - as Currently Unused (CU). RFC 2780 [RFC2780] specified ECN as an - experimental use of the two-bit CU field. RFC 2780 updated the - definition of the DS Field to only encompass the first six bits of - this octet rather than all eight bits; these first six bits are - defined as the Differentiated Services CodePoint (DSCP): - - 0 1 2 3 4 5 6 7 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | DSCP | CU | RFCs 2474, - +-----+-----+-----+-----+-----+-----+-----+-----+ 2780 - - Because of this unstable history, the definition of the ECN field in - this document cannot be guaranteed to be backwards compatible with - all past uses of these two bits. - - - -Ramakrishnan, et al. Standards Track [Page 59] - -RFC 3168 The Addition of ECN to IP September 2001 - - - Prior to RFC 2474, routers were not permitted to modify bits in - either the DSCP or ECN field of packets forwarded through them, and - hence routers that comply only with RFCs prior to 2474 should have no - effect on ECN. For end nodes, bit 7 (the second ECN bit) must be - transmitted as zero for any implementation compliant only with RFCs - prior to 2474. Such nodes may transmit bit 6 (the first ECN bit) as - one for the "Minimize Monetary Cost" provision of RFC 1349 or the - experiment authorized by RFC 1455; neither this aspect of RFC 1349 - nor the experiment in RFC 1455 were widely implemented or used. The - damage that could be done by a broken, non-conformant router would - include "erasing" the CE codepoint for an ECN-capable packet that - arrived at the router with the CE codepoint set, or setting the CE - codepoint even in the absence of congestion. This has been discussed - in the section on "Non-compliance in the Network". - - The damage that could be done in an ECN-capable environment by a - non-ECN-capable end-node transmitting packets with the ECT codepoint - set has been discussed in the section on "Non-compliance by the End - Nodes". - -23. IANA Considerations - - This section contains the namespaces that have either been created in - this specification, or the values assigned in existing namespaces - managed by IANA. - -23.1. IPv4 TOS Byte and IPv6 Traffic Class Octet - - The codepoints for the ECN Field of the IP header are specified by - the Standards Action of this RFC, as is required by RFC 2780. - - When this document is published as an RFC, IANA should create a new - registry, "IPv4 TOS Byte and IPv6 Traffic Class Octet", with the - namespace as follows: - - IPv4 TOS Byte and IPv6 Traffic Class Octet - - Description: The registrations are identical for IPv4 and IPv6. - - Bits 0-5: see Differentiated Services Field Codepoints Registry - (http://www.iana.org/assignments/dscp-registry) - - - - - - - - - - -Ramakrishnan, et al. Standards Track [Page 60] - -RFC 3168 The Addition of ECN to IP September 2001 - - - Bits 6-7, ECN Field: - - Binary Keyword References - ------ ------- ---------- - 00 Not-ECT (Not ECN-Capable Transport) [RFC 3168] - 01 ECT(1) (ECN-Capable Transport(1)) [RFC 3168] - 10 ECT(0) (ECN-Capable Transport(0)) [RFC 3168] - 11 CE (Congestion Experienced) [RFC 3168] - -23.2. TCP Header Flags - - The codepoints for the CWR and ECE flags in the TCP header are - specified by the Standards Action of this RFC, as is required by RFC - 2780. - - When this document is published as an RFC, IANA should create a new - registry, "TCP Header Flags", with the namespace as follows: - - TCP Header Flags - - The Transmission Control Protocol (TCP) included a 6-bit Reserved - field defined in RFC 793, reserved for future use, in bytes 13 and 14 - of the TCP header, as illustrated below. The other six Control bits - are defined separately by RFC 793. - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | U | A | P | R | S | F | - | Header Length | Reserved | R | C | S | S | Y | I | - | | | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - RFC 3168 defines two of the six bits from the Reserved field to be - used for ECN, as follows: - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | C | E | U | A | P | R | S | F | - | Header Length | Reserved | W | C | R | C | S | S | Y | I | - | | | R | E | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - - - - - - - - - -Ramakrishnan, et al. Standards Track [Page 61] - -RFC 3168 The Addition of ECN to IP September 2001 - - - TCP Header Flags - - Bit Name Reference - --- ---- --------- - 8 CWR (Congestion Window Reduced) [RFC 3168] - 9 ECE (ECN-Echo) [RFC 3168] - -23.3. IPSEC Security Association Attributes - - IANA allocated the IPSEC Security Association Attribute value 10 for - the ECN Tunnel use described in Section 9.2.1.2 above at the request - of David Black in November 1999. The IANA has changed the Reference - for this allocation from David Black's request to this RFC. - -24. Authors' Addresses - - K. K. Ramakrishnan - TeraOptic Networks, Inc. - - Phone: +1 (408) 666-8650 - EMail: kk@teraoptic.com - - - Sally Floyd - ACIRI - - Phone: +1 (510) 666-2989 - EMail: floyd@aciri.org - URL: http://www.aciri.org/floyd/ - - - David L. Black - EMC Corporation - 42 South St. - Hopkinton, MA 01748 - - Phone: +1 (508) 435-1000 x75140 - EMail: black_david@emc.com - - - - - - - - - - - - - -Ramakrishnan, et al. Standards Track [Page 62] - -RFC 3168 The Addition of ECN to IP September 2001 - - -25. Full Copyright Statement - - Copyright (C) The Internet Society (2001). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Ramakrishnan, et al. Standards Track [Page 63] - diff --git a/kernel/picotcp/RFC/rfc3360.txt b/kernel/picotcp/RFC/rfc3360.txt deleted file mode 100644 index af27470..0000000 --- a/kernel/picotcp/RFC/rfc3360.txt +++ /dev/null @@ -1,1067 +0,0 @@ - - - - - - -Network Working Group S. Floyd -Request for Comments: 3360 ICIR -BCP: 60 August 2002 -Category: Best Current Practice - - - Inappropriate TCP Resets Considered Harmful - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2002). All Rights Reserved. - -Abstract - - This document is being written because there are a number of - firewalls in the Internet that inappropriately reset a TCP connection - upon receiving certain TCP SYN packets, in particular, packets with - flags set in the Reserved field of the TCP header. In this document - we argue that this practice is not conformant with TCP standards, and - is an inappropriate overloading of the semantics of the TCP reset. - We also consider the longer-term consequences of this and similar - actions as obstacles to the evolution of the Internet infrastructure. - -1. Introduction - - TCP uses the RST (Reset) bit in the TCP header to reset a TCP - connection. Resets are appropriately sent in response to a - connection request to a nonexistent connection, for example. The TCP - receiver of the reset aborts the TCP connection, and notifies the - application [RFC793, RFC1122, Ste94]. - - Unfortunately, a number of firewalls and load-balancers in the - current Internet send a reset in response to a TCP SYN packet that - use flags from the Reserved field in the TCP header. Section 3 below - discusses the specific example of firewalls that send resets in - response to TCP SYN packets from ECN-capable hosts. - - This document is being written to inform administrators of web - servers and firewalls of this problem, in an effort to encourage the - deployment of bug-fixes [FIXES]. A second purpose of this document - is to consider the longer-term consequences of such middlebox - behavior on the more general evolution of protocols in the Internet. - - - -Floyd Best Current Practice [Page 1] - -RFC 3360 Inappropriate TCP Resets August 2002 - - -2. The history of TCP resets. - - This section gives a brief history of the use of the TCP reset in the - TCP standards, and argues that sending a reset in response to a SYN - packet that uses bits from the Reserved field of the TCP header is - non-compliant behavior. - - RFC 793 contained the original specification of TCP in September, - 1981 [RFC793]. This document defined the RST bit in the TCP header, - and explained that reset was devised to prevent old duplicate - connection initiations from causing confusion in TCP's three-way - handshake. The reset is also used when a host receives data for a - TCP connection that no longer exists. - - RFC 793 states the following, in Section 5: - - "As a general rule, reset (RST) must be sent whenever a segment - arrives which apparently is not intended for the current connection. - A reset must not be sent if it is not clear that this is the case." - - RFC 1122 "amends, corrects, and supplements" RFC 793. RFC 1122 says - nothing specific about sending resets, or not sending resets, in - response to flags in the TCP Reserved field. - - Thus, there is nothing in RFC 793 or RFC 1122 that suggests that it - is acceptable to send a reset simply because a SYN packet uses - Reserved flags in the TCP header, and RFC 793 explicitly forbids - sending a reset for this reason. - - RFC 793 and RFC 1122 both include Jon Postel's famous robustness - principle, also from RFC 791: "Be liberal in what you accept, and - conservative in what you send." RFC 1122 reiterates that this - robustness principle "is particularly important in the Internet - layer, where one misbehaving host can deny Internet service to many - other hosts." The discussion of the robustness principle in RFC 1122 - also states that "adaptability to change must be designed into all - levels of Internet host software". The principle "be liberal in what - you accept" doesn't carry over in a clear way (if at all) to the - world of firewalls, but the issue of "adaptability to change" is - crucial nevertheless. The challenge is to protect legitimate - security interests without completely blocking the ability of the - Internet to evolve to support new applications, protocols, and - functionality. - - - - - - - - -Floyd Best Current Practice [Page 2] - -RFC 3360 Inappropriate TCP Resets August 2002 - - -2.1. The TCP Reserved Field - - RFC 793 says that the Reserved field in the TCP header is reserved - for future use, and must be zero. A rephrasing more consistent with - the rest of the document would have been to say that the Reserved - field should be zero when sent and ignored when received, unless - specified otherwise by future standards actions. However, the - phrasing in RFC 793 does not permit sending resets in response to TCP - packets with a non-zero Reserved field, as is explained in the - section above. - -2.2. Behavior of and Requirements for Internet Firewalls - - RFC 2979 on the Behavior of and Requirements for Internet Firewalls - [RFC2979], an Informational RFC, contains the following: - - "Applications have to continue to work properly in the presence of - firewalls. This translates into the following transparency rule: The - introduction of a firewall and any associated tunneling or access - negotiation facilities MUST NOT cause unintended failures of - legitimate and standards-compliant usage that would work were the - firewall not present." - - "A necessary corollary to this requirement is that when such failures - do occur it is incumbent on the firewall and associated software to - address the problem: Changes to either implementations of existing - standard protocols or the protocols themselves MUST NOT be - necessary." - - "Note that this requirement only applies to legitimate protocol usage - and gratuitous failures -- a firewall is entitled to block any sort - of access that a site deems illegitimate, regardless of whether or - not the attempted access is standards-compliant." - - We would note that RFC 2979 is an Informational RFC. RFC 2026 on - Internet Standards Process says the following in Section 4.2.2: "An - `Informational' specification is published for the general - information of the Internet community, and does not represent an - Internet community consensus or recommendation" [RFC2026]. - -2.3. Sending Resets as a Congestion Control Mechanism - - Some firewalls and hosts send resets in response to SYN packets as a - congestion control mechanism, for example, when their listen queues - are full. These resets are sent without regard to the contents of - the TCP Reserved field. Possibly in response to the use of resets as - - - - - -Floyd Best Current Practice [Page 3] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - a congestion control mechanism, several popular TCP implementations - immediately resend a SYN packet in response to a reset, up to four - times. - - We would recommend that the TCP reset not be used as a congestion - control mechanism, because this overloads the semantics of the reset - message, and inevitably leads to more aggressive behavior from TCP - implementations in response to a reset. We would suggest that simply - dropping the SYN packet is the most effective response to congestion. - The TCP sender will retransmit the SYN packet, using the default - value for the Retransmission Timeout (RTO), backing-off the - retransmit timer after each retransmit. - -2.4. Resets in Response to Changes in the Precedence Field - - RFC 793 includes the following in Section 5: - - "If an incoming segment has a security level, or compartment, or - precedence which does not exactly match the level, and compartment, - and precedence requested for the connection, a reset is sent and - connection goes to the CLOSED state." - - The "precedence" refers to the (old) Precedence field in the (old) - ToS field in the IP header. The "security" and "compartment" refer - to the obsolete IP Security option. When it was written, this was - consistent with the guideline elsewhere in RFC 793 that resets should - only be sent when a segment arrives which apparently is not intended - for the current connection. - - RFC 2873 on "TCP Processing of the IPv4 Precedence Field" discusses - specific problems raised by the sending of resets when the precedence - field has changed [RFC2873]. RFC 2873, currently a Proposed - Standard, specifies that TCP must ignore the precedence of all - received segments, and must not send a reset in response to changes - in the precedence field. We discuss this here to clarify that this - issue never permitted the sending of a reset in response to a segment - with a non-zero TCP Reserved field. - -2.5. Resets in Response to Illegal Option Lengths - - RFC 1122 says the following in Section 4.2.2.5 about TCP options - [RFC1122]: - - "A TCP MUST be able to receive a TCP option in any segment. A TCP - MUST ignore without error any TCP option it does not implement, - assuming that the option has a length field (all TCP options defined - - - - - -Floyd Best Current Practice [Page 4] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - in the future will have length fields). TCP MUST be prepared to - handle an illegal option length (e.g., zero) without crashing; a - suggested procedure is to reset the connection and log the reason." - - This makes sense, as a TCP receiver is unable to interpret the rest - of the data on a segment that has a TCP option with an illegal option - length. Again, we discuss this here to clarify that this issue never - permitted the sending of a reset in response to a segment with a - non-zero TCP Reserved field. - -3. The Specific Example of ECN - - This section has a brief explanation of ECN (Explicit Congestion - Notification) in general, and the ECN-setup SYN packet in particular. - - The Internet is based on end-to-end congestion control, and - historically the Internet has used packet drops as the only method - for routers to indicate congestion to the end nodes. ECN is a recent - addition to the IP architecture to allow routers to set a bit in the - IP packet header to inform end-nodes of congestion, instead of - dropping the packet. ECN requires the cooperation of the transport - end-nodes. - - The ECN specification, RFC 2481, was an Experimental RFC from January - 1999 until June 2001, when a revised document [RFC3168] was approved - as Proposed Standard. More information about ECN is available from - the ECN Web Page [ECN]. - - The use of ECN with TCP requires that both TCP end-nodes have been - upgraded to support the use of ECN, and that both end-nodes agree to - use ECN with this particular TCP connection. This negotiation of ECN - support between the two TCP end-nodes uses two flags that have been - allocated from the Reserved field in the TCP header [RFC2481]. - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | U | A | P | R | S | F | - | Header Length | Reserved | R | C | S | S | Y | I | - | | | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - Figure 1: The previous definition of bytes 13 and 14 of the TCP - header. - - - - - - - - -Floyd Best Current Practice [Page 5] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | C | E | U | A | P | R | S | F | - | Header Length | Reserved | W | C | R | C | S | S | Y | I | - | | | R | E | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - Figure 2: The current definition of bytes 13 and 14 of the TCP - Header, from RFC 3168. - - The two ECN flags in the TCP header are defined from the last two - bits in the Reserved field of the TCP header. Bit 9 in the Reserved - field of the TCP header is designated as the ECN-Echo flag (ECE), and - Bit 8 is designated as the Congestion Window Reduced (CWR) flag. To - negotiate ECN usage, the TCP sender sends an "ECN-setup SYN packet", - a TCP SYN packet with the ECE and CWR flags set. If the TCP host at - the other end wishes to use ECN for this connection, then it sends an - "ECN-setup SYN-ACK packet", a TCP SYN packet with the ECE flag set - and the CWR flag not set. Otherwise, the TCP host at the other end - returns a SYN-ACK packet with neither the ECE nor the CWR flag set. - - So now back to TCP resets. When a TCP host negotiating ECN sends an - ECN-setup SYN packet, an old TCP implementation is expected to ignore - those flags in the Reserved field, and to send a plain SYN-ACK packet - in response. However, there are some broken firewalls and load- - balancers in the Internet that instead respond to an ECN-setup SYN - packet with a reset. Following the deployment of ECN-enabled end - nodes, there were widespread complaints that ECN-capable hosts could - not access a number of websites [Kelson00]. This has been - investigated by the Linux community, and by the TBIT project [TBIT] - in data taken from September, 2000, up to March, 2002, and has been - discussed in an article in Enterprise Linux Today [Cou01]. Some of - the offending equipment has been identified, and a web page [FIXES] - contains a list of non-compliant products and the fixes posted by the - vendors. In March 2002, six months after ECN was approved as - Proposed Standard, ECN-setup SYN packets were answered by a reset - from 203 of the 12,364 web sites tested, and ECN-setup SYN packets - were dropped for 420 of the web sites. Installing software that - blocks packets using flags in TCP's Reserved field is considerably - easier than uninstalling that software later on. - -3.1. ECN: The Work-Around. - - A work-around for maintaining connectivity in the face of the broken - equipment was described in [Floyd00], and has been specified in RFC - 3168 as a procedure that may be included in TCP implementations. We - describe this work-around briefly below. - - - - -Floyd Best Current Practice [Page 6] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - To provide robust connectivity even in the presence of faulty - equipment, a TCP host that receives a reset in response to the - transmission of an ECN-setup SYN packet may resend the SYN with CWR - and ECE cleared. This would result in a TCP connection being - established without using ECN. This also has the unfortunate result - of the ECN-capable TCP host not responding properly to the first - valid reset. If a second reset is sent in response to the second - SYN, which had CWR and ECE cleared, then the TCP host should respond - properly by aborting the connection. - - Similarly, a host that receives no reply to an ECN-setup SYN within - the normal SYN retransmission timeout interval may resend the SYN and - any subsequent SYN retransmissions with CWR and ECE cleared. To - overcome normal packet loss that results in the original SYN being - lost, the originating host may retransmit one or more ECN-setup SYN - packets before giving up and retransmitting the SYN with the CWR and - ECE bits cleared. - - Some TCP implementors have so far decided not to deploy these - workarounds, for the following reasons: - - * The work-arounds would result in ECN-capable hosts not responding - properly to the first valid reset received in response to a SYN - packet. - - * The work-arounds would limit ECN functionality in environments - without broken equipment, by disabling ECN where the first SYN or - SYN-ACK packet was dropped in the network. - - * The work-arounds in many cases would involve a delay of six seconds - or more before connectivity is established with the remote server, - in the case of broken equipment that drops ECN-setup SYN packets. - By accommodating this broken equipment, the work-arounds have been - judged as implicitly accepting both this delay and the broken - equipment that would be causing this delay. - - One possibility would be for such work-arounds to be configurable by - the user. - - One unavoidable consequence of the work-around of resending a - modified SYN packet in response to a reset is to further erode the - semantics of the TCP reset. Thus, when a box sends a reset, the TCP - host receiving that reset does not know if the reset was sent simply - because of the ECN-related flags in the TCP header, or because of - some more fundamental problem. Therefore, the TCP host resends the - TCP SYN packet without the ECN-related flags in the TCP header. The - ultimate consequence of this absence of clear communications from the - middlebox to the end-nodes could be an extended spiral of - - - -Floyd Best Current Practice [Page 7] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - communications specified for transport protocols, as end nodes - attempt to sacrifice as little functionality as possible in the - process of determining which packets will and will not be forwarded - to the other end. This is discussed in more detail in Section 6.1 - below. - -4. On Combating Obstacles to the Proper Evolution of the Internet - Infrastructure - - One of the reasons that this issue of inappropriate resets is - important (to me) is that it has complicated the deployment of ECN in - the Internet (though it has fortunately not blocked the deployment - completely). It has also added an unnecessary obstacle to the future - effectiveness of ECN. - - However, a second, more general reason why this issue is important is - that the presence of equipment in the Internet that rejects valid TCP - packets limits the future evolution of TCP, completely aside from the - issue of ECN. That is, the widespread deployment of equipment that - rejects TCP packets that use Reserved flags in the TCP header could - effectively prevent the deployment of new mechanisms that use any of - these Reserved flags. It doesn't matter if these new mechanisms have - the protection of Experimental or Proposed Standard status from the - IETF, because the broken equipment in the Internet does not stop to - look up the current status of the protocols before rejecting the - packets. TCP is good, and useful, but it would be a pity for the - deployment of broken equipment in the Internet to result in the - "freezing" of TCP in its current state, without the ability to use - the Reserved flags in the future evolution of TCP. - - In the specific case of middleboxes that block TCP SYN packets - attempting to negotiate ECN, the work-around described in Section 3.1 - is sufficient to ensure that end-nodes could still establish - connectivity. However, there are likely to be additional uses of the - TCP Reserved Field standardized in the next year or two, and these - additional uses might not coexist quite as successfully with - middleboxes that send resets. Consider the difficulties that could - result if a path changes in the middle of a connection's lifetime, - and the middleboxes on the old and new paths have different policies - about exactly which flags in the TCP Reserved field they would and - would not block. - - Taking the wider view, the existence of web servers or firewalls that - send inappropriate resets is only one example of functionality in the - Internet that restricts the future evolution of the Internet. The - impact of all of these small restrictions taken together presents a - considerable obstacle to the development of the Internet - architecture. - - - -Floyd Best Current Practice [Page 8] - -RFC 3360 Inappropriate TCP Resets August 2002 - - -5. Issues for Transport Protocols - - One lesson for designers of transport protocols is that transport - protocols will have to protect themselves from the unknown and - seemingly arbitrary actions of firewalls, normalizers, and other - middleboxes in the network. For the moment, for TCP, this means - sending a non-ECN-setup SYN when a reset is received in response to - an ECN-setup SYN packet. Defensive actions on the side of transport - protocols could include using Reserved flags in the SYN packet before - using them in data traffic, to protect against middleboxes that block - packets using those flags. It is possible that transport protocols - will also have to add additional checks during the course of the - connection lifetime to check for interference from middleboxes along - the path. - - The ECN standards document, RFC 3168, contains an extensive - discussion in Section 18 on "Possible Changes to the ECN Field in the - Network", but includes the following about possible changes to the - TCP header: - - "This document does not consider potential dangers introduced by - changes in the transport header within the network. We note that - when IPsec is used, the transport header is protected both in tunnel - and transport modes [ESP, AH]." - - With the current modification of transport-level headers in the - network by firewalls (as discussed below in Section 6.2), future - protocol designers might no longer have the luxury of ignoring the - possible impact of changes to the transport header within the - network. - - Transport protocols will also have to respond in some fashion to an - ICMP code of "Communication Administratively Prohibited" if - middleboxes start to use this form of the ICMP Destination - Unreachable message to indicate that the packet is using - functionality not allowed [RFC1812]. - -6. Issues for Middleboxes - - Given that some middleboxes are going to drop some packets because - they use functionality not allowed by the middlebox, the larger issue - remains of how middleboxes should communicate the reason for this - action to the end-nodes, if at all. One suggestion, for - consideration in more depth in a separate document, would be that - firewalls send an ICMP Destination Unreachable message with the code - "Communication Administratively Prohibited" [B01]. - - - - - -Floyd Best Current Practice [Page 9] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - We acknowledge that this is not an ideal solution, for several - reasons. First, middleboxes along the reverse path might block these - ICMP messages. Second, some firewall operators object to explicit - communication because it reveals too much information about security - policies. And third, the response of transport protocols to such an - ICMP message is not yet specified. - - However, an ICMP "Administratively Prohibited" message could be a - reasonable addition, for firewalls willing to use explicit - communication. One possibility, again to be explored in a separate - document, would be for the ICMP "Administratively Prohibited" message - to be modified to convey additional information to the end host. - - We would note that this document does not consider middleboxes that - block complete transport protocols. We also note that this document - is not addressing firewalls that send resets in response to a TCP SYN - packet to a firewalled-off TCP port. Such a use of resets seems - consistent with the semantics of TCP reset. This document is only - considering the problems caused by middleboxes that block specific - packets within a transport protocol when other packets from that - transport protocol are forwarded by the middlebox unaltered. - - One complication is that once a mechanism is installed in a firewall - to block a particular functionality, it can take considerable effort - for network administrators to "un-install" that block. It has been - suggested that tweakable settings on firewalls could make recovery - from future incidents less painful all around. Again, because this - document does not address more general issues about firewalls, the - issue of greater firewall flexibility, and the attendant possible - security risks, belongs in a separate document. - -6.1. Current Choices for Firewalls - - Given a firewall that has decided to drop TCP packets that use - reserved bits in the TCP header, one question is whether the firewall - should also send a Reset, in order to prevent the TCP connection from - consuming unnecessary resources at the TCP sender waiting for the - retransmit timeout. We would argue that whether or not the firewall - feels compelled to drop the TCP packet, it is not appropriate to send - a TCP reset. Sending a TCP reset in response to prohibited - functionality would continue the current overloading of the semantics - of the TCP reset in a way that could be counterproductive all around. - - As an example, Section 2.3 has already observed that some firewalls - send resets in response to TCP SYN packets as a congestion control - mechanism. Possibly in response to this (or perhaps in response to - something else), some popular TCP implementations immediately resend - a SYN packet in response to a reset, up to four times. Other TCP - - - -Floyd Best Current Practice [Page 10] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - implementations, in conformance to the standards, don't resend SYN - packets after receiving a reset. The more aggressive TCP - implementations increase congestion for others, but also increase - their own chances of eventually getting through. Giving these fluid - semantics for the TCP reset, one might expect more TCP - implementations to start resending SYN packets in response to a - reset, completely apart from any issues having to do with ECN. - Obviously, this weakens the effectiveness of the reset when used for - its original purpose, of responding to TCP packets that apparently - are not intended for the current connection. - - If we add to this mix the use of the TCP reset by firewalls in - response to TCP packets using reserved bits in the TCP header, this - muddies the waters further. Because TCP resets could be sent due to - congestion, or to prohibited functionality, or because a packet was - received from a previous TCP connection, TCP implementations (or, - more properly, TCP implementors) would now have an incentive to be - even more persistent in resending SYN packets in response to TCP - resets. In addition to the incentive mentioned above of resending - TCP SYN packets to increase one's odds of eventually getting through - in a time of congestion, the TCP reset might have been due to - prohibited functionality instead of congestion, so the TCP - implementation might resend SYN packets in different forms to - determine exactly which functionality is being prohibited. Such a - continual changing of the semantics of the TCP reset could be - expected to lead to a continued escalation of measures and - countermeasures between firewalls and end-hosts, with little - productive benefit to either side. - - It could be argued that *dropping* the TCP SYN packet due to the use - of prohibited functionality leads to overloading of the semantics of - a packet drop, in the same way that the reset leads to overloading - the semantics of a reset. This is true; from the viewpoint of end- - system response to messages with overloaded semantics, it would be - preferable to have an explicit indication about prohibited - functionality (for those firewalls for some reason willing to use - explicit indications). But given a firewall's choice between sending - a reset or just dropping the packet, we would argue that just - dropping the packet does less damage, in terms of giving an incentive - to end-hosts to adopt counter-measures. It is true that just - dropping the packet, without sending a reset, results in delay for - the TCP connection in resending the SYN packet without the prohibited - functionality. However, sending a reset has the undesirable longer- - term effect of giving an incentive to future TCP implementations to - add more baroque combinations of resending SYN packets in response to - a reset, because the TCP sender can't tell if the reset is for a - standard reason, for congestion, or for the prohibited functionality - of option X or reserved bit Y in the TCP header. - - - -Floyd Best Current Practice [Page 11] - -RFC 3360 Inappropriate TCP Resets August 2002 - - -6.2. The Complications of Modifying Packet Headers in the Network - - In addition to firewalls that send resets in response to ECN-setup - SYN packets and firewalls that drop ECN-setup SYN packets, there also - exist firewalls that by default zero the flags in the TCP Reserved - field, including the two flags used for ECN. We note that in some - cases this could have unintended and undesirable consequences. - - If a firewall zeros the ECN-related flags in the TCP header in the - initial SYN packet, then the TCP connection will be set up without - using ECN, and the ECN-related flags in the TCP header will be sent - zeroed-out in all of the subsequent packets in this connection. This - will accomplish the firewall's purpose of blocking ECN, while - allowing the TCP connection to proceed efficiently and smoothly - without using ECN. - - If for some reason the ECN-related flags in the TCP header aren't - zeroed in the initial SYN packet from host A to host B, but the - firewall does zero those flags in the responding SYN/ACK packet from - host B to host A, the consequence could be to subvert end-to-end - congestion control for this connection. The ECN specifications were - not written to ensure robust operation in the presence of the - arbitrary zeroing of TCP header fields within the network, because it - didn't occur to the authors of the protocol at the time that this was - a requirement in protocol design. - - Similarly, if the ECN-related flags in the TCP header are not zeroed - in either the SYN or the SYN/ACK packet, but the firewall does zero - these flags in later packets in that TCP connection, this could also - have the unintended consequence of subverting end-to-end congestion - control for this connection. The details of these possible - interactions are not crucial for this document, and are described in - the appendix. However, our conclusion, both for the ECN-related - flags in the TCP header and for future uses of the four other bits in - the TCP Reserved field, would be that if it is required for firewalls - to be able to block the use of a new function being added to a - protocol, this is best addressed in the initial design phase by joint - cooperation between the firewall community and the protocol - designers. - -7. Conclusions - - Our conclusion is that it is not conformant with current standards - for a firewall, load-balancer, or web-server to respond with a reset - to a TCP SYN packet simply because the packet uses flags in the TCP - Reserved field. More specifically, it is not conformant to respond - with a reset to a TCP SYN packet simply because the ECE and CWR flags - are set in the IP header. We would urge vendors to make available - - - -Floyd Best Current Practice [Page 12] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - fixes for any nonconformant code, and we could urge ISPs and system - administrators to deploy these fixes in their web servers and - firewalls. - - We don't claim that it violates any standard for middleboxes to - arbitrarily drop packets that use flags in the TCP Reserved field, - but we would argue that behavior of this kind, without a clear method - for informing the end-nodes of the reasons for these actions, could - present a significant obstacle to the development of TCP. More work - is clearly needed to reconcile the conflicting interests of providing - security while at the same time allowing the careful evolution of - Internet protocols. - -8. Acknowledgements - - This document results from discussions and activity by many people, - so I will refrain from trying to acknowledge all of them here. My - specific thanks go to Ran Atkinson, Steve Bellovin, Alex Cannara, - Dennis Ferguson, Ned Freed, Mark Handley, John Klensin, Allison - Mankin, Jitendra Padhye, Vern Paxson, K. K. Ramakrishnan, Jamal Hadi - Salim, Pekka Savola, Alex Snoeren, and Dan Wing for feedback on this - document, and to the End-to-End Research Group, the IAB, and the IESG - for discussion of these issues. I thank Mikael Olsson for numerous - rounds of feedback. I also thank the members of the Firewall Wizards - mailing list for feedback (generally of disagreement) on an earlier - draft of this document. - - Email discussions with a number of people, including Dax Kelson, - Alexey Kuznetsov, Kacheong Poon, David Reed, Jamal Hadi-Salim, and - Venkat Venkatsubra, have addressed the issues raised by non- - conformant equipment in the Internet that does not respond to TCP SYN - packets with the ECE and CWR flags set. We thank Mark Handley, - Jitentra Padhye, and others for discussions on the TCP initialization - procedures. - -9. Normative References - - [RFC793] Postel, J., "Transmission Control Protocol - DARPA - Internet Program Protocol Specification", STD 7, RFC 793, - September 1981. - - [RFC1122] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC1812] Baker, F., "Requirements for IP Version 4 Routers", RFC - 1812, June 1995. - - - - - -Floyd Best Current Practice [Page 13] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - [RFC2026] Bradner, S., "The Internet Standards Process -- Revision - 3", BCP 9, RFC 2026, October 1996. - - [RFC2481] Ramakrishnan, K. and S. Floyd, "A Proposal to add Explicit - Congestion Notification (ECN) to IP", RFC 2481, January - 1999. - - [RFC2873] Xiao, X., Hannan, A., Paxson, V., and E. Crabbe, "TCP - Processing of the IPv4 Precedence Field, RFC 2873, June - 2000. - - [RFC2979] Freed, N., " Behavior of and Requirements for Internet - Firewalls", RFC 2979, October 2000. - - [RFC3168] Ramakrishnan, K., Floyd, S. and D. Black, "The Addition - of Explicit Congestion Notification (ECN) to IP", RFC - 3168, September 2001. - -10. Informative References - - [B01] Bellovin, S., "A "Reason" Field for ICMP "Administratively - Prohibited" Messages", Work in Progress. - - [Cou01] Scott Courtney, Why Can't My 2.4 Kernel See Some Web - Sites?, Enterprise Linux Today, Apr 17, 2001. URL - "http://eltoday.com/article.php3?ltsn=2001-04-17-001-14- - PS". - - [ECN] "The ECN Web Page", URL - "http://www.icir.org/floyd/ecn.html". - - [FIXES] ECN-under-Linux Unofficial Vendor Support Page, URL - "http://gtf.org/garzik/ecn/". - - [Floyd00] Sally Floyd, Negotiating ECN-Capability in a TCP - connection, October 2, 2000, email to the end2end-interest - mailing list. URL - "http://www.icir.org/floyd/papers/ECN.Oct2000.txt". - - [Kelson00] Dax Kelson, note sent to the Linux kernel mailing list, - September 10, 2000. - - [QUESO] Toby Miller, Intrusion Detection Level Analysis of Nmap - and Queso, August 30, 2000. URL - "http://www.securityfocus.com/infocus/1225". - - - - - - -Floyd Best Current Practice [Page 14] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - [Ste94] Stevens, W., "TCP/IP Illustrated, Volume 1: The - Protocols", Addison-Wesley, 1994. - - [SFO01] FreeBSD ipfw Filtering Evasion Vulnerability, Security - Focus Online, January 23, 2001. URL - "http://www.securityfocus.com/bid/2293". - - [TBIT] Jitendra Padhye and Sally Floyd, Identifying the TCP - Behavior of Web Servers, SIGCOMM, August 2001. URL - "http://www.icir.org/tbit/". - -11. Security Considerations - - One general risk of using Reserved flags in TCP is the risk of - providing additional information about the configuration of the host - in question. However, TCP is sufficiently loosely specified as it - is, with sufficiently many variants and options, that port-scanning - tools such as Nmap and Queso do rather well in identifying the - configuration of hosts even without the use of Reserved flags. - - The security considerations and all other considerations of a - possible ICMP Destination Unreachable message with the code - "Communication Administratively Prohibited" will be discussed in a - separate document. - - The traditional concern of firewalls is to prevent unauthorized - access to systems, to prevent DoS attacks and other attacks from - subverting the end-user terminal, and to protect end systems from - buggy code. We are aware of one security vulnerability reported from - the use of the Reserved flags in the TCP header [SFO01]. A packet - filter intended only to let through packets in established - connections can let pass a packet not in an established connection if - the packet has the ECE flag set in the reserved field. "Exploitation - of this vulnerability may allow for unauthorized remote access to - otherwise protected services." It is also possible that an - implementation of TCP could appear that has buggy code associated - with the use of Reserved flags in the TCP header, but we are not - aware of any such implementation at the moment. - - Unfortunately, misconceived security concerns are one of the reasons - for the problems described in this document in the first place. An - August, 2000, article on "Intrusion Detection Level Analysis of Nmap - and Queso" described the port-scanning tool Queso as sending SYN - packets with the last two Reserved bits in the TCP header set, and - said the following: "[QUESO] is easy to identify, if you see [these - two Reserved bits and the SYN bit] set in the 13th byte of the TCP - header, you know that someone has malicious intentions for your - network." As is documented on the TBIT Web Page, the middleboxes - - - -Floyd Best Current Practice [Page 15] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - that block SYNs using the two ECN-related Reserved flags in the TCP - header do not block SYNs using other Reserved flags in the TCP - header. - - One lesson appears to be that anyone can effectively "attack" a new - TCP function simply by using that function in their publicly- - available port-scanning tool, thus causing middleboxes of all kinds - to block the use of that function. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Floyd Best Current Practice [Page 16] - -RFC 3360 Inappropriate TCP Resets August 2002 - - -12. Appendix: The Complications of Modifying Packet Headers - - In this section we first show that if the ECN-related flags in the - TCP header aren't zeroed in the initial SYN packet from Host A to - Host B, but are zeroed in the responding SYN/ACK packet from Host B - to Host A, the consequence could be to subvert end-to-end congestion - control for this connection. - - Assume that the ECN-setup SYN packet from Host A is received by Host - B, but the ECN-setup SYN/ACK from Host B is modified by a firewall in - the network to a non-ECN-setup SYN/ACK, as in Figure 3 below. RFC - 3168 does not specify that the ACK packet in any way should echo the - TCP flags received in the SYN/ACK packet, because it had not occurred - to the designers that these flags would be modified within the - network. - - Host A Firewall or router Host B - ----------------------------------------------------------------- - Sends ECN-setup SYN ----------------> Receives ECN-setup SYN - <- Sends ECN-setup SYN/ACK - <- Firewall zeros flags - Receives non-ECN-setup SYN/ACK - Sends ACK and data ----------------> Receives ACK and data - <- Sends data packet with ECT - <- Router sets CE - Receives data packet with ECT and CE - - Figure 3: ECN-related flags in SYN/ACK packet cleared in network. - - Following RFC 3168, Host A has received a non-ECN-setup SYN/ACK - packet, and must not set ECT on data packets. Host B, however, does - not know that Host A has received a non-ECN-setup SYN/ACK packet, and - Host B may set ECT on data packets. RFC 3168 does not require Host A - to respond properly to data packets received from Host B with the ECT - and CE codepoints set in the IP header. Thus, the data sender, Host - B, might never be informed about the congestion encountered in the - network, thus violating end-to-end congestion control. - - Next we show that if the ECN-related flags in the TCP header are not - zeroed in either the SYN or the SYN/ACK packet, but the firewall does - zero these flags in later packets in that TCP connection, this could - also have the unintended consequence of subverting end-to-end - congestion control for this connection. Figure 4 shows this - scenario. - - - - - - - -Floyd Best Current Practice [Page 17] - -RFC 3360 Inappropriate TCP Resets August 2002 - - - Host A Firewall or router Host B - ----------------------------------------------------------------- - Sends ECN-setup SYN ----------------> Receives ECN-setup SYN - Receives ECN-setup SYN/ACK <------------ Sends ECN-setup SYN/ACK - Sends ACK and data ----------------> Receives ACK and data - <- Sends data packet with ECT - <- Router sets CE - Receives data packet with ECT and CE - Sends ACK with ECE -> - Firewall resets ECE -> - Receives plain ACK - - Figure 4: ECN-related flags in ACK packet cleared in network. - - The ECN-related flags are not changed by the network in the ECN-setup - SYN and SYN/ACK packets for the scenario in Figure 4, and both end - nodes are free to use ECN, and to set the ECT flag in the ECN field - in the IP header. However, if the firewall clears the ECE flag in - the TCP header in ACK packets from Node A to Node B, then Node B will - never hear about the congestion that its earlier data packets - encountered in the network, thus subverting end-to-end congestion - control for this connection. - - Additional complications will arise when/if the use of the ECN nonce - in TCP becomes standardized in the IETF [RFC3168], as this could - involve the specification of an additional flag from the TCP Reserved - field for feedback from the TCP data receiver to the TCP data sender. - The primary motivation for the ECN nonce is to allow mechanisms for - the data sender to verify that network elements are not erasing the - CE codepoint, and that data receivers are properly reporting to the - sender the receipt of packets with the CE codepoint set. - -13. IANA Considerations - - There are no IANA considerations in this document. - -14. Author's Address - - Sally Floyd - ICIR (ICSI Center for Internet Research) - - Phone: +1 (510) 666-2989 - EMail: floyd@icir.org - URL: http://www.icir.org/floyd/ - - - - - - - -Floyd Best Current Practice [Page 18] - -RFC 3360 Inappropriate TCP Resets August 2002 - - -15. Full Copyright Statement - - Copyright (C) The Internet Society (2002). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Floyd Best Current Practice [Page 19] - diff --git a/kernel/picotcp/RFC/rfc3366.txt b/kernel/picotcp/RFC/rfc3366.txt deleted file mode 100644 index 03753a3..0000000 --- a/kernel/picotcp/RFC/rfc3366.txt +++ /dev/null @@ -1,1515 +0,0 @@ - - - - - - -Network Working Group G. Fairhurst -Request for Comments: 3366 University of Aberdeen -BCP: 62 L. Wood -Category: Best Current Practice Cisco Systems Ltd - August 2002 - - - Advice to link designers on link Automatic Repeat reQuest (ARQ) - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2002). All Rights Reserved. - -Abstract - - This document provides advice to the designers of digital - communication equipment and link-layer protocols employing link-layer - Automatic Repeat reQuest (ARQ) techniques. This document presumes - that the designers wish to support Internet protocols, but may be - unfamiliar with the architecture of the Internet and with the - implications of their design choices for the performance and - efficiency of Internet traffic carried over their links. - - ARQ is described in a general way that includes its use over a wide - range of underlying physical media, including cellular wireless, - wireless LANs, RF links, and other types of channel. This document - also describes issues relevant to supporting IP traffic over - physical-layer channels where performance varies, and where link ARQ - is likely to be used. - - - - - - - - - - - - - - - - -Fairhurst & Wood Best Current Practice [Page 1] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - -Table of Contents - - 1. Introduction. . . . . . . . . . . . . . . . . . . . . . . . .2 - 1.1 Link ARQ. . . . . . . . . . . . . . . . . . . . . . . . . . .4 - 1.2 Causes of Packet Loss on a Link . . . . . . . . . . . . . . .5 - 1.3 Why Use ARQ?. . . . . . . . . . . . . . . . . . . . . . . . .6 - 1.4 Commonly-used ARQ Techniques. . . . . . . . . . . . . . . . .7 - 1.4.1 Stop-and-wait ARQ . . . . . . . . . . . . . . . . . . . . . .7 - 1.4.2 Sliding-Window ARQ. . . . . . . . . . . . . . . . . . . . . .7 - 1.5 Causes of Delay Across a Link . . . . . . . . . . . . . . . .8 - 2. ARQ Persistence . . . . . . . . . . . . . . . . . . . . . . 10 - 2.1 Perfectly-Persistent (Reliable) ARQ Protocols . . . . . . . 10 - 2.2 High-Persistence (Highly-Reliable) ARQ Protocols. . . . . . 12 - 2.3 Low-Persistence (Partially-Reliable) ARQ Protocols. . . . . 13 - 2.4 Choosing Your Persistency . . . . . . . . . . . . . . . . . 13 - 2.5 Impact of Link Outages. . . . . . . . . . . . . . . . . . . 14 - 3. Treatment of Packets and Flows. . . . . . . . . . . . . . . 15 - 3.1 Packet Ordering . . . . . . . . . . . . . . . . . . . . . . 15 - 3.2 Using Link ARQ to Support Multiple Flows. . . . . . . . . . 16 - 3.3 Differentiation of Link Service Classes . . . . . . . . . . 17 - 4. Conclusions . . . . . . . . . . . . . . . . . . . . . . . . 19 - 5. Security Considerations . . . . . . . . . . . . . . . . . . 21 - 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . 21 - 7. Acknowledgements. . . . . . . . . . . . . . . . . . . . . . 22 - 8. References. . . . . . . . . . . . . . . . . . . . . . . . . 22 - 8.1 Normative References. . . . . . . . . . . . . . . . . . . . 22 - 8.2 Informative References. . . . . . . . . . . . . . . . . . . 23 - 9. Authors' Addresses. . . . . . . . . . . . . . . . . . . . . 26 - 10. Full Copyright Statement. . . . . . . . . . . . . . . . . . 27 - -1. Introduction - - IP, the Internet Protocol [RFC791], forms the core protocol of the - global Internet and defines a simple "connectionless" packet-switched - network. Over the years, Internet traffic using IP has been carried - over a wide variety of links, of vastly different capacities, delays - and loss characteristics. In the future, IP traffic can be expected - to continue to be carried over a very wide variety of new and - existing link designs, again of varied characteristics. - - A companion document [DRAFTKARN02] describes the general issues - associated with link design. This document should be read in - conjunction with that and with other documents produced by the - Performance Implications of Link Characteristics (PILC) IETF - workgroup [RFC3135, RFC3155]. - - - - - - -Fairhurst & Wood Best Current Practice [Page 2] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - This document is intended for three distinct groups of readers: - - a. Link designers wishing to configure (or tune) a link for the IP - traffic that it will carry, using standard link-layer mechanisms - such as the ISO High-level Data Link Control (HDLC) [ISO4335a] or - its derivatives. - - b. Link implementers who may wish to design new link mechanisms that - perform well for IP traffic. - - c. The community of people using or developing TCP, UDP and related - protocols, who may wish to be aware of the ways in which links - can operate. - - The primary audiences are intended to be groups (a) and (b). Group - (c) should not need to be aware of the exact details of an ARQ scheme - across a single link, and should not have to consider such details - for protocol implementations; much of the Internet runs across links - that do not use any form of ARQ. However, the TCP/IP community does - need to be aware that the IP protocol operates over a diverse range - of underlying subnetworks. This document may help to raise that - awareness. - - Perfect reliability is not a requirement for IP networks, nor is it a - requirement for links [DRAFTKARN02]. IP networks may discard packets - due to a variety of reasons entirely unrelated to channel errors, - including lack of queuing space, congestion management, faults, and - route changes. It has long been widely understood that perfect - end-to-end reliability can be ensured only at, or above, the - transport layer [SALT81]. - - Some familiarity with TCP, the Transmission Control Protocol [RFC793, - STE94], is presumed here. TCP provides a reliable byte-stream - transport service, building upon the best-effort datagram delivery - service provided by the Internet Protocol. TCP achieves this by - dividing data into TCP segments, and transporting these segments in - IP packets. TCP guarantees that a TCP session will retransmit the - TCP segments contained in any data packets that are lost along the - Internet path between endhosts. TCP normally performs retransmission - using its Fast Retransmit procedure, but if the loss fails to be - detected (or retransmission is unsuccessful), TCP falls back to a - Retransmission Time Out (RTO) retransmission using a timer [RFC2581, - RFC2988]. (Link protocols also implement timers to verify integrity - of the link, and to assist link ARQ.) TCP also copes with any - duplication or reordering introduced by the IP network. There are a - number of variants of TCP, with differing levels of sophistication in - - - - - -Fairhurst & Wood Best Current Practice [Page 3] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - their procedures for handling loss recovery and congestion avoidance. - Far from being static, the TCP protocol is itself subject to ongoing - gradual refinement and evolution, e.g., [RFC2488, RFC2760]. - - Internet networks may reasonably be expected to carry traffic from a - wide and evolving range of applications. Not all applications - require or benefit from using the reliable service provided by TCP. - In the Internet, these applications are carried by alternate - transport protocols, such as the User Datagram Protocol (UDP) - [RFC768]. - -1.1 Link ARQ - - At the link layer, ARQ operates on blocks of data, known as frames, - and attempts to deliver frames from the link sender to the link - receiver over a channel. The channel provides the physical-layer - connection over which the link protocol operates. In its simplest - form, a channel may be a direct physical-layer connection between the - two link nodes (e.g., across a length of cable or over a wireless - medium). ARQ may also be used edge-to-edge across a subnetwork, - where the path includes more than one physical-layer medium. Frames - often have a small fixed or maximum size for convenience of - processing by Medium-Access Control (MAC) and link protocols. This - contrasts with the variable lengths of IP datagrams, or 'packets'. A - link-layer frame may contain all, or part of, one or more IP packets. - A link ARQ mechanism relies on an integrity check for each frame - (e.g., strong link-layer CRC [DRAFTKARN02]) to detect channel errors, - and uses a retransmission process to retransmit lost (i.e., missing - or corrupted) frames. - - Links may be full-duplex (allowing two-way communication over - separate forward and reverse channels), half-duplex (where two-way - communication uses a shared forward and reverse channel, e.g., IrDA, - IEEE 802.11) or simplex (where a single channel permits communication - in only one direction). - - ARQ requires both a forward and return path, and therefore link ARQ - may be used over links that employ full- or half-duplex links. When - a channel is shared between two or more link nodes, a link MAC - protocol is required to ensure all nodes requiring transmission can - gain access to the shared channel. Such schemes may add to the delay - (jitter) associated with transmission of packet data and ARQ control - frames. - - When using ARQ over a link where the probability of frame loss is - related to the frame size, there is an optimal frame size for any - specific target channel error rate. To allow for efficient use of - the channel, this maximum link frame size may be considerably lower - - - -Fairhurst & Wood Best Current Practice [Page 4] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - than the maximum IP datagram size specified by the IP Maximum - Transmission Unit (MTU). Each frame will then contain only a - fraction of an IP packet, and transparent implicit fragmentation of - the IP datagram is used [DRAFTKARN02]. A smaller frame size - introduces more frame header overhead per payload byte transported. - - Explicit network-layer IP fragmentation is undesirable for a variety - of reasons, and should be avoided [KEN87, DRAFTKARN02]. Its use can - be minimized with use of Path MTU discovery [RFC1191, RFC1435, - RFC1981]. - - Another way to reduce the frame loss rate (or reduce transmit signal - power for the same rate of frame loss) is to use coding, e.g., - Forward Error Correction (FEC) [LIN93]. - - FEC is commonly included in the physical-layer design of wireless - links and may be used simultaneously with link ARQ. FEC schemes - which combine modulation and coding also exist, and may also be - adaptive. Hybrid ARQ [LIN93] combines adaptive FEC with link ARQ - procedures to reduce the probability of loss of retransmitted frames. - Interleaving may also be used to reduce the probability of frame loss - by dispersing the occurrence of errors more widely in the channel to - improve error recovery; this adds further delay to the channel's - existing propagation delay. - - The document does not consider the use of link ARQ to support a - broadcast or multicast service within a subnetwork, where a link may - send a single packet to more than one recipient using a single link - transmit operation. Although such schemes are supported in some - subnetworks, they raise a number of additional issues not examined - here. - - Links supporting stateful reservation-based quality of service (QoS) - according to the Integrated Services (intserv) model are also not - explicitly discussed. - -1.2 Causes of Packet Loss on a Link - - Not all packets sent to a link are necessarily received successfully - by the receiver at the other end of the link. There are a number of - possible causes of packet loss. These may occur as frames travel - across a link, and include: - - a. Loss due to channel noise, often characterised by random frame - loss. Channel noise may also result from other traffic degrading - channel conditions. - - - - - -Fairhurst & Wood Best Current Practice [Page 5] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - b. Frame loss due to channel interference. This interference can - be random, structured, and in some cases even periodic. - - c. A link outage, a period during which the link loses all or - virtually all frames, until the link is restored. This is a - common characteristic of some types of link, e.g., mobile cellular - radio. - - Other forms of packet loss are not related to channel conditions, - but include: - - i. Loss of a frame transmitted in a shared channel where a - contention-aware MAC protocol is used (e.g., due to collision). - Here, many protocols require that retransmission is deferred to - promote stability of the shared channel (i.e., prevent excessive - channel contention). This is discussed further in section 1.5. - - ii. Packet discards due to congestion. Queues will eventually - overflow as the arrival rate of new packets to send continues to - exceed the outgoing packet transmission rate over the link. - - iii. Loss due to implementation errors, including hardware faults - and software errors. This is recognised as a common cause of - packet corruption detected in the endhosts [STONE00]. - - The rate of loss and patterns of loss experienced are functions of - the design of the physical and link layers. These vary significantly - across different link configurations. The performance of a specific - implementation may also vary considerably across the same link - configuration when operated over different types of channel. - -1.3 Why Use ARQ? - - Reasons that encourage considering the use of ARQ include: - - a. ARQ across a single link has a faster control loop than TCP's - acknowledgement control loop, which takes place over the longer - end-to-end path over which TCP must operate. It is therefore - possible for ARQ to provide more rapid retransmission of TCP - segments lost on the link, at least for a reasonable number of - retries [RFC3155, SALT81]. - - b. Link ARQ can operate on individual frames, using implicit - transparent link fragmentation [DRAFTKARN02]. Frames may be - much smaller than IP packets, and repetition of smaller frames - containing lost or errored parts of an IP packet may improve the - efficiency of the ARQ process and the efficiency of the link. - - - - -Fairhurst & Wood Best Current Practice [Page 6] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - A link ARQ procedure may be able to use local knowledge that is not - available to endhosts, to optimise delivery performance for the - current link conditions. This information can include information - about the state of the link and channel, e.g., knowledge of the - current available transmission rate, the prevailing error - environment, or available transmit power in wireless links. - -1.4 Commonly-used ARQ Techniques - - A link ARQ protocol uses a link protocol mechanism to allow the - sender to detect lost or corrupted frames and to schedule - retransmission. Detection of frame loss may be via a link protocol - timer, by detecting missing positive link acknowledgement frames, by - receiving explicit negative acknowledgement frames and/or by polling - the link receiver status. - - Whatever mechanisms are chosen, there are two easily-described - categories of ARQ retransmission process that are widely used: - -1.4.1 Stop-And-Wait ARQ - - A sender using stop-and-wait ARQ (sometimes known as 'Idle ARQ' - [LIN93]) transmits a single frame and then waits for an - acknowledgement from the receiver for that frame. The sender then - either continues transmission with the next frame, or repeats - transmission of the same frame if the acknowledgement indicates that - the original frame was lost or corrupted. - - Stop-and-wait ARQ is simple, if inefficient, for protocol designers - to implement, and therefore popular, e.g., tftp [RFC1350] at the - transport layer. However, when stop-and-wait ARQ is used in the link - layer, it is well-suited only to links with low bandwidth-delay - products. This technique is not discussed further in this document. - -1.4.2 Sliding-Window ARQ - - A protocol using sliding-window link ARQ [LIN93] numbers every frame - with a unique sequence number, according to a modulus. The modulus - defines the numbering base for frame sequence numbers, and the size - of the sequence space. The largest sequence number value is viewed - by the link protocol as contiguous with the first (0), since the - numbering space wraps around. - - - - - - - - - -Fairhurst & Wood Best Current Practice [Page 7] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - TCP is itself a sliding-window protocol at the transport layer - [STE94], so similarities between a link-interface-to-link-interface - protocol and end-to-end TCP may be recognisable. A sliding-window - link protocol is much more complex in implementation than the simpler - stop-and-wait protocol described in the previous section, - particularly if per-flow ordering is preserved. - - At any time the link sender may have a number of frames outstanding - and awaiting acknowledgement, up to the space available in its - transmission window. A sufficiently-large link sender window - (equivalent to or greater than the number of frames sent, or larger - than the bandwidth*delay product capacity of the link) permits - continuous transmission of new frames. A smaller link sender window - causes the sender to pause transmission of new frames until a timeout - or a control frame, such as an acknowledgement, is received. When - frames are lost, a larger window, i.e., more than the link's - bandwidth*delay product, is needed to allow continuous operation - while frame retransmission takes place. - - The modulus numbering space determines the size of the frame header - sequence number field. This sequence space needs to be larger than - the link window size and, if using selective repeat ARQ, larger than - twice the link window size. For continuous operation, the sequence - space should be larger than the product of the link capacity and the - link ARQ persistence (discussed in section 2), so that in-flight - frames can be identified uniquely. - - As with TCP, which provides sliding-window delivery across an entire - end-to-end path rather than across a single link, there are a large - number of variations on the basic sliding-window implementation, with - increased complexity and sophistication to make them suitable for - various conditions. Selective Repeat (SR), also known as Selective - Reject (SREJ), and Go-Back-N, also known as Reject (REJ), are - examples of ARQ techniques using protocols implementing sliding - window ARQ. - -1.5 Causes of Delay Across a Link - - Links and link protocols contribute to the total path delay - experienced between communicating applications on endhosts. Delay - has a number of causes, including: - - a. Input packet queuing and frame buffering at the link head before - transmission over the channel. - - b. Retransmission back-off, an additional delay introduced for - retransmissions by some MAC schemes when operating over a shared - channel to prevent excessive contention. A high level of - - - -Fairhurst & Wood Best Current Practice [Page 8] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - contention may otherwise arise, if, for example, a set of link - receivers all retransmitted immediately after a collision on a - busy shared channel. Link ARQ protocols designed for shared - channels may select a backoff delay, which increases with the - number of attempts taken to retransmit a frame; analogies can be - drawn with end-to-end TCP congestion avoidance at the transport - layer [RFC2581]. In contrast, a link over a dedicated channel - (which has capacity pre-allocated to the link) may send a - retransmission at the earliest possible time. - - c. Waiting for access to the allocated channel when the channel is - shared. There may be processing or protocol-induced delay - before transmission takes place [FER99, PAR00]. - - d. Frame serialisation and transmission processing. These are - functions of frame size and the transmission speed of the link. - - e. Physical-layer propagation time, limited by the speed of - transmission of the signal in the physical medium of the - channel. - - f. Per-frame processing, including the cost of QoS scheduling, - encryption, FEC and interleaving. FEC and interleaving also add - substantial delay and, in some cases, additional jitter. Hybrid - link ARQ schemes [LIN93], in particular, may incur significant - receiver processing delay. - - g. Packet processing, including buffering frame contents at the - link receiver for packet reassembly, before onward transmission - of the packet. - - When link ARQ is used, steps (b), (c), (d), (e), and (f) may be - repeated a number of times, every time that retransmission of a frame - occurs, increasing overall delay for the packet carried in part by - the frame. Adaptive ARQ schemes (e.g., hybrid ARQ using adaptive FEC - codes) may also incur extra per-frame processing for retransmitted - frames. - - It is important to understand that applications and transport - protocols at the endhosts are unaware of the individual delays - contributed by each link in the path, and only see the overall path - delay. Application performance is therefore determined by the - cumulative delay of the entire end-to-end Internet path. This path - may include an arbitrary or even a widely-fluctuating number of - links, where any link may or may not use ARQ. As a result, it is not - possible to state fixed limits on the acceptable delay that a link - can add to a path; other links in the path will add an unknown delay. - - - - -Fairhurst & Wood Best Current Practice [Page 9] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - -2. ARQ Persistence - - ARQ protocols may be characterised by their persistency. Persistence - is the willingness of the protocol to retransmit lost frames to - ensure reliable delivery of traffic across the link. - - A link's retransmission persistency defines how long the link is - allowed to delay a packet, in an attempt to transmit all the frames - carrying the packet and its content over the link, before giving up - and discarding the packet. This persistency can normally be measured - in milliseconds, but may, if the link propagation delay is specified, - be expressed in terms of the maximum number of link retransmission - attempts permitted. The latter does not always map onto an exact - time limit, for the reasons discussed in section 1.5. - - An example of a reliable link protocol that is perfectly persistent - is the ISO HDLC protocol in the Asynchronous Balanced Mode (ABM) - [ISO4335a]. - - A protocol that only retransmits a number of times before giving up - is less persistent, e.g., Ethernet [FER99], IEEE 802.11, or GSM RLP - [RFC2757]. Here, lower persistence also ensures stability and fair - sharing of a shared channel, even when many senders are attempting - retransmissions. - - TCP, STCP [RFC2960] and a number of applications using UDP (e.g., - tftp) implement their own end-to-end reliable delivery mechanisms. - Many TCP and UDP applications, e.g., streaming multimedia, benefit - from timely delivery from lower layers with sufficient reliability, - rather than perfect reliability with increased link delays. - -2.1 Perfectly-Persistent (Reliable) ARQ Protocols - - A perfectly-persistent ARQ protocol is one that attempts to provide a - reliable service, i.e., in-order delivery of packets to the other end - of the link, with no missing packets and no duplicate packets. The - perfectly-persistent ARQ protocol will repeat a lost or corrupted - frame an indefinite (and potentially infinite) number of times until - the frame is successfully received. - - If traffic is going no further than across one link, and losses do - not occur within the endhosts, perfect persistence ensures - reliability between the two link ends without requiring any - higher-layer protocols. This reliability can become - counterproductive for traffic traversing multiple links, as it - duplicates and interacts with functionality in protocol mechanisms at - higher layers [SALT81]. - - - - -Fairhurst & Wood Best Current Practice [Page 10] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - Arguments against the use of perfect persistence for IP traffic - include: - - a. Variable link delay; the impact of ARQ introduces a degree of - jitter, a function of the physical-layer delay and frame - serialisation and transmission times (discussed in section 1.5), - to all flows sharing a link performing frame retransmission. - - b. Perfect persistence does not provide a clear upper bound on the - maximum retransmission delay for the link. Significant changes - in path delay caused by excessive link retransmissions may lead - to timeouts of TCP retransmission timers, although a high - variance in link delay and the resulting overall path delay may - also cause a large TCP RTO value to be selected [LUD99b, PAR00]. - This will alter TCP throughput, decreasing overall performance, - but, in mitigation, it can also decrease the occurrence of - timeouts due to continued packet loss. - - c. Applications needing perfectly-reliable delivery can implement a - form of perfectly-persistent ARQ themselves, or use a reliable - transport protocol within the endhosts. Implementing perfect - persistence at each link along the path between the endhosts is - redundant, but cannot ensure the same reliability as end-to-end - transport [SALT81]. - - d. Link ARQ should not adversely delay the flow of end-to-end - control information. As an example, the ARQ retransmission of - data for one or more flows should not excessively extend the - protocol control loops. Excessive delay of duplicate TCP - acknowledgements (dupacks [STE94, BAL97]), SACK, or Explicit - Congestion Notification (ECN) indicators will reduce the - responsiveness of TCP flows to congestion events. Similar - issues exist for TCP-Friendly Rate Control (TFRC), where - equation-based congestion control is used with UDP [DRAFTHAN01]. - - Perfectly-persistent link protocols that perform unlimited ARQ, i.e., - that continue to retransmit frames indefinitely until the frames are - successfully received, are seldom found in real implementations. - - Most practical link protocols give up retransmission at some point, - but do not necessarily do so with the intention of bounding the ARQ - retransmission persistence. A protocol may, for instance, terminate - retransmission after a link connection failure, e.g., after no frames - have been successfully received within a pre-configured timer period. - The number of times a protocol retransmits a specific frame (or the - maximum number of retransmissions) therefore becomes a function of - many different parameters (ARQ procedure, link timer values, and - procedure for link monitoring), rather than being pre-configured. - - - -Fairhurst & Wood Best Current Practice [Page 11] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - Another common feature of this type of behaviour is that some - protocol implementers presume that, after a link failure, packets - queued to be sent over the link are no longer significant and can be - discarded when giving up ARQ retransmission. - - Examples of ARQ protocols that are perfectly persistent include - ISO/ITU-T LAP-B [ISO7776] and ISO HDLC in the Asynchronously Balanced - Mode (ABM) [ISO4335a], e.g., using Multiple Selective Reject (MSREJ - [ISO4335b]). These protocols will retransmit a frame an unlimited - number of times until receipt of the frame is acknowledged. - -2.2 High-Persistence (Highly-Reliable) ARQ Protocols - - High-persistence ARQ protocols limit the number of times (or number - of attempts) that ARQ may retransmit a particular frame before the - sender gives up on retransmission of the missing frame and moves on - to forwarding subsequent buffered in-sequence frames. Ceasing - retransmission of a frame does not imply a lack of link connectivity - and does not cause a link protocol state change. - - It has been recommended that a single IP packet should never be - delayed by the network for more than the Maximum Segment Lifetime - (MSL) of 120 seconds defined for TCP [RFC1122]. It is, however, - difficult in practice to bound the maximum path delay of an Internet - path. One case where segment (packet) lifetime may be significant is - where alternate paths of different delays exist between endhosts and - route flapping or flow-unaware traffic engineering is used. Some TCP - packets may follow a short path, while others follow a much longer - path, e.g., using persistent ARQ over a link outage. - - Failure to limit the maximum packet lifetime can result in TCP - sequence numbers wrapping at high transmission rates, where old data - segments may be confused with newer segments if the sequence number - space has been exhausted and reused in the interim. Some TCP - implementations use the Round Trip Timestamp Measurement (RTTM) - option in TCP packets to remove this ambiguity, using the Protection - Against Wrapped Sequence number (PAWS) algorithm [RFC1323]. - - In practice, the MSL is usually very large compared to the typical - TCP RTO. The calculation of TCP RTO is based on estimated round-trip - path delay [RFC2988]. If the number of link retransmissions causes a - path delay larger than the value of RTO, the TCP retransmission timer - can expire, leading to a timeout and retransmission of a segment - (packet) by the TCP sender. - - - - - - - -Fairhurst & Wood Best Current Practice [Page 12] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - Although high persistency may benefit bulk flows, the additional - delay (and variations in delay) that it introduces may be highly - undesirable for other types of flows. Being able to treat flows - separately, with different classes of link service, is useful, and is - discussed in section 3. - - Examples of high-persistence ARQ protocols include [BHA97, ECK98, - LUD99a, MEY99]. - -2.3 Low-Persistence (Partially-Reliable) ARQ Protocols - - The characteristics of a link using a low-persistence ARQ protocol - may be summarised as: - - a. The link is not perfectly reliable and does not provide an - absolute guarantee of delivery, i.e., the transmitter will - discard some frames as it 'gives up' before receiving an - acknowledgement of successful transmission across the link. - - b. There is a lowered limit on the maximum added delay that IP - packets will experience when travelling across the link - (typically lower than the TCP path RTO). This reduces - interaction with TCP timers or with UDP-based error-control - schemes. - - c. The link offers a low bound for the time that retransmission for - any one frame can block completed transmission and assembly of - other correctly and completely-received IP packets whose - transmission was begun before the missing frame was sent. - Limiting delay avoids aggravating contention or interaction - between different packet flows (see also section 3.2). - - Examples of low-persistence ARQ protocols include [SAM96, WARD95, - CHE00]. - -2.4 Choosing Your Persistency - - The TCP Maximum RTO is an upper limit on the maximum time that TCP - will wait until it performs a retransmission. Most TCP - implementations will generally have a TCP RTO of at least several - times the path delay. - - Setting a lower link persistency (e.g., of the order 2-5 - retransmission attempts) reduces potential interaction with the TCP - RTO timer, and may therefore reduce the probability of duplicate - copies of the same packet being present in the link transmit buffer - under some patterns of loss. - - - - -Fairhurst & Wood Best Current Practice [Page 13] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - A link using a physical layer with a low propagation delay may allow - tens of retransmission attempts to deliver a single frame, and still - satisfy a bound for (b) in section 2.3. In this case, a low delay is - defined as being where the total packet transmission time across the - link is much less than 100 ms (a common value for the granularity of - the internal TCP system timer). - - A packet may traverse a number of successive links on its total end- - to-end path. This is therefore an argument for much lower - persistency on any individual link, as delay due to persistency is - accumulated along the path taken by each packet. - - Some implementers have chosen a lower persistence, falling back on - the judgement of TCP or of a UDP application to retransmit any - packets that are not recovered by the link ARQ protocol. - -2.5 Impact of Link Outages - - Links experiencing persistent loss, where many consecutive frames are - corrupted over an extended time, may also need to be considered. - Examples of channel behaviour leading to link outages include fading, - roaming, and some forms of interference. During the loss event, - there is an increased probability that a retransmission request may - be corrupted, and/or an increased probability that a retransmitted - frame will also be lost. This type of loss event is often known as a - "transient outage". - - If the transient outage extends for longer than the TCP RTO, the TCP - sender will also perform transport-layer retransmission. At the same - time, the TCP sender will reduce its congestion window (cwnd) to 1 - segment (packet), recalculate its RTO, and wait for an ACK packet. - If no acknowledgement is received, TCP will retransmit again, up to a - retry limit. TCP only determines that the outage is over (i.e., that - path capacity is restored) by receipt of an ACK. If link ARQ - protocol persistency causes a link in the path to discard the ACK, - the TCP sender must wait for the next RTO retransmission and its ACK - to learn that the link is restored. This can be many seconds after - the end of the transient outage. - - When a link layer is able to differentiate a set of link service - classes (see section 3.3), a link ARQ persistency longer than the - largest link loss event may benefit a TCP session. This would allow - TCP to rapidly restore transmission without the need to wait for a - retransmission time out, generally improving TCP performance in the - face of transient outages. Implementation of such schemes remains a - research issue. - - - - - -Fairhurst & Wood Best Current Practice [Page 14] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - When an outage occurs for a sender sharing a common channel with - other nodes, uncontrolled high persistence can continue to consume - transmission resources for the duration of the outage. This may be - undesirable, since it reduces the capacity available for other nodes - sharing the channel, which do not necessarily experience the same - outage. These nodes could otherwise use the channel for more - productive transfers. The persistence is often limited by another - controlling mechanism in such cases. To counter such contention - effects, ARQ protocols may delay retransmission requests, or defer - the retransmission of requested frames until the outage ends for the - sender. - - An alternate suggested approach for a link layer that is able to - identify separate flows is to use low link persistency (section 2.3) - along with a higher-layer mechanism, for example one that attempts to - deliver one packet (or whole TCP segment) per TCP flow after a loss - event [DRAFTKARN02]. This is intended to ensure that TCP - transmission is restored rapidly. Algorithms to implement this - remain an area of research. - -3. Treatment of Packets and Flows - -3.1 Packet Ordering - - A common debate is whether a link should be allowed to forward - packets in an order different from that in which they were originally - received at its transmit interface. - - IP networks are not required to deliver all IP packets in order, - although in most cases networks do deliver IP packets in their - original transmission order. Routers supporting class-based queuing - do reorder received packets, by reordering packets in different - flows, but these usually retain per-flow ordering. - - Policy-based queuing, allowing fairer access to the link, may also - reorder packets. There is still much debate on optimal algorithms, - and on optimal queue sizes for particular link speeds. This, - however, is not related to the use of link ARQ and applies to any - (potential) bottleneck router. - - Although small amounts of reordering are common in IP networks - [BEN00], significant reordering within a flow is undesirable as it - can have a number of effects: - - a. Reordering will increase packet jitter for real-time - applications. This may lead to application data loss if a small - play-out buffer is used by the receiving application. - - - - -Fairhurst & Wood Best Current Practice [Page 15] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - b. Reordering will interleave arrival of TCP segments, leading to - generation of duplicate ACKs (dupacks), leading to assumptions - of loss. Reception of an ACK followed by a sequence of three - identical dupacks causes the TCP sender to trigger fast - retransmission and recovery, a form of congestion avoidance, - since TCP always presumes that packet loss is due to congestion - [RFC2581, STE94]. This reduces TCP throughput efficiency as far - as the application is concerned, although it should not impact - data integrity. - - In addition, reordering may negatively impact processing by some - existing poorly-implemented TCP/IP stacks, by leading to unwanted - side-effects in poorly-implemented IP fragment reassembly code, - poorly-implemented IP demultiplexing (filter) code, or in - poorly-implemented UDP applications. - - Ordering effects must also be considered when breaking the end-to-end - paradigm and evaluating transport-layer relays such as split-TCP - implementations or Protocol Enhancing Proxies [RFC3135]. - - As with total path delay, TCP and UDP flows are impacted by the - cumulative effect of reordering along the entire path. Link protocol - designers must not assume that their link is the only link - undertaking packet reordering, as some level of reordering may be - introduced by other links along the same path, or by router - processing within the network [BEN00]. Ideally, the link protocol - should not contribute to reordering within a flow, or at least ensure - that it does not significantly increase the level of reordering - within the flow. To achieve this, buffering is required at the link - receiver. The total amount of buffering required is a function of - the link's bandwidth*delay product and the level of ARQ persistency, - and is bounded by the link window size. - - A number of experimental ARQ protocols have allowed out-of-order - delivery [BAL95, SAM96, WARD95]. - -3.2 Using Link ARQ to Support Multiple Flows - - Most links can be expected to carry more than one IP flow at a time. - Some high-capacity links are expected to carry a very large number of - simultaneous flows, often from and to a large number of different - endhosts. With use of multiple applications at an endhost, multiple - flows can be considered the norm rather than the exception, even for - last-hop links. - - - - - - - -Fairhurst & Wood Best Current Practice [Page 16] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - When packets from several flows are simultaneously in transit within - a link ARQ protocol, ARQ may cause a number of additional effects: - - a. ARQ introduces variable delay (jitter) to a TCP flow sharing a - link with another flow experiencing loss. This additional - delay, introduced by the need for a link to provide in-sequence - delivery of packets, may adversely impact other applications - sharing the link, and can increase the duration of the initial - slow-start period for TCP flows for these applications. This is - significant for short-lived TCP flows (e.g., those used by - HTTP/1.0 and earlier), which spend most of their lives in - slow-start. - - b. ARQ introduces jitter to UDP flows that share a link with - another flow experiencing loss. An end-to-end protocol may not - require reliable delivery for its flows, particularly if it is - supporting a delay-sensitive application. - - c. High-persistence ARQ may delay packets long enough to cause the - premature timeout of another TCP flow's RTO timer, although - modern TCP implementations should not experience this since - their computed RTO values should leave a sufficient margin over - path RTTs to cope with reasonable amounts of jitter. - - Reordering of packets belonging to different flows may be desirable - [LUD99b, CHE00] to achieve fair sharing of the link between - established bulk-data transfer sessions and sessions that transmit - less data, but would benefit from lower link transit delay. - Preserving ordering within each individual flow, to avoid the effects - of reordering described earlier in section 3.1, is worthwhile. - -3.3 Differentiation of Link Service Classes - - High ARQ persistency is generally considered unsuitable for many - applications using UDP, where reliable delivery is not always - required and where it may introduce unacceptable jitter, but may - benefit bulk data transfers under certain link conditions. A scheme - that differentiates packet flows into two or more classes, to provide - a different service to each class, is therefore desirable. - - Observation of flow behaviour can tell you which flows are controlled - and congestion-sensitive, or uncontrolled and not, so that you can - treat them differently and ensure fairness. However, this cannot - tell you whether a flow is intended as reliable or unreliable by its - application, or what the application requires for best operation. - - - - - - -Fairhurst & Wood Best Current Practice [Page 17] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - Supporting different link services for different classes of flows - therefore requires that the link is able to distinguish the different - flows from each other. This generally needs an explicit indication - of the class associated with each flow. - - Some potential schemes for indicating the class of a packet include: - - a. Using the Type of Service (ToS) bits in the IP header [RFC791]. - The IETF has replaced these globally-defined bits, which were - not widely used, with the differentiated services model - (diffserv [RFC2475, RFC3260]). In diffserv, each packet carries a - Differentiated Service Code Point (DSCP), which indicates which - one of a set of diffserv classes the flow belongs to. Each - router maps the DSCP value of a received IP packet to one of a - set of Per Hop Behaviours (PHBs) as the packet is processed - within the network. Diffserv uses include policy-based routing, - class-based queuing, and support for other QoS metrics, - including IP packet priority, delay, reliability, and cost. - - b. Inspecting the network packet header and viewing the IP protocol - type [RFC791] to gain an idea of the transport protocol used and - thus guess its needs. This is not possible when carrying an - encrypted payload, e.g., using the IP security extensions (IPSec) - with Encapsulation Security Payload (ESP) [RFC2406] payload - encryption. - - c. By inspecting the transport packet header information to view - the TCP or UDP headers and port numbers (e.g., [PAR00, BAL95]). - This is not possible when using payload encryption, e.g., IPSec - with ESP payload encryption [RFC2406], and incurs processing - overhead for each packet sent over the link. - - There are, however, some drawbacks to these schemes: - - i. The ToS/Differentiated Services Code Point (DSCP) values - [RFC2475] may not be set reliably, and may be overwritten by - intermediate routers along the packet's path. These values may - be set by an ISP, and do not necessarily indicate the level of - reliability required by the end application. The link must be - configured with knowledge of the local meaning of the values. - - ii. Tunnelling of traffic (e.g., GRE, MPLS, L2TP, IP-in-IP - encapsulation) can aggregate flows of different transport - classes, complicating individual flow classification with - schemes (b) and (c) and incurring further header processing if - tunnel contents are inspected. - - - - - -Fairhurst & Wood Best Current Practice [Page 18] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - iii. Use of the TCP/UDP port number makes assumptions about - application behaviour and requirements. New applications or - protocols can invalidate these assumptions, as can the use of - e.g., Network Address Port Translation, where port numbers are - remapped [RFC3022]. - - iv. In IPv6, the entire IPv6 header must be parsed to locate the - transport layer protocol, adding complexity to header - inspection. Again, this assumes that IPSec payload encryption - is not used. - - Despite the difficulties in providing a framework for accurate flow - identification, approach (a) may be beneficial, and is preferable to - adding optimisations that are triggered by inspecting the contents of - specific IP packets. Some such optimisations are discussed in detail - in [LUD99b]. - - Flow management is desirable; clear flow identification increases the - number of tools available for the link designer, and permits more - complex ARQ strategies that may otherwise make misassumptions about - traffic requirements and behaviour when flow identification is not - done. - - Links that are unable to distinguish clearly and safely between - delay-sensitive flows, e.g., real-time multimedia, DNS queries or - telnet, and delay-insensitive flows, e.g., bulk ftp transfers or - reliable multicast file transfer, cannot separate link service - classes safely. All flows should therefore experience the same link - behaviour. - - In general, if separation of flows according to class is not - practicable, a low persistency is best for link ARQ. - -4. Conclusions - - A number of techniques may be used by link protocol designers to - counter the effects of channel errors or loss. One of these - techniques is Automatic Repeat ReQuest, ARQ, which has been and - continues to be used on links that carry IP traffic. An ARQ protocol - retransmits link frames that have been corrupted during transmission - across a channel. Link ARQ may significantly improve the probability - of successful transmission of IP packets over links prone to - occasional frame loss. - - A lower rate of packet loss generally benefits transport protocols - and endhost applications. Applications using TCP generally benefit - from Internet paths with little or no loss and low round trip path - delay. This reduces impact on applications, allows more rapid growth - - - -Fairhurst & Wood Best Current Practice [Page 19] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - of TCP's congestion window during slow start, and ensures prompt - reaction to end-to-end protocol exchanges (e.g., retransmission, - congestion indications). Applications using other transport - protocols, e.g., UDP or SCTP, also benefit from low loss and timely - delivery. - - A side-effect of link ARQ is that link transit delay is increased - when frames are retransmitted. At low error rates, many of the - details of ARQ, such as degree of persistence or any resulting - out-of-order delivery, become unimportant. Most frame losses will be - resolved in one or two retransmission attempts, and this is generally - unlikely to cause significant impact to e.g., TCP. However, on - shared high-delay links, the impact of ARQ on other UDP or TCP flows - may lead to unwanted jitter. - - Where error rates are highly variable, high link ARQ persistence may - provide good performance for a single TCP flow. However, - interactions between flows can arise when many flows share capacity - on the same link. A link ARQ procedure that provides flow management - will be beneficial. Lower ARQ persistence may also have merit, and - is preferable for applications using UDP. The reasoning here is that - the link can perform useful work forwarding some complete packets, - and that blocking all flows by retransmitting the frames of a single - packet with high persistence is undesirable. - - During a link outage, interactions between ARQ and multiple flows are - less significant; the ARQ protocol is likely to be equally - unsuccessful in retransmitting frames for all flows. High - persistence may benefit TCP flows, by enabling prompt recovery once - the channel is restored. - - Low ARQ persistence is particularly useful where it is difficult or - impossible to classify traffic flows, and hence treat each flow - independently, and where the link capacity can accommodate a large - number of simultaneous flows. - - Link ARQ designers should consider the implications of their design - on the wider Internet. Effects such as increased transit delay, - jitter, and re-ordering are cumulative when performed on multiple - links along an Internet path. It is therefore very hard to say how - many ARQ links may exist in series along an arbitrary Internet path - between endhosts, especially as the path taken and its links may - change over time. - - In summary, when links cannot classify traffic flows and treat them - separately, low persistence is generally desirable; preserving packet - ordering is generally desirable. Extremely high persistence and - perfect persistence are generally undesirable; highly-persistent ARQ - - - -Fairhurst & Wood Best Current Practice [Page 20] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - is a bad idea unless flow classification and detailed and accurate - knowledge of flow requirements make it possible to deploy high - persistency where it will be beneficial. - - There is currently insufficient experience to recommend a specific - ARQ scheme for any class of link. It is also important to realize - that link ARQ is just one method of error recovery, and that other - complementary physical-layer techniques may be used instead of, or - together with, ARQ to improve overall link throughput for IP traffic. - - The choice of potential schemes includes adapting the data rate, - adapting the signal bandwidth, adapting the transmission power, - adaptive modulation, adaptive information redundancy / forward error - control, and interleaving. All of these schemes can be used to - improve the received signal energy per bit, and hence reduce error, - frame loss and resulting packet loss rates given specific channel - conditions. - - There is a need for more research to more clearly identify the - importance of and trade-offs between the above issues over various - types of link and over various types of channels. It would be useful - if researchers and implementers clearly indicated the loss model, - link capacity and characteristics, link and end-to-end path delays, - details of TCP, and the number (and details) of flows sharing a link - when describing their experiences. In each case, it is recommended - that specific details of the link characteristics and mechanisms also - be considered; solutions vary with conditions. - -5. Security Considerations - - No security implications have been identified as directly impacting - IP traffic. However, an unreliable link service may adversely impact - some existing link-layer key management distribution protocols if - link encryption is also used over the link. - - Denial-of-service attacks exploiting the behaviour of the link - protocol, e.g., using knowledge of its retransmission behaviour and - propagation delay to cause a particular form of jamming, may be - specific to an individual link scenario. - -6. IANA Considerations - - No assignments from the IANA are required. - - - - - - - - -Fairhurst & Wood Best Current Practice [Page 21] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - -7. Acknowledgements - - Much of what is described here has been developed from a summary of a - subset of the discussions on the archived IETF PILC mailing list. We - thank the contributors to PILC for vigorous debate. - - In particular, the authors would like to thank Spencer Dawkins, Aaron - Falk, Dan Grossman, Merkourios Karaliopoulos, Gary Kenward, Reiner - Ludwig and Jean Tourrilhes for their detailed comments. - -8. References - - References of the form RFCnnnn are Internet Request for Comments - (RFC) documents available online at http://www.rfc-editor.org/. - -8.1 Normative References - - [RFC768] Postel, J., "User Datagram Protocol", STD 6, RFC 768, - August 1980. - - [RFC791] Postel, J., "Internet Protocol", STD 5, RFC 791, - September 1981. - - [RFC793] Postel, J., "Transmission Control Protocol", RFC 793, - September 1981. - - [RFC1122] Braden, R., Ed., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC2406] Kent, S. and R. Atkinson, "IP Encapsulating Security - Payload (ESP)", RFC 2406, November 1998. - - [RFC2475] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. - and W. Weiss, "An Architecture for Differentiated - Services", RFC 2475, December 1998. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's - Retransmission Timer", RFC 2988, November 2000. - - [RFC3135] Border, J., Kojo, M., Griner, J., Montenegro, G. and Z. - Shelby, "Performance Enhancing Proxies Intended to - Mitigate Link-Related Degradations", RFC 3135, June - 2001. - - - - - -Fairhurst & Wood Best Current Practice [Page 22] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - [RFC3260] Grossman, D., "New Terminology and Clarifications for - Diffserv", RFC 3260, April 2002. - -8.2 Informative References - - [BAL95] Balakrishnan, H., Seshan, S. and R. H. Katz, - "Improving Reliable Transport and Handoff Performance - in Cellular Wireless Networks", ACM MOBICOM, Berkeley, - 1995. - - [BAL97] Balakrishnan, H., Padmanabhan, V. N., Seshan, S. and - R. H. Katz, "A Comparison of Mechanisms for Improving - TCP Performance over Wireless Links", IEEE/ACM - Transactions on Networking, 5(6), pp. 756-759, 1997. - - [BEN00] Bennett, J. C., Partridge, C. and N. Schectman, "Packet - Reordering is Not Pathological Network Behaviour", - IEEE/ACM Transactions on Networking, 7(6), pp. 789-798, - 2000. - - [BHA97] Bhagwat, P., Bhattacharya, P., Krishna A. and S. K. - Tripathi, "Using channel state dependent packet - scheduling to improve TCP throughput over wireless - LANs", ACM/Baltzer Wireless Networks Journal, (3)1, - 1997. - - [CHE00] Cheng, H. S., G. Fairhurst et al., "An Efficient - Partial Retransmission ARQ Strategy with Error Codes - by Feedback Channel", IEE Proceedings - Communications, - (147)5, pp. 263-268, 2000. - - [DRAFTKARN02] Karn, P., Ed., "Advice for Internet Subnetwork - Designers", Work in Progress. - - [DRAFTHAN01] Handley, M., Floyd, S. and J. Widmer, "TCP Friendly - Rate Control (TFRC): Protocol Specification", Work in - Progress. - - [ECK98] Eckhardt, D. A. and P. Steenkiste, "Improving Wireless - LAN Performance via Adaptive Local Error Control", - IEEE ICNP, 1998. - - [FER99] Ferrero, A., "The Eternal Ethernet", Addison-Wesley, - 1999. - - [ISO4335a] HDLC Procedures: Specification for Consolidation of - Elements of Procedures, ISO 4335 and AD/1, - International Standardization Organization, 1985. - - - -Fairhurst & Wood Best Current Practice [Page 23] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - [ISO4335b] HDLC Procedures: Elements of Procedures, Amendment 4: - Multi-Selective Reject Option, ISO 4335/4, - International Standards Organization, 1991. - - [ISO7776] Specification for X.25 LAPB-Compatible DTE Data Link - Procedures, ISO 4335/4, International Standards - Organization, 1985. - - [KEN87] Kent, C. A. and J. C. Mogul, "Fragmentation - Considered Harmful", Proceedings of ACM SIGCOMM 1987, - ACM Computer Communications Review, 17(5), pp. 390-401, - 1987. - - [LIN93] Lin, S. and D. Costello, "Error Control Coding: - Fundamentals and Applications", Prentice Hall, 1993. - - [LUD99a] Ludwig, R., Rathonyi, B., Konrad, A., Oden, K., and A. - Joseph, "Multi-Layer Tracing of TCP over a Reliable - Wireless Link", ACM SIGMETRICS, pp. 144-154, 1999. - - [LUD99b] Ludwig, R., Konrad, A., Joseph, A. and R. H. Katz, - "Optimizing the End-to-End Performance of Reliable - Flows over Wireless Links", ACM MobiCOM, 1999. - - [MEY99] Meyer, M., "TCP Performance over GPRS", IEEE Wireless - Communications and Networking Conference, 1999. - - [PAR00] Parsa, C. and J. J. Garcia-Luna-Aceves, "Improving TCP - Performance over Wireless Networks at the Link Layer", - ACM Mobile Networks and Applications Journal, (5)1, - pp. 57-71, 2000. - - [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC - 1191, November 1990. - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC1350] Sollins, K., "The TFTP Protocol (Revision 2)", STD 33, - RFC 1350, July 1992. - - [RFC1435] Knowles, S., "IESG Advice from Experience with Path MTU - Discovery", RFC 1435, March 1993. - - [RFC1981] McCann, J., Deering, S. and J. Mogul, "Path MTU - Discovery for IP version 6", RFC 1981, August 1996. - - - - - -Fairhurst & Wood Best Current Practice [Page 24] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - [RFC2488] Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP - Over Satellite Channels using Standard Mechanisms", - BCP 28, RFC 2488, January 1999. - - [RFC2757] Montenegro, G., Dawkins, S., Kojo, M., Magret V. and - N. Vaidya, "Long Thin Networks", RFC 2757, January - 2000. - - [RFC2760] Allman, M., Dawkins, S., Glover, D., Griner, J., - Tran, D., Henderson, T., Heidemann, J., Touch, J., - Kruse, H., Ostermann, S., Scott K. and J. Semke - "Ongoing TCP Research Related to Satellites", - RFC 2760, February 2000. - - [RFC2960] Stewart, R., Xie, Q., Morneault, K., Sharp, C., - Schwarzbauer, H., Taylor, T., Rytina, I., Kalla, M., - Zhang, L. and V. Paxson, "Stream Control Transmission - Protocol", RFC 2960, October 2000. - - [RFC3022] Srisuresh, P. and K. Egevang, "Traditional IP Network - Address Translator (Traditional NAT)", RFC 3022, - January 2001. - - [RFC3155] Dawkins, S., Montenegro, G., Kojo, M., Magret, V. and - N. Vaidya, "End-to-end Performance Implications of - Links with Errors", BCP 50, RFC 3155, August 2001. - - [SALT81] Saltzer, J. H., Reed, D. P. and D. Clark, "End-to-End - Arguments in System Design", Second International - Conference on Distributed Computing Systems, pp. - 509-512, 1981. Published with minor changes in ACM - Transactions in Computer Systems (2)4, pp. 277-288, - 1984. - - [SAM96] Samaraweera, N. and G. Fairhurst, "Robust Data Link - Protocols for Connection-less Service over Satellite - Links", International Journal of Satellite - Communications, 14(5), pp. 427-437, 1996. - - [SAM98] Samaraweera, N. and G. Fairhurst, "Reinforcement of - TCP/IP Error Recovery for Wireless Communications", - ACM Computer Communications Review, 28(2), pp. 30-38, - 1998. - - [STE94] Stevens, W. R., "TCP/IP Illustrated, Volume 1", - Addison-Wesley, 1994. - - - - - -Fairhurst & Wood Best Current Practice [Page 25] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - - [STONE00] Stone, J. and C. Partridge, "When the CRC and TCP - Checksum Disagree", Proceedings of SIGCOMM 2000, ACM - Computer Communications Review 30(4), pp. 309-321, - September 2000. - - [WARD95] Ward, C., et al., "A Data Link Control Protocol for LEO - Satellite Networks Providing a Reliable Datagram - Service", IEEE/ACM Transactions on Networking, 3(1), - 1995. - -Authors' Addresses - - Godred Fairhurst - Department of Engineering - University of Aberdeen - Aberdeen AB24 3UE - United Kingdom - - EMail: gorry@erg.abdn.ac.uk - http://www.erg.abdn.ac.uk/users/gorry/ - - - Lloyd Wood - Cisco Systems Ltd - 4 The Square - Stockley Park - Uxbridge UB11 1BY - United Kingdom - - EMail: lwood@cisco.com - http://www.ee.surrey.ac.uk/Personal/L.Wood/ - - - - - - - - - - - - - - - - - - - - -Fairhurst & Wood Best Current Practice [Page 26] - -RFC 3366 Advice to Link Designers on Link ARQ August 2002 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2002). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Fairhurst & Wood Best Current Practice [Page 27] - diff --git a/kernel/picotcp/RFC/rfc3390.txt b/kernel/picotcp/RFC/rfc3390.txt deleted file mode 100644 index 968a340..0000000 --- a/kernel/picotcp/RFC/rfc3390.txt +++ /dev/null @@ -1,843 +0,0 @@ - - - - - - -Network Working Group M. Allman -Request for Comments: 3390 BBN/NASA GRC -Obsoletes: 2414 S. Floyd -Updates: 2581 ICIR -Category: Standards Track C. Partridge - BBN Technologies - October 2002 - - - Increasing TCP's Initial Window - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2002). All Rights Reserved. - -Abstract - - This document specifies an optional standard for TCP to increase the - permitted initial window from one or two segment(s) to roughly 4K - bytes, replacing RFC 2414. It discusses the advantages and - disadvantages of the higher initial window, and includes discussion - of experiments and simulations showing that the higher initial window - does not lead to congestion collapse. Finally, this document - provides guidance on implementation issues. - -Terminology - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in RFC 2119 [RFC2119]. - -1. TCP Modification - - This document obsoletes [RFC2414] and updates [RFC2581] and specifies - an increase in the permitted upper bound for TCP's initial window - from one or two segment(s) to between two and four segments. In most - cases, this change results in an upper bound on the initial window of - roughly 4K bytes (although given a large segment size, the permitted - initial window of two segments may be significantly larger than 4K - bytes). - - - -Allman, et. al. Standards Track [Page 1] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - The upper bound for the initial window is given more precisely in - (1): - - min (4*MSS, max (2*MSS, 4380 bytes)) (1) - - Note: Sending a 1500 byte packet indicates a maximum segment size - (MSS) of 1460 bytes (assuming no IP or TCP options). Therefore, - limiting the initial window's MSS to 4380 bytes allows the sender to - transmit three segments initially in the common case when using 1500 - byte packets. - - Equivalently, the upper bound for the initial window size is based on - the MSS, as follows: - - If (MSS <= 1095 bytes) - then win <= 4 * MSS; - If (1095 bytes < MSS < 2190 bytes) - then win <= 4380; - If (2190 bytes <= MSS) - then win <= 2 * MSS; - - This increased initial window is optional: a TCP MAY start with a - larger initial window. However, we expect that most general-purpose - TCP implementations would choose to use the larger initial congestion - window given in equation (1) above. - - This upper bound for the initial window size represents a change from - RFC 2581 [RFC2581], which specified that the congestion window be - initialized to one or two segments. - - This change applies to the initial window of the connection in the - first round trip time (RTT) of data transmission following the TCP - three-way handshake. Neither the SYN/ACK nor its acknowledgment - (ACK) in the three-way handshake should increase the initial window - size above that outlined in equation (1). If the SYN or SYN/ACK is - lost, the initial window used by a sender after a correctly - transmitted SYN MUST be one segment consisting of MSS bytes. - - TCP implementations use slow start in as many as three different - ways: (1) to start a new connection (the initial window); (2) to - restart transmission after a long idle period (the restart window); - and (3) to restart transmission after a retransmit timeout (the loss - window). The change specified in this document affects the value of - the initial window. Optionally, a TCP MAY set the restart window to - the minimum of the value used for the initial window and the current - value of cwnd (in other words, using a larger value for the restart - window should never increase the size of cwnd). These changes do NOT - change the loss window, which must remain 1 segment of MSS bytes (to - - - -Allman, et. al. Standards Track [Page 2] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - permit the lowest possible window size in the case of severe - congestion). - -2. Implementation Issues - - When larger initial windows are implemented along with Path MTU - Discovery [RFC1191], and the MSS being used is found to be too large, - the congestion window `cwnd' SHOULD be reduced to prevent large - bursts of smaller segments. Specifically, `cwnd' SHOULD be reduced - by the ratio of the old segment size to the new segment size. - - When larger initial windows are implemented along with Path MTU - Discovery [RFC1191], alternatives are to set the "Don't Fragment" - (DF) bit in all segments in the initial window, or to set the "Don't - Fragment" (DF) bit in one of the segments. It is an open question as - to which of these two alternatives is best; we would hope that - implementation experiences will shed light on this question. In the - first case of setting the DF bit in all segments, if the initial - packets are too large, then all of the initial packets will be - dropped in the network. In the second case of setting the DF bit in - only one segment, if the initial packets are too large, then all but - one of the initial packets will be fragmented in the network. When - the second case is followed, setting the DF bit in the last segment - in the initial window provides the least chance for needless - retransmissions when the initial segment size is found to be too - large, because it minimizes the chances of duplicate ACKs triggering - a Fast Retransmit. However, more attention needs to be paid to the - interaction between larger initial windows and Path MTU Discovery. - - The larger initial window specified in this document is not intended - as encouragement for web browsers to open multiple simultaneous TCP - connections, all with large initial windows. When web browsers open - simultaneous TCP connections to the same destination, they are - working against TCP's congestion control mechanisms [FF99], - regardless of the size of the initial window. Combining this - behavior with larger initial windows further increases the unfairness - to other traffic in the network. We suggest the use of HTTP/1.1 - [RFC2068] (persistent TCP connections and pipelining) as a way to - achieve better performance of web transfers. - -3. Advantages of Larger Initial Windows - - 1. When the initial window is one segment, a receiver employing - delayed ACKs [RFC1122] is forced to wait for a timeout before - generating an ACK. With an initial window of at least two - segments, the receiver will generate an ACK after the second data - segment arrives. This eliminates the wait on the timeout (often - up to 200 msec, and possibly up to 500 msec [RFC1122]). - - - -Allman, et. al. Standards Track [Page 3] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - 2. For connections transmitting only a small amount of data, a - larger initial window reduces the transmission time (assuming at - most moderate segment drop rates). For many email (SMTP [Pos82]) - and web page (HTTP [RFC1945, RFC2068]) transfers that are less - than 4K bytes, the larger initial window would reduce the data - transfer time to a single RTT. - - 3. For connections that will be able to use large congestion - windows, this modification eliminates up to three RTTs and a - delayed ACK timeout during the initial slow-start phase. This - will be of particular benefit for high-bandwidth large- - propagation-delay TCP connections, such as those over satellite - links. - -4. Disadvantages of Larger Initial Windows for the Individual - Connection - - In high-congestion environments, particularly for routers that have a - bias against bursty traffic (as in the typical Drop Tail router - queues), a TCP connection can sometimes be better off starting with - an initial window of one segment. There are scenarios where a TCP - connection slow-starting from an initial window of one segment might - not have segments dropped, while a TCP connection starting with an - initial window of four segments might experience unnecessary - retransmits due to the inability of the router to handle small - bursts. This could result in an unnecessary retransmit timeout. For - a large-window connection that is able to recover without a - retransmit timeout, this could result in an unnecessarily-early - transition from the slow-start to the congestion-avoidance phase of - the window increase algorithm. These premature segment drops are - unlikely to occur in uncongested networks with sufficient buffering - or in moderately-congested networks where the congested router uses - active queue management (such as Random Early Detection [FJ93, - RFC2309]). - - Some TCP connections will receive better performance with the larger - initial window even if the burstiness of the initial window results - in premature segment drops. This will be true if (1) the TCP - connection recovers from the segment drop without a retransmit - timeout, and (2) the TCP connection is ultimately limited to a small - congestion window by either network congestion or by the receiver's - advertised window. - -5. Disadvantages of Larger Initial Windows for the Network - - In terms of the potential for congestion collapse, we consider two - separate potential dangers for the network. The first danger would - be a scenario where a large number of segments on congested links - - - -Allman, et. al. Standards Track [Page 4] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - were duplicate segments that had already been received at the - receiver. The second danger would be a scenario where a large number - of segments on congested links were segments that would be dropped - later in the network before reaching their final destination. - - In terms of the negative effect on other traffic in the network, a - potential disadvantage of larger initial windows would be that they - increase the general packet drop rate in the network. We discuss - these three issues below. - - Duplicate segments: - - As described in the previous section, the larger initial window - could occasionally result in a segment dropped from the initial - window, when that segment might not have been dropped if the - sender had slow-started from an initial window of one segment. - However, Appendix A shows that even in this case, the larger - initial window would not result in the transmission of a large - number of duplicate segments. - - Segments dropped later in the network: - - How much would the larger initial window for TCP increase the - number of segments on congested links that would be dropped - before reaching their final destination? This is a problem that - can only occur for connections with multiple congested links, - where some segments might use scarce bandwidth on the first - congested link along the path, only to be dropped later along the - path. - - First, many of the TCP connections will have only one congested - link along the path. Segments dropped from these connections do - not "waste" scarce bandwidth, and do not contribute to congestion - collapse. - - However, some network paths will have multiple congested links, - and segments dropped from the initial window could use scarce - bandwidth along the earlier congested links before ultimately - being dropped on subsequent congested links. To the extent that - the drop rate is independent of the initial window used by TCP - segments, the problem of congested links carrying segments that - will be dropped before reaching their destination will be similar - for TCP connections that start by sending four segments or one - segment. - - - - - - - -Allman, et. al. Standards Track [Page 5] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - An increased packet drop rate: - - For a network with a high segment drop rate, increasing the TCP - initial window could increase the segment drop rate even further. - This is in part because routers with Drop Tail queue management - have difficulties with bursty traffic in times of congestion. - However, given uncorrelated arrivals for TCP connections, the - larger TCP initial window should not significantly increase the - segment drop rate. Simulation-based explorations of these issues - are discussed in Section 7.2. - - These potential dangers for the network are explored in simulations - and experiments described in the section below. Our judgment is that - while there are dangers of congestion collapse in the current - Internet (see [FF99] for a discussion of the dangers of congestion - collapse from an increased deployment of UDP connections without - end-to-end congestion control), there is no such danger to the - network from increasing the TCP initial window to 4K bytes. - -6. Interactions with the Retransmission Timer - - Using a larger initial burst of data can exacerbate existing problems - with spurious retransmit timeouts on low-bandwidth paths, assuming - the standard algorithm for determining the TCP retransmission timeout - (RTO) [RFC2988]. The problem is that across low-bandwidth network - paths on which the transmission time of a packet is a large portion - of the round-trip time, the small packets used to establish a TCP - connection do not seed the RTO estimator appropriately. When the - first window of data packets is transmitted, the sender's retransmit - timer could expire before the acknowledgments for those packets are - received. As each acknowledgment arrives, the retransmit timer is - generally reset. Thus, the retransmit timer will not expire as long - as an acknowledgment arrives at least once a second, given the one- - second minimum on the RTO recommended in RFC 2988. - - For instance, consider a 9.6 Kbps link. The initial RTT measurement - will be on the order of 67 msec, if we simply consider the - transmission time of 2 packets (the SYN and SYN-ACK), each consisting - of 40 bytes. Using the RTO estimator given in [RFC2988], this yields - an initial RTO of 201 msec (67 + 4*(67/2)). However, we round the - RTO to 1 second as specified in RFC 2988. Then assume we send an - initial window of one or more 1500-byte packets (1460 data bytes plus - overhead). Each packet will take on the order of 1.25 seconds to - transmit. Therefore, the RTO will fire before the ACK for the first - packet returns, causing a spurious timeout. In this case, a larger - initial window of three or four packets exacerbates the problems - caused by this spurious timeout. - - - - -Allman, et. al. Standards Track [Page 6] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - One way to deal with this problem is to make the RTO algorithm more - conservative. During the initial window of data, for instance, the - RTO could be updated for each acknowledgment received. In addition, - if the retransmit timer expires for some packet lost in the first - window of data, we could leave the exponential-backoff of the - retransmit timer engaged until at least one valid RTT measurement, - that involves a data packet, is received. - - Another method would be to refrain from taking an RTT sample during - connection establishment, leaving the default RTO in place until TCP - takes a sample from a data segment and the corresponding ACK. While - this method likely helps prevent spurious retransmits, it also may - slow the data transfer down if loss occurs before the RTO is seeded. - The use of limited transmit [RFC3042] to aid a TCP connection in - recovering from loss using fast retransmit rather than the RTO timer - mitigates the performance degradation caused by using the high - default RTO during the initial window of data transmission. - - This specification leaves the decision about what to do (if anything) - with regards to the RTO, when using a larger initial window, to the - implementer. However, the RECOMMENDED approach is to refrain from - sampling the RTT during the three-way handshake, keeping the default - RTO in place until an RTT sample involving a data packet is taken. - In addition, it is RECOMMENDED that TCPs use limited transmit - [RFC3042]. - -7. Typical Levels of Burstiness for TCP Traffic. - - Larger TCP initial windows would not dramatically increase the - burstiness of TCP traffic in the Internet today, because such traffic - is already fairly bursty. Bursts of two and three segments are - already typical of TCP [Flo97]; a delayed ACK (covering two - previously unacknowledged segments) received during congestion - avoidance causes the congestion window to slide and two segments to - be sent. The same delayed ACK received during slow start causes the - window to slide by two segments and then be incremented by one - segment, resulting in a three-segment burst. While not necessarily - typical, bursts of four and five segments for TCP are not rare. - Assuming delayed ACKs, a single dropped ACK causes the subsequent ACK - to cover four previously unacknowledged segments. During congestion - avoidance this leads to a four-segment burst, and during slow start a - five-segment burst is generated. - - There are also changes in progress that reduce the performance - problems posed by moderate traffic bursts. One such change is the - deployment of higher-speed links in some parts of the network, where - a burst of 4K bytes can represent a small quantity of data. A second - change, for routers with sufficient buffering, is the deployment of - - - -Allman, et. al. Standards Track [Page 7] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - queue management mechanisms such as RED, which is designed to be - tolerant of transient traffic bursts. - -8. Simulations and Experimental Results - -8.1 Studies of TCP Connections using that Larger Initial Window - - This section surveys simulations and experiments that explore the - effect of larger initial windows on TCP connections. The first set - of experiments explores performance over satellite links. Larger - initial windows have been shown to improve the performance of TCP - connections over satellite channels [All97b]. In this study, an - initial window of four segments (512 byte MSS) resulted in throughput - improvements of up to 30% (depending upon transfer size). [KAGT98] - shows that the use of larger initial windows results in a decrease in - transfer time in HTTP tests over the ACTS satellite system. A study - involving simulations of a large number of HTTP transactions over - hybrid fiber coax (HFC) indicates that the use of larger initial - windows decreases the time required to load WWW pages [Nic98]. - - A second set of experiments explored TCP performance over dialup - modem links. In experiments over a 28.8 bps dialup channel [All97a, - AHO98], a four-segment initial window decreased the transfer time of - a 16KB file by roughly 10%, with no accompanying increase in the drop - rate. A simulation study [RFC2416] investigated the effects of using - a larger initial window on a host connected by a slow modem link and - a router with a 3 packet buffer. The study concluded that for the - scenario investigated, the use of larger initial windows was not - harmful to TCP performance. - - Finally, [All00] illustrates that the percentage of connections at a - particular web server that experience loss in the initial window of - data transmission increases with the size of the initial congestion - window. However, the increase is in line with what would be expected - from sending a larger burst into the network. - -8.2 Studies of Networks using Larger Initial Windows - - This section surveys simulations and experiments investigating the - impact of the larger window on other TCP connections sharing the - path. Experiments in [All97a, AHO98] show that for 16 KB transfers - to 100 Internet hosts, four-segment initial windows resulted in a - small increase in the drop rate of 0.04 segments/transfer. While the - drop rate increased slightly, the transfer time was reduced by - roughly 25% for transfers using the four-segment (512 byte MSS) - initial window when compared to an initial window of one segment. - - - - - -Allman, et. al. Standards Track [Page 8] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - A simulation study in [RFC2415] explores the impact of a larger - initial window on competing network traffic. In this investigation, - HTTP and FTP flows share a single congested gateway (where the number - of HTTP and FTP flows varies from one simulation set to another). - For each simulation set, the paper examines aggregate link - utilization and packet drop rates, median web page delay, and network - power for the FTP transfers. The larger initial window generally - resulted in increased throughput, slightly-increased packet drop - rates, and an increase in overall network power. With the exception - of one scenario, the larger initial window resulted in an increase in - the drop rate of less than 1% above the loss rate experienced when - using a one-segment initial window; in this scenario, the drop rate - increased from 3.5% with one-segment initial windows, to 4.5% with - four-segment initial windows. The overall conclusions were that - increasing the TCP initial window to three packets (or 4380 bytes) - helps to improve perceived performance. - - Morris [Mor97] investigated larger initial windows in a highly - congested network with transfers of 20K in size. The loss rate in - networks where all TCP connections use an initial window of four - segments is shown to be 1-2% greater than in a network where all - connections use an initial window of one segment. This relationship - held in scenarios where the loss rates with one-segment initial - windows ranged from 1% to 11%. In addition, in networks where - connections used an initial window of four segments, TCP connections - spent more time waiting for the retransmit timer (RTO) to expire to - resend a segment than was spent using an initial window of one - segment. The time spent waiting for the RTO timer to expire - represents idle time when no useful work was being accomplished for - that connection. These results show that in a very congested - environment, where each connection's share of the bottleneck - bandwidth is close to one segment, using a larger initial window can - cause a perceptible increase in both loss rates and retransmit - timeouts. - -9. Security Considerations - - This document discusses the initial congestion window permitted for - TCP connections. Changing this value does not raise any known new - security issues with TCP. - -10. Conclusion - - This document specifies a small change to TCP that will likely be - beneficial to short-lived TCP connections and those over links with - long RTTs (saving several RTTs during the initial slow-start phase). - - - - - -Allman, et. al. Standards Track [Page 9] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - -11. Acknowledgments - - We would like to acknowledge Vern Paxson, Tim Shepard, members of the - End-to-End-Interest Mailing List, and members of the IETF TCP - Implementation Working Group for continuing discussions of these - issues and for feedback on this document. - -12. References - - [AHO98] Mark Allman, Chris Hayes, and Shawn Ostermann, An - Evaluation of TCP with Larger Initial Windows, March 1998. - ACM Computer Communication Review, 28(3), July 1998. URL - "http://roland.lerc.nasa.gov/~mallman/papers/initwin.ps". - - [All97a] Mark Allman. An Evaluation of TCP with Larger Initial - Windows. 40th IETF Meeting -- TCP Implementations WG. - December, 1997. Washington, DC. - - [All97b] Mark Allman. Improving TCP Performance Over Satellite - Channels. Master's thesis, Ohio University, June 1997. - - [All00] Mark Allman. A Web Server's View of the Transport Layer. - ACM Computer Communication Review, 30(5), October 2000. - - [FF96] Fall, K., and Floyd, S., Simulation-based Comparisons of - Tahoe, Reno, and SACK TCP. Computer Communication Review, - 26(3), July 1996. - - [FF99] Sally Floyd, Kevin Fall. Promoting the Use of End-to-End - Congestion Control in the Internet. IEEE/ACM Transactions - on Networking, August 1999. URL - "http://www.icir.org/floyd/end2end-paper.html". - - [FJ93] Floyd, S., and Jacobson, V., Random Early Detection - gateways for Congestion Avoidance. IEEE/ACM Transactions on - Networking, V.1 N.4, August 1993, p. 397-413. - - [Flo94] Floyd, S., TCP and Explicit Congestion Notification. - Computer Communication Review, 24(5):10-23, October 1994. - - [Flo96] Floyd, S., Issues of TCP with SACK. Technical report, - January 1996. Available from http://www- - nrg.ee.lbl.gov/floyd/. - - [Flo97] Floyd, S., Increasing TCP's Initial Window. Viewgraphs, - 40th IETF Meeting - TCP Implementations WG. December, 1997. - URL "ftp://ftp.ee.lbl.gov/talks/sf-tcp-ietf97.ps". - - - - -Allman, et. al. Standards Track [Page 10] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - [KAGT98] Hans Kruse, Mark Allman, Jim Griner, Diepchi Tran. HTTP - Page Transfer Rates Over Geo-Stationary Satellite Links. - March 1998. Proceedings of the Sixth International - Conference on Telecommunication Systems. URL - "http://roland.lerc.nasa.gov/~mallman/papers/nash98.ps". - - [Mor97] Robert Morris. Private communication, 1997. Cited for - acknowledgement purposes only. - - [Nic98] Kathleen Nichols. Improving Network Simulation With - Feedback, Proceedings of LCN 98, October 1998. URL - "http://www.computer.org/proceedings/lcn/8810/8810toc.htm". - - [Pos82] Postel, J., "Simple Mail Transfer Protocol", STD 10, RFC - 821, August 1982. - - [RFC1122] Braden, R., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - [RFC1945] Berners-Lee, T., Fielding, R. and H. Nielsen, "Hypertext - Transfer Protocol -- HTTP/1.0", RFC 1945, May 1996. - - [RFC2068] Fielding, R., Mogul, J., Gettys, J., Frystyk, H. and T. - Berners-Lee, "Hypertext Transfer Protocol -- HTTP/1.1", RFC - 2616, January 1997. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2309] Braden, B., Clark, D., Crowcroft, J., Davie, B., Deering, - S., Estrin, D., Floyd, S., Jacobson, V., Minshall, G., - Partridge, C., Peterson, L., Ramakrishnan, K., Shenker, S., - Wroclawski, J. and L. Zhang, "Recommendations on Queue - Management and Congestion Avoidance in the Internet", RFC - 2309, April 1998. - - [RFC2414] Allman, M., Floyd, S. and C. Partridge, "Increasing TCP's - Initial Window", RFC 2414, September 1998. - - [RFC2415] Poduri, K. and K. Nichols, "Simulation Studies of Increased - Initial TCP Window Size", RFC 2415, September 1998. - - [RFC2416] Shepard, T. and C. Partridge, "When TCP Starts Up With Four - Packets Into Only Three Buffers", RFC 2416, September 1998. - - - - -Allman, et. al. Standards Track [Page 11] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2821] Klensin, J., "Simple Mail Transfer Protocol", RFC 2821, - April 2001. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [RFC3042] Allman, M., Balakrishnan, H. and S. Floyd, "Enhancing TCP's - Loss Recovery Using Limited Transmit", RFC 3042, January - 2001. - - [RFC3168] Ramakrishnan, K.K., Floyd, S. and D. Black, "The Addition - of Explicit Congestion Notification (ECN) to IP", RFC 3168, - September 2001. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Allman, et. al. Standards Track [Page 12] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - -Appendix A - Duplicate Segments - - In the current environment (without Explicit Congestion Notification - [Flo94] [RFC2481]), all TCPs use segment drops as indications from - the network about the limits of available bandwidth. We argue here - that the change to a larger initial window should not result in the - sender retransmitting a large number of duplicate segments that have - already arrived at the receiver. - - If one segment is dropped from the initial window, there are three - different ways for TCP to recover: (1) Slow-starting from a window of - one segment, as is done after a retransmit timeout, or after Fast - Retransmit in Tahoe TCP; (2) Fast Recovery without selective - acknowledgments (SACK), as is done after three duplicate ACKs in Reno - TCP; and (3) Fast Recovery with SACK, for TCP where both the sender - and the receiver support the SACK option [MMFR96]. In all three - cases, if a single segment is dropped from the initial window, no - duplicate segments (i.e., segments that have already been received at - the receiver) are transmitted. Note that for a TCP sending four - 512-byte segments in the initial window, a single segment drop will - not require a retransmit timeout, but can be recovered by using the - Fast Retransmit algorithm (unless the retransmit timer expires - prematurely). In addition, a single segment dropped from an initial - window of three segments might be repaired using the fast retransmit - algorithm, depending on which segment is dropped and whether or not - delayed ACKs are used. For example, dropping the first segment of a - three segment initial window will always require waiting for a - timeout, in the absence of Limited Transmit [RFC3042]. However, - dropping the third segment will always allow recovery via the fast - retransmit algorithm, as long as no ACKs are lost. - - Next we consider scenarios where the initial window contains two to - four segments, and at least two of those segments are dropped. If - all segments in the initial window are dropped, then clearly no - duplicate segments are retransmitted, as the receiver has not yet - received any segments. (It is still a possibility that these dropped - segments used scarce bandwidth on the way to their drop point; this - issue was discussed in Section 5.) - - When two segments are dropped from an initial window of three - segments, the sender will only send a duplicate segment if the first - two of the three segments were dropped, and the sender does not - receive a packet with the SACK option acknowledging the third - segment. - - When two segments are dropped from an initial window of four - segments, an examination of the six possible scenarios (which we - don't go through here) shows that, depending on the position of the - - - -Allman, et. al. Standards Track [Page 13] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - - dropped packets, in the absence of SACK the sender might send one - duplicate segment. There are no scenarios in which the sender sends - two duplicate segments. - - When three segments are dropped from an initial window of four - segments, then, in the absence of SACK, it is possible that one - duplicate segment will be sent, depending on the position of the - dropped segments. - - The summary is that in the absence of SACK, there are some scenarios - with multiple segment drops from the initial window where one - duplicate segment will be transmitted. There are no scenarios in - which more than one duplicate segment will be transmitted. Our - conclusion is than the number of duplicate segments transmitted as a - result of a larger initial window should be small. - -Author's Addresses - - Mark Allman - BBN Technologies/NASA Glenn Research Center - 21000 Brookpark Rd - MS 54-5 - Cleveland, OH 44135 - EMail: mallman@bbn.com - http://roland.lerc.nasa.gov/~mallman/ - - Sally Floyd - ICSI Center for Internet Research - 1947 Center St, Suite 600 - Berkeley, CA 94704 - Phone: +1 (510) 666-2989 - EMail: floyd@icir.org - http://www.icir.org/floyd/ - - Craig Partridge - BBN Technologies - 10 Moulton St - Cambridge, MA 02138 - EMail: craig@bbn.com - - - - - - - - - - - - -Allman, et. al. Standards Track [Page 14] - -RFC 3390 Increasing TCP's Initial Window October 2002 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2002). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Allman, et. al. Standards Track [Page 15] - diff --git a/kernel/picotcp/RFC/rfc3449.txt b/kernel/picotcp/RFC/rfc3449.txt deleted file mode 100644 index 46936b0..0000000 --- a/kernel/picotcp/RFC/rfc3449.txt +++ /dev/null @@ -1,2299 +0,0 @@ - - - - - - -Network Working Group H. Balakrishnan -Request for Comments: 3449 MIT LCS -BCP: 69 V. N. Padmanabhan -Category: Best Current Practice Microsoft Research - G. Fairhurst - M. Sooriyabandara - University of Aberdeen, U.K. - December 2002 - - - TCP Performance Implications - of Network Path Asymmetry - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2002). All Rights Reserved. - -Abstract - - This document describes TCP performance problems that arise because - of asymmetric effects. These problems arise in several access - networks, including bandwidth-asymmetric networks and packet radio - subnetworks, for different underlying reasons. However, the end - result on TCP performance is the same in both cases: performance - often degrades significantly because of imperfection and variability - in the ACK feedback from the receiver to the sender. - - The document details several mitigations to these effects, which have - either been proposed or evaluated in the literature, or are currently - deployed in networks. These solutions use a combination of local - link-layer techniques, subnetwork, and end-to-end mechanisms, - consisting of: (i) techniques to manage the channel used for the - upstream bottleneck link carrying the ACKs, typically using header - compression or reducing the frequency of TCP ACKs, (ii) techniques to - handle this reduced ACK frequency to retain the TCP sender's - acknowledgment-triggered self-clocking and (iii) techniques to - schedule the data and ACK packets in the reverse direction to improve - performance in the presence of two-way traffic. Each technique is - described, together with known issues, and recommendations for use. - A summary of the recommendations is provided at the end of the - document. - - - - -Balakrishnan et. al. Best Current Practice [Page 1] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -Table of Contents - - 1. Conventions used in this Document ...............................3 - 2. Motivation ....................................................4 - 2.1 Asymmetry due to Differences in Transmit - and Receive Capacity .........................................4 - 2.2 Asymmetry due to Shared Media in the Reverse Direction .......5 - 2.3 The General Problem ..........................................5 - 3. How does Asymmetry Degrade TCP Performance? .....................5 - 3.1 Asymmetric Capacity ..........................................5 - 3.2 MAC Protocol Interactions ....................................7 - 3.3 Bidirectional Traffic ........................................8 - 3.4 Loss in Asymmetric Network Paths ............................10 - 4. Improving TCP Performance using Host Mitigations ...............10 - 4.1 Modified Delayed ACKs .......................................11 - 4.2 Use of Large MSS ............................................12 - 4.3 ACK Congestion Control ......................................13 - 4.4 Window Prediction Mechanism .................................14 - 4.5 Acknowledgement based on Cwnd Estimation. ...................14 - 4.6 TCP Sender Pacing ...........................................14 - 4.7 TCP Byte Counting ...........................................15 - 4.8 Backpressure ................................................16 - 5. Improving TCP performance using Transparent Modifications ......17 - 5.1 TYPE 0: Header Compression ..................................18 - 5.1.1 TCP Header Compression ..................................18 - 5.1.2 Alternate Robust Header Compression Algorithms ..........19 - 5.2 TYPE 1: Reverse Link Bandwidth Management ...................19 - 5.2.1 ACK Filtering ...........................................20 - 5.2.2 ACK Decimation ..........................................21 - 5.3 TYPE 2: Handling Infrequent ACKs ............................22 - 5.3.1 ACK Reconstruction ......................................23 - 5.3.2 ACK Compaction and Companding ...........................25 - 5.3.3 Mitigating TCP packet bursts generated by - Infrequent ACKs .........................................26 - 5.4 TYPE 3: Upstream Link Scheduling ............................27 - 5.4.1 Per-Flow queuing at the Upstream Bottleneck Link ........27 - 5.4.2 ACKs-first Scheduling ...................................28 - 6. Security Considerations ........................................29 - 7. Summary ........................................................30 - 8. Acknowledgments ................................................32 - 9. References .....................................................32 - 10. IANA Considerations ...........................................37 - Appendix: Examples of Subnetworks Exhibiting Network Path - Asymmetry ...............................................38 - Authors' Addresses ................................................40 - Full Copyright Statement ..........................................41 - - - - - -Balakrishnan et. al. Best Current Practice [Page 2] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -1. Conventions used in this Document - - FORWARD DIRECTION: The dominant direction of data transfer over an - asymmetric network path. It corresponds to the direction with better - characteristics in terms of capacity, latency, error rate, etc. Data - transfer in the forward direction is called "forward transfer". - Packets travelling in the forward direction follow the forward path - through the IP network. - - REVERSE DIRECTION: The direction in which acknowledgments of a - forward TCP transfer flow. Data transfer could also happen in this - direction (and is termed "reverse transfer"), but it is typically - less voluminous than that in the forward direction. The reverse - direction typically exhibits worse characteristics than the forward - direction. Packets travelling in the reverse direction follow the - reverse path through the IP network. - - UPSTREAM LINK: The specific bottleneck link that normally has much - less capability than the corresponding downstream link. Congestion - is not confined to this link alone, and may also occur at any point - along the forward and reverse directions (e.g., due to sharing with - other traffic flows). - - DOWNSTREAM LINK: A link on the forward path, corresponding to the - upstream link. - - ACK: A cumulative TCP acknowledgment [RFC791]. In this document, - this term refers to a TCP segment that carries a cumulative - acknowledgement (ACK), but no data. - - DELAYED ACK FACTOR, d: The number of TCP data segments acknowledged - by a TCP ACK. The minimum value of d is 1, since at most one ACK - should be sent for each data packet [RFC1122, RFC2581]. - - STRETCH ACK: Stretch ACKs are acknowledgements that cover more than 2 - segments of previously unacknowledged data (d>2) [RFC2581]. Stretch - ACKs can occur by design (although this is not standard), due to - implementation bugs [All97b, RFC2525], or due to ACK loss [RFC2760]. - - NORMALIZED BANDWIDTH RATIO, k: The ratio of the raw bandwidth - (capacity) of the forward direction to the return direction, divided - by the ratio of the packet sizes used in the two directions [LMS97]. - - SOFTSTATE: Per-flow state established in a network device that is - used by the protocol [Cla88]. The state expires after a period of - time (i.e., is not required to be explicitly deleted when a session - - - - - -Balakrishnan et. al. Best Current Practice [Page 3] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - expires), and is continuously refreshed while a flow continues (i.e., - lost state may be reconstructed without needing to exchange - additional control messages). - -2. Motivation - - Asymmetric characteristics are exhibited by several network - technologies, including cable data networks, (e.g., DOCSIS cable TV - networks [DS00, DS01]), direct broadcast satellite (e.g., an IP - service using Digital Video Broadcast, DVB, [EN97] with an - interactive return channel), Very Small Aperture satellite Terminals - (VSAT), Asymmetric Digital Subscriber Line (ADSL) [ITU02, ANS01], and - several packet radio networks. These networks are increasingly being - deployed as high-speed Internet access networks, and it is therefore - highly desirable to achieve good TCP performance. However, the - asymmetry of the network paths often makes this challenging. - Examples of some networks that exhibit asymmetry are provided in the - Appendix. - - Asymmetry may manifest itself as a difference in transmit and receive - capacity, an imbalance in the packet loss rate, or differences - between the transmit and receive paths [RFC3077]. For example, when - capacity is asymmetric, such that there is reduced capacity on - reverse path used by TCP ACKs, slow or infrequent ACK feedback - degrades TCP performance in the forward direction. Similarly, - asymmetry in the underlying Medium Access Control (MAC) and Physical - (PHY) protocols could make it expensive to transmit TCP ACKs - (disproportionately to their size), even when capacity is symmetric. - -2.1 Asymmetry due to Differences in Transmit and Receive Capacity - - Network paths may be asymmetric because the upstream and downstream - links operate at different rates and/or are implemented using - different technologies. - - The asymmetry in capacity may be substantially increased when best - effort IP flows carrying TCP ACKs share the available upstream - capacity with other traffic flows, e.g., telephony, especially flows - that have reserved upstream capacity. This includes service - guarantees at the IP layer (e.g., the Guaranteed Service [RFC2212]) - or at the subnet layer (e.g., support of Voice over IP [ITU01] using - the Unsolicited Grant service in DOCSIS [DS01], or CBR virtual - connections in ATM over ADSL [ITU02, ANS01]). - - When multiple upstream links exist the asymmetry may be reduced by - dividing upstream traffic between a number of available upstream - links. - - - - -Balakrishnan et. al. Best Current Practice [Page 4] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -2.2 Asymmetry due to Shared Media in the Reverse Direction - - In networks employing centralized multiple access control, asymmetry - may be a fundamental consequence of the hub-and-spokes architecture - of the network (i.e., a single base node communicating with multiple - downstream nodes). The central node often incurs less transmission - overhead and does not incur latency in scheduling its own downstream - transmissions. In contrast, upstream transmission is subject to - additional overhead and latency (e.g., due to guard times between - transmission bursts, and contention intervals). This can produce - significant network path asymmetry. - - Upstream capacity may be further limited by the requirement that each - node must first request per-packet bandwidth using a contention MAC - protocol (e.g., DOCSIS 1.0 MAC restricts each node to sending at most - a single packet in each upstream time-division interval [DS00]). A - satellite network employing dynamic Bandwidth on Demand (BoD), also - consumes MAC resources for each packet sent (e.g., [EN00]). In these - schemes, the available uplink capacity is a function of the MAC - algorithm. The MAC and PHY schemes also introduce overhead per - upstream transmission which could be so significant that transmitting - short packets (including TCP ACKs) becomes as costly as transmitting - MTU-sized data packets. - -2.3 The General Problem - - Despite the technological differences between capacity-dependent and - MAC-dependent asymmetries, both kinds of network path suffer reduced - TCP performance for the same fundamental reason: the imperfection and - variability of ACK feedback. This document discusses the problem in - detail and describes several techniques that may reduce or eliminate - the constraints. - -3. How does Asymmetry Degrade TCP Performance? - - This section describes the implications of network path asymmetry on - TCP performance. The reader is referred to [BPK99, Bal98, Pad98, - FSS01, Sam99] for more details and experimental results. - -3.1 Asymmetric Capacity - - The problems that degrade unidirectional transfer performance when - the forward and return paths have very different capacities depend on - the characteristics of the upstream link. Two types of situations - arise for unidirectional traffic over such network paths: when the - upstream bottleneck link has sufficient queuing to prevent packet - (ACK) losses, and when the upstream bottleneck link has a small - buffer. Each is considered in turn. - - - -Balakrishnan et. al. Best Current Practice [Page 5] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - If the upstream bottleneck link has deep queues, so that this does - not drop ACKs in the reverse direction, then performance is a strong - function of the normalized bandwidth ratio, k. For example, for a 10 - Mbps downstream link and a 50 Kbps upstream link, the raw capacity - ratio is 200. With 1000-byte data packets and 40-byte ACKs, the - ratio of the packet sizes is 25. This implies that k is 200/25 = 8. - Thus, if the receiver acknowledges more frequently than one ACK every - 8 (k) data packets, the upstream link will become saturated before - the downstream link, limiting the throughput in the forward - direction. Note that, the achieved TCP throughput is determined by - the minimum of the receiver advertised window or TCP congestion - window, cwnd [RFC2581]. - - If ACKs are not dropped (at the upstream bottleneck link) and k > 1 - or k > 0.5 when delayed ACKs are used [RFC1122], TCP ACK-clocking - breaks down. Consider two data packets transmitted by the sender in - quick succession. En route to the receiver, these packets get spaced - apart according to the capacity of the smallest bottleneck link in - the forward direction. The principle of ACK clocking is that the - ACKs generated in response to receiving these data packets reflects - this temporal spacing all the way back to the sender, enabling it to - transmit new data packets that maintain the same spacing [Jac88]. ACK - clocking with delayed ACKs, reflects the spacing between data packets - that actually trigger ACKs. However, the limited upstream capacity - and queuing at the upstream bottleneck router alters the inter-ACK - spacing of the reverse path, and hence that observed at the sender. - When ACKs arrive at the upstream bottleneck link at a faster rate - than the link can support, they get queued behind one another. The - spacing between them when they emerge from the link is dilated with - respect to their original spacing, and is a function of the upstream - bottleneck capacity. Thus the TCP sender clocks out new data packets - at a slower rate than if there had been no queuing of ACKs. The - performance of the connection is no longer dependent on the - downstream bottleneck link alone; instead, it is throttled by the - rate of arriving ACKs. As a side effect, the sender's rate of cwnd - growth also slows down. - - A second side effect arises when the upstream bottleneck link on the - reverse path is saturated. The saturated link causes persistent - queuing of packets, leading to an increasing path Round Trip Time - (RTT) [RFC2998] observed by all end hosts using the bottleneck link. - This can impact the protocol control loops, and may also trigger - false time out (underestimation of the path RTT by the sending host). - - A different situation arises when the upstream bottleneck link has a - relatively small amount of buffer space to accommodate ACKs. As the - transmission window grows, this queue fills, and ACKs are dropped. If - the receiver were to acknowledge every packet, only one of every k - - - -Balakrishnan et. al. Best Current Practice [Page 6] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - ACKs would get through to the sender, and the remaining (k-1) are - dropped due to buffer overflow at the upstream link buffer (here k is - the normalized bandwidth ratio as before). In this case, the reverse - bottleneck link capacity and slow ACK arrival rate are not directly - responsible for any degraded performance. However, the infrequency - of ACKs leads to three reasons for degraded performance: - - 1. The sender transmits data in large bursts of packets, limited only - by the available cwnd. If the sender receives only one ACK in k, - it transmits data in bursts of k (or more) packets because each - ACK shifts the sliding window by at least k (acknowledged) data - packets (TCP data segments). This increases the likelihood of - data packet loss along the forward path especially when k is - large, because routers do not handle large bursts of packets well. - - 2. Current TCP sender implementations increase their cwnd by counting - the number of ACKs they receive and not by how much data is - actually acknowledged by each ACK. The later approach, also known - as byte counting (section 4.7), is a standard implementation - option for cwnd increase during the congestion avoidance period - [RFC2581]. Thus fewer ACKs imply a slower rate of growth of the - cwnd, which degrades performance over long-delay connections. - - 3. The sender TCP's Fast Retransmission and Fast Recovery algorithms - [RFC2581] are less effective when ACKs are lost. The sender may - possibly not receive the threshold number of duplicate ACKs even - if the receiver transmits more than the DupACK threshold (> 3 - DupACKs) [RFC2581]. Furthermore, the sender may possibly not - receive enough duplicate ACKs to adequately inflate its cwnd - during Fast Recovery. - -3.2 MAC Protocol Interactions - - The interaction of TCP with MAC protocols may degrade end-to-end - performance. Variable round-trip delays and ACK queuing are the main - symptoms of this problem. - - One example is the impact on terrestrial wireless networks [Bal98]. A - high per-packet overhead may arise from the need for communicating - link nodes to first synchronise (e.g., via a Ready To Send / Clear to - Send (RTS/CTS) protocol) before communication and the significant - turn-around time for the wireless channel. This overhead is - variable, since the RTS/CTS exchange may need to back-off - exponentially when the remote node is busy (e.g., engaged in a - conversation with a different node). This leads to large and - variable communication latencies in packet-radio networks. - - - - - -Balakrishnan et. al. Best Current Practice [Page 7] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - An asymmetric workload (more downstream than upstream traffic) may - cause ACKs to be queued in some wireless nodes (especially in the end - host modems), exacerbating the variable latency. Queuing may also - occur in other shared media, e.g., cable modem uplinks, BoD access - systems often employed on shared satellite channels. - - Variable latency and ACK queuing reduces the smoothness of the TCP - data flow. In particular, ACK traffic can interfere with the flow of - data packets, increasing the traffic load of the system. - - TCP measures the path RTT, and from this calculates a smoothed RTT - estimate (srtt) and a linear deviation, rttvar. These are used to - estimate a path retransmission timeout (RTO) [RFC2988], set to srtt + - 4*rttvar. For most wired TCP connections, the srtt remains constant - or has a low linear deviation. The RTO therefore tracks the path - RTT, and the TCP sender will respond promptly when multiple losses - occur in a window. In contrast, some wireless networks exhibit a - high variability in RTT, causing the RTO to significantly increase - (e.g., on the order of 10 seconds). Paths traversing multiple - wireless hops are especially vulnerable to this effect, because this - increases the probability that the intermediate nodes may already be - engaged in conversation with other nodes. The overhead in most MAC - schemes is a function of both the number and size of packets. - However, the MAC contention problem is a significant function of the - number of packets (e.g., ACKs) transmitted rather than their size. - In other words, there is a significant cost to transmitting a packet - regardless of packet size. - - Experiments conducted on the Ricochet packet radio network in 1996 - and 1997 demonstrated the impact of radio turnarounds and the - corresponding increased RTT variability, resulting in degraded TCP - performance. It was not uncommon for TCP connections to experience - timeouts of 9 - 12 seconds, with the result that many connections - were idle for a significant fraction of their lifetime (e.g., - sometimes 35% of the total transfer time). This leads to under- - utilization of the available capacity. These effects may also occur - in other wireless subnetworks. - -3.3 Bidirectional Traffic - - Bidirectional traffic arises when there are simultaneous TCP - transfers in the forward and reverse directions over an asymmetric - network path, e.g., a user who sends an e-mail message in the reverse - direction while simultaneously receiving a web page in the forward - direction. To simplify the discussion, only one TCP connection in - each direction is considered. In many practical cases, several - simultaneous connections need to share the available capacity, - increasing the level of congestion. - - - -Balakrishnan et. al. Best Current Practice [Page 8] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - Bidirectional traffic makes the effects discussed in section 3.1 more - pronounced, because part of the upstream link bandwidth is consumed - by the reverse transfer. This effectively increases the degree of - bandwidth asymmetry. Other effects also arise due to the interaction - between data packets of the reverse transfer and ACKs of the forward - transfer. Suppose at the time the forward TCP connection is - initiated, the reverse TCP connection has already saturated the - bottleneck upstream link with data packets. There is then a high - probability that many ACKs of the new forward TCP connection will - encounter a full upstream link buffer and hence get dropped. Even - after these initial problems, ACKs of the forward connection could - get queued behind large data packets of the reverse connection. The - larger data packets may have correspondingly long transmission times - (e.g., it takes about 280 ms to transmit a 1 Kbyte data packet over a - 28.8 kbps line). This causes the forward transfer to stall for long - periods of time. It is only at times when the reverse connection - loses packets (due to a buffer overflow at an intermediate router) - and slows down, that the forward connection gets the opportunity to - make rapid progress and build up its cwnd. - - When ACKs are queued behind other traffic for appreciable periods of - time, the burst nature of TCP traffic and self-synchronizing effects - can result in an effect known as ACK Compression [ZSC91], which - reduces the throughput of TCP. It occurs when a series of ACKs, in - one direction are queued behind a burst of other packets (e.g., data - packets traveling in the same direction) and become compressed in - time. This results in an intense burst of data packets in the other - direction, in response to the burst of compressed ACKs arriving at - the server. This phenomenon has been investigated in detail for - bidirectional traffic, and recent analytical work [LMS97] has - predicted ACK Compression may also result from bi-directional - transmission with asymmetry, and was observed in practical asymmetric - satellite subnetworks [FSS01]. In the case of extreme asymmetry - (k>>1), the inter-ACK spacing can increase due to queuing (section - 3.1), resulting in ACK dilation. - - In summary, sharing of the upstream bottleneck link by multiple flows - (e.g., IP flows to the same end host, or flows to a number of end - hosts sharing a common upstream link) increases the level of ACK - Congestion. The presence of bidirectional traffic exacerbates the - constraints introduced by bandwidth asymmetry because of the adverse - interaction between (large) data packets of a reverse direction - connection and the ACKs of a forward direction connection. - - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 9] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -3.4 Loss in Asymmetric Network Paths - - Loss may occur in either the forward or reverse direction. For data - transfer in the forward direction this results respectively in loss - of data packets and ACK packets. Loss of ACKs is less significant - than loss of data packets, because it generally results in stretch - ACKs [CR98, FSS01]. - - In the case of long delay paths, a slow upstream link [RFC3150] can - lead to another complication when the end host uses TCP large windows - [RFC1323] to maximize throughput in the forward direction. Loss of - data packets on the forward path, due to congestion, or link loss, - common for some wireless links, will generate a large number of - back-to-back duplicate ACKs (or TCP SACK packets [RFC2018]), for each - correctly received data packet following a loss. The TCP sender - employs Fast Retransmission and Recovery [RFC2581] to recover from - the loss, but even if this is successful, the ACK to the - retransmitted data segment may be significantly delayed by other - duplicate ACKs still queued at the upstream link buffer. This can - ultimately lead to a timeout [RFC2988] and a premature end to the TCP - Slow Start [RFC2581]. This results in poor forward path throughput. - Section 5.3 describes some mitigations to counter this. - -4. Improving TCP Performance using Host Mitigations - - There are two key issues that need to be addressed to improve TCP - performance over asymmetric networks. The first is to manage the - capacity of the upstream bottleneck link, used by ACKs and possibly - other traffic. A number of techniques exist which work by reducing - the number of ACKs that flow in the reverse direction. This has the - side effect of potentially destroying the desirable self-clocking - property of the TCP sender where transmission of new data packets is - triggered by incoming ACKs. Thus, the second issue is to avoid any - adverse impact of infrequent ACKs. - - Each of these issues can be handled by local link-layer solutions - and/or by end-to-end techniques. This section discusses end-to-end - modifications. Some techniques require TCP receiver changes - (sections 4.1 4.4, 4.5), some require TCP sender changes (sections - 4.6, 4.7), and a pair requires changes to both the TCP sender and - receiver (sections 4.2, 4.3). One technique requires a sender - modification at the receiving host (section 4.8). The techniques may - be used independently, however some sets of techniques are - complementary, e.g., pacing (section 4.6) and byte counting (section - 4.7) which have been bundled into a single TCP Sender Adaptation - scheme [BPK99]. - - - - - -Balakrishnan et. al. Best Current Practice [Page 10] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - It is normally envisaged that these changes would occur in the end - hosts using the asymmetric path, however they could, and have, been - used in a middle-box or Protocol Enhancing Proxy (PEP) [RFC3135] - employing split TCP. This document does not discuss the issues - concerning PEPs. Section 4 describes several techniques, which do - not require end-to-end changes. - -4.1 Modified Delayed ACKs - - There are two standard methods that can be used by TCP receivers to - generate acknowledgments. The method outlined in [RFC793] generates - an ACK for each incoming data segment (i.e., d=1). [RFC1122] states - that hosts should use "delayed acknowledgments". Using this - algorithm, an ACK is generated for at least every second full-sized - segment (d=2), or if a second full-sized segment does not arrive - within a given timeout (which must not exceed 500 ms [RFC1122], and - is typically less than 200 ms). Relaxing the latter constraint - (i.e., allowing d>2) may generate Stretch ACKs [RFC2760]. This - provides a possible mitigation, which reduces the rate at which ACKs - are returned by the receiver. An implementer should only deviate - from this requirement after careful consideration of the implications - [RFC2581]. - - Reducing the number of ACKs per received data segment has a number of - undesirable effects including: - - (i) Increased path RTT - (ii) Increased time for TCP to open the cwnd - (iii) Increased TCP sender burst size, since cwnd opens in larger - steps - - In addition, a TCP receiver is often unable to determine an optimum - setting for a large d, since it will normally be unaware of the - details of the properties of the links that form the path in the - reverse direction. - - RECOMMENDATION: A TCP receiver must use the standard TCP algorithm - for sending ACKs as specified in [RFC2581]. That is, it may delay - sending an ACK after it receives a data segment [RFC1122]. When ACKs - are delayed, the receiver must generate an ACK within 500 ms and the - ACK should be generated for at least every second full sized segment - (MSS) of received data [RFC2581]. This will result in an ACK delay - factor (d) that does not exceed a value of 2. Changing the algorithm - would require a host modification to the TCP receiver and awareness - by the receiving host that it is using a connection with an - asymmetric path. Such a change has many drawbacks in the general - case and is currently not recommended for use within the Internet. - - - - -Balakrishnan et. al. Best Current Practice [Page 11] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -4.2 Use of Large MSS - - A TCP sender that uses a large Maximum Segment Size (MSS) reduces the - number of ACKs generated per transmitted byte of data. - - Although individual subnetworks may support a large MTU, the majority - of current Internet links employ an MTU of approx 1500 bytes (that of - Ethernet). By setting the Don't Fragment (DF) bit in the IP header, - Path MTU (PMTU) discovery [RFC1191] may be used to determine the - maximum packet size (and hence MSS) a sender can use on a given - network path without being subjected to IP fragmentation, and - provides a way to automatically select a suitable MSS for a specific - path. This also guarantees that routers will not perform IP - fragmentation of normal data packets. - - By electing not to use PMTU Discovery, an end host may choose to use - IP fragmentation by routers along the path in the forward direction - [RFC793]. This allows an MSS larger than smallest MTU along the - path. However, this increases the unit of error recovery (TCP - segment) above the unit of transmission (IP packet). This is not - recommended, since it can increase the number of retransmitted - packets following loss of a single IP packet, leading to reduced - efficiency, and potentially aggravating network congestion [Ken87]. - Choosing an MSS larger than the forward path minimum MTU also permits - the sender to transmit more initial packets (a burst of IP fragments - for each TCP segment) when a session starts or following RTO expiry, - increasing the aggressiveness of the sender compared to standard TCP - [RFC2581]. This can adversely impact other standard TCP sessions - that share a network path. - - RECOMMENDATION: - - A larger forward path MTU is desirable for paths with bandwidth - asymmetry. Network providers may use a large MTU on links in the - forward direction. TCP end hosts using Path MTU discovery may be - able to take advantage of a large MTU by automatically selecting an - appropriate larger MSS, without requiring modification. The use of - Path MTU discovery [RFC1191] is therefore recommended. - - Increasing the unit of error recovery and congestion control (MSS) - above the unit of transmission and congestion loss (the IP packet) by - using a larger end host MSS and IP fragmentation in routers is not - recommended. - - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 12] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -4.3 ACK Congestion Control - - ACK Congestion Control (ACC) is an experimental technique that - operates end to end. ACC extends congestion control to ACKs, since - they may make non-negligible demands on resources (e.g., packet - buffers, and MAC transmission overhead) at an upstream bottleneck - link. It has two parts: (a) a network mechanism indicating to the - receiver that the ACK path is congested, and (b) the receiver's - response to such an indication. - - A router feeding an upstream bottleneck link may detect incipient - congestion, e.g., using an algorithm based on RED (Random Early - Detection) [FJ93]. This may track the average queue size over a time - window in the recent past. If the average exceeds a threshold, the - router may select a packet at random. If the packet IP header has - the Explicit Congestion Notification Capable Transport (ECT) bit set, - the router may mark the packet, i.e., sets an Explicit Congestion - Notification (ECN) [RFC3168] bit(s) in the IP header, otherwise the - packet is normally dropped. The ECN notification received by the end - host is reflected back to the sending TCP end host, to trigger - congestion avoidance [RFC3168]. Note that routers implementing RED - with ECN, do not eliminate packet loss, and may drop a packet (even - when the ECT bit is set). It is also possible to use an algorithm - other than RED to decide when to set the ECN bit. - - ACC extends ECN so that both TCP data packets and ACKs set the ECT - bit and are thus candidates for being marked with an ECN bit. - Therefore, upon receiving an ACK with the ECN bit set [RFC3168], a - TCP receiver reduces the rate at which it sends ACKs. It maintains a - dynamically varying delayed-ACK factor, d, and sends one ACK for - every d data packets received. When it receives a packet with the - ECN bit set, it increases d multiplicatively, thereby - multiplicatively decreasing the frequency of ACKs. For each - subsequent RTT (e.g., determined using the TCP RTTM option [RFC1323]) - during which it does not receive an ECN, it linearly decreases the - factor d, increasing the frequency of ACKs. Thus, the receiver - mimics the standard congestion control behavior of TCP senders in the - manner in which it sends ACKs. - - The maximum value of d is determined by the TCP sender window size, - which could be conveyed to the receiver in a new (experimental) TCP - option. The receiver should send at least one ACK (preferably more) - for each window of data from the sender (i.e., d < (cwnd/mss)) to - prevent the sender from stalling until the receiver's delayed ACK - timer triggers an ACK to be sent. - - - - - - -Balakrishnan et. al. Best Current Practice [Page 13] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - RECOMMENDATION: ACK Congestion Control (ACC) is an experimental - technique that requires TCP sender and receiver modifications. There - is currently little experience of using such techniques in the - Internet. Future versions of TCP may evolve to include this or - similar techniques. These are the subject of ongoing research. ACC - is not recommended for use within the Internet in its current form. - -4.4 Window Prediction Mechanism - - The Window Prediction Mechanism (WPM) is a TCP receiver side - mechanism [CLP98] that uses a dynamic ACK delay factor (varying d) - resembling the ACC scheme (section 4.3). The TCP receiver - reconstructs the congestion control behavior of the TCP sender by - predicting a cwnd value. This value is used along with the allowed - window to adjust the receiver's value of d. WPM accommodates for - unnecessary retransmissions resulting from losses due to link errors. - - RECOMMENDATION: Window Prediction Mechanism (WPM) is an experimental - TCP receiver side modification. There is currently little experience - of using such techniques in the Internet. Future versions of TCP may - evolve to include this or similar techniques. These are the subjects - of ongoing research. WPM is not recommended for use within the - Internet in its current form. - -4.5 Acknowledgement based on Cwnd Estimation. - - Acknowledgement based on Cwnd Estimation (ACE) [MJW00] attempts to - measure the cwnd at the TCP receiver and maintain a varying ACK delay - factor (d). The cwnd is estimated by counting the number of packets - received during a path RTT. The technique may improve accuracy of - prediction of a suitable cwnd. - - RECOMMENDATION: Acknowledgement based on Cwnd Estimation (ACE) is an - experimental TCP receiver side modification. There is currently - little experience of using such techniques in the Internet. Future - versions of TCP may evolve to include this or similar techniques. - These are the subject of ongoing research. ACE is not recommended - for use within the Internet in its current form. - -4.6 TCP Sender Pacing - - Reducing the frequency of ACKs may alleviate congestion of the - upstream bottleneck link, but can lead to increased size of TCP - sender bursts (section 4.1). This may slow the growth of cwnd, and - is undesirable when used over shared network paths since it may - significantly increase the maximum number of packets in the - bottleneck link buffer, potentially resulting in an increase in - network congestion. This may also lead to ACK Compression [ZSC91]. - - - -Balakrishnan et. al. Best Current Practice [Page 14] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - TCP Pacing [AST00], generally referred to as TCP Sender pacing, - employs an adapted TCP sender to alleviating transmission burstiness. - A bound is placed on the maximum number of packets the TCP sender can - transmit back-to-back (at local line rate), even if the window(s) - allow the transmission of more data. If necessary, more bursts of - data packets are scheduled for later points in time computed based on - the transmission rate of the TCP connection. The transmission rate - may be estimated from the ratio cwnd/srtt. Thus, large bursts of - data packets get broken up into smaller bursts spread over time. - - A subnetwork may also provide pacing (e.g., Generic Traffic Shaping - (GTS)), but implies a significant increase in the per-packet - processing overhead and buffer requirement at the router where - shaping is performed (section 5.3.3). - - RECOMMENDATIONS: TCP Sender Pacing requires a change to - implementation of the TCP sender. It may be beneficial in the - Internet and will significantly reduce the burst size of packets - transmitted by a host. This successfully mitigates the impact of - receiving Stretch ACKs. TCP Sender Pacing implies increased - processing cost per packet, and requires a prediction algorithm to - suggest a suitable transmission rate. There are hence performance - trade-offs between end host cost and network performance. - Specification of efficient algorithms remains an area of ongoing - research. Use of TCP Sender Pacing is not expected to introduce new - problems. It is an experimental mitigation for TCP hosts that may - control the burstiness of transmission (e.g., resulting from Type 1 - techniques, section 5.1.2), however it is not currently widely - deployed. It is not recommended for use within the Internet in its - current form. - -4.7 TCP Byte Counting - - The TCP sender can avoid slowing growth of cwnd by taking into - account the volume of data acknowledged by each ACK, rather than - opening the cwnd based on the number of received ACKs. So, if an ACK - acknowledges d data packets (or TCP data segments), the cwnd would - grow as if d separate ACKs had been received. This is called TCP - Byte Counting [RFC2581, RFC2760]. (One could treat the single ACK as - being equivalent to d/2, instead of d ACKs, to mimic the effect of - the TCP delayed ACK algorithm.) This policy works because cwnd - growth is only tied to the available capacity in the forward - direction, so the number of ACKs is immaterial. - - This may mitigate the impact of asymmetry when used in combination - with other techniques (e.g., a combination of TCP Pacing - (section4.6), and ACC (section 4.3) associated with a duplicate ACK - threshold at the receiver.) - - - -Balakrishnan et. al. Best Current Practice [Page 15] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - The main issue is that TCP byte counting may generate undesirable - long bursts of TCP packets at the sender host line rate. An - implementation must also consider that data packets in the forward - direction and ACKs in the reverse direction may both travel over - network paths that perform some amount of packet reordering. - Reordering of IP packets is currently common, and may arise from - various causes [BPS00]. - - RECOMMENDATION: TCP Byte Counting requires a small TCP sender - modification. In its simplest form, it can generate large bursts of - TCP data packets, particularly when Stretch ACKs are received. - Unlimited byte counting is therefore not allowed [RFC2581] for use - within the Internet. - - It is therefore strongly recommended [RFC2581, RFC2760] that any byte - counting scheme should include a method to mitigate the potentially - large bursts of TCP data packets the algorithm can cause (e.g., TCP - Sender Pacing (section 4.6), ABC [abc-ID]). If the burst size or - sending rate of the TCP sender can be controlled then the scheme may - be beneficial when Stretch ACKs are received. Determining safe - algorithms remain an area of ongoing research. Further - experimentation will then be required to assess the success of these - safeguards, before they can be recommended for use in the Internet. - -4.8 Backpressure - - Backpressure is a technique to enhance the performance of - bidirectional traffic for end hosts directly connected to the - upstream bottleneck link [KVR98]. A limit is set on how many data - packets of upstream transfers can be enqueued at the upstream - bottleneck link. In other words, the bottleneck link queue exerts - 'backpressure' on the TCP (sender) layer. This requires a modified - implementation, compared to that currently deployed in many TCP - stacks. Backpressure ensures that ACKs of downstream connections do - not get starved at the upstream bottleneck, thereby improving - performance of the downstream connections. Similar generic schemes - that may be implemented in hosts/routers are discussed in section - 5.4. - - Backpressure can be unfair to a reverse direction connection and make - its throughput highly sensitive to the dynamics of the forward - connection(s). - - RECOMMENDATION: Backpressure requires an experimental modification to - the sender protocol stack of a host directly connected to an upstream - bottleneck link. Use of backpressure is an implementation issue, - rather than a network protocol issue. Where backpressure is - implemented, the optimizations described in this section could be - - - -Balakrishnan et. al. Best Current Practice [Page 16] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - desirable and can benefit bidirectional traffic for hosts. - Specification of safe algorithms for providing backpressure is still - a subject of ongoing research. The technique is not recommended for - use within the Internet in its current form. - -5. Improving TCP performance using Transparent Modifications - - Various link and network layer techniques have been suggested to - mitigate the effect of an upstream bottleneck link. These techniques - may provide benefit without modification to either the TCP sender or - receiver, or may alternately be used in conjunction with one or more - of the schemes identified in section 4. In this document, these - techniques are known as "transparent" [RFC3135], because at the - transport layer, the TCP sender and receiver are not necessarily - aware of their existence. This does not imply that they do not - modify the pattern and timing of packets as observed at the network - layer. The techniques are classified here into three types based on - the point at which they are introduced. - - Most techniques require the individual TCP connections passing over - the bottleneck link(s) to be separately identified and imply that - some per-flow state is maintained for active TCP connections. A link - scheduler may also be employed (section 5.4). The techniques (with - one exception, ACK Decimation (section 5.2.2) require: - - (i) Visibility of an unencrypted IP and TCP packet header (e.g., no - use of IPSec with payload encryption [RFC2406]). - (ii) Knowledge of IP/TCP options and ability to inspect packets with - tunnel encapsulations (e.g., [RFC2784]) or to suspend - processing of packets with unknown formats. - (iii) Ability to demultiplex flows (by using address/protocol/port - number, or an explicit flow-id). - - [RFC3135] describes a class of network device that provides more than - forwarding of packets, and which is known as a Protocol Enhancing - Proxy (PEP). A large spectrum of PEP devices exists, ranging from - simple devices (e.g., ACK filtering) to more sophisticated devices - (e.g., stateful devices that split a TCP connection into two separate - parts). The techniques described in section 5 of this document - belong to the simpler type, and do not inspect or modify any TCP or - UDP payload data. They also do not modify port numbers or link - addresses. Many of the risks associated with more complex PEPs do - not exist for these schemes. Further information about the operation - and the risks associated with using PEPs are described in [RFC3135]. - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 17] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -5.1 TYPE 0: Header Compression - - A client may reduce the volume of bits used to send a single ACK by - using compression [RFC3150, RFC3135]. Most modern dial-up modems - support ITU-T V.42 bulk compression. In contrast to bulk - compression, header compression is known to be very effective at - reducing the number of bits sent on the upstream link [RFC1144]. This - relies on the observation that most TCP packet headers vary only in a - few bit positions between successive packets in a flow, and that the - variations can often be predicted. - -5.1.1 TCP Header Compression - - TCP header compression [RFC1144] (sometimes known as V-J compression) - is a Proposed Standard describing use over low capacity links running - SLIP or PPP [RFC3150]. It greatly reduces the size of ACKs on the - reverse link when losses are infrequent (a situation that ensures - that the state of the compressor and decompressor are synchronized). - However, this alone does not address all of the asymmetry issues: - - (i) In some (e.g., wireless) subnetworks there is a significant - per-packet MAC overhead that is independent of packet size - (section 3.2). - (ii) A reduction in the size of ACKs does not prevent adverse - interaction with large upstream data packets in the presence - of bidirectional traffic (section 3.3). - (iii) TCP header compression cannot be used with packets that have - IP or TCP options (including IPSec [RFC2402, RFC2406], TCP - RTTM [RFC1323], TCP SACK [RFC2018], etc.). - (iv) The performance of header compression described by RFC1144 is - significantly degraded when compressed packets are lost. An - improvement, which can still incur significant penalty on - long network paths is described in [RFC2507]. This suggests - it should only be used on links (or paths) that experience a - low level of packet loss [RFC3150]. - (v) The normal implementation of Header Compression inhibits - compression when IP is used to support tunneling (e.g., L2TP, - GRE [RFC2794], IP-in-IP). The tunnel encapsulation - complicates locating the appropriate packet headers. Although - GRE allows Header Compression on the inner (tunneled) IP - header [RFC2784], this is not recommended, since loss of a - packet (e.g., due to router congestion along the tunnel path) - will result in discard of all packets for one RTT [RFC1144]. - - RECOMMENDATION: TCP Header Compression is a transparent modification - performed at both ends of the upstream bottleneck link. It offers no - benefit for flows employing IPSec [RFC2402, RFC2406], or when - additional protocol headers are present (e.g., IP or TCP options, - - - -Balakrishnan et. al. Best Current Practice [Page 18] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - and/or tunnel encapsulation headers). The scheme is widely - implemented and deployed and used over Internet links. It is - recommended to improve TCP performance for paths that have a low-to- - medium bandwidth asymmetry (e.g., k<10). - - In the form described in [RFC1144], TCP performance is degraded when - used over links (or paths) that may exhibit appreciable rates of - packet loss [RFC3150]. It may also not provide significant - improvement for upstream links with bidirectional traffic. It is - therefore not desirable for paths that have a high bandwidth - asymmetry (e.g., k>10). - -5.1.2 Alternate Robust Header Compression Algorithms - - TCP header compression [RFC1144] and IP header compression [RFC2507] - do not perform well when subject to packet loss. Further, they do - not compress packets with TCP option fields (e.g., SACK [RFC2018] and - Timestamp (RTTM) [RFC1323]). However, recent work on more robust - schemes suggest that a new generation of compression algorithms may - be developed which are much more robust. The IETF ROHC working group - has specified compression techniques for UDP-based traffic [RFC3095] - and is examining a number of schemes that may provide improve TCP - header compression. These could be beneficial for asymmetric network - paths. - - RECOMMENDATION: Robust header compression is a transparent - modification that may be performed at both ends of an upstream - bottleneck link. This class of techniques may also be suited to - Internet paths that suffer low levels of re-ordering. The techniques - benefit paths with a low-to-medium bandwidth asymmetry (e.g., k>10) - and may be robust to packet loss. - - Selection of suitable compression algorithms remains an area of - ongoing research. It is possible that schemes may be derived which - support IPSec authentication, but not IPSec payload encryption. Such - schemes do not alone provide significant improvement in asymmetric - networks with a high asymmetry and/or bidirectional traffic. - -5.2 TYPE 1: Reverse Link Bandwidth Management - - Techniques beyond Type 0 header compression are required to address - the performance problems caused by appreciable asymmetry (k>>1). One - set of techniques is implemented only at one point on the reverse - direction path, within the router/host connected to the upstream - bottleneck link. These use flow class or per-flow queues at the - upstream link interface to manage the queue of packets waiting for - transmission on the bottleneck upstream link. - - - - -Balakrishnan et. al. Best Current Practice [Page 19] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - This type of technique bounds the upstream link buffer queue size, - and employs an algorithm to remove (discard) excess ACKs from each - queue. This relies on the cumulative nature of ACKs (section 4.1). - Two approaches are described which employ this type of mitigation. - -5.2.1 ACK Filtering - - ACK Filtering (AF) [DMT96, BPK99] (also known as ACK Suppression - [SF98, Sam99, FSS01]) is a TCP-aware link-layer technique that - reduces the number of ACKs sent on the upstream link. This technique - has been deployed in specific production networks (e.g., asymmetric - satellite networks [ASB96]). The challenge is to ensure that the - sender does not stall waiting for ACKs, which may happen if ACKs are - indiscriminately removed. - - When an ACK from the receiver is about to be enqueued at a upstream - bottleneck link interface, the router or the end host link layer (if - the host is directly connected to the upstream bottleneck link) - checks the transmit queue(s) for older ACKs belonging to the same TCP - connection. If ACKs are found, some (or all of them) are removed - from the queue, reducing the number of ACKs. - - Some ACKs also have other functions in TCP [RFC1144], and should not - be deleted to ensure normal operation. AF should therefore not - delete an ACK that has any data or TCP flags set (SYN, RST, URG, and - FIN). In addition, it should avoid deleting a series of 3 duplicate - ACKs that indicate the need for Fast Retransmission [RFC2581] or ACKs - with the Selective ACK option (SACK)[RFC2018] from the queue to avoid - causing problems to TCP's data-driven loss recovery mechanisms. - Appropriate treatment is also needed to preserve correct operation of - ECN feedback (carried in the TCP header) [RFC3168]. - - A range of policies to filter ACKs may be used. These may be either - deterministic or random (similar to a random-drop gateway, but should - take into consideration the semantics of the items in the queue). - Algorithms have also been suggested to ensure a minimum ACK rate to - guarantee the TCP sender window is updated [Sam99, FSS01], and to - limit the number of data packets (TCP segments) acknowledged by a - Stretch ACK. Per-flow state needs to be maintained only for - connections with at least one packet in the queue (similar to FRED - [LM97]). This state is soft [Cla88], and if necessary, can easily be - reconstructed from the contents of the queue. - - The undesirable effect of delayed DupACKs (section 3.4) can be - reduced by deleting duplicate ACKs above a threshold value [MJW00, - CLP98] allowing Fast Retransmission, but avoiding early TCP timeouts, - which may otherwise result from excessive queuing of DupACKs. - - - - -Balakrishnan et. al. Best Current Practice [Page 20] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - Future schemes may include more advanced rules allowing removal of - selected SACKs [RFC2018]. Such a scheme could prevent the upstream - link queue from becoming filled by back-to-back ACKs with SACK - blocks. Since a SACK packet is much larger than an ACK, it would - otherwise add significantly to the path delay in the reverse - direction. Selection of suitable algorithms remains an ongoing area - of research. - - RECOMMENDATION: ACK Filtering requires a modification to the upstream - link interface. The scheme has been deployed in some networks where - the extra processing overhead (per ACK) may be compensated for by - avoiding the need to modify TCP. ACK Filtering can generate Stretch - ACKs resulting in large bursts of TCP data packets. Therefore on its - own, it is not recommended for use in the general Internet. - - ACK Filtering when used in combination with a scheme to mitigate the - effect of Stretch ACKs (i.e., control TCP sender burst size) is - recommended for paths with appreciable asymmetry (k>1) and/or with - bidirectional traffic. Suitable algorithms to support IPSec - authentication, SACK, and ECN remain areas of ongoing research. - -5.2.2 ACK Decimation - - ACK Decimation is based on standard router mechanisms. By using an - appropriate configuration of (small) per-flow queues and a chosen - dropping policy (e.g., Weighted Fair Queuing, WFQ) at the upstream - bottleneck link, a similar effect to AF (section 5.2.1) may be - obtained, but with less control of the actual packets which are - dropped. - - In this scheme, the router/host at the bottleneck upstream link - maintains per-flow queues and services them fairly (or with - priorities) by queuing and scheduling of ACKs and data packets in the - reverse direction. A small queue threshold is maintained to drop - excessive ACKs from the tail of each queue, in order to reduce ACK - Congestion. The inability to identify special ACK packets (c.f., AF) - introduces some major drawbacks to this approach, such as the - possibility of losing DupACKs, FIN/ACK, RST packets, or packets - carrying ECN information [RFC3168]. Loss of these packets does not - significantly impact network congestion, but does adversely impact - the performance of the TCP session observing the loss. - - A WFQ scheduler may assign a higher priority to interactive traffic - (providing it has a mechanism to identify such traffic) and provide a - fair share of the remaining capacity to the bulk traffic. In the - presence of bidirectional traffic, and with a suitable scheduling - policy, this may ensure fairer sharing for ACK and data packets. An - increased forward transmission rate is achieved over asymmetric links - - - -Balakrishnan et. al. Best Current Practice [Page 21] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - by an increased ACK Decimation rate, leading to generation of Stretch - ACKs. As in AF, TCP sender burst size increases when Stretch ACKs - are received unless other techniques are used in combination with - this technique. - - This technique has been deployed in specific networks (e.g., a - network with high bandwidth asymmetry supporting high-speed data - services to in-transit mobile hosts [Seg00]). Although not optimal, - it offered a potential mitigation applicable when the TCP header is - difficult to identify or not visible to the link layer (e.g., due to - IPSec encryption). - - RECOMMENDATION: ACK Decimation uses standard router mechanisms at the - upstream link interface to constrain the rate at which ACKs are fed - to the upstream link. The technique is beneficial with paths having - appreciable asymmetry (k>1). It is however suboptimal, in that it - may lead to inefficient TCP error recovery (and hence in some cases - degraded TCP performance), and provides only crude control of link - behavior. It is therefore recommended that where possible, ACK - Filtering should be used in preference to ACK Decimation. - - When ACK Decimation is used on paths with an appreciable asymmetry - (k>1) (or with bidirectional traffic) it increases the burst size of - the TCP sender, use of a scheme to mitigate the effect of Stretch - ACKs or control burstiness is therefore strongly recommended. - -5.3 TYPE 2: Handling Infrequent ACKs - - TYPE 2 mitigations perform TYPE 1 upstream link bandwidth management, - but also employ a second active element which mitigates the effect of - the reduced ACK rate and burstiness of ACK transmission. This is - desirable when end hosts use standard TCP sender implementations - (e.g., those not implementing the techniques in sections 4.6, 4.7). - - Consider a path where a TYPE 1 scheme forwards a Stretch ACK covering - d TCP packets (i.e., where the acknowledgement number is d*MSS larger - than the last ACK received by the TCP sender). When the TCP sender - receives this ACK, it can send a burst of d (or d+1) TCP data - packets. The sender is also constrained by the current cwnd. - Received ACKs also serve to increase cwnd (by at most one MSS). - - A TYPE 2 scheme mitigates the impact of the reduced ACK frequency - resulting when a TYPE 1 scheme is used. This is achieved by - interspersing additional ACKs before each received Stretch ACK. The - additional ACKs, together with the original ACK, provide the TCP - sender with sufficient ACKs to allow the TCP cwnd to open in the same - way as if each of the original ACKs sent by the TCP receiver had been - forwarded by the reverse path. In addition, by attempting to restore - - - -Balakrishnan et. al. Best Current Practice [Page 22] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - the spacing between ACKs, such a scheme can also restore the TCP - self-clocking behavior, and reduce the TCP sender burst size. Such - schemes need to ensure conservative behavior (i.e., should not - introduce more ACKs than were originally sent) and reduce the - probability of ACK Compression [ZSC91]. - - The action is performed at two points on the return path: the - upstream link interface (where excess ACKs are removed), and a point - further along the reverse path (after the bottleneck upstream - link(s)), where replacement ACKs are inserted. This attempts to - reconstruct the ACK stream sent by the TCP receiver when used in - combination with AF (section 5.2.1), or ACK Decimation (section - 5.2.2). - - TYPE 2 mitigations may be performed locally at the receive interface - directly following the upstream bottleneck link, or may alternatively - be applied at any point further along the reverse path (this is not - necessarily on the forward path, since asymmetric routing may employ - different forward and reverse internet paths). Since the techniques - may generate multiple ACKs upon reception of each individual Stretch - ACK, it is strongly recommended that the expander implements a scheme - to prevent exploitation as a "packet amplifier" in a Denial-of- - Service (DoS) attack (e.g., to verify the originator of the ACK). - Identification of the sender could be accomplished by appropriately - configured packet filters and/or by tunnel authentication procedures - (e.g., [RFC2402, RFC2406]). A limit on the number of reconstructed - ACKs that may be generated from a single packet may also be - desirable. - -5.3.1 ACK Reconstruction - - ACK Reconstruction (AR) [BPK99] is used in conjunction with AF - (section 5.2.1). AR deploys a soft-state [Cla88] agent called an ACK - Reconstructor on the reverse path following the upstream bottleneck - link. The soft-state can be regenerated if lost, based on received - ACKs. When a Stretch ACK is received, AR introduces additional ACKs - by filling gaps in the ACK sequence. Some potential Denial-of- - Service vulnerabilities may arise (section 6) and need to be - addressed by appropriate security techniques. - - The Reconstructor determines the number of additional ACKs, by - estimating the number of filtered ACKs. This uses implicit - information present in the received ACK stream by observing the ACK - sequence number of each received ACK. An example implementation - could set an ACK threshold, ackthresh, to twice the MSS (this assumes - the chosen MSS is known by the link). The factor of two corresponds - - - - - -Balakrishnan et. al. Best Current Practice [Page 23] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - to standard TCP delayed-ACK policy (d=2). Thus, if successive ACKs - arrive separated by delta, the Reconstructor regenerates a maximum of - ((delta/ackthresh) - 2) ACKs. - - To reduce the TCP sender burst size and allow the cwnd to increase at - a rate governed by the downstream link, the reconstructed ACKs must - be sent at a consistent rate (i.e., temporal spacing between - reconstructed ACKs). One method is for the Reconstructor to measure - the arrival rate of ACKs using an exponentially weighted moving - average estimator. This rate depends on the output rate from the - upstream link and on the presence of other traffic sharing the link. - The output of the estimator indicates the average temporal spacing - for the ACKs (and the average rate at which ACKs would reach the TCP - sender if there were no further losses or delays). This may be used - by the Reconstructor to set the temporal spacing of reconstructed - ACKs. The scheme may also be used in combination with TCP sender - adaptation (e.g., a combination of the techniques in sections 4.6 and - 4.7). - - The trade-off in AR is between obtaining less TCP sender burstiness, - and a better rate of cwnd increase, with a reduction in RTT - variation, versus a modest increase in the path RTT. The technique - cannot perform reconstruction on connections using IPSec (AH - [RFC2402] or ESP [RFC2406]), since it is unable to generate - appropriate security information. It also cannot regenerate other - packet header information (e.g., the exact pattern of bits carried in - the IP packet ECN field [RFC3168] or the TCP RTTM option [RFC1323]). - - An ACK Reconstructor operates correctly (i.e., generates no spurious - ACKs and preserves the end-to-end semantics of TCP), providing: - - (i) the TCP receiver uses ACK Delay (d=2) [RFC2581] - (ii) the Reconstructor receives only in-order ACKs - (iii) all ACKs are routed via the Reconstructor - (iv) the Reconstructor correctly determines the TCP MSS used by - the session - (v) the packets do not carry additional header information (e.g., - TCP RTTM option [RFC1323], IPSec using AH [RFC2402]or ESP - [RFC2406]). - - RECOMMENDATION: ACK Reconstruction is an experimental transparent - modification performed on the reverse path following the upstream - bottleneck link. It is designed to be used in conjunction with a - TYPE 1 mitigation. It reduces the burst size of TCP transmission in - the forward direction, which may otherwise increase when TYPE 1 - schemes are used alone. It requires modification of equipment after - the upstream link (including maintaining per-flow soft state). The - scheme introduces implicit assumptions about the network path and has - - - -Balakrishnan et. al. Best Current Practice [Page 24] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - potential Denial-of-Service vulnerabilities (i.e., acting as a packet - amplifier); these need to be better understood and addressed by - appropriate security techniques. - - Selection of appropriate algorithms to pace the ACK traffic remains - an open research issue. There is also currently little experience of - the implications of using such techniques in the Internet, and - therefore it is recommended that this technique should not be used - within the Internet in its current form. - -5.3.2 ACK Compaction and Companding - - ACK Compaction and ACK Companding [SAM99, FSS01] are techniques that - operate at a point on the reverse path following the constrained ACK - bottleneck. Like AR (section 5.3.1), ACK Compaction and ACK - Companding are both used in conjunction with an AF technique (section - 5.2.1) and regenerate filtered ACKs, restoring the ACK stream. - However, they differ from AR in that they use a modified AF (known as - a compactor or compressor), in which explicit information is added to - all Stretch ACKs generated by the AF. This is used to explicitly - synchronize the reconstruction operation (referred to here as - expansion). - - The modified AF combines two modifications: First, when the - compressor deletes an ACK from the upstream bottleneck link queue, it - appends explicit information (a prefix) to the remaining ACK (this - ACK is marked to ensure it is not subsequently deleted). The - additional information contains details the conditions under which - ACKs were previously filtered. A variety of information may be - encoded in the prefix. This includes the number of ACKs deleted by - the AF and the average number of bytes acknowledged. This may - subsequently be used by an expander at the remote end of the tunnel. - Further timing information may also be added to control the pacing of - the regenerated ACKs [FSS01]. The temporal spacing of the filtered - ACKs may also be encoded. - - To encode the prefix requires the subsequent expander to recognize a - modified ACK header. This would normally limit the expander to - link-local operation (at the receive interface of the upstream - bottleneck link). If remote expansion is needed further along the - reverse path, a tunnel may be used to pass the modified ACKs to the - remote expander. The tunnel introduces extra overhead, however - networks with asymmetric capacity and symmetric routing frequently - already employ such tunnels (e.g., in a UDLR network [RFC3077], the - expander may be co-located with the feed router). - - - - - - -Balakrishnan et. al. Best Current Practice [Page 25] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - ACK expansion uses a stateless algorithm to expand the ACK (i.e., - each received packet is processed independently of previously - received packets). It uses the prefix information together with the - acknowledgment field in the received ACK, to produce an equivalent - number of ACKs to those previously deleted by the compactor. These - ACKs are forwarded to the original destination (i.e., the TCP - sender), preserving normal TCP ACK clocking. In this way, ACK - Compaction, unlike AR, is not reliant on specific ACK policies, nor - must it see all ACKs associated with the reverse path (e.g., it may - be compatible with schemes such as DAASS [RFC2760]). - - Some potential Denial-of-Service vulnerabilities may arise (section - 6) and need to be addressed by appropriate security techniques. The - technique cannot perform reconstruction on connections using IPSec, - since they are unable to regenerate appropriate security information. - It is possible to explicitly encode IPSec security information from - suppressed packets, allowing operation with IPSec AH, however this - remains an open research issue, and implies an additional overhead - per ACK. - - RECOMMENDATION: ACK Compaction and Companding are experimental - transparent modifications performed on the reverse path following the - upstream bottleneck link. They are designed to be used in - conjunction with a modified TYPE 1 mitigation and reduce the burst - size of TCP transmission in the forward direction, which may - otherwise increase when TYPE 1 schemes are used alone. - - The technique is desirable, but requires modification of equipment - after the upstream bottleneck link (including processing of a - modified ACK header). Selection of appropriate algorithms to pace - the ACK traffic also remains an open research issue. Some potential - Denial-of-Service vulnerabilities may arise with any device that may - act as a packet amplifier. These need to be addressed by appropriate - security techniques. There is little experience of using the scheme - over Internet paths. This scheme is a subject of ongoing research - and is not recommended for use within the Internet in its current - form. - -5.3.3 Mitigating TCP packet bursts generated by Infrequent ACKs - - The bursts of data packets generated when a Type 1 scheme is used on - the reverse direction path may be mitigated by introducing a router - supporting Generic Traffic Shaping (GTS) on the forward path [Seg00]. - GTS is a standard router mechanism implemented in many deployed - routers. This technique does not eliminate the bursts of data - generated by the TCP sender, but attempts to smooth out the bursts by - employing scheduling and queuing techniques, producing traffic which - resembles that when TCP Pacing is used (section 4.6). These - - - -Balakrishnan et. al. Best Current Practice [Page 26] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - techniques require maintaining per-flow soft-state in the router, and - increase per-packet processing overhead. Some additional buffer - capacity is needed to queue packets being shaped. - - To perform GTS, the router needs to select appropriate traffic - shaping parameters, which require knowledge of the network policy, - connection behavior and/or downstream bottleneck characteristics. GTS - may also be used to enforce other network policies and promote - fairness between competing TCP connections (and also UDP and - multicast flows). It also reduces the probability of ACK Compression - [ZSC91]. - - The smoothing of packet bursts reduces the impact of the TCP - transmission bursts on routers and hosts following the point at which - GTS is performed. It is therefore desirable to perform GTS near to - the sending host, or at least at a point before the first forward - path bottleneck router. - - RECOMMENDATIONS: Generic Traffic Shaping (GTS) is a transparent - technique employed at a router on the forward path. The algorithms - to implement GTS are available in widely deployed routers and may be - used on an Internet link, but do imply significant additional per- - packet processing cost. - - Configuration of a GTS is a policy decision of a network service - provider. When appropriately configured the technique will reduce - size of TCP data packet bursts, mitigating the effects of Type 1 - techniques. GTS is recommended for use in the Internet in - conjunction with type 1 techniques such as ACK Filtering (section - 5.2.1) and ACK Decimation (section 5.2.2). - -5.4 TYPE 3: Upstream Link Scheduling - - Many of the above schemes imply using per flow queues (or per - connection queues in the case of TCP) at the upstream bottleneck - link. Per-flow queuing (e.g., FQ, CBQ) offers benefit when used on - any slow link (where the time to transmit a packet forms an - appreciable part of the path RTT) [RFC3150]. Type 3 schemes offer - additional benefit when used with one of the above techniques. - -5.4.1 Per-Flow queuing at the Upstream Bottleneck Link - - When bidirectional traffic exists in a bandwidth asymmetric network - competing ACK and packet data flows along the return path may degrade - the performance of both upstream and downstream flows [KVR98]. - Therefore, it is highly desirable to use a queuing strategy combined - with a scheduling mechanism at the upstream link. This has also been - called priority-based multiplexing [RFC3135]. - - - -Balakrishnan et. al. Best Current Practice [Page 27] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - On a slow upstream link, appreciable jitter may be introduced by - sending large data packets ahead of ACKs [RFC3150]. A simple scheme - may be implemented using per-flow queuing with a fair scheduler - (e.g., round robin service to all flows, or priority scheduling). A - modified scheduler [KVR98] could place a limit on the number of ACKs - a host is allowed to transmit upstream before transmitting a data - packet (assuming at least one data packet is waiting in the upstream - link queue). This guarantees at least a certain minimum share of the - capacity to flows in the reverse direction, while enabling flows in - the forward direction to improve TCP throughput. - - Bulk (payload) compression, a small MTU, link level transparent - fragmentation [RFC1991, RFC2686] or link level suspend/resume - capability (where higher priority frames may pre-empt transmission of - lower priority frames) may be used to mitigate the impact (jitter) of - bidirectional traffic on low speed links [RFC3150]. More advanced - schemes (e.g., WFQ) may also be used to improve the performance of - transfers with multiple ACK streams such as http [Seg00]. - - RECOMMENDATION: Per-flow queuing is a transparent modification - performed at the upstream bottleneck link. Per-flow (or per-class) - scheduling does not impact the congestion behavior of the Internet, - and may be used on any Internet link. The scheme has particular - benefits for slow links. It is widely implemented and widely - deployed on links operating at less than 2 Mbps. This is recommended - as a mitigation on its own or in combination with one of the other - described techniques. - -5.4.2 ACKs-first Scheduling - - ACKs-first Scheduling is an experimental technique to improve - performance of bidirectional transfers. In this case data packets - and ACKs compete for resources at the upstream bottleneck link - [RFC3150]. A single First-In First-Out, FIFO, queue for both data - packets and ACKs could impact the performance of forward transfers. - For example, if the upstream bottleneck link is a 28.8 kbps dialup - line, the transmission of a 1 Kbyte sized data packet would take - about 280 ms. So even if just two such data packets get queued ahead - of ACKs (not an uncommon occurrence since data packets are sent out - in pairs during slow start), they would shut out ACKs for well over - half a second. If more than two data packets are queued up ahead of - an ACK, the ACKs would be delayed by even more [RFC3150]. - - A possible approach to alleviating this is to schedule data and ACKs - differently from FIFO. One algorithm, in particular, is ACKs-first - scheduling, which accords a higher priority to ACKs over data - packets. The motivation for such scheduling is that it minimizes the - idle time for the forward connection by minimizing the time that ACKs - - - -Balakrishnan et. al. Best Current Practice [Page 28] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - spend queued behind data packets at the upstream link. At the same - time, with Type 0 techniques such as header compression [RFC1144], - the transmission time of ACKs becomes small enough that the impact on - subsequent data packets is minimal. (Subnetworks in which the per- - packet overhead of the upstream link is large, e.g., packet radio - subnetworks, are an exception, section 3.2.) This scheduling scheme - does not require the upstream bottleneck router/host to explicitly - identify or maintain state for individual TCP connections. - - ACKs-first scheduling does not help avoid a delay due to a data - packet in transmission. Link fragmentation or suspend/resume may be - beneficial in this case. - - RECOMMENDATION: ACKs-first scheduling is an experimental transparent - modification performed at the upstream bottleneck link. If it is - used without a mechanism (such as ACK Congestion Control (ACC), - section 4.3) to regulate the volume of ACKs, it could lead to - starvation of data packets. This is a performance penalty - experienced by end hosts using the link and does not modify Internet - congestion behavior. Experiments indicate that ACKs-first scheduling - in combination with ACC is promising. However, there is little - experience of using the technique in the wider Internet. Further - development of the technique remains an open research issue, and - therefore the scheme is not currently recommended for use within the - Internet. - -6. Security Considerations - - The recommendations contained in this document do not impact the - integrity of TCP, introduce new security implications to the TCP - protocol, or applications using TCP. - - Some security considerations in the context of this document arise - from the implications of using IPSec by the end hosts or routers - operating along the return path. Use of IPSec prevents, or - complicates, some of the mitigations. For example: - - (i) When IPSec ESP [RFC2406] is used to encrypt the IP payload, the - TCP header can neither be read nor modified by intermediate - entities. This rules out header compression, ACK Filtering, ACK - Reconstruction, and the ACK Compaction. - - (ii) The TCP header information may be visible, when some forms of - network layer security are used. For example, using IPSec AH - [RFC2402], the TCP header may be read, but not modified, by - intermediaries. This may in future allow extensions to support - ACK Filtering, but rules out the generation of new - - - - -Balakrishnan et. al. Best Current Practice [Page 29] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - packets by intermediaries (e.g., ACK Reconstruction). The - enhanced header compression scheme discussed in [RFC2507] would - also work with IPSec AH. - - There are potential Denial-of-Service (DoS) implications when using - Type 2 schemes. Unless additional security mechanisms are used, a - Reconstructor/expander could be exploited as a packet amplifier. A - third party may inject unauthorized Stretch ACKs into the reverse - path, triggering the generation of additional ACKs. These ACKs would - consume capacity on the return path and processing resources at the - systems along the path, including the destination host. This - provides a potential platform for a DoS attack. The usual - precautions must be taken to verify the correct tunnel end point, and - to ensure that applications cannot falsely inject packets that expand - to generate unwanted traffic. Imposing a rate limit and bound on the - delayed ACK factor(d) would also lessen the impact of any undetected - exploitation. - -7. Summary - - This document considers several TCP performance constraints that - arise from asymmetry in the properties of the forward and reverse - paths across an IP network. Such performance constraints arise, - e.g., as a result of both bandwidth (capacity) asymmetry, asymmetric - shared media in the reverse direction, and interactions with Media - Access Control (MAC) protocols. Asymmetric capacity may cause TCP - Acknowledgments (ACKs) to be lost or become inordinately delayed - (e.g., when a bottleneck link is shared between many flows, or when - there is bidirectional traffic). This effect may be exacerbated with - media-access delays (e.g., in certain multi-hop radio subnetworks, - satellite Bandwidth on Demand access). Asymmetry, and particular - high asymmetry, raises a set of TCP performance issues. - - A set of techniques providing performance improvement is surveyed. - These include techniques to alleviate ACK Congestion and techniques - that enable a TCP sender to cope with infrequent ACKs without - destroying TCP self-clocking. These techniques include both end-to- - end, local link-layer, and subnetwork schemes. Many of these - techniques have been evaluated in detail via analysis, simulation, - and/or implementation on asymmetric subnetworks forming part of the - Internet. There is however as yet insufficient operational - experience for some techniques, and these therefore currently remain - items of on-going research and experimentation. - - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 30] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - The following table summarizes the current recommendations. - Mechanisms are classified as recommended (REC), not recommended (NOT - REC) or experimental (EXP). Experimental techniques may not be well - specified. These techniques will require further operational - experience before they can be recommended for use in the public - Internet. - - The recommendations for end-to-end host modifications are summarized - in table 1. This lists each technique, the section in which each - technique is discussed, and where it is applied (S denotes the host - sending TCP data packets in the forward direction, R denotes the host - which receives these data packets). - - +------------------------+-------------+------------+--------+ - | Technique | Use | Section | Where | - +------------------------+-------------+------------+--------+ - | Modified Delayed ACKs | NOT REC | 4.1 | TCP R | - | Large MSS & NO FRAG | REC | 4.2 | TCP SR | - | Large MSS & IP FRAG | NOT REC | 4.2 | TCP SR | - | ACK Congestion Control | EXP | 4.3 | TCP SR | - | Window Pred. Mech (WPM)| NOT REC | 4.4 | TCP R | - | Window Cwnd. Est. (ACE)| NOT REC | 4.5 | TCP R | - | TCP Sender Pacing | EXP *1 | 4.6 | TCP S | - | Byte Counting | NOT REC *2 | 4.7 | TCP S | - | Backpressure | EXP *1 | 4.8 | TCP R | - +------------------------+-------------+------------+--------+ - - Table 1: Recommendations concerning host modifications. - - *1 Implementation of the technique may require changes to the - internal design of the protocol stack in end hosts. - *2 Dependent on a scheme for preventing excessive TCP transmission - burst. - - The recommendations for techniques that do not require the TCP sender - and receiver to be aware of their existence (i.e., transparent - techniques) are summarized in table 2. Each technique is listed - along with the section in which each mechanism is discussed, and - where the technique is applied (S denotes the sending interface prior - to the upstream bottleneck link, R denotes receiving interface - following the upstream bottleneck link). - - - - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 31] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - +------------------------+-------------+------------+--------+ - | Mechanism | Use | Section | Type | - +------------------------+-------------+------------+--------+ - | Header Compr. (V-J) | REC *1 | 5.1.1 | 0 SR | - | Header Compr. (ROHC) | REC *1 *2 | 5.1.2 | 0 SR | - +------------------------+-------------+------------+--------+ - | ACK Filtering (AF) | EXP *3 | 5.2.1 | 1 S | - | ACK Decimation | EXP *3 | 5.2.2 | 1 S | - +------------------------+-------------+------------+--------+ - | ACK Reconstruction (AR)| NOT REC | 5.3.1 | 2 *4 | - | ACK Compaction/Compand.| EXP | 5.3.2 | 2 S *4 | - | Gen. Traff. Shap. (GTS)| REC | 5.3.3 | 2 *5 | - +------------------------+-------------+------------+--------+ - | Fair Queueing (FQ) | REC | 5.4.1 | 3 S | - | ACKs-First Scheduling | NOT REC | 5.4.2 | 3 S | - +------------------------+-------------+------------+--------+ - - Table 2: Recommendations concerning transparent modifications. - - *1 At high asymmetry these schemes may degrade TCP performance, but - are not considered harmful to the Internet. - *2 Standardisation of new TCP compression protocols is the subject of - ongoing work within the ROHC WG, refer to other IETF RFCs on the - use of these techniques. - *3 Use in the Internet is dependent on a scheme for preventing - excessive TCP transmission burst. - *4 Performed at a point along the reverse path after the upstream - bottleneck link. - *5 Performed at a point along the forward path. - -8. Acknowledgments - - This document has benefited from comments from the members of the - Performance Implications of Links (PILC) Working Group. In - particular, the authors would like to thank John Border, Spencer - Dawkins, Aaron Falk, Dan Grossman, Randy Katz, Jeff Mandin, Rod - Ragland, Ramon Segura, Joe Touch, and Lloyd Wood for their useful - comments. They also acknowledge the data provided by Metricom Inc., - concerning operation of their packet data network. - -9. References - - References of the form RFCnnnn are Internet Request for Comments - (RFC) documents available online at http://www.rfc-editor.org/. - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 32] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -9.1 Normative References - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC1122] Braden, R., Ed., "Requirements for Internet Hosts - - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC1144] Jacobson, V., "Compressing TCP/IP Headers for Low-Speed - Serial Links", RFC 1144, February 1990. - - [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2784] Farinacci, D., Li, T., Hanks, S., Meyer, D. and P. Traina, - "Generic Routing Encapsulation (GRE)", RFC 2784, March - 2000. - - [RFC3135] Border, J., Kojo, M., Griner, J., Montenegro, G. and Z. - Shelby, "Performance Enhancing Proxies Intended to Mitigate - Link-Related Degradations", RFC 3135, June 2001. - -9.2 Informative References - - [abc-ID] Allman, M., "TCP Congestion Control with Appropriate Byte - Counting", Work in Progress. - - [All97b] Allman, M., "Fixing Two BSD TCP Bugs", Technical Report - CR-204151, NASA Lewis Research Center, October 1997. - - [ANS01] ANSI Standard T1.413, "Network to Customer Installation - Interfaces - Asymmetric Digital Subscriber Lines (ADSL) - Metallic Interface", November 1998. - - [ASB96] Arora, V., Suphasindhu, N., Baras, J.S. and D. Dillon, - "Asymmetric Internet Access over Satellite-Terrestrial - Networks", Proc. AIAA: 16th International Communications - Satellite Systems Conference and Exhibit, Part 1, - Washington, D.C., February 25-29, 1996, pp.476-482. - - [AST00] Aggarwal, A., Savage, S., and T. Anderson, "Understanding - the Performance of TCP Pacing", Proc. IEEE INFOCOM, Tel- - Aviv, Israel, V.3, March 2000, pp. 1157-1165. - - - - - -Balakrishnan et. al. Best Current Practice [Page 33] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - [Bal98] Balakrishnan, H., "Challenges to Reliable Data Transport - over Heterogeneous Wireless Networks", Ph.D. Thesis, - University of California at Berkeley, USA, August 1998. - http://nms.lcs.mit.edu/papers/hari-phd/ - - [BPK99] Balakrishnan, H., Padmanabhan, V. N., and R. H. Katz, "The - Effects of Asymmetry on TCP Performance", ACM Mobile - Networks and Applications (MONET), Vol.4, No.3, 1999, pp. - 219-241. An expanded version of a paper published at Proc. - ACM/IEEE Mobile Communications Conference (MOBICOM), 1997. - - [BPS00] Bennett, J. C., Partridge, C., and N. Schectman, "Packet - Reordering is Not Pathological Network Behaviour", IEEE/ACM - Transactions on Networking, Vol. 7, Issue. 6, 2000, - pp.789-798. - - [Cla88] Clark, D.D, "The Design Philosophy of the DARPA Internet - Protocols", ACM Computer Communications Review (CCR), Vol. - 18, Issue 4, 1988, pp.106-114. - - [CLC99] Clausen, H., Linder, H., and B. Collini-Nocker, "Internet - over Broadcast Satellites", IEEE Communications Magazine, - Vol. 37, Issue. 6, 1999, pp.146-151. - - [CLP98] Calveras, A., Linares, J., and J. Paradells, "Window - Prediction Mechanism for Improving TCP in Wireless - Asymmetric Links". Proc. IEEE Global Communications - Conference (GLOBECOM), Sydney Australia, November 1998, - pp.533-538. - - [CR98] Cohen, R., and Ramanathan, S., "Tuning TCP for High - Performance in Hybrid Fiber Coaxial Broad-Band Access - Networks", IEEE/ACM Transactions on Networking, Vol.6, - No.1, 1998, pp.15-29. - - [DS00] Cable Television Laboratories, Inc., Data-Over-Cable - Service Interface Specifications---Radio Frequency - Interface Specification SP-RFIv1.1-I04-00407, 2000 - - [DS01] Data-Over-Cable Service Interface Specifications, Radio - Frequency Interface Specification 1.0, SP-RFI-I05-991105, - Cable Television Laboratories, Inc., November 1999. - - [DMT96] Durst, R., Miller, G., and E. Travis, "TCP Extensions for - Space Communications", ACM/IEEE Mobile Communications - Conference (MOBICOM), New York, USA, November 1996, pp.15- - 26. - - - - -Balakrishnan et. al. Best Current Practice [Page 34] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - [EN97] "Digital Video Broadcasting (DVB); DVB Specification for - Data Broadcasting", European Standard (Telecommunications - series) EN 301 192, 1997. - - [EN00] "Digital Video Broadcasting (DVB); Interaction Channel for - Satellite Distribution Systems", Draft European Standard - (Telecommunications series) ETSI, Draft EN 301 790, v.1.2.1 - - [FJ93] Floyd, S., and V. Jacobson, "Random Early Detection - gateways for Congestion Avoidance", IEEE/ACM Transactions - on Networking, Vol.1, No.4, 1993, pp.397-413. - - [FSS01] Fairhurst, G., Samaraweera, N.K.G, Sooriyabandara, M., - Harun, H., Hodson, K., and R. Donardio, "Performance Issues - in Asymmetric Service Provision using Broadband Satellite", - IEE Proceedings on Communication, Vol.148, No.2, 2001, - pp.95-99. - - [ITU01] ITU-T Recommendation E.681, "Traffic Engineering Methods - For IP Access Networks Based on Hybrid Fiber/Coax System", - September 2001. - - [ITU02] ITU-T Recommendation G.992.1, "Asymmetrical Digital - Subscriber Line (ADSL) Transceivers", July 1999. - - [Jac88] Jacobson, V., "Congestion Avoidance and Control", Proc. ACM - SIGCOMM, Stanford, CA, ACM Computer Communications Review - (CCR), Vol.18, No.4, 1988, pp.314-329. - - [Ken87] Kent C.A., and J. C. Mogul, "Fragmentation Considered - Harmful", Proc. ACM SIGCOMM, USA, ACM Computer - Communications Review (CCR), Vol.17, No.5, 1988, pp.390- - 401. - - [KSG98] Krout, T., Solsman, M., and J. Goldstein, "The Effects of - Asymmetric Satellite Networks on Protocols", Proc. IEEE - Military Communications Conference (MILCOM), Bradford, MA, - USA, Vol.3, 1998, pp.1072-1076. - - [KVR98] Kalampoukas, L., Varma, A., and Ramakrishnan, K.K., - "Improving TCP Throughput over Two-Way Asymmetric Links: - Analysis and Solutions", Proc. ACM SIGMETRICS, Medison, - USA, 1998, pp.78-89. - - [LM97] Lin, D., and R. Morris, "Dynamics of Random Early - Detection", Proc. ACM SIGCOMM, Cannes, France, ACM Computer - Communications Review (CCR), Vol.27, No.4, 1997, pp.78-89. - - - - -Balakrishnan et. al. Best Current Practice [Page 35] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - [LMS97] Lakshman, T.V., Madhow, U., and B. Suter, "Window-based - Error Recovery and Flow Control with a Slow Acknowledgement - Channel: A Study of TCP/IP Performance", Proc. IEEE - INFOCOM, Vol.3, Kobe, Japan, 1997, pp.1199-1209. - - [MJW00] Ming-Chit, I.T., Jinsong, D., and W. Wang,"Improving TCP - Performance Over Asymmetric Networks", ACM SIGCOMM, ACM - Computer Communications Review (CCR), Vol.30, No.3, 2000. - - [Pad98] Padmanabhan, V.N., "Addressing the Challenges of Web Data - Transport", Ph.D. Thesis, University of California at - Berkeley, USA, September 1998 (also Tech Report UCB/CSD- - 98-1016). http://www.cs.berkeley.edu/~padmanab/phd- - thesis.html - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for - High Performance", RFC 1323, May 1992. - - [RFC2018] Mathis, B., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [RFC2402] Kent, S. and R. Atkinson, "IP Authentication Header", RFC - 2402, November 1998. - - [RFC2406] Kent, S. and R. Atkinson, "IP Encapsulating Security - Payload (ESP)", RFC 2406, November 1998. - - [RFC2507] Degermark, M., Nordgren, B. and S. Pink, "IP Header - Compression", RFC 2507, February 1999. - - [RFC2525] Paxson, V., Allman, M., Dawson, S., Heavens, I. and B. - Volz, "Known TCP Implementation Problems", RFC 2525, March - 1999. - - [RFC2686] Bormann, C., "The Multi-Class Extension to Multi-Link PPP", - RFC 2686, September 1999. - - [RFC2760] Allman, M., Dawkins, S., Glover, D., Griner, J., Henderson, - T., Heidemann, J., Kruse, H., Ostermann, S., Scott, K., - Semke, J., Touch, J. and D. Tran, "Ongoing TCP Research - Related to Satellites", RFC 2760, February 2000. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [RFC3077] Duros, E., Dabbous, W., Izumiyama, H., Fujii, N. and Y. - Zhang, "A link Layer tunneling mechanism for unidirectional - links", RFC 3077, March 2001. - - - -Balakrishnan et. al. Best Current Practice [Page 36] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - [RFC3095] Bormann, C., Burmeister, C., Degermark, M., Fukushima, H., - Hannu, H., Jonsson, E., Hakenberg, R., Koren, T., Le, K., - Liu, Z., Martensson, A., Miyazaki, A., Svanbro, K., Wiebke, - T., Yoshimura, T. and H. Zheng, "RObust Header Compression - (ROHC): Framework and four profiles: RTP, UDP ESP and - uncompressed", RFC 3095, July 2001. - - [RFC3150] Dawkins, S., Montenegro, G., Kojo, M. and V. Magret, "End- - to-end Performance Implications of Slow Links", BCP 48, RFC - 3150, July 2001. - - [RFC3168] Ramakrishnan K., Floyd, S. and D. Black, "A Proposal to add - Explicit Congestion Notification (ECN) to IP", RFC 3168, - September 2001. - - [Sam99] Samaraweera, N.K.G, "Return Link Optimization for Internet - Service Provision Using DVB-S Networks", ACM Computer - Communications Review (CCR), Vol.29, No.3, 1999, pp.4-19. - - [Seg00] Segura R., "Asymmetric Networking Techniques For Hybrid - Satellite Communications", NC3A, The Hague, Netherlands, - NATO Technical Note 810, August 2000, pp.32-37. - - [SF98] Samaraweera, N.K.G., and G. Fairhurst. "High Speed Internet - Access using Satellite-based DVB Networks", Proc. IEEE - International Networks Conference (INC98), Plymouth, UK, - 1998, pp.23-28. - - [ZSC91] Zhang, L., Shenker, S., and D. D. Clark, "Observations and - Dynamics of a Congestion Control Algorithm: The Effects of - Two-Way Traffic", Proc. ACM SIGCOMM, ACM Computer - Communications Review (CCR), Vol 21, No 4, 1991, pp.133- - 147. - -10. IANA Considerations - - There are no IANA considerations associated with this document. - - - - - - - - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 37] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -Appendix - Examples of Subnetworks Exhibiting Network Path Asymmetry - - This appendix provides a list of some subnetworks which are known to - experience network path asymmetry. The asymmetry in capacity of - these network paths can require mitigations to provide acceptable - overall performance. Examples include the following: - - - IP service over some wide area and local area wireless networks. - In such networks, the predominant network path asymmetry arises - from the hub-and-spokes architecture of the network (e.g., a - single base station that communicates with multiple mobile - stations), this requires a Ready To Send / Clear To Send (RTS/CTS) - protocol and a Medium Access Control (MAC) protocol which needs to - accommodate the significant turn-around time for the radios. A - high per-packet transmission overhead may lead to significant - network path asymmetry. - - - IP service over a forward satellite link utilizing Digital Video - Broadcast (DVB) transmission [EN97] (e.g., 38-45 Mbps), and a - slower upstream link using terrestrial network technology (e.g., - dial-up modem, line of sight microwave, cellular radio) [CLC99]. - Network path asymmetry arises from a difference in the upstream - and downstream link capacities. - - - Certain military networks [KSG98] providing Internet access to - in-transit or isolated hosts [Seg00] using a high capacity - downstream satellite link (e.g., 2-3 Mbps) with a narrowband - upstream link (e.g., 2.4-9.6 kbps) using either Demand Assigned - Multiple Access (DAMA) or fixed rate satellite links. The main - factor contributing to network path asymmetry is the difference in - the upstream and downstream link capacities. Some differences - between forward and reverse paths may arise from the way in which - upstream link capacity is allocated. - - - Most data over cable TV networks (e.g., DOCSIS [ITU01, DS00]), - where the analogue channels assigned for upstream communication - (i.e., in the reverse direction) are narrower and may be more - noisy than those assigned for the downstream link. As a - consequence, the upstream and downstream links differ in their - transmission rate. For example, in DOCSIS 1.0 [DS00], the - downstream transmission rate is either 27 or 52 Mbps. Upstream - transmission rates may be dynamically selected to be one of a - series of rates which range between 166 kbps to 9 Mbps. Operators - may assign multiple upstream channels per downstream channel. - Physical layer (PHY) overhead (which accompanies upstream - transmissions, but is not present in the downstream link) can also - increase the network path asymmetry. The Best Effort service, - which is typically used to carry TCP, uses a - - - -Balakrishnan et. al. Best Current Practice [Page 38] - -RFC 3449 PILC - Asymmetric Links December 2002 - - - contention/reservation MAC protocol. A cable modem (CM) sending - an isolated packet (such as a TCP ACK) on the upstream link must - contend with other CMs to request capacity from the central cable - modem termination system (CMTS). The CMTS then grants timeslots - to a CM for the upstream transmission. The CM may "piggyback" - subsequent requests onto upstream packets, avoiding contention - cycles; as a result, spacing of TCP ACKs can be dramatically - altered due to minor variations in load of the cable data network - and inter-arrival times of TCP DATA packets. Numerous other - complexities may add to, or mitigate, the asymmetry in rate and - access latency experienced by packets sent on the upstream link - relative to downstream packets in DOCSIS. The asymmetry - experienced by end hosts may also change dynamically (e.g., with - network load), and when best effort services share capacity with - services that have symmetric reserved capacity (e.g., IP telephony - over the Unsolicited Grant service) [ITU01]. - - - Asymmetric Digital Subscriber Line (ADSL), by definition, offers a - downstream link transmission rate that is higher than that of the - upstream link. The available rates depend upon channel quality - and system configuration. For example, one widely deployed ADSL - technology [ITU02, ANS01] operates at rates that are multiples of - 32 kbps (up to 6.144 Mbps) in the downstream link, and up to 640 - kbps for the upstream link. The network path asymmetry - experienced by end hosts may be further increased when best effort - services, e.g., Internet access over ADSL, share the available - upstream capacity with reserved services (e.g., constant bit rate - voice telephony). - - - - - - - - - - - - - - - - - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 39] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -Authors' Addresses - - Hari Balakrishnan - Laboratory for Computer Science - 200 Technology Square - Massachusetts Institute of Technology - Cambridge, MA 02139 - USA - - Phone: +1-617-253-8713 - EMail: hari@lcs.mit.edu - Web: http://nms.lcs.mit.edu/~hari/ - - - Venkata N. Padmanabhan - Microsoft Research - One Microsoft Way - Redmond, WA 98052 - USA - - Phone: +1-425-705-2790 - EMail: padmanab@microsoft.com - Web: http://www.research.microsoft.com/~padmanab/ - - - Godred Fairhurst - Department of Engineering - Fraser Noble Building - University of Aberdeen - Aberdeen AB24 3UE - UK - - EMail: gorry@erg.abdn.ac.uk - Web: http://www.erg.abdn.ac.uk/users/gorry - - - Mahesh Sooriyabandara - Department of Engineering - Fraser Noble Building - University of Aberdeen - Aberdeen AB24 3UE - UK - - EMail: mahesh@erg.abdn.ac.uk - Web: http://www.erg.abdn.ac.uk/users/mahesh - - - - - - -Balakrishnan et. al. Best Current Practice [Page 40] - -RFC 3449 PILC - Asymmetric Links December 2002 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2002). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Balakrishnan et. al. Best Current Practice [Page 41] - diff --git a/kernel/picotcp/RFC/rfc3465.txt b/kernel/picotcp/RFC/rfc3465.txt deleted file mode 100644 index 32b38c3..0000000 --- a/kernel/picotcp/RFC/rfc3465.txt +++ /dev/null @@ -1,563 +0,0 @@ - - - - - - -Network Working Group M. Allman -Request for Comments: 3465 BBN/NASA GRC -Category: Experimental February 2003 - - - TCP Congestion Control with Appropriate Byte Counting (ABC) - -Status of this Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - This document proposes a small modification to the way TCP increases - its congestion window. Rather than the traditional method of - increasing the congestion window by a constant amount for each - arriving acknowledgment, the document suggests basing the increase on - the number of previously unacknowledged bytes each ACK covers. This - change improves the performance of TCP, as well as closes a security - hole TCP receivers can use to induce the sender into increasing the - sending rate too rapidly. - -Terminology - - Much of the language in this document is taken from [RFC2581]. - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in [RFC2119]. - -1 Introduction - - This document proposes a modification to the algorithm for increasing - TCP's congestion window (cwnd) that improves both performance and - security. Rather than increasing a TCP's congestion window based on - the number of acknowledgments (ACKs) that arrive at the data sender - (per the current specification [RFC2581]), the congestion window is - increased based on the number of bytes acknowledged by the arriving - ACKs. The algorithm improves performance by mitigating the impact of - delayed ACKs on the growth of cwnd. At the same time, the algorithm - provides cwnd growth in direct relation to the probed capacity of a - - - -Allman Experimental [Page 1] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - - network path, therefore providing a more measured response to ACKs - that cover only small amounts of data (less than a full segment size) - than ACK counting. This more appropriate cwnd growth can improve - both performance and can prevent inappropriate cwnd growth in - response to a misbehaving receiver. On the other hand, in some cases - the modified cwnd growth algorithm causes larger bursts of segments - to be sent into the network. In some cases this can lead to a non- - negligible increase in the drop rate and reduced performance (see - section 4 for a larger discussion of the issues). - - This document is organized as follows. Section 2 outlines the - modified algorithm for increasing TCP's congestion window. Section 3 - discusses the advantages of using the modified algorithm. Section 4 - discusses the disadvantages of the approach outlined in this - document. Section 5 outlines some of the fairness issues that must - be considered for the modified algorithm. Section 6 discusses - security considerations. - - Statement of Intent - - This specification contains an algorithm improving the performance - of TCP which is understood to be effective and safe, but which has - not been widely deployed. One goal of publication as an - Experimental RFC is to be prudent, and encourage use and - deployment prior to publication in the standards track. It is the - intent of the Transport Area to re-submit this specification as an - IETF Proposed Standard in the future, after more experience has - been gained. - -2 A Modified Algorithm for Increasing the Congestion Window - - As originally outlined in [Jac88] and specified in [RFC2581], TCP - uses two algorithms for increasing the congestion window. During - steady-state, TCP uses the Congestion Avoidance algorithm to linearly - increase the value of cwnd. At the beginning of a transfer, after a - retransmission timeout or after a long idle period (in some - implementations), TCP uses the Slow Start algorithm to increase cwnd - exponentially. According to RFC 2581, slow start bases the cwnd - increase on the number of incoming acknowledgments. During - congestion avoidance RFC 2581 allows more latitude in increasing - cwnd, but traditionally implementations have based the increase on - the number of arriving ACKs. In the following two subsections, we - detail modifications to these algorithms to increase cwnd based on - the number of bytes being acknowledged by each arriving ACK, rather - than by the number of ACKs that arrive. We call these changes - "Appropriate Byte Counting" (ABC) [All99]. - - - - - -Allman Experimental [Page 2] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - -2.1 Congestion Avoidance - - RFC 2581 specifies that cwnd should be increased by 1 segment per - round-trip time (RTT) during the congestion avoidance phase of a - transfer. Traditionally, TCPs have approximated this increase by - increasing cwnd by 1/cwnd for each arriving ACK. This algorithm - opens cwnd by roughly 1 segment per RTT if the receiver ACKs each - incoming segment and no ACK loss occurs. However, if the receiver - implements delayed ACKs [Bra89], the receiver returns roughly half as - many ACKs, which causes the sender to open cwnd more conservatively - (by approximately 1 segment every second RTT). The approach that - this document suggests is to store the number of bytes that have been - ACKed in a "bytes_acked" variable in the TCP control block. When - bytes_acked becomes greater than or equal to the value of the - congestion window, bytes_acked is reduced by the value of cwnd. - Next, cwnd is incremented by a full-sized segment (SMSS). The - algorithm suggested above is specifically allowed by RFC 2581 during - congestion avoidance because it opens the window by at most 1 segment - per RTT. - -2.2 Slow Start - - RFC 2581 states that the sender increments the congestion window by - at most, 1*SMSS bytes for each arriving acknowledgment during slow - start. This document proposes that a TCP sender SHOULD increase cwnd - by the number of previously unacknowledged bytes ACKed by each - incoming acknowledgment, provided the increase is not more than L - bytes. Choosing the limit on the increase, L, is discussed in the - next subsection. When the number of previously unacknowledged bytes - ACKed is less than or equal to 1*SMSS bytes, or L is less than or - equal to 1*SMSS bytes, this proposal is no more aggressive (and - possibly less aggressive) than allowed by RFC 2581. However, - increasing cwnd by more than 1*SMSS bytes in response to a single ACK - is more aggressive than allowed by RFC 2581. The more aggressive - version of the slow start algorithm still falls within the spirit of - the principles outlined in [Jac88] (i.e., of no more than doubling - the cwnd per RTT), and this document proposes ABC for experimentation - in shared networks, provided an appropriate limit is applied (see - next section). - -2.3 Choosing the Limit - - The limit, L, chosen for the cwnd increase during slow start, - controls the aggressiveness of the algorithm. Choosing L=1*SMSS - bytes provides behavior that is no more aggressive than allowed by - RFC 2581. However, ABC with L=1*SMSS bytes is more conservative in a - - - - - -Allman Experimental [Page 3] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - - number of key ways (as discussed in the next section) and therefore, - this document suggests that even though with L=1*SMSS bytes TCP - stacks will see little performance change, ABC SHOULD be used. - - A very large L could potentially lead to large line-rate bursts of - traffic in the face of a large amount of ACK loss or in the case when - the receiver sends "stretch ACKs" (ACKs for more than the two full- - sized segments allowed by the delayed ACK algorithm) [Pax97]. - - This document specifies that TCP implementations MAY use L=2*SMSS - bytes and MUST NOT use L > 2*SMSS bytes. This choice balances - between being conservative (L=1*SMSS bytes) and being potentially - very aggressive. In addition, L=2*SMSS bytes exactly balances the - negative impact of the delayed ACK algorithm (as discussed in more - detail in section 3.2). Note that when L=2*SMSS bytes cwnd growth is - roughly the same as the case when the standard algorithms are used in - conjunction with a receiver that transmits an ACK for each incoming - segment [All98] (assuming no or small amounts of ACK loss in both - cases). - - The exception to the above suggestion is during a slow start phase - that follows a retransmission timeout (RTO). In this situation, a - TCP MUST use L=1*SMSS as specified in RFC 2581 since ACKs for large - amounts of previously unacknowledged data are common during this - phase of a transfer. These ACKs do not necessarily indicate how much - data has left the network in the last RTT, and therefore ABC cannot - accurately determine how much to increase cwnd. As an example, say - segment N is dropped by the network, and segments N+1 and N+2 arrive - successfully at the receiver. The sender will receive only two - duplicate ACKs and therefore must rely on the retransmission timer - (RTO) to detect the loss. When the RTO expires, segment N is - retransmitted. The ACK sent in response to the retransmission will - be for segment N+2. However, this ACK does not indicate that three - segments have left the network in the last RTT, but rather only a - single segment left the network. Therefore, the appropriate cwnd - increment is at most 1*SMSS bytes. - -2.4 RTO Implications - - [Jac88] shows that increases in cwnd of more than a factor of two in - succeeding RTTs can cause spurious retransmissions on slow links - where the bandwidth dominates the RTT, assuming the RTO estimator - given in [Jac88] and [RFC2988]. ABC stays within this limit of no - more than doubling cwnd in successive RTTs by capping the increase - (no matter what L is employed) by the number of previously - unacknowledged bytes covered by each incoming ACK. - - - - - -Allman Experimental [Page 4] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - -3 Advantages - - This section outlines several advantages of using the ABC algorithm - to increase cwnd, rather than the standard ACK counting algorithm - given in [RFC2581]. - -3.1 More Appropriate Congestion Window Increase - - The ABC algorithm outlined in section 2 increases TCP's cwnd in - proportion to the amount of data actually sent into the network. ACK - counting, on the other hand, increments cwnd by a constant upon the - arrival of each ACK. For instance, consider an interactive telnet - connection (e.g., ssh or telnet) in which ACKs generally cover only a - few bytes of data, but cwnd is increased by 1*SMSS bytes for each ACK - received. When a large amount of data needs to be transmitted (e.g., - displaying a large file) the data is sent in one large burst because - the cwnd grows by 1*SMSS bytes per ACK rather than based on the - actual amount of capacity used. Such a line-rate burst of data can - potentially cause a large amount of segment loss. - - Congestion Window Validation (CWV) [RFC2861] addresses the above - problem as well. CWV limits the amount of unused cwnd a TCP - connection can accumulate. ABC can be used in conjunction with CWV - to obtain an accurate measure of the network path. - -3.2 Mitigate the Impact of Delayed ACKs and Lost ACKs - - Delayed ACKs [RFC1122,RFC2581] allow a TCP receiver to refrain from - sending an ACK for each incoming segment. However, a receiver SHOULD - send an ACK for every second full-sized segment that arrives. - Furthermore, a receiver MUST NOT withhold an ACK for more than 500 - ms. By reducing the number of ACKs sent to the data originator the - receiver is slowing the growth of the congestion window under an ACK - counting system. Using ABC with L=2*SMSS bytes can roughly negate - the negative impact imposed by delayed ACKs by allowing cwnd to be - increased for ACKs that are withheld by the receiver. This allows - the congestion window to grow in a manner similar to the case when - the receiver ACKs each incoming segment, but without adding extra - traffic to the network. Simulation studies have shown increased - throughput when a TCP sender uses ABC when compared to the standard - ACK counting algorithm [All99], especially for short transfers that - never leave the initial slow start period. - - Note that delayed ACKs should not be an issue during slow start-based - loss recovery, as RFC 2581 recommends that receivers should not delay - ACKs that cover out-of-order segments. Therefore, as discussed - above, ABC with L > 1*SMSS bytes is inappropriate for such slow start - based loss recovery and MUST NOT be used. - - - -Allman Experimental [Page 5] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - - Note: In the case when an entire window of data is lost, a TCP - receiver will likely generate delayed ACKs and an L > 1*SMSS bytes - would be safe. However, detecting this scenario is difficult. - Therefore to keep ABC conservative, this document mandates that L - MUST NOT be > 1*SMSS bytes in any slow start-based loss recovery. - - ACK loss can also retard the growth of a congestion window that - increases based on the number of ACKs that arrive. When counting - ACKs, dropped ACKs represent forever-missed opportunities to increase - cwnd. Using ABC with L > 1*SMSS bytes allows the sender to mitigate - the effect of lost ACKs. - -3.3 Prevents Attacks from Misbehaving Receivers - - [SCWA99] outlines several methods for a receiver to induce a TCP - sender into violating congestion control and transmitting data at a - potentially inappropriate rate. One of the outlined attacks is "ACK - Division". This scheme involves the receiver sending multiple ACKs - for each incoming data segment, each ACKing only a small portion of - the original TCP data segment. Since TCP senders have traditionally - used ACK counting to increase cwnd, ACK division causes - inappropriately rapid cwnd growth and, in turn, a potentially - inappropriate sending rate. A TCP sender that uses ABC can prevent - this attack from being used to undermine standard congestion control - because the cwnd increase is based on the number of bytes ACKed, - rather than the number of ACKs received. - - To prevent misbehaving receivers from inducing inappropriate sender - behavior, this document suggests TCP implementations use ABC, even if - L=1*SMSS bytes (i.e., not allowing ABC to provide more aggressive - cwnd growth than allowed by RFC 2581). - -4 Disadvantages - - The main disadvantages of using ABC with L=2*SMSS bytes are an - increase in the burstiness of TCP and a small increase in the overall - loss rate. [All98] discusses the two ways that ABC increases the - burstiness of the TCP sender. First, the "micro burstiness" of the - connection is increased. In other words, the number of segments sent - in response to each incoming ACK is increased by at most 1 segment - when using ABC with L=2*SMSS bytes in conjunction with a receiver - that is sending delayed ACKs. During slow start this translates into - an increase from sending 2 back-to-back segments to sending 3 back- - to-back packets in response to an ACK for a single packet. Or, an - increase from 3 packets to 4 packets when receiving a delayed ACK for - two outstanding packets. Note that ACK loss can cause larger bursts. - However, ABC only increases the burst size by at most 1*SMSS bytes - per ACK received when compared to the standard behavior. This slight - - - -Allman Experimental [Page 6] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - - increase in the burstiness should only cause problems for devices - that have very small buffers. In addition, ABC increases the "macro - burstiness" of the TCP sender in response to delayed ACKs in slow - start. Rather than increasing cwnd by roughly 1.5 times per RTT, ABC - roughly doubles the congestion window every RTT. However, doubling - cwnd every RTT fits within the spirit of slow start, as originally - outlined [Jac88]. - - With the increased burstiness comes a modest increase in the loss - rate for a TCP connection employing ABC (see the next section for a - short discussion on the fairness of ABC to non-ABC flows). The - additional loss can be directly attributable to the increased - aggressiveness of ABC. During slow start cwnd is increased more - rapidly. Therefore when loss occurs cwnd is larger and more drops - are likely. Similarly, a congestion avoidance cycle takes roughly - half, as long when using ABC and delayed ACKs when compared to an ACK - counting implementation. In other words, a TCP sender reaches the - capacity of the network path, drops a packet and reduces the - congestion window by half roughly twice as often when using ABC. - However, as discussed above, in spite of the additional loss an ABC - TCP sender generally obtains better overall performance than a non- - ABC TCP [All99]. - - Due to the increase in the packet drop rate we suggest ABC be - implemented in conjunction with selective acknowledgments [RFC2018]. - -5 Fairness Considerations - - [All99] presents several simple simulations conducted to measure the - impact of ABC on competing traffic (both ABC and non-ABC). The - experiments show that while ABC increases the drop rate for the - connection using ABC, competing traffic is not greatly effected. The - experiments show that standard TCP and ABC both obtain roughly the - same throughput, regardless of the variant of the competing traffic. - The simulations also reaffirm that ABC outperforms non-ABC TCP in an - environment with varying types of TCP connections. On the other - hand, the simulations presented in [All99] are not necessarily - realistic. Therefore we are encouraging more experimentation in the - Internet. - -6 Security Considerations - - As discussed in section 3.3, ABC protects a TCP sender from a - misbehaving receiver that induces the sender into transmitting at an - inappropriate rate with an "ACK division" attack. This, in turn, - protects the network from an overly aggressive sender. - - - - - -Allman Experimental [Page 7] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - -7 Conclusions - - This document RECOMMENDS that all TCP stacks be modified to use ABC - with L=1*SMSS bytes. This change does not increase the - aggressiveness of TCP. Furthermore, simulations of ABC with L=2*SMSS - bytes show a promising performance improvement that we encourage - researchers to experiment with in the Internet. - -Acknowledgments - - This document has benefited from discussions with and encouragement - from Sally Floyd. Van Jacobson and Reiner Ludwig provided valuable - input on the implications of byte counting on the RTO. Reiner Ludwig - and Kostas Pentikousis provided valuable feedback on a draft of this - document. - -Normative References - - [RFC1122] Braden, R., Ed., "Requirements for Internet Hosts -- - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - -Informative References - - [All98] Mark Allman. On the Generation and Use of TCP - Acknowledgments. ACM Computer Communication Review, 29(3), - July 1998. - - [All99] Mark Allman. TCP Byte Counting Refinements. ACM Computer - Communication Review, 29(3), July 1999. - - [Jac88] Van Jacobson. Congestion Avoidance and Control. ACM - SIGCOMM 1988. - - [Pax97] Vern Paxson. Automated Packet Trace Analysis of TCP - Implementations. ACM SIGCOMM, September 1997. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [RFC2861] Handley, M., Padhye, J. and S. Floyd, "TCP Congestion - Window Validation", RFC 2861, June 2000. - - - - -Allman Experimental [Page 8] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - - [SCWA99] Stefan Savage, Neal Cardwell, David Wetherall, Tom - Anderson. TCP Congestion Control with a Misbehaving - Receiver. ACM Computer Communication Review, 29(5), - October 1999. - -Author's Address - - Mark Allman - BBN Technologies/NASA Glenn Research Center - Lewis Field - 21000 Brookpark Rd. MS 54-5 - Cleveland, OH 44135 - - Fax: 216-433-8705 - Phone: 216-433-6586 - EMail: mallman@bbn.com - http://roland.grc.nasa.gov/~mallman - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Allman Experimental [Page 9] - -RFC 3465 TCP Congestion Control with ABC February 2003 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Allman Experimental [Page 10] - diff --git a/kernel/picotcp/RFC/rfc3481.txt b/kernel/picotcp/RFC/rfc3481.txt deleted file mode 100644 index 6c45efe..0000000 --- a/kernel/picotcp/RFC/rfc3481.txt +++ /dev/null @@ -1,1459 +0,0 @@ - - - - - - -Network Working Group H. Inamura, Ed. -Request for Comments: 3481 NTT DoCoMo, Inc. -BCP: 71 G. Montenegro, Ed. -Category: Best Current Practice Sun Microsystems Laboratories - Europe - R. Ludwig - Ericsson Research - A. Gurtov - Sonera - F. Khafizov - Nortel Networks - February 2003 - - - TCP over Second (2.5G) and Third (3G) Generation Wireless Networks - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - This document describes a profile for optimizing TCP to adapt so that - it handles paths including second (2.5G) and third (3G) generation - wireless networks. It describes the relevant characteristics of 2.5G - and 3G networks, and specific features of example deployments of such - networks. It then recommends TCP algorithm choices for nodes known - to be starting or ending on such paths, and it also discusses open - issues. The configuration options recommended in this document are - commonly found in modern TCP stacks, and are widely available - standards-track mechanisms that the community considers safe for use - on the general Internet. - - - - - - - - - - - - - -Inamura, et al. Best Current Practice [Page 1] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -Table of Contents - - 1. Introduction. . . . . . . . . . . . . . . . . . . . . . . . . 3 - 2. 2.5G and 3G Link Characteristics. . . . . . . . . . . . . . . 4 - 2.1 Latency. . . . . . . . . . . . . . . . . . . . . . . . . 4 - 2.2 Data Rates . . . . . . . . . . . . . . . . . . . . . . . 5 - 2.3 Asymmetry . . . . . . . . . . . . . . . . . . . . . . . 6 - 2.4 Delay Spikes . . . . . . . . . . . . . . . . . . . . . . 6 - 2.5 Packet Loss Due to Corruption. . . . . . . . . . . . . . 7 - 2.6 Intersystem Handovers. . . . . . . . . . . . . . . . . . 7 - 2.7 Bandwidth Oscillation. . . . . . . . . . . . . . . . . . 7 - 3. Example 2.5G and 3G Deployments . . . . . . . . . . . . . . . 8 - 3.1 2.5G Technologies: GPRS, HSCSD and CDMA2000 1XRTT. . . . 8 - 3.2 A 3G Technology: W-CDMA. . . . . . . . . . . . . . . . . 8 - 3.3 A 3G Technology: CDMA2000 1X-EV. . . . . . . . . . . . . 10 - 4. TCP over 2.5G and 3G. . . . . . . . . . . . . . . . . . . . . 10 - 4.1 Appropriate Window Size (Sender & Receiver). . . . . . . 11 - 4.2 Increased Initial Window (Sender). . . . . . . . . . . . 11 - 4.3 Limited Transmit (Sender). . . . . . . . . . . . . . . . 12 - 4.4 IP MTU Larger than Default . . . . . . . . . . . . . . . 12 - 4.5 Path MTU Discovery (Sender & Intermediate Routers) . . . 13 - 4.6 Selective Acknowledgments (Sender & Receiver). . . . . . 13 - 4.7 Explicit Congestion Notification (Sender, Receiver & - Intermediate Routers). . . . . . . . . . . . . . . . . . 13 - 4.8 TCP Timestamps Option (Sender & Receiver). . . . . . . . 13 - 4.9 Disabling RFC 1144 TCP/IP Header Compression (Wireless - Host) . . . . . . . . . . . . . . . . . . . . . . . . . 15 - 4.10 Summary . . . . . . . . . . . . . . . . . . . . . . . . . 16 - 5. Open Issues . . . . . . . . . . . . . . . . . . . . . . . . . 16 - 6. Security Considerations . . . . . . . . . . . . . . . . . . . 18 - 7. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 18 - 8. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 19 - 9. Normative References . . . . . . . . . . . . . . . . . . . . . 19 - 10. Informative References . . . . . . . . . . . . . . . . . . . . 21 - 11. Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . 25 - 12. Full Copyright Statement . . . . . . . . . . . . . . . . . . . 26 - - - - - - - - - - - - - - - -Inamura, et al. Best Current Practice [Page 2] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -1. Introduction - - The second generation cellular systems are commonly referred to as - 2G. The 2G phase began in the 1990s when digital voice encoding had - replaced analog systems (1G). 2G systems are based on various radio - technologies including frequency-, code- and time- division multiple - access. Examples of 2G systems include GSM (Europe), PDC (Japan), - and IS-95 (USA). Data links provided by 2G systems are mostly - circuit-switched and have transmission speeds of 10-20 kbps uplink - and downlink. Demand for higher data rates, instant availability and - data volume-based charging, as well as lack of radio spectrum - allocated for 2G led to the introduction of 2.5G (for example, GPRS - and PDC-P) and 3G (for example, Wideband CDMA and cdma2000) systems. - - Radio technology for both Wideband CDMA (W-CDMA) (adopted, for - example, in Europe, Japan, etc) and cdma2000 (adopted, for example, - in US, South Korea, etc) is based on code division multiple access - allowing for higher data rates and more efficient spectrum - utilization than 2G systems. 3G systems provide both packet-switched - and circuit-switched connectivity in order to address the quality of - service requirements of conversational, interactive, streaming, and - bulk transfer applications. The transition to 3G is expected to be a - gradual process. Initially, 3G will be deployed to introduce high - capacity and high speed access in densely populated areas. Mobile - users with multimode terminals will be able to utilize existing - coverage of 2.5G systems on the rest of territory. - - Much development and deployment activity has centered around 2.5G and - 3G technologies. Along with objectives like increased capacity for - voice channels, a primary motivation for these is data communication, - and, in particular, Internet access. Accordingly, key issues are TCP - performance and the several techniques which can be applied to - optimize it over different wireless environments [19]. - - This document proposes a profile of such techniques, (particularly - effective for use with 2.5G and 3G wireless networks). The - configuration options in this document are commonly found in modern - TCP stacks, and are widely available IETF standards-track mechanisms - that the community has judged to be safe on the general Internet - (that is, even in predominantly non-wireless scenarios). - Furthermore, this document makes one set of recommendations that - covers both 2.5G and 3G networks. Since both generations of wireless - technologies exhibit similar challenges to TCP performance (see - Section 2), one common set is warranted. - - - - - - - -Inamura, et al. Best Current Practice [Page 3] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - Two example applications of the recommendations in this document are: - - o The WAP Forum [25] (part of the Open Mobile Alliance [26] as of - June 2002) is an industry association that has developed standards - for wireless information and telephony services on digital mobile - phones. In order to address WAP functionality for higher speed - networks such as 2.5G and 3G networks, and to aim at convergence - with Internet standards, the WAP Forum thoroughly revised its - specifications. The resultant version 2.0 [31] adopts TCP as its - transport protocol, and recommends TCP optimization mechanisms - closely aligned with those described in this document. - - o I-mode [33] is a wireless Internet service deployed on handsets in - Japan. The newer version of i-mode runs on FOMA [34], an - implementation of W-CDMA. I-mode over FOMA deploys the profile of - TCP described in this document. - - This document is structured as follows: Section 2 reviews the link - layer characteristics of 2.5G/3G networks; Section 3 gives a brief - overview of some representative 2.5G/3G technologies like W-CDMA, - cdma2000 and GPRS; Section 4 recommends mechanisms and configuration - options for TCP implementations used in 2.5G/3G networks, including a - summary in chart form at the end of the section; finally, Section 5 - discusses some open issues. - -2. 2.5G and 3G Link Characteristics - - Link layer characteristics of 2.5G/3G networks have significant - effects on TCP performance. In this section we present various - aspects of link characteristics unique to the 2.5G/3G networks. - -2.1 Latency - - The latency of 2.5G/3G links is high mostly due to the extensive - processing required at the physical layer of those networks, e.g., - for FEC and interleaving, and due to transmission delays in the radio - access network [58] (including link-level retransmissions). A - typical RTT varies between a few hundred milliseconds and one second. - The associated radio channels suffer from difficult propagation - environments. Hence, powerful but complex physical layer techniques - need to be applied to provide high capacity in a wide coverage area - in a resource efficient way. Hopefully, rapid improvements in all - areas of wireless networks ranging from radio layer techniques over - signal processing to system architecture will ultimately also lead to - reduced delays in 3G wireless systems. - - - - - - -Inamura, et al. Best Current Practice [Page 4] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -2.2 Data Rates - - The main incentives for transition from 2G to 2.5G to 3G are the - increase in voice capacity and in data rates for the users. 2.5G - systems have data rates of 10-20 kbps in uplink and 10-40 kbps in - downlink. Initial 3G systems are expected to have bit rates around - 64 kbps in uplink and 384 kbps in downlink. Considering the - resulting bandwidth-delay product (BDP) of around 1-5 KB for 2.5G and - 8-50 KB for 3G, 2.5G links can be considered LTNs (Long Thin Networks - [19]), and 3G links approach LFNs (Long Fat Networks [2], as - exemplified by some satellite networks [48]). Accordingly, - interested readers might find related and potentially relevant issues - discussed in RFC 2488 [49]. For good TCP performance both LFNs and - LTNs require maintaining a large enough window of outstanding data. - For LFNs, utilizing the available network bandwidth is of particular - concern. LTNs need a sufficiently large window for efficient loss - recovery. In particular, the fast retransmit algorithm cannot be - triggered if the window is less than four segments. This leads to a - lengthy recovery through retransmission timeouts. The Limited - Transmit algorithm RFC 3042 [10] helps avoid the deleterious effects - of timeouts on connections with small windows. Nevertheless, making - full use of the SACK RFC 2018 [3] information for loss recovery in - both LFNs and LTNs may require twice the window otherwise sufficient - to utilize the available bandwidth. - - This document recommends only standard mechanisms suitable both for - LTNs and LFNs, and to any network in general. However, experimental - mechanisms suggested in Section 5 can be targeted either for LTNs - [19] or LFNs [48]. - - Data rates are dynamic due to effects from other users and from - mobility. Arriving and departing users can reduce or increase the - available bandwidth in a cell. Increasing the distance from the base - station decreases the link bandwidth due to reduced link quality. - Finally, by simply moving into another cell the user can experience a - sudden change in available bandwidth. For example, if upon changing - cells a connection experiences a sudden increase in available - bandwidth, it can underutilize it, because during congestion - avoidance TCP increases the sending rate slowly. Changing from a - fast to a slow cell normally is handled well by TCP due to the self- - clocking property. However, a sudden increase in RTT in this case - can cause a spurious TCP timeout as described in Section 2.7. In - addition, a large TCP window used in the fast cell can create - congestion resulting in overbuffering in the slow cell. - - - - - - - -Inamura, et al. Best Current Practice [Page 5] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -2.3 Asymmetry - - 2.5G/3G systems may run asymmetric uplink and downlink data rates. - The uplink data rate is limited by battery power consumption and - complexity limitations of mobile terminals. However, the asymmetry - does not exceed 3-6 times, and can be tolerated by TCP without the - need for techniques like ACK congestion control or ACK filtering - [50]. Accordingly, this document does not include recommendations - meant for such highly asymmetric networks. - -2.4 Delay Spikes - - A delay spike is a sudden increase in the latency of the - communication path. 2.5G/3G links are likely to experience delay - spikes exceeding the typical RTT by several times due to the - following reasons. - - 1. A long delay spike can occur during link layer recovery from a - link outage due to temporal loss of radio coverage, for example, - while driving into a tunnel or within an elevator. - - 2. During a handover the mobile terminal and the new base station - must exchange messages and perform some other time-consuming - actions before data can be transmitted in a new cell. - - 3. Many wide area wireless networks provide seamless mobility by - internally re-routing packets from the old to the new base station - which may cause extra delay. - - 4. Blocking by high-priority traffic may occur when an arriving - circuit-switched call or higher priority data temporarily preempts - the radio channel. This happens because most current terminals - are not able to handle a voice call and a data connection - simultaneously and suspend the data connection in this case. - - 5. Additionally, a scheduler in the radio network can suspend a low- - priority data transfer to give the radio channel to higher - priority users. - - Delay spikes can cause spurious TCP timeouts, unnecessary - retransmissions and a multiplicative decrease in the congestion - window size. - - - - - - - - - -Inamura, et al. Best Current Practice [Page 6] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -2.5 Packet Loss Due to Corruption - - Even in the face of a high probability of physical layer frame - errors, 2.5G/3G systems have a low rate of packet losses thanks to - link-level retransmissions. Justification for link layer ARQ is - discussed in [23], [22], [44]. In general, link layer ARQ and FEC - can provide a packet service with a negligibly small probability of - undetected errors (failures of the link CRC), and a low level of loss - (non-delivery) for the upper layer traffic, e.g., IP. The loss rate - of IP packets is low due to the ARQ, but the recovery at the link - layer appears as delay jitter to the higher layers lengthening the - computed RTO value. - -2.6 Intersystem Handovers - - In the initial phase of deployment, 3G systems will be used as a 'hot - spot' technology in high population areas, while 2.5G systems will - provide lower speed data service elsewhere. This creates an - environment where a mobile user can roam between 2.5G and 3G networks - while keeping ongoing TCP connections. The inter-system handover is - likely to trigger a high delay spike (Section 2.4), and can result in - data loss. Additional problems arise because of context transfer, - which is out of scope of this document, but is being addressed - elsewhere in the IETF in activities addressing seamless mobility - [51]. - - Intersystem handovers can adversely affect ongoing TCP connections - since features may only be negotiated at connection establishment and - cannot be changed later. After an intersystem handover, the network - characteristics may be radically different, and, in fact, may be - negatively affected by the initial configuration. This point argues - against premature optimization by the TCP implementation. - -2.7 Bandwidth Oscillation - - Given the limited RF spectrum, satisfying the high data rate needs of - 2.5G/3G wireless systems requires dynamic resource sharing among - concurrent data users. Various scheduling mechanisms can be deployed - in order to maximize resource utilization. If multiple users wish to - transfer large amounts of data at the same time, the scheduler may - have to repeatedly allocate and de-allocate resources for each user. - We refer to periodic allocation and release of high-speed channels as - Bandwidth Oscillation. Bandwidth Oscillation effects such as - spurious retransmissions were identified elsewhere (e.g., [30]) as - factors that degrade throughput. There are research studies [52], - [54], which show that in some cases Bandwidth Oscillation can be the - single most important factor in reducing throughput. For fixed TCP - parameters the achievable throughput depends on the pattern of - - - -Inamura, et al. Best Current Practice [Page 7] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - resource allocation. When the frequency of resource allocation and - de-allocation is sufficiently high, there is no throughput - degradation. However, increasing the frequency of resource - allocation/de-allocation may come at the expense of increased - signaling, and, therefore, may not be desirable. Standards for 3G - wireless technologies provide mechanisms that can be used to combat - the adverse effects of Bandwidth Oscillation. It is the consensus of - the PILC Working Group that the best approach for avoiding adverse - effects of Bandwidth Oscillation is proper wireless sub-network - design [23]. - -3. Example 2.5G and 3G Deployments - - This section provides further details on a few example 2.5G/3G - technologies. The objective is not completeness, but merely to - discuss some representative technologies and the issues that may - arise with TCP performance. Other documents discuss the underlying - technologies in more detail. For example, ARQ and FEC are discussed - in [23], while further justification for link layer ARQ is discussed - in [22], [44]. - -3.1 2.5G Technologies: GPRS, HSCSD and CDMA2000 1XRTT - - High Speed Circuit-Switched Data (HSCSD) and General Packet Radio - Service (GPRS) are extensions of GSM providing high data rates for a - user. Both extensions were developed first by ETSI and later by - 3GPP. In GSM, a user is assigned one timeslot downlink and one - uplink. HSCSD allocates multiple timeslots to a user creating a fast - circuit-switched link. GPRS is based on packet-switched technology - that allows efficient sharing of radio resources among users and - always-on capability. Several terminals can share timeslots. A GPRS - network uses an updated base station subsystem of GSM as the access - network; the GPRS core network includes Serving GPRS Support Nodes - (SGSN) and Gateway GPRS Support Nodes (GGSN). The RLC protocol - operating between a base station controller and a terminal provides - ARQ capability over the radio link. The Logical Link Control (LLC) - protocol between the SGSN and the terminal also has an ARQ capability - utilized during handovers. - -3.2 A 3G Technology: W-CDMA - - The International Telecommunication Union (ITU) has selected Wideband - Code Division Multiple Access (W-CDMA) as one of the global telecom - systems for the IMT-2000 3G mobile communications standard. W-CDMA - specifications are created in the 3rd Generation Partnership Project - (3GPP). - - - - - -Inamura, et al. Best Current Practice [Page 8] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - The link layer characteristics of the 3G network which have the - largest effect on TCP performance over the link are error controlling - schemes such as layer two ARQ (L2 ARQ) and FEC (forward error - correction). - - W-CDMA uses RLC (Radio Link Control) [20], a Selective Repeat and - sliding window ARQ. RLC uses protocol data units (PDUs) with a 16 - bit RLC header. The size of the PDUs may vary. Typically, 336 bit - PDUs are implemented [34]. This is the unit for link layer - retransmission. The IP packet is fragmented into PDUs for - transmission by RLC. (For more fragmentation discussion, see Section - 4.4.) - - In W-CDMA, one to twelve PDUs (RLC frames) constitute one FEC frame, - the actual size of which depends on link conditions and bandwidth - allocation. The FEC frame is the unit of interleaving. This - accumulation of PDUs for FEC adds part of the latency mentioned in - Section 2.1. - - For reliable transfer, RLC has an acknowledged mode for PDU - retransmission. RLC uses checkpoint ARQ [20] with "status report" - type acknowledgments; the poll bit in the header explicitly solicits - the peer for a status report containing the sequence number that the - peer acknowledges. The use of the poll bit is controlled by timers - and by the size of available buffer space in RLC. Also, when the - peer detects a gap between sequence numbers in received frames, it - can issue a status report to invoke retransmission. RLC preserves - the order of packet delivery. - - The maximum number of retransmissions is a configurable RLC parameter - that is specified by RRC [39] (Radio Resource Controller) through RLC - connection initialization. The RRC can set the maximum number of - retransmissions (up to a maximum of 40). Therefore, RLC can be - described as an ARQ that can be configured for either HIGH- - PERSISTENCE or LOW-PERSISTENCE, not PERFECT-PERSISTENCE, according to - the terminology in [22]. - - Since the RRC manages RLC connection state, Bandwidth Oscillation - (Section 2.7) can be eliminated by the RRC's keeping RF resource on - an RLC connection with data in its queue. This avoids resource de- - allocation in the middle of transferring data. - - In summary, the link layer ARQ and FEC can provide a packet service - with a negligibly small probability of undetected error (failure of - the link CRC), and a low level of loss (non-delivery) for the upper - layer traffic, i.e., IP. Retransmission of PDUs by ARQ introduces - latency and delay jitter to the IP flow. This is why the transport - layer sees the underlying W-CDMA network as a network with a - - - -Inamura, et al. Best Current Practice [Page 9] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - relatively large BDP (Bandwidth-Delay Product) of up to 50 KB for the - 384 kbps radio bearer. - -3.3 A 3G Technology: CDMA2000 1X-EV - - One of the Terrestrial Radio Interface standards for 3G wireless - systems, proposed under the International Mobile Telecommunications- - 2000 umbrella, is cdma2000 [55]. It employs Multi-Carrier Code - Division Multiple Access (CDMA) technology with a single-carrier RF - bandwidth of 1.25 MHz. cdma2000 evolved from IS-95 [56], a 2G - standard based on CDMA technology. The first phase of cdma2000 - utilizes a single carrier and is designed to double the voice - capacity of existing CDMA (IS-95) networks and to support always-on - data transmission speeds of up to 316.8 kbps. As mentioned above, - these enhanced capabilities are delivered by cdma2000 1XRTT. 3G - speeds of 2 Mbps are offered by cdma2000 1X-EV. At the physical - layer, the standard allows transmission in 5,10,20,40 or 80 ms time - frames. Various orthogonal (Walsh) codes are used for channel - identification and to achieve higher data rates. - - Radio Link Protocol Type 3 (RLP) [57] is used with a cdma2000 Traffic - Channel to support CDMA data services. RLP provides an octet stream - transport service and is unaware of higher layer framing. There are - several RLP frame formats. RLP frame formats with higher payload - were designed for higher data rates. Depending on the channel speed, - one or more RLP frames can be transmitted in a single physical layer - frame. - - RLP can substantially decrease the error rate exhibited by CDMA - traffic channels [53]. When transferring data, RLP is a pure NAK- - based finite selective repeat protocol. The receiver does not - acknowledge successfully received data frames. If one or more RLP - data frames are missing, the receiving RLP makes several attempts - (called NAK rounds) to recover them by sending one or more NAK - control frames to the transmitter. Each NAK frame must be sent in a - separate physical layer frame. When RLP supplies the last NAK - control frame of a particular NAK round, a retransmission timer is - set. If the missing frame is not received when the timer expires, - RLP may try another NAK round. RLP may not recover all missing - frames. If after all RLP rounds, a frame is still missing, RLP - supplies data with a missing frame to the higher layer protocols. - -4. TCP over 2.5G and 3G - - What follows is a set of recommendations for configuration parameters - for protocol stacks which will be used to support TCP connections - over 2.5G and 3G wireless networks. Some of these recommendations - imply special configuration: - - - -Inamura, et al. Best Current Practice [Page 10] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - o at the data receiver (frequently a stack at or near the wireless - device), - - o at the data sender (frequently a host in the Internet or possibly - a gateway or proxy at the edge of a wireless network), or - - o at both. - - These configuration options are commonly available IETF standards- - track mechanisms considered safe on the general Internet. System - administrators are cautioned, however, that increasing the MTU size - (Section 4.4) and disabling RFC 1144 header compression (Section 4.9) - could affect host efficiency, and that changing such parameters - should be done with care. - -4.1 Appropriate Window Size (Sender & Receiver) - - TCP over 2.5G/3G should support appropriate window sizes based on the - Bandwidth Delay Product (BDP) of the end-to-end path (see Section - 2.2). The TCP specification [14] limits the receiver window size to - 64 KB. If the end-to-end BDP is expected to be larger than 64 KB, - the window scale option [2] can be used to overcome that limitation. - Many operating systems by default use small TCP receive and send - buffers around 16KB. Therefore, even for a BDP below 64 KB, the - default buffer size setting should be increased at the sender and at - the receiver to allow a large enough window. - -4.2 Increased Initial Window (Sender) - - TCP controls its transmit rate using the congestion window mechanism. - The traditional initial window value of one segment, coupled with the - delayed ACK mechanism [17] implies unnecessary idle times in the - initial phase of the connection, including the delayed ACK timeout - (typically 200 ms, but potentially as much as 500 ms) [4]. Senders - can avoid this by using a larger initial window of up to four - segments (not to exceed roughly 4 KB) [4]. Experiments with - increased initial windows and related measurements have shown (1) - that it is safe to deploy this mechanism (i.e., it does not lead to - congestion collapse), and (2) that it is especially effective for the - transmission of a few TCP segments' worth of data (which is the - behavior commonly seen in such applications as Internet-enabled - mobile wireless devices). For large data transfers, on the other - hand, the effect of this mechanism is negligible. - - TCP over 2.5G/3G SHOULD set the initial CWND (congestion window) - according to Equation 1 in [4]: - - min (4*MSS, max (2*MSS, 4380 bytes)) - - - -Inamura, et al. Best Current Practice [Page 11] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - This increases the permitted initial window from one to between two - and four segments (not to exceed approximately 4 KB). - -4.3 Limited Transmit (Sender) - - RFC 3042 [10], Limited Transmit, extends Fast Retransmit/Fast - Recovery for TCP connections with small congestion windows that are - not likely to generate the three duplicate acknowledgements required - to trigger Fast Retransmit [1]. If a sender has previously unsent - data queued for transmission, the limited transmit mechanism calls - for sending a new data segment in response to each of the first two - duplicate acknowledgments that arrive at the sender. This mechanism - is effective when the congestion window size is small or if a large - number of segments in a window are lost. This may avoid some - retransmissions due to TCP timeouts. In particular, some studies - [10] have shown that over half of a busy server's retransmissions - were due to RTO expiration (as opposed to Fast Retransmit), and that - roughly 25% of those could have been avoided using Limited Transmit. - Similar to the discussion in Section 4.2, this mechanism is useful - for small amounts of data to be transmitted. TCP over 2.5G/3G - implementations SHOULD implement Limited Transmit. - -4.4 IP MTU Larger than Default - - The maximum size of an IP datagram supported by a link layer is the - MTU (Maximum Transfer Unit). The link layer may, in turn, fragment - IP datagrams into PDUs. For example, on links with high error rates, - a smaller link PDU size increases the chance of successful - transmission. With layer two ARQ and transparent link layer - fragmentation, the network layer can enjoy a larger MTU even in a - relatively high BER (Bit Error Rate) condition. Without these - features in the link, a smaller MTU is suggested. - - TCP over 2.5G/3G should allow freedom for designers to choose MTU - values ranging from small values (such as 576 bytes) to a large value - that is supported by the type of link in use (such as 1500 bytes for - IP packets on Ethernet). Given that the window is counted in units - of segments, a larger MTU allows TCP to increase the congestion - window faster [5]. Hence, designers are generally encouraged to - choose larger values. These may exceed the default IP MTU values of - 576 bytes for IPv4 RFC 1191 [6] and 1280 bytes for IPv6 [18]. While - this recommendation is applicable to 3G networks, operation over 2.5G - networks should exercise caution as per the recommendations in RFC - 3150 [5]. - - - - - - - -Inamura, et al. Best Current Practice [Page 12] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -4.5 Path MTU Discovery (Sender & Intermediate Routers) - - Path MTU discovery allows a sender to determine the maximum end-to- - end transmission unit (without IP fragmentation) for a given routing - path. RFC 1191 [6] and RFC 1981 [8] describe the MTU discovery - procedure for IPv4 and IPv6, respectively. This allows TCP senders - to employ larger segment sizes (without causing IP layer - fragmentation) instead of assuming the small default MTU. TCP over - 2.5G/3G implementations should implement Path MTU Discovery. Path - MTU Discovery requires intermediate routers to support the generation - of the necessary ICMP messages. RFC 1435 [7] provides - recommendations that may be relevant for some router implementations. - -4.6 Selective Acknowledgments (Sender & Receiver) - - The selective acknowledgment option (SACK), RFC 2018 [3], is - effective when multiple TCP segments are lost in a single TCP window - [24]. In particular, if the end-to-end path has a large BDP and a - high packet loss rate, the probability of multiple segment losses in - a single window of data increases. In such cases, SACK provides - robustness beyond TCP-Tahoe and TCP-Reno [21]. TCP over 2.5G/3G - SHOULD support SACK. - - In the absence of SACK feature, the TCP should use NewReno RFC 2582 - [15]. - -4.7 Explicit Congestion Notification (Sender, Receiver & Intermediate - Routers) - - Explicit Congestion Notification, RFC 3168 [9], allows a TCP receiver - to inform the sender of congestion in the network by setting the - ECN-Echo flag upon receiving an IP packet marked with the CE bit(s). - The TCP sender will then reduce its congestion window. Thus, the use - of ECN is believed to provide performance benefits [32], [43]. RFC - 3168 [9] also places requirements on intermediate routers (e.g., - active queue management and setting of the CE bit(s) in the IP header - to indicate congestion). Therefore, the potential improvement in - performance can only be achieved when ECN capable routers are - deployed along the path. TCP over 2.5G/3G SHOULD support ECN. - -4.8 TCP Timestamps Option (Sender & Receiver) - - Traditionally, TCPs collect one RTT sample per window of data [14], - [17]. This can lead to an underestimation of the RTT, and spurious - timeouts on paths in which the packet transmission delay dominates - the RTT. This holds despite a conservative retransmit timer such as - the one specified in RFC 2988 [11]. TCP connections with large - windows may benefit from more frequent RTT samples provided with - - - -Inamura, et al. Best Current Practice [Page 13] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - timestamps by adapting quicker to changing network conditions [2]. - However, there is some empirical evidence that for TCPs with an RFC - 2988 timer [11], timestamps provide little or no benefits on backbone - Internet paths [59]. Using the TCP Timestamps option has the - advantage that retransmitted segments can be used for RTT - measurement, which is otherwise forbidden by Karn's algorithm [17], - [11]. Furthermore, the TCP Timestamps option is the basis for - detecting spurious retransmits using the Eifel algorithm [30]. - - A 2.5/3G link (layer) is dedicated to a single host. It therefore - only experiences a low degree of statistical multiplexing between - different flows. Also, the packet transmission and queuing delays of - a 2.5/3G link often dominate the path's RTT. This already results in - large RTT variations as packets fill the queue while a TCP sender - probes for more bandwidth, or as packets drain from the queue while a - TCP sender reduces its load in response to a packet loss. In - addition, the delay spikes across a 2.5/3G link (see Section 2.4) may - often exceed the end-to-end RTT. The thus resulting large variations - in the path's RTT may often cause spurious timeouts. - - When running TCP in such an environment, it is therefore advantageous - to sample the path's RTT more often than only once per RTT. This - allows the TCP sender to track changes in the RTT more closely. In - particular, a TCP sender can react more quickly to sudden increases - of the RTT by sooner updating the RTO to a more conservative value. - The TCP Timestamps option [2] provides this capability, allowing the - TCP sender to sample the RTT from every segment that is acknowledged. - Using timestamps in the mentioned scenario leads to a more - conservative TCP retransmission timer and reduces the risk of - triggering spurious timeouts [45], [52], [54], [60]. - - There are two problematic issues with using timestamps: - - o 12 bytes of overhead are introduced by carrying the TCP Timestamps - option and padding in the TCP header. For a small MTU size, it - can present a considerable overhead. For example, for an MTU of - 296 bytes the added overhead is 4%. For an MTU of 1500 bytes, the - added overhead is only 0.8%. - - o Current TCP header compression schemes are limited in their - handling of the TCP options field. For RFC 2507 [13], any change - in the options field (caused by timestamps or SACK, for example) - renders the entire field uncompressible (leaving the TCP/IP header - itself compressible, however). Even worse, for RFC 1144 [40] such - a change in the options field effectively disables TCP/IP header - compression altogether. This is the case when a connection uses - the TCP Timestamps option. That option field is used both in the - data and the ACK path, and its value typically changes from one - - - -Inamura, et al. Best Current Practice [Page 14] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - packet to the next. The IETF is currently specifying a robust - TCP/IP header compression scheme with better support for TCP - options [29]. - - The original definition of the timestamps option [2] specifies that - duplicate segments below cumulative ACK do not update the cached - timestamp value at the receiver. This may lead to overestimating of - RTT for retransmitted segments. A possible solution [47] allows the - receiver to use a more recent timestamp from a duplicate segment. - However, this suggestion allows for spoofing attacks against the TCP - receiver. Therefore, careful consideration is needed in - implementing this solution. - - Recommendation: TCP SHOULD use the TCP Timestamps option. It allows - for better RTT estimation and reduces the risk of spurious timeouts. - -4.9 Disabling RFC 1144 TCP/IP Header Compression (Wireless Host) - - It is well known (and has been shown with experimental data) that RFC - 1144 [40] TCP header compression does not perform well in the - presence of packet losses [43], [52]. If a wireless link error is - not recovered, it will cause TCP segment loss between the compressor - and decompressor, and then RFC 1144 header compression does not allow - TCP to take advantage of Fast Retransmit Fast Recovery mechanism. - The RFC 1144 header compression algorithm does not transmit the - entire TCP/IP headers, but only the changes in the headers of - consecutive segments. Therefore, loss of a single TCP segment on the - link causes the transmitting and receiving TCP sequence numbers to - fall out of synchronization. Hence, when a TCP segment is lost - after the compressor, the decompressor will generate false TCP - headers. Consequently, the TCP receiver will discard all remaining - packets in the current window because of a checksum error. This - continues until the compressor receives the first retransmission - which is forwarded uncompressed to synchronize the decompressor [40]. - - As previously recommended in RFC 3150 [5], RFC 1144 header - compression SHOULD NOT be enabled unless the packet loss probability - between the compressor and decompressor is very low. Actually, - enabling the Timestamps Option effectively accomplishes the same - thing (see Section 4.8). Other header compression schemes like RFC - 2507 [13] and Robust Header Compression [12] are meant to address - deficiencies in RFC 1144 header compression. At the time of this - writing, the IETF was working on multiple extensions to Robust Header - Compression (negotiating Robust Header Compression over PPP, - compressing TCP options, etc) [16]. - - - - - - -Inamura, et al. Best Current Practice [Page 15] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -4.10 Summary - - Items Comments - ---------------------------------------------------------------- - Appropriate Window Size (sender & receiver) - based on end-to-end BDP - - Window Scale Option (sender & receiver) - [RFC1323] Window size > 64KB - - Increased Initial Window (sender) - [RFC3390] CWND = min (4*MSS, - max (2*MSS, 4380 bytes)) - - Limited Transmit (sender) - [RFC3042] - - IP MTU larger than more applicable to 3G - Default - - Path MTU Discovery (sender & intermediate routers) - [RFC1191,RFC1981] - - Selective Acknowledgment - option (SACK) - [RFC2018] (sender & receiver) - - Explicit Congestion - Notification(ECN) - [RFC3168] (sender, receiver & - intermediate routers) - - Timestamps Option (sender & receiver) - [RFC1323, R.T.Braden's ID] - - Disabling RFC1144 - TCP/IP Header Compression - [RFC1144] (wireless host) - -5. Open Issues - - This section outlines additional mechanisms and parameter settings - that may increase end-to-end performance when running TCP across - 2.5G/3G networks. Note, that apart from the discussion of the RTO's - initial value, those mechanisms and parameter settings are not part - of any standards track RFC at the time of this writing. Therefore, - they cannot be recommended for the Internet in general. - - - - -Inamura, et al. Best Current Practice [Page 16] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - Other mechanisms for increasing TCP performance include enhanced TCP/ - IP header compression schemes [29], active queue management RFC 2309 - [28], link layer retransmission schemes [23], and caching packets - during transient link outages to retransmit them locally when the - link is restored to operation [23]. - - Shortcomings of existing TCP/IP header compression schemes (RFC 1144 - [40], RFC 2507 [13]) are that they do not compress headers of - handshaking packets (SYNs and FINs), and that they lack proper - handling of TCP option fields (e.g., SACK or timestamps) (see Section - 4.8). Although RFC 3095 [12] does not yet address this issue, the - IETF is developing improved TCP/IP header compression schemes, - including better handling of TCP options such as timestamps and - selective acknowledgements. Especially, if many short-lived TCP - connections run across the link, the compression of the handshaking - packets may greatly improve the overall header compression ratio. - - Implementing active queue management is attractive for a number of - reasons as outlined in RFC 2309 [28]. One important benefit for - 2.5G/ 3G networks, is that it minimizes the amount of potentially - stale data that may be queued in the network ("clicking from page to - page" before the download of the previous page is complete). - Avoiding the transmission of stale data across the 2.5G/3G radio link - saves transmission (battery) power, and increases the ratio of useful - data over total data transmitted. Another important benefit of - active queue management for 2.5G/3G networks, is that it reduces the - risk of a spurious timeout for the first data segment as outlined - below. - - Since 2.5G/3G networks are commonly characterized by high delays, - avoiding unecessary round-trip times is particularly attractive. - This is specially beneficial for short-lived, transactional (request/ - response-style) TCP sessions that typically result from browsing the - Web from a smart phone. However, existing solutions such as T/TCP - RFC 1644 [27], have not been adopted due to known security concerns - [38]. - - Spurious timeouts, packet re-ordering, and packet duplication may - reduce TCP's performance. Thus, making TCP more robust against those - events is desirable. Solutions to this problem have been proposed - [30], [35], [41], and standardization work within the IETF is ongoing - at the time of writing. Those solutions include reverting congestion - control state after such an event has been detected, and adapting the - retransmission timer and duplicate acknowledgement threshold. The - deployment of such solutions may be particularly beneficial when - running TCP across wireless networks because wireless access links - may often be subject to handovers and resource preemption, or the - mobile transmitter may traverse through a radio coverage hole. Such - - - -Inamura, et al. Best Current Practice [Page 17] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - disrupting events may easily trigger a spurious timeout despite a - conservative retransmission timer. Also, the mobility mechanisms of - some wireless networks may cause packet duplication. - - The algorithm for computing TCP's retransmission timer is specified - in RFC 2988 [11]. The standard specifies that the initial setting of - the retransmission timeout value (RTO) should not be less than 3 - seconds. This value might be too low when running TCP across 2.5G/3G - networks. In addition to its high latencies, those networks may be - run at bit rates of as low as about 10 kb/s which results in large - packet transmission delays. In this case, the RTT for the first data - segment may easily exceed the initial TCP retransmission timer - setting of 3 seconds. This would then cause a spurious timeout for - that segment. Hence, in such situations it may be advisable to set - TCP's initial RTO to a value larger than 3 seconds. Furthermore, due - to the potentially large packet transmission delays, a TCP sender - might choose to refrain from initializing its RTO from the RTT - measured for the SYN, but instead take the RTT measured for the first - data segment. - - Some of the recommendations in RFC 2988 [11] are optional, and are - not followed by all TCP implementations. Specifically, some TCP - stacks allow a minimum RTO less than the recommended value of 1 - second (section 2.4 of [11]), and some implementations do not - implement the recommended restart of the RTO timer when an ACK is - received (section 5.3 of [11]). Some experiments [52], [54], have - shown that in the face of bandwidth oscillation, using the - recommended minimum RTO value of 1 sec (along with the also - recommended initial RTO of 3 sec) reduces the number of spurious - retransmissions as compared to using small minimum RTO values of 200 - or 400 ms. Furthermore, TCP stacks that restart the retransmission - timer when an ACK is received experience far less spurious - retransmissions than implementations that do not restart the RTO - timer when an ACK is received. Therefore, at the time of this - writing, it seems preferable for TCP implementations used in 3G - wireless data transmission to comply with all recommendations of RFC - 2988. - -6. Security Considerations - - In 2.5G/3G wireless networks, data is transmitted as ciphertext over - the air and as cleartext between the Radio Access Network (RAN) and - the core network. IP security RFC 2401 [37] or TLS RFC 2246 [36] can - be deployed by user devices for end-to-end security. - -7. IANA Considerations - - This specification requires no IANA actions. - - - -Inamura, et al. Best Current Practice [Page 18] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -8. Acknowledgements - - The authors would like to acknowledge contributions to the text from - the following individuals: - - Max Hata, NTT DoCoMo, Inc. (hata@mml.yrp.nttdocomo.co.jp) - - Masahiro Hara, Fujitsu, Inc. (mhara@FLAB.FUJITSU.CO.JP) - - Joby James, Motorola, Inc. (joby@MIEL.MOT.COM) - - William Gilliam, Hewlett-Packard Company (wag@cup.hp.com) - - Alan Hameed, Fujitsu FNC, Inc. (Alan.Hameed@fnc.fujitsu.com) - - Rodrigo Garces, Mobility Network Systems - (rodrigo.garces@mobilitynetworks.com) - - Peter Ford, Microsoft (peterf@Exchange.Microsoft.com) - - Fergus Wills, Openwave (fergus.wills@openwave.com) - - Michael Meyer (Michael.Meyer@eed.ericsson.se) - - The authors gratefully acknowledge the valuable advice from the - following individuals: - - Gorry Fairhurst (gorry@erg.abdn.ac.uk) - - Mark Allman (mallman@grc.nasa.gov) - - Aaron Falk (falk@ISI.EDU) - -9. Normative References - - [1] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion Control", - RFC 2581, April 1999. - - [2] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for High - Performance", RFC 1323, May 1992. - - [3] Mathis, M., Mahdavi, J., Floyd, S. and R. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [4] Allman, M., Floyd, S. and C. Partridge, "Increasing TCP's - Initial Window", RFC 3390, October 2002. - - - - - -Inamura, et al. Best Current Practice [Page 19] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - [5] Dawkins, S., Montenegro, G., Kojo, M. and V. Magret, "End-to-end - Performance Implications of Slow Links", BCP 48, RFC 3150, July - 2001. - - [6] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, - November 1990. - - [7] Knowles, S., "IESG Advice from Experience with Path MTU - Discovery", RFC 1435, March 1993. - - [8] McCann, J., Deering, S. and J. Mogul, "Path MTU Discovery for IP - version 6", RFC 1981, August 1996. - - [9] Ramakrishnan, K., Floyd, S. and D. Black, "The Addition of - Explicit Congestion Notification (ECN) to IP", RFC 3168, - September 2001. - - [10] Allman, M., Balakrishnan, H. and S. Floyd, "Enhancing TCP's Loss - Recovery Using Limited Transmit", RFC 3042, January 2001. - - [11] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [12] Bormann, C., Burmeister, C., Degermark, M., Fukushima, H., - Hannu, H., Jonsson, L-E., Hakenberg, R., Koren, T., Le, K., Liu, - Z., Martensson, A., Miyazaki, A., Svanbro, K., Wiebke, T., - Yoshimura, T. and H. Zheng, "RObust Header Compression (ROHC): - Framework and four profiles: RTP, UDP, ESP, and uncompressed", - RFC 3095, July 2001. - - [13] Degermark, M., Nordgren, B. and S. Pink, "IP Header - Compression", RFC 2507, February 1999. - - [14] Postel, J., "Transmission Control Protocol - DARPA Internet - Program Protocol Specification", STD 7, RFC 793, September 1981. - - [15] Floyd, S. and T. Henderson, "The NewReno Modification to TCP's - Fast Recovery Algorithm", RFC 2582, April 1999. - - [16] Bormann, C., "Robust Header Compression (ROHC) over PPP", RFC - 3241, April 2002. - - [17] Braden, R., "Requirements for Internet Hosts - Communication - Layers", STD 3, RFC 1122, October 1989. - - [18] Deering, S. and R. Hinden, "Internet Protocol, Version 6 (IPv6) - Specification", RFC 2460, December 1998. - - - - -Inamura, et al. Best Current Practice [Page 20] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -10. Informative References - - [19] Montenegro, G., Dawkins, S., Kojo, M., Magret, V. and N. - Vaidya, "Long Thin Networks", RFC 2757, January 2000. - - [20] Third Generation Partnership Project, "RLC Protocol - Specification (3G TS 25.322:)", 1999. - - [21] Fall, K. and S. Floyd, "Simulation-based Comparisons of Tahoe, - Reno, and SACK TCP", Computer Communication Review, 26(3) , July - 1996. - - [22] Fairhurst, G. and L. Wood, "Advice to link designers on link - Automatic Repeat reQuest (ARQ)", BCP 62, RFC 3366, August 2002. - - [23] Karn, P., "Advice for Internet Subnetwork Designers", Work in - Progress. - - [24] Dawkins, S., Montenegro, G., Magret, V., Vaidya, N. and M. - Kojo, "End-to-end Performance Implications of Links with - Errors", BCP 50, RFC 3135, August 2001. - - [25] Wireless Application Protocol, "WAP Specifications", 2002, - . - - [26] Open Mobile Alliance, "Open Mobile Alliance", 2002, - . - - [27] Braden, R., "T/TCP -- TCP Extensions for Transactions", RFC - 1644, July 1994. - - [28] Braden, R., Clark, D., Crowcroft, J., Davie, B., Deering, S., - Estrin, D., Floyd, S., Jacobson, V., Minshall, G., Partridge, - C., Peterson, L., Ramakrishnan, K., Shenker, S., Wroclawski, J. - and L. Zhang, "Recommendations on Queue Management and - Congestion Avoidance in the Internet", RFC 2309, April 1998. - - [29] IETF, "Robust Header Compression", 2001, - . - - [30] Ludwig, R. and R. H. Katz, "The Eifel Algorithm: Making TCP - Robust Against Spurious Retransmissions", ACM Computer - Communication Review 30(1), January 2000. - - [31] Wireless Application Protocol, "WAP Wireless Profiled TCP", - WAP-225-TCP-20010331-a, April 2001, - . - - - - -Inamura, et al. Best Current Practice [Page 21] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - [32] Hadi Salim, J. and U. Ahmed, "Performance Evaluation of Explicit - Congestion Notification (ECN) in IP Networks", RFC 2884, July - 2000. - - [33] NTT DoCoMo Technical Journal, "Special Issue on i-mode Service", - October 1999. - - [34] NTT DoCoMo Technical Journal, "Special Article on IMT-2000 - Services", September 2001. - - [35] Floyd, S., Mahdavi, J., Mathis, M. and M. Podolsky, "An - Extension to the Selective Acknowledgement (SACK) Option for - TCP", RFC 2883, July 2000. - - [36] Dierks, T. and C. Allen, "The TLS Protocol Version 1.0", RFC - 2246, January 1999. - - [37] Kent, S. and R. Atkinson, "Security Architecture for the - Internet Protocol", RFC 2401, November 1998. - - [38] de Vivo, M., O. de Vivo, G., Koeneke, R. and G. Isern, "Internet - Vulnerabilities Related to TCP/IP and T/TCP", ACM Computer - Communication Review 29(1), January 1999. - - [39] Third Generation Partnership Project, "RRC Protocol - Specification (3GPP TS 25.331:)", September 2001. - - [40] Jacobson, V., "Compressing TCP/IP Headers for Low-Speed Serial - Links", RFC 1144, February 1990. - - [41] Blanton, E. and M. Allman, "On Making TCP More Robust to Packet - Reordering", ACM Computer Communication Review 32(1), January - 2002, . - - [42] Karn, P. and C. Partridge, "Improving Round-Trip Time Estimates - in Reliable Transport Protocols", ACM SIGCOMM 87, 1987. - - [43] Ludwig, R., Rathonyi, B., Konrad, A. and A. Joseph, "Multi-layer - tracing of TCP over a reliable wireless link", ACM SIGMETRICS - 99, May 1999. - - [44] Ludwig, R., Konrad, A., Joseph, A. and R. Katz, "Optimizing the - End-to-End Performance of Reliable Flows over Wireless Links", - Kluwer/ACM Wireless Networks Journal Vol. 8, Nos. 2/3, pp. 289- - 299, March-May 2002. - - - - - -Inamura, et al. Best Current Practice [Page 22] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - [45] Gurtov, A., "Making TCP Robust Against Delay Spikes", University - of Helsinki, Department of Computer Science, Series of - Publications C, C-2001-53, Nov 2001, - . - - [46] Stevens, W., "TCP/IP Illustrated, Volume 1; The Protocols", - Addison Wesley, 1995. - - [47] Braden, R., "TCP Extensions for High Performance: An Update", - Work in Progress. - - [48] Allman, M., Dawkins, S., Glover, D., Griner, J., Tran, D., - Henderson, T., Heidemann, J., Touch, J., Kruse, H., Ostermann, - S., Scott, K. and J. Semke, "Ongoing TCP Research Related to - Satellites", RFC 2760, February 2000. - - [49] Allman, M., Glover, D. and L. Sanchez, "Enhancing TCP Over - Satellite Channels using Standard Mechanisms", BCP 28, RFC 2488, - January 1999. - - [50] Balakrishnan, H., Padmanabhan, V., Fairhurst, G. and M. - Sooriyabandara, "TCP Performance Implications of Network - Asymmetry", RFC 3449, December 2002. - - [51] Kempf, J., "Problem Description: Reasons For Performing Context - Transfers Between Nodes in an IP Access Network", RFC 3374, - September 2002. - - [52] Khafizov, F. and M. Yavuz, "Running TCP over IS-2000", Proc. of - IEEE ICC, 2002. - - [53] Khafizov, F. and M. Yavuz, "Analytical Model of RLP in IS-2000 - CDMA Networks", Proc. of IEEE Vehicular Technology Conference, - September 2002. - - [54] Yavuz, M. and F. Khafizov, "TCP over Wireless Links with - Variable Bandwidth", Proc. of IEEE Vehicular Technology - Conference, September 2002. - - [55] TIA/EIA/cdma2000, "Mobile Station - Base Station Compatibility - Standard for Dual-Mode Wideband Spread Spectrum Cellular - Systems", Washington: Telecommunication Industry Association, - 1999. - - [56] TIA/EIA/IS-95 Rev A, "Mobile Station - Base Station - Compatibility Standard for Dual-Mode Wideband Spread Spectrum - Cellular Systems", Washington: Telecommunication Industry - Association, 1995. - - - -Inamura, et al. Best Current Practice [Page 23] - -RFC 3481 TCP over 2.5G/3G February 2003 - - - [57] TIA/EIA/IS-707-A-2.10, "Data Service Options for Spread Spectrum - Systems: Radio Link Protocol Type 3", January 2000. - - [58] Dahlman, E., Beming, P., Knutsson, J., Ovesjo, F., Persson, M. - and C. Roobol, "WCDMA - The Radio Interface for Future Mobile - Multimedia Communications", IEEE Trans. on Vehicular Technology, - vol. 47, no. 4, pp. 1105-1118, November 1998. - - [59] Allman, M. and V. Paxson, "On Estimating End-to-End Network Path - Properties", ACM SIGCOMM 99, September 1999. - - [60] Gurtov, A. and R. Ludwig, "Responding to Spurious Timeouts in - TCP", IEEE INFOCOM'03, March 2003. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Inamura, et al. Best Current Practice [Page 24] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -11. Authors' Addresses - - Hiroshi Inamura - NTT DoCoMo, Inc. - 3-5 Hikarinooka - Yokosuka Shi, Kanagawa Ken 239-8536 - Japan - - EMail: inamura@mml.yrp.nttdocomo.co.jp - URI: http://www.nttdocomo.co.jp/ - - - Gabriel Montenegro - Sun Microsystems Laboratories, Europe - Avenue de l'Europe - ZIRST de Montbonnot - 38334 Saint Ismier CEDEX - France - - EMail: gab@sun.com - - - Reiner Ludwig - Ericsson Research - Ericsson Allee 1 - 52134 Herzogenrath - Germany - - EMail: Reiner.Ludwig@Ericsson.com - - - Andrei Gurtov - Sonera - P.O. Box 970, FIN-00051 - Helsinki, - Finland - - EMail: andrei.gurtov@sonera.com - URI: http://www.cs.helsinki.fi/u/gurtov/ - - - Farid Khafizov - Nortel Networks - 2201 Lakeside Blvd - Richardson, TX 75082, - USA - - EMail: faridk@nortelnetworks.com - - - -Inamura, et al. Best Current Practice [Page 25] - -RFC 3481 TCP over 2.5G/3G February 2003 - - -12. Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Inamura, et al. Best Current Practice [Page 26] - diff --git a/kernel/picotcp/RFC/rfc3493.txt b/kernel/picotcp/RFC/rfc3493.txt deleted file mode 100644 index 5fea6c1..0000000 --- a/kernel/picotcp/RFC/rfc3493.txt +++ /dev/null @@ -1,2187 +0,0 @@ - - - - - - -Network Working Group R. Gilligan -Request for Comments: 3493 Intransa, Inc. -Obsoletes: 2553 S. Thomson -Category: Informational Cisco - J. Bound - J. McCann - Hewlett-Packard - W. Stevens - February 2003 - - - Basic Socket Interface Extensions for IPv6 - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - The de facto standard Application Program Interface (API) for TCP/IP - applications is the "sockets" interface. Although this API was - developed for Unix in the early 1980s it has also been implemented on - a wide variety of non-Unix systems. TCP/IP applications written - using the sockets API have in the past enjoyed a high degree of - portability and we would like the same portability with IPv6 - applications. But changes are required to the sockets API to support - IPv6 and this memo describes these changes. These include a new - socket address structure to carry IPv6 addresses, new address - conversion functions, and some new socket options. These extensions - are designed to provide access to the basic IPv6 features required by - TCP and UDP applications, including multicasting, while introducing a - minimum of change into the system and providing complete - compatibility for existing IPv4 applications. Additional extensions - for advanced IPv6 features (raw sockets and access to the IPv6 - extension headers) are defined in another document. - - - - - - - - - - -Gilligan, et al. Informational [Page 1] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -Table of Contents - - 1. Introduction................................................3 - 2. Design Considerations.......................................4 - 2.1 What Needs to be Changed...............................4 - 2.2 Data Types.............................................6 - 2.3 Headers................................................6 - 2.4 Structures.............................................6 - 3. Socket Interface............................................6 - 3.1 IPv6 Address Family and Protocol Family................6 - 3.2 IPv6 Address Structure.................................7 - 3.3 Socket Address Structure for 4.3BSD-Based Systems......7 - 3.4 Socket Address Structure for 4.4BSD-Based Systems......9 - 3.5 The Socket Functions...................................9 - 3.6 Compatibility with IPv4 Applications..................10 - 3.7 Compatibility with IPv4 Nodes.........................11 - 3.8 IPv6 Wildcard Address.................................11 - 3.9 IPv6 Loopback Address.................................13 - 3.10 Portability Additions.................................14 - 4. Interface Identification...................................16 - 4.1 Name-to-Index.........................................17 - 4.2 Index-to-Name.........................................17 - 4.3 Return All Interface Names and Indexes................18 - 4.4 Free Memory...........................................18 - 5. Socket Options.............................................18 - 5.1 Unicast Hop Limit.....................................19 - 5.2 Sending and Receiving Multicast Packets...............19 - 5.3 IPV6_V6ONLY option for AF_INET6 Sockets...............22 - 6. Library Functions..........................................22 - 6.1 Protocol-Independent Nodename and - Service Name Translation..............................23 - 6.2 Socket Address Structure to Node Name - and Service Name......................................28 - 6.3 Address Conversion Functions..........................31 - 6.4 Address Testing Macros................................33 - 7. Summary of New Definitions.................................33 - 8. Security Considerations....................................35 - 9. Changes from RFC 2553......................................35 - 10. Acknowledgments............................................36 - 11. References.................................................37 - 12. Authors' Addresses.........................................38 - 13. Full Copyright Statement...................................39 - - - - - - - - - -Gilligan, et al. Informational [Page 2] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -1. Introduction - - While IPv4 addresses are 32 bits long, IPv6 addresses are 128 bits - long. The socket interface makes the size of an IP address quite - visible to an application; virtually all TCP/IP applications for - BSD-based systems have knowledge of the size of an IP address. Those - parts of the API that expose the addresses must be changed to - accommodate the larger IPv6 address size. IPv6 also introduces new - features, some of which must be made visible to applications via the - API. This memo defines a set of extensions to the socket interface - to support the larger address size and new features of IPv6. It - defines "basic" extensions that are of use to a broad range of - applications. A companion document, the "advanced" API [4], covers - extensions that are of use to more specialized applications, examples - of which include routing daemons, and the "ping" and "traceroute" - utilities. - - The development of this API was started in 1994 in the IETF IPng - working group. The API has evolved over the years, published first - in RFC 2133, then again in RFC 2553, and reaching its final form in - this document. - - As the API matured and stabilized, it was incorporated into the Open - Group's Networking Services (XNS) specification, issue 5.2, which was - subsequently incorporated into a joint Open Group/IEEE/ISO standard - [3]. - - Effort has been made to ensure that this document and [3] contain the - same information with regard to the API definitions. However, the - reader should note that this document is for informational purposes - only, and that the official standard specification of the sockets API - is [3]. - - It is expected that any future standardization work on this API would - be done by the Open Group Base Working Group [6]. - - It should also be noted that this document describes only those - portions of the API needed for IPv4 and IPv6 communications. Other - potential uses of the API, for example the use of getaddrinfo() and - getnameinfo() with the AF_UNIX address family, are beyond the scope - of this document. - - - - - - - - - - -Gilligan, et al. Informational [Page 3] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -2. Design Considerations - - There are a number of important considerations in designing changes - to this well-worn API: - - - The API changes should provide both source and binary - compatibility for programs written to the original API. That is, - existing program binaries should continue to operate when run on a - system supporting the new API. In addition, existing applications - that are re-compiled and run on a system supporting the new API - should continue to operate. Simply put, the API changes for IPv6 - should not break existing programs. An additional mechanism for - implementations to verify this is to verify the new symbols are - protected by Feature Test Macros as described in [3]. (Such - Feature Test Macros are not defined by this RFC.) - - - The changes to the API should be as small as possible in order to - simplify the task of converting existing IPv4 applications to - IPv6. - - - Where possible, applications should be able to use this API to - interoperate with both IPv6 and IPv4 hosts. Applications should - not need to know which type of host they are communicating with. - - - IPv6 addresses carried in data structures should be 64-bit - aligned. This is necessary in order to obtain optimum performance - on 64-bit machine architectures. - - Because of the importance of providing IPv4 compatibility in the API, - these extensions are explicitly designed to operate on machines that - provide complete support for both IPv4 and IPv6. A subset of this - API could probably be designed for operation on systems that support - only IPv6. However, this is not addressed in this memo. - -2.1 What Needs to be Changed - - The socket interface API consists of a few distinct components: - - - Core socket functions. - - - Address data structures. - - - Name-to-address translation functions. - - - Address conversion functions. - - - - - - -Gilligan, et al. Informational [Page 4] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - The core socket functions -- those functions that deal with such - things as setting up and tearing down TCP connections, and sending - and receiving UDP packets -- were designed to be transport - independent. Where protocol addresses are passed as function - arguments, they are carried via opaque pointers. A protocol-specific - address data structure is defined for each protocol that the socket - functions support. Applications must cast pointers to these - protocol-specific address structures into pointers to the generic - "sockaddr" address structure when using the socket functions. These - functions need not change for IPv6, but a new IPv6-specific address - data structure is needed. - - The "sockaddr_in" structure is the protocol-specific data structure - for IPv4. This data structure actually includes 8-octets of unused - space, and it is tempting to try to use this space to adapt the - sockaddr_in structure to IPv6. Unfortunately, the sockaddr_in - structure is not large enough to hold the 16-octet IPv6 address as - well as the other information (address family and port number) that - is needed. So a new address data structure must be defined for IPv6. - - IPv6 addresses are scoped [2] so they could be link-local, site, - organization, global, or other scopes at this time undefined. To - support applications that want to be able to identify a set of - interfaces for a specific scope, the IPv6 sockaddr_in structure must - support a field that can be used by an implementation to identify a - set of interfaces identifying the scope for an IPv6 address. - - The IPv4 name-to-address translation functions in the socket - interface are gethostbyname() and gethostbyaddr(). These are left as - is, and new functions are defined which support both IPv4 and IPv6. - - The IPv4 address conversion functions -- inet_ntoa() and inet_addr() - -- convert IPv4 addresses between binary and printable form. These - functions are quite specific to 32-bit IPv4 addresses. We have - designed two analogous functions that convert both IPv4 and IPv6 - addresses, and carry an address type parameter so that they can be - extended to other protocol families as well. - - Finally, a few miscellaneous features are needed to support IPv6. A - new interface is needed to support the IPv6 hop limit header field. - New socket options are needed to control the sending and receiving of - IPv6 multicast packets. - - The socket interface will be enhanced in the future to provide access - to other IPv6 features. Some of these extensions are described in - [4]. - - - - - -Gilligan, et al. Informational [Page 5] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -2.2 Data Types - - The data types of the structure elements given in this memo are - intended to track the relevant standards. uintN_t means an unsigned - integer of exactly N bits (e.g., uint16_t). The sa_family_t and - in_port_t types are defined in [3]. - -2.3 Headers - - When function prototypes and structures are shown we show the headers - that must be #included to cause that item to be defined. - -2.4 Structures - - When structures are described the members shown are the ones that - must appear in an implementation. Additional, nonstandard members - may also be defined by an implementation. As an additional - precaution nonstandard members could be verified by Feature Test - Macros as described in [3]. (Such Feature Test Macros are not - defined by this RFC.) - - The ordering shown for the members of a structure is the recommended - ordering, given alignment considerations of multibyte members, but an - implementation may order the members differently. - -3. Socket Interface - - This section specifies the socket interface changes for IPv6. - -3.1 IPv6 Address Family and Protocol Family - - A new address family name, AF_INET6, is defined in . - The AF_INET6 definition distinguishes between the original - sockaddr_in address data structure, and the new sockaddr_in6 data - structure. - - A new protocol family name, PF_INET6, is defined in . - Like most of the other protocol family names, this will usually be - defined to have the same value as the corresponding address family - name: - - #define PF_INET6 AF_INET6 - - The AF_INET6 is used in the first argument to the socket() function - to indicate that an IPv6 socket is being created. - - - - - - -Gilligan, et al. Informational [Page 6] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -3.2 IPv6 Address Structure - - A new in6_addr structure holds a single IPv6 address and is defined - as a result of including : - - struct in6_addr { - uint8_t s6_addr[16]; /* IPv6 address */ - }; - - This data structure contains an array of sixteen 8-bit elements, - which make up one 128-bit IPv6 address. The IPv6 address is stored - in network byte order. - - The structure in6_addr above is usually implemented with an embedded - union with extra fields that force the desired alignment level in a - manner similar to BSD implementations of "struct in_addr". Those - additional implementation details are omitted here for simplicity. - - An example is as follows: - - struct in6_addr { - union { - uint8_t _S6_u8[16]; - uint32_t _S6_u32[4]; - uint64_t _S6_u64[2]; - } _S6_un; - }; - #define s6_addr _S6_un._S6_u8 - -3.3 Socket Address Structure for 4.3BSD-Based Systems - - In the socket interface, a different protocol-specific data structure - is defined to carry the addresses for each protocol suite. Each - protocol-specific data structure is designed so it can be cast into a - protocol-independent data structure -- the "sockaddr" structure. - Each has a "family" field that overlays the "sa_family" of the - sockaddr data structure. This field identifies the type of the data - structure. - - The sockaddr_in structure is the protocol-specific address data - structure for IPv4. It is used to pass addresses between - applications and the system in the socket functions. The following - sockaddr_in6 structure holds IPv6 addresses and is defined as a - result of including the header: - - - - - - - -Gilligan, et al. Informational [Page 7] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -struct sockaddr_in6 { - sa_family_t sin6_family; /* AF_INET6 */ - in_port_t sin6_port; /* transport layer port # */ - uint32_t sin6_flowinfo; /* IPv6 flow information */ - struct in6_addr sin6_addr; /* IPv6 address */ - uint32_t sin6_scope_id; /* set of interfaces for a scope */ -}; - - This structure is designed to be compatible with the sockaddr data - structure used in the 4.3BSD release. - - The sin6_family field identifies this as a sockaddr_in6 structure. - This field overlays the sa_family field when the buffer is cast to a - sockaddr data structure. The value of this field must be AF_INET6. - - The sin6_port field contains the 16-bit UDP or TCP port number. This - field is used in the same way as the sin_port field of the - sockaddr_in structure. The port number is stored in network byte - order. - - The sin6_flowinfo field is a 32-bit field intended to contain flow- - related information. The exact way this field is mapped to or from a - packet is not currently specified. Until such time as its use is - specified, applications should set this field to zero when - constructing a sockaddr_in6, and ignore this field in a sockaddr_in6 - structure constructed by the system. - - The sin6_addr field is a single in6_addr structure (defined in the - previous section). This field holds one 128-bit IPv6 address. The - address is stored in network byte order. - - The ordering of elements in this structure is specifically designed - so that when sin6_addr field is aligned on a 64-bit boundary, the - start of the structure will also be aligned on a 64-bit boundary. - This is done for optimum performance on 64-bit architectures. - - The sin6_scope_id field is a 32-bit integer that identifies a set of - interfaces as appropriate for the scope [2] of the address carried in - the sin6_addr field. The mapping of sin6_scope_id to an interface or - set of interfaces is left to implementation and future specifications - on the subject of scoped addresses. - - Notice that the sockaddr_in6 structure will normally be larger than - the generic sockaddr structure. On many existing implementations the - sizeof(struct sockaddr_in) equals sizeof(struct sockaddr), with both - being 16 bytes. Any existing code that makes this assumption needs - to be examined carefully when converting to IPv6. - - - - -Gilligan, et al. Informational [Page 8] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -3.4 Socket Address Structure for 4.4BSD-Based Systems - - The 4.4BSD release includes a small, but incompatible change to the - socket interface. The "sa_family" field of the sockaddr data - structure was changed from a 16-bit value to an 8-bit value, and the - space saved used to hold a length field, named "sa_len". The - sockaddr_in6 data structure given in the previous section cannot be - correctly cast into the newer sockaddr data structure. For this - reason, the following alternative IPv6 address data structure is - provided to be used on systems based on 4.4BSD. It is defined as a - result of including the header. - -struct sockaddr_in6 { - uint8_t sin6_len; /* length of this struct */ - sa_family_t sin6_family; /* AF_INET6 */ - in_port_t sin6_port; /* transport layer port # */ - uint32_t sin6_flowinfo; /* IPv6 flow information */ - struct in6_addr sin6_addr; /* IPv6 address */ - uint32_t sin6_scope_id; /* set of interfaces for a scope */ -}; - - The only differences between this data structure and the 4.3BSD - variant are the inclusion of the length field, and the change of the - family field to a 8-bit data type. The definitions of all the other - fields are identical to the structure defined in the previous - section. - - Systems that provide this version of the sockaddr_in6 data structure - must also declare SIN6_LEN as a result of including the - header. This macro allows applications to determine - whether they are being built on a system that supports the 4.3BSD or - 4.4BSD variants of the data structure. - -3.5 The Socket Functions - - Applications call the socket() function to create a socket descriptor - that represents a communication endpoint. The arguments to the - socket() function tell the system which protocol to use, and what - format address structure will be used in subsequent functions. For - example, to create an IPv4/TCP socket, applications make the call: - - s = socket(AF_INET, SOCK_STREAM, 0); - - To create an IPv4/UDP socket, applications make the call: - - s = socket(AF_INET, SOCK_DGRAM, 0); - - - - - -Gilligan, et al. Informational [Page 9] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - Applications may create IPv6/TCP and IPv6/UDP sockets (which may also - handle IPv4 communication as described in section 3.7) by simply - using the constant AF_INET6 instead of AF_INET in the first argument. - For example, to create an IPv6/TCP socket, applications make the - call: - - s = socket(AF_INET6, SOCK_STREAM, 0); - - To create an IPv6/UDP socket, applications make the call: - - s = socket(AF_INET6, SOCK_DGRAM, 0); - - Once the application has created a AF_INET6 socket, it must use the - sockaddr_in6 address structure when passing addresses in to the - system. The functions that the application uses to pass addresses - into the system are: - - bind() - connect() - sendmsg() - sendto() - - The system will use the sockaddr_in6 address structure to return - addresses to applications that are using AF_INET6 sockets. The - functions that return an address from the system to an application - are: - - accept() - recvfrom() - recvmsg() - getpeername() - getsockname() - - No changes to the syntax of the socket functions are needed to - support IPv6, since all of the "address carrying" functions use an - opaque address pointer, and carry an address length as a function - argument. - -3.6 Compatibility with IPv4 Applications - - In order to support the large base of applications using the original - API, system implementations must provide complete source and binary - compatibility with the original API. This means that systems must - continue to support AF_INET sockets and the sockaddr_in address - structure. Applications must be able to create IPv4/TCP and IPv4/UDP - sockets using the AF_INET constant in the socket() function, as - - - - - -Gilligan, et al. Informational [Page 10] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - described in the previous section. Applications should be able to - hold a combination of IPv4/TCP, IPv4/UDP, IPv6/TCP and IPv6/UDP - sockets simultaneously within the same process. - - Applications using the original API should continue to operate as - they did on systems supporting only IPv4. That is, they should - continue to interoperate with IPv4 nodes. - -3.7 Compatibility with IPv4 Nodes - - The API also provides a different type of compatibility: the ability - for IPv6 applications to interoperate with IPv4 applications. This - feature uses the IPv4-mapped IPv6 address format defined in the IPv6 - addressing architecture specification [2]. This address format - allows the IPv4 address of an IPv4 node to be represented as an IPv6 - address. The IPv4 address is encoded into the low-order 32 bits of - the IPv6 address, and the high-order 96 bits hold the fixed prefix - 0:0:0:0:0:FFFF. IPv4-mapped addresses are written as follows: - - ::FFFF: - - These addresses can be generated automatically by the getaddrinfo() - function, as described in Section 6.1. - - Applications may use AF_INET6 sockets to open TCP connections to IPv4 - nodes, or send UDP packets to IPv4 nodes, by simply encoding the - destination's IPv4 address as an IPv4-mapped IPv6 address, and - passing that address, within a sockaddr_in6 structure, in the - connect() or sendto() call. When applications use AF_INET6 sockets - to accept TCP connections from IPv4 nodes, or receive UDP packets - from IPv4 nodes, the system returns the peer's address to the - application in the accept(), recvfrom(), or getpeername() call using - a sockaddr_in6 structure encoded this way. - - Few applications will likely need to know which type of node they are - interoperating with. However, for those applications that do need to - know, the IN6_IS_ADDR_V4MAPPED() macro, defined in Section 6.4, is - provided. - -3.8 IPv6 Wildcard Address - - While the bind() function allows applications to select the source IP - address of UDP packets and TCP connections, applications often want - the system to select the source address for them. With IPv4, one - specifies the address as the symbolic constant INADDR_ANY (called the - "wildcard" address) in the bind() call, or simply omits the bind() - entirely. - - - - -Gilligan, et al. Informational [Page 11] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - Since the IPv6 address type is a structure (struct in6_addr), a - symbolic constant can be used to initialize an IPv6 address variable, - but cannot be used in an assignment. Therefore systems provide the - IPv6 wildcard address in two forms. - - The first version is a global variable named "in6addr_any" that is an - in6_addr structure. The extern declaration for this variable is - defined in : - - extern const struct in6_addr in6addr_any; - - Applications use in6addr_any similarly to the way they use INADDR_ANY - in IPv4. For example, to bind a socket to port number 23, but let - the system select the source address, an application could use the - following code: - - struct sockaddr_in6 sin6; - . . . - sin6.sin6_family = AF_INET6; - sin6.sin6_flowinfo = 0; - sin6.sin6_port = htons(23); - sin6.sin6_addr = in6addr_any; /* structure assignment */ - . . . - if (bind(s, (struct sockaddr *) &sin6, sizeof(sin6)) == -1) - . . . - - The other version is a symbolic constant named IN6ADDR_ANY_INIT and - is defined in . This constant can be used to - initialize an in6_addr structure: - - struct in6_addr anyaddr = IN6ADDR_ANY_INIT; - - Note that this constant can be used ONLY at declaration time. It can - not be used to assign a previously declared in6_addr structure. For - example, the following code will not work: - - /* This is the WRONG way to assign an unspecified address */ - struct sockaddr_in6 sin6; - . . . - sin6.sin6_addr = IN6ADDR_ANY_INIT; /* will NOT compile */ - - Be aware that the IPv4 INADDR_xxx constants are all defined in host - byte order but the IPv6 IN6ADDR_xxx constants and the IPv6 - in6addr_xxx externals are defined in network byte order. - - - - - - - -Gilligan, et al. Informational [Page 12] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -3.9 IPv6 Loopback Address - - Applications may need to send UDP packets to, or originate TCP - connections to, services residing on the local node. In IPv4, they - can do this by using the constant IPv4 address INADDR_LOOPBACK in - their connect(), sendto(), or sendmsg() call. - - IPv6 also provides a loopback address to contact local TCP and UDP - services. Like the unspecified address, the IPv6 loopback address is - provided in two forms -- a global variable and a symbolic constant. - - The global variable is an in6_addr structure named - "in6addr_loopback." The extern declaration for this variable is - defined in : - - extern const struct in6_addr in6addr_loopback; - - Applications use in6addr_loopback as they would use INADDR_LOOPBACK - in IPv4 applications (but beware of the byte ordering difference - mentioned at the end of the previous section). For example, to open - a TCP connection to the local telnet server, an application could use - the following code: - - struct sockaddr_in6 sin6; - . . . - sin6.sin6_family = AF_INET6; - sin6.sin6_flowinfo = 0; - sin6.sin6_port = htons(23); - sin6.sin6_addr = in6addr_loopback; /* structure assignment */ - . . . - if (connect(s, (struct sockaddr *) &sin6, sizeof(sin6)) == -1) - . . . - - The symbolic constant is named IN6ADDR_LOOPBACK_INIT and is defined - in . It can be used at declaration time ONLY; for - example: - - struct in6_addr loopbackaddr = IN6ADDR_LOOPBACK_INIT; - - Like IN6ADDR_ANY_INIT, this constant cannot be used in an assignment - to a previously declared IPv6 address variable. - - - - - - - - - - -Gilligan, et al. Informational [Page 13] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -3.10 Portability Additions - - One simple addition to the sockets API that can help application - writers is the "struct sockaddr_storage". This data structure can - simplify writing code that is portable across multiple address - families and platforms. This data structure is designed with the - following goals. - - - Large enough to accommodate all supported protocol-specific address - structures. - - - Aligned at an appropriate boundary so that pointers to it can be - cast as pointers to protocol specific address structures and used - to access the fields of those structures without alignment - problems. - - The sockaddr_storage structure contains field ss_family which is of - type sa_family_t. When a sockaddr_storage structure is cast to a - sockaddr structure, the ss_family field of the sockaddr_storage - structure maps onto the sa_family field of the sockaddr structure. - When a sockaddr_storage structure is cast as a protocol specific - address structure, the ss_family field maps onto a field of that - structure that is of type sa_family_t and that identifies the - protocol's address family. - - - - - - - - - - - - - - - - - - - - - - - - - - - -Gilligan, et al. Informational [Page 14] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - An example implementation design of such a data structure would be as - follows. - -/* - * Desired design of maximum size and alignment - */ -#define _SS_MAXSIZE 128 /* Implementation specific max size */ -#define _SS_ALIGNSIZE (sizeof (int64_t)) - /* Implementation specific desired alignment */ -/* - * Definitions used for sockaddr_storage structure paddings design. - */ -#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof (sa_family_t)) -#define _SS_PAD2SIZE (_SS_MAXSIZE - (sizeof (sa_family_t) + - _SS_PAD1SIZE + _SS_ALIGNSIZE)) -struct sockaddr_storage { - sa_family_t ss_family; /* address family */ - /* Following fields are implementation specific */ - char __ss_pad1[_SS_PAD1SIZE]; - /* 6 byte pad, this is to make implementation - /* specific pad up to alignment field that */ - /* follows explicit in the data structure */ - int64_t __ss_align; /* field to force desired structure */ - /* storage alignment */ - char __ss_pad2[_SS_PAD2SIZE]; - /* 112 byte pad to achieve desired size, */ - /* _SS_MAXSIZE value minus size of ss_family */ - /* __ss_pad1, __ss_align fields is 112 */ -}; - - The above example implementation illustrates a data structure which - will align on a 64-bit boundary. An implementation-specific field - "__ss_align" along with "__ss_pad1" is used to force a 64-bit - alignment which covers proper alignment good enough for the needs of - sockaddr_in6 (IPv6), sockaddr_in (IPv4) address data structures. The - size of padding field __ss_pad1 depends on the chosen alignment - boundary. The size of padding field __ss_pad2 depends on the value - of overall size chosen for the total size of the structure. This - size and alignment are represented in the above example by - implementation specific (not required) constants _SS_MAXSIZE (chosen - value 128) and _SS_ALIGNSIZE (with chosen value 8). Constants - _SS_PAD1SIZE (derived value 6) and _SS_PAD2SIZE (derived value 112) - are also for illustration and not required. The derived values - assume sa_family_t is 2 bytes. The implementation specific - definitions and structure field names above start with an underscore - to denote implementation private namespace. Portable code is not - expected to access or reference those fields or constants. - - - - -Gilligan, et al. Informational [Page 15] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - On implementations where the sockaddr data structure includes a - "sa_len" field this data structure would look like this: - -/* - * Definitions used for sockaddr_storage structure paddings design. - */ -#define _SS_PAD1SIZE (_SS_ALIGNSIZE - - (sizeof (uint8_t) + sizeof (sa_family_t)) -#define _SS_PAD2SIZE (_SS_MAXSIZE - - (sizeof (uint8_t) + sizeof (sa_family_t) + - _SS_PAD1SIZE + _SS_ALIGNSIZE)) -struct sockaddr_storage { - uint8_t ss_len; /* address length */ - sa_family_t ss_family; /* address family */ - /* Following fields are implementation specific */ - char __ss_pad1[_SS_PAD1SIZE]; - /* 6 byte pad, this is to make implementation - /* specific pad up to alignment field that */ - /* follows explicit in the data structure */ - int64_t __ss_align; /* field to force desired structure */ - /* storage alignment */ - char __ss_pad2[_SS_PAD2SIZE]; - /* 112 byte pad to achieve desired size, */ - /* _SS_MAXSIZE value minus size of ss_len, */ - /* __ss_family, __ss_pad1, __ss_align fields is 112 */ -}; - -4. Interface Identification - - This API uses an interface index (a small positive integer) to - identify the local interface on which a multicast group is joined - (Section 5.2). Additionally, the advanced API [4] uses these same - interface indexes to identify the interface on which a datagram is - received, or to specify the interface on which a datagram is to be - sent. - - Interfaces are normally known by names such as "le0", "sl1", "ppp2", - and the like. On Berkeley-derived implementations, when an interface - is made known to the system, the kernel assigns a unique positive - integer value (called the interface index) to that interface. These - are small positive integers that start at 1. (Note that 0 is never - used for an interface index.) There may be gaps so that there is no - current interface for a particular positive interface index. - - This API defines two functions that map between an interface name and - index, a third function that returns all the interface names and - indexes, and a fourth function to return the dynamic memory allocated - by the previous function. How these functions are implemented is - - - -Gilligan, et al. Informational [Page 16] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - left up to the implementation. 4.4BSD implementations can implement - these functions using the existing sysctl() function with the - NET_RT_IFLIST command. Other implementations may wish to use ioctl() - for this purpose. - -4.1 Name-to-Index - - The first function maps an interface name into its corresponding - index. - - #include - - unsigned int if_nametoindex(const char *ifname); - - If ifname is the name of an interface, the if_nametoindex() function - shall return the interface index corresponding to name ifname; - otherwise, it shall return zero. No errors are defined. - -4.2 Index-to-Name - - The second function maps an interface index into its corresponding - name. - - #include - - char *if_indextoname(unsigned int ifindex, char *ifname); - - When this function is called, the ifname argument shall point to a - buffer of at least IF_NAMESIZE bytes. The function shall place in - this buffer the name of the interface with index ifindex. - (IF_NAMESIZE is also defined in and its value includes a - terminating null byte at the end of the interface name.) If ifindex - is an interface index, then the function shall return the value - supplied in ifname, which points to a buffer now containing the - interface name. Otherwise, the function shall return a NULL pointer - and set errno to indicate the error. If there is no interface - corresponding to the specified index, errno is set to ENXIO. If - there was a system error (such as running out of memory), errno would - be set to the proper value (e.g., ENOMEM). - - - - - - - - - - - - -Gilligan, et al. Informational [Page 17] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -4.3 Return All Interface Names and Indexes - - The if_nameindex structure holds the information about a single - interface and is defined as a result of including the - header. - - struct if_nameindex { - unsigned int if_index; /* 1, 2, ... */ - char *if_name; /* null terminated name: "le0", ... */ - }; - - The final function returns an array of if_nameindex structures, one - structure per interface. - - #include - - struct if_nameindex *if_nameindex(void); - - The end of the array of structures is indicated by a structure with - an if_index of 0 and an if_name of NULL. The function returns a NULL - pointer upon an error, and would set errno to the appropriate value. - - The memory used for this array of structures along with the interface - names pointed to by the if_name members is obtained dynamically. - This memory is freed by the next function. - -4.4 Free Memory - - The following function frees the dynamic memory that was allocated by - if_nameindex(). - - #include - - void if_freenameindex(struct if_nameindex *ptr); - - The ptr argument shall be a pointer that was returned by - if_nameindex(). After if_freenameindex() has been called, the - application shall not use the array of which ptr is the address. - -5. Socket Options - - A number of new socket options are defined for IPv6. All of these - new options are at the IPPROTO_IPV6 level. That is, the "level" - parameter in the getsockopt() and setsockopt() calls is IPPROTO_IPV6 - when using these options. The constant name prefix IPV6_ is used in - all of the new socket options. This serves to clearly identify these - options as applying to IPv6. - - - - -Gilligan, et al. Informational [Page 18] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - The declaration for IPPROTO_IPV6, the new IPv6 socket options, and - related constants defined in this section are obtained by including - the header . - -5.1 Unicast Hop Limit - - A new setsockopt() option controls the hop limit used in outgoing - unicast IPv6 packets. The name of this option is IPV6_UNICAST_HOPS, - and it is used at the IPPROTO_IPV6 layer. The following example - illustrates how it is used: - - int hoplimit = 10; - - if (setsockopt(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, - (char *) &hoplimit, sizeof(hoplimit)) == -1) - perror("setsockopt IPV6_UNICAST_HOPS"); - - When the IPV6_UNICAST_HOPS option is set with setsockopt(), the - option value given is used as the hop limit for all subsequent - unicast packets sent via that socket. If the option is not set, the - system selects a default value. The integer hop limit value (called - x) is interpreted as follows: - - x < -1: return an error of EINVAL - x == -1: use kernel default - 0 <= x <= 255: use x - x >= 256: return an error of EINVAL - - The IPV6_UNICAST_HOPS option may be used with getsockopt() to - determine the hop limit value that the system will use for subsequent - unicast packets sent via that socket. For example: - - int hoplimit; - socklen_t len = sizeof(hoplimit); - - if (getsockopt(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, - (char *) &hoplimit, &len) == -1) - perror("getsockopt IPV6_UNICAST_HOPS"); - else - printf("Using %d for hop limit.\n", hoplimit); - -5.2 Sending and Receiving Multicast Packets - - IPv6 applications may send multicast packets by simply specifying an - IPv6 multicast address as the destination address, for example in the - destination address argument of the sendto() function. - - - - - -Gilligan, et al. Informational [Page 19] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - Three socket options at the IPPROTO_IPV6 layer control some of the - parameters for sending multicast packets. Setting these options is - not required: applications may send multicast packets without using - these options. The setsockopt() options for controlling the sending - of multicast packets are summarized below. These three options can - also be used with getsockopt(). - - IPV6_MULTICAST_IF - - Set the interface to use for outgoing multicast packets. The - argument is the index of the interface to use. If the - interface index is specified as zero, the system selects the - interface (for example, by looking up the address in a routing - table and using the resulting interface). - - Argument type: unsigned int - - IPV6_MULTICAST_HOPS - - Set the hop limit to use for outgoing multicast packets. (Note - a separate option - IPV6_UNICAST_HOPS - is provided to set the - hop limit to use for outgoing unicast packets.) - - The interpretation of the argument is the same as for the - IPV6_UNICAST_HOPS option: - - x < -1: return an error of EINVAL - x == -1: use kernel default - 0 <= x <= 255: use x - x >= 256: return an error of EINVAL - - If IPV6_MULTICAST_HOPS is not set, the default is 1 - (same as IPv4 today) - - Argument type: int - - IPV6_MULTICAST_LOOP - - If a multicast datagram is sent to a group to which the sending - host itself belongs (on the outgoing interface), a copy of the - datagram is looped back by the IP layer for local delivery if - this option is set to 1. If this option is set to 0 a copy is - not looped back. Other option values return an error of - EINVAL. - - - - - - - -Gilligan, et al. Informational [Page 20] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - If IPV6_MULTICAST_LOOP is not set, the default is 1 (loopback; - same as IPv4 today). - - Argument type: unsigned int - - The reception of multicast packets is controlled by the two - setsockopt() options summarized below. An error of EOPNOTSUPP is - returned if these two options are used with getsockopt(). - - IPV6_JOIN_GROUP - - Join a multicast group on a specified local interface. - If the interface index is specified as 0, - the kernel chooses the local interface. - For example, some kernels look up the multicast group - in the normal IPv6 routing table and use the resulting - interface. - - Argument type: struct ipv6_mreq - - IPV6_LEAVE_GROUP - - Leave a multicast group on a specified interface. - If the interface index is specified as 0, the system - may choose a multicast group membership to drop by - matching the multicast address only. - - Argument type: struct ipv6_mreq - - The argument type of both of these options is the ipv6_mreq - structure, defined as a result of including the - header; - - struct ipv6_mreq { - struct in6_addr ipv6mr_multiaddr; /* IPv6 multicast addr */ - unsigned int ipv6mr_interface; /* interface index */ - }; - - Note that to receive multicast datagrams a process must join the - multicast group to which datagrams will be sent. UDP applications - must also bind the UDP port to which datagrams will be sent. Some - processes also bind the multicast group address to the socket, in - addition to the port, to prevent other datagrams destined to that - same port from being delivered to the socket. - - - - - - - -Gilligan, et al. Informational [Page 21] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -5.3 IPV6_V6ONLY option for AF_INET6 Sockets - - This socket option restricts AF_INET6 sockets to IPv6 communications - only. As stated in section <3.7 Compatibility with IPv4 Nodes>, - AF_INET6 sockets may be used for both IPv4 and IPv6 communications. - Some applications may want to restrict their use of an AF_INET6 - socket to IPv6 communications only. For these applications the - IPV6_V6ONLY socket option is defined. When this option is turned on, - the socket can be used to send and receive IPv6 packets only. This - is an IPPROTO_IPV6 level option. This option takes an int value. - This is a boolean option. By default this option is turned off. - - Here is an example of setting this option: - - int on = 1; - - if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, - (char *)&on, sizeof(on)) == -1) - perror("setsockopt IPV6_V6ONLY"); - else - printf("IPV6_V6ONLY set\n"); - - Note - This option has no effect on the use of IPv4 Mapped addresses - which enter a node as a valid IPv6 addresses for IPv6 communications - as defined by Stateless IP/ICMP Translation Algorithm (SIIT) [5]. - - An example use of this option is to allow two versions of the same - server process to run on the same port, one providing service over - IPv6, the other providing the same service over IPv4. - -6. Library Functions - - New library functions are needed to perform a variety of operations - with IPv6 addresses. Functions are needed to lookup IPv6 addresses - in the Domain Name System (DNS). Both forward lookup (nodename-to- - address translation) and reverse lookup (address-to-nodename - translation) need to be supported. Functions are also needed to - convert IPv6 addresses between their binary and textual form. - - We note that the two existing functions, gethostbyname() and - gethostbyaddr(), are left as-is. New functions are defined to handle - both IPv4 and IPv6 addresses. - - The commonly used function gethostbyname() is inadequate for many - applications, first because it provides no way for the caller to - specify anything about the types of addresses desired (IPv4 only, - IPv6 only, IPv4-mapped IPv6 are OK, etc.), and second because many - implementations of this function are not thread safe. RFC 2133 - - - -Gilligan, et al. Informational [Page 22] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - defined a function named gethostbyname2() but this function was also - inadequate, first because its use required setting a global option - (RES_USE_INET6) when IPv6 addresses were required, and second because - a flag argument is needed to provide the caller with additional - control over the types of addresses required. The gethostbyname2() - function was deprecated in RFC 2553 and is no longer part of the - basic API. - -6.1 Protocol-Independent Nodename and Service Name Translation - - Nodename-to-address translation is done in a protocol-independent - fashion using the getaddrinfo() function. - -#include -#include - - -int getaddrinfo(const char *nodename, const char *servname, - const struct addrinfo *hints, struct addrinfo **res); - -void freeaddrinfo(struct addrinfo *ai); - -struct addrinfo { - int ai_flags; /* AI_PASSIVE, AI_CANONNAME, - AI_NUMERICHOST, .. */ - int ai_family; /* AF_xxx */ - int ai_socktype; /* SOCK_xxx */ - int ai_protocol; /* 0 or IPPROTO_xxx for IPv4 and IPv6 */ - socklen_t ai_addrlen; /* length of ai_addr */ - char *ai_canonname; /* canonical name for nodename */ - struct sockaddr *ai_addr; /* binary address */ - struct addrinfo *ai_next; /* next structure in linked list */ -}; - - The getaddrinfo() function translates the name of a service location - (for example, a host name) and/or a service name and returns a set of - socket addresses and associated information to be used in creating a - socket with which to address the specified service. - - The nodename and servname arguments are either null pointers or - pointers to null-terminated strings. One or both of these two - arguments must be a non-null pointer. - - The format of a valid name depends on the address family or families. - If a specific family is not given and the name could be interpreted - as valid within multiple supported families, the implementation will - attempt to resolve the name in all supported families and, in absence - of errors, one or more results shall be returned. - - - -Gilligan, et al. Informational [Page 23] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - If the nodename argument is not null, it can be a descriptive name or - can be an address string. If the specified address family is - AF_INET, AF_INET6, or AF_UNSPEC, valid descriptive names include host - names. If the specified address family is AF_INET or AF_UNSPEC, - address strings using Internet standard dot notation as specified in - inet_addr() are valid. If the specified address family is AF_INET6 - or AF_UNSPEC, standard IPv6 text forms described in inet_pton() are - valid. - - If nodename is not null, the requested service location is named by - nodename; otherwise, the requested service location is local to the - caller. - - If servname is null, the call shall return network-level addresses - for the specified nodename. If servname is not null, it is a null- - terminated character string identifying the requested service. This - can be either a descriptive name or a numeric representation suitable - for use with the address family or families. If the specified - address family is AF_INET, AF_INET6 or AF_UNSPEC, the service can be - specified as a string specifying a decimal port number. - - If the argument hints is not null, it refers to a structure - containing input values that may direct the operation by providing - options and by limiting the returned information to a specific socket - type, address family and/or protocol. In this hints structure every - member other than ai_flags, ai_family, ai_socktype and ai_protocol - shall be set to zero or a null pointer. A value of AF_UNSPEC for - ai_family means that the caller shall accept any address family. A - value of zero for ai_socktype means that the caller shall accept any - socket type. A value of zero for ai_protocol means that the caller - shall accept any protocol. If hints is a null pointer, the behavior - shall be as if it referred to a structure containing the value zero - for the ai_flags, ai_socktype and ai_protocol fields, and AF_UNSPEC - for the ai_family field. - - Note: - - 1. If the caller handles only TCP and not UDP, for example, then the - ai_protocol member of the hints structure should be set to - IPPROTO_TCP when getaddrinfo() is called. - - 2. If the caller handles only IPv4 and not IPv6, then the ai_family - member of the hints structure should be set to AF_INET when - getaddrinfo() is called. - - - - - - - -Gilligan, et al. Informational [Page 24] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - The ai_flags field to which hints parameter points shall be set to - zero or be the bitwise-inclusive OR of one or more of the values - AI_PASSIVE, AI_CANONNAME, AI_NUMERICHOST, AI_NUMERICSERV, - AI_V4MAPPED, AI_ALL, and AI_ADDRCONFIG. - - If the AI_PASSIVE flag is specified, the returned address information - shall be suitable for use in binding a socket for accepting incoming - connections for the specified service (i.e., a call to bind()). In - this case, if the nodename argument is null, then the IP address - portion of the socket address structure shall be set to INADDR_ANY - for an IPv4 address or IN6ADDR_ANY_INIT for an IPv6 address. If the - AI_PASSIVE flag is not specified, the returned address information - shall be suitable for a call to connect() (for a connection-mode - protocol) or for a call to connect(), sendto() or sendmsg() (for a - connectionless protocol). In this case, if the nodename argument is - null, then the IP address portion of the socket address structure - shall be set to the loopback address. This flag is ignored if the - nodename argument is not null. - - If the AI_CANONNAME flag is specified and the nodename argument is - not null, the function shall attempt to determine the canonical name - corresponding to nodename (for example, if nodename is an alias or - shorthand notation for a complete name). - - If the AI_NUMERICHOST flag is specified, then a non-null nodename - string supplied shall be a numeric host address string. Otherwise, - an [EAI_NONAME] error is returned. This flag shall prevent any type - of name resolution service (for example, the DNS) from being invoked. - - If the AI_NUMERICSERV flag is specified, then a non-null servname - string supplied shall be a numeric port string. Otherwise, an - [EAI_NONAME] error shall be returned. This flag shall prevent any - type of name resolution service (for example, NIS+) from being - invoked. - - If the AI_V4MAPPED flag is specified along with an ai_family of - AF_INET6, then getaddrinfo() shall return IPv4-mapped IPv6 addresses - on finding no matching IPv6 addresses (ai_addrlen shall be 16). - - For example, when using the DNS, if no AAAA records are found then - a query is made for A records and any found are returned as IPv4- - mapped IPv6 addresses. - - The AI_V4MAPPED flag shall be ignored unless ai_family equals - AF_INET6. - - If the AI_ALL flag is used with the AI_V4MAPPED flag, then - getaddrinfo() shall return all matching IPv6 and IPv4 addresses. - - - -Gilligan, et al. Informational [Page 25] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - For example, when using the DNS, queries are made for both AAAA - records and A records, and getaddrinfo() returns the combined - results of both queries. Any IPv4 addresses found are returned as - IPv4-mapped IPv6 addresses. - - The AI_ALL flag without the AI_V4MAPPED flag is ignored. - - Note: - - When ai_family is not specified (AF_UNSPEC), AI_V4MAPPED and - AI_ALL flags will only be used if AF_INET6 is supported. - - If the AI_ADDRCONFIG flag is specified, IPv4 addresses shall be - returned only if an IPv4 address is configured on the local system, - and IPv6 addresses shall be returned only if an IPv6 address is - configured on the local system. The loopback address is not - considered for this case as valid as a configured address. - - For example, when using the DNS, a query for AAAA records should - occur only if the node has at least one IPv6 address configured - (other than IPv6 loopback) and a query for A records should occur - only if the node has at least one IPv4 address configured (other - than the IPv4 loopback). - - The ai_socktype field to which argument hints points specifies the - socket type for the service, as defined for socket(). If a specific - socket type is not given (for example, a value of zero) and the - service name could be interpreted as valid with multiple supported - socket types, the implementation shall attempt to resolve the service - name for all supported socket types and, in the absence of errors, - all possible results shall be returned. A non-zero socket type value - shall limit the returned information to values with the specified - socket type. - - If the ai_family field to which hints points has the value AF_UNSPEC, - addresses shall be returned for use with any address family that can - be used with the specified nodename and/or servname. Otherwise, - addresses shall be returned for use only with the specified address - family. If ai_family is not AF_UNSPEC and ai_protocol is not zero, - then addresses are returned for use only with the specified address - family and protocol; the value of ai_protocol shall be interpreted as - in a call to the socket() function with the corresponding values of - ai_family and ai_protocol. - - The freeaddrinfo() function frees one or more addrinfo structures - returned by getaddrinfo(), along with any additional storage - associated with those structures (for example, storage pointed to by - the ai_canonname and ai_addr fields; an application must not - - - -Gilligan, et al. Informational [Page 26] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - reference this storage after the associated addrinfo structure has - been freed). If the ai_next field of the structure is not null, the - entire list of structures is freed. The freeaddrinfo() function must - support the freeing of arbitrary sublists of an addrinfo list - originally returned by getaddrinfo(). - - Functions getaddrinfo() and freeaddrinfo() must be thread-safe. - - A zero return value for getaddrinfo() indicates successful - completion; a non-zero return value indicates failure. The possible - values for the failures are listed below under Error Return Values. - - Upon successful return of getaddrinfo(), the location to which res - points shall refer to a linked list of addrinfo structures, each of - which shall specify a socket address and information for use in - creating a socket with which to use that socket address. The list - shall include at least one addrinfo structure. The ai_next field of - each structure contains a pointer to the next structure on the list, - or a null pointer if it is the last structure on the list. Each - structure on the list shall include values for use with a call to the - socket() function, and a socket address for use with the connect() - function or, if the AI_PASSIVE flag was specified, for use with the - bind() function. The fields ai_family, ai_socktype, and ai_protocol - shall be usable as the arguments to the socket() function to create a - socket suitable for use with the returned address. The fields - ai_addr and ai_addrlen are usable as the arguments to the connect() - or bind() functions with such a socket, according to the AI_PASSIVE - flag. - - If nodename is not null, and if requested by the AI_CANONNAME flag, - the ai_canonname field of the first returned addrinfo structure shall - point to a null-terminated string containing the canonical name - corresponding to the input nodename; if the canonical name is not - available, then ai_canonname shall refer to the nodename argument or - a string with the same contents. The contents of the ai_flags field - of the returned structures are undefined. - - All fields in socket address structures returned by getaddrinfo() - that are not filled in through an explicit argument (for example, - sin6_flowinfo) shall be set to zero. - - Note: This makes it easier to compare socket address structures. - - - - - - - - - -Gilligan, et al. Informational [Page 27] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - Error Return Values: - - The getaddrinfo() function shall fail and return the corresponding - value if: - - [EAI_AGAIN] The name could not be resolved at this time. Future - attempts may succeed. - - [EAI_BADFLAGS] The flags parameter had an invalid value. - - [EAI_FAIL] A non-recoverable error occurred when attempting to - resolve the name. - - [EAI_FAMILY] The address family was not recognized. - - [EAI_MEMORY] There was a memory allocation failure when trying to - allocate storage for the return value. - - [EAI_NONAME] The name does not resolve for the supplied - parameters. Neither nodename nor servname were - supplied. At least one of these must be supplied. - - [EAI_SERVICE] The service passed was not recognized for the - specified socket type. - - [EAI_SOCKTYPE] The intended socket type was not recognized. - - [EAI_SYSTEM] A system error occurred; the error code can be found - in errno. - - The gai_strerror() function provides a descriptive text string - corresponding to an EAI_xxx error value. - - #include - - const char *gai_strerror(int ecode); - - The argument is one of the EAI_xxx values defined for the - getaddrinfo() and getnameinfo() functions. The return value points - to a string describing the error. If the argument is not one of the - EAI_xxx values, the function still returns a pointer to a string - whose contents indicate an unknown error. - -6.2 Socket Address Structure to Node Name and Service Name - - The getnameinfo() function is used to translate the contents of a - socket address structure to a node name and/or service name. - - - - -Gilligan, et al. Informational [Page 28] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - #include - #include - - int getnameinfo(const struct sockaddr *sa, socklen_t salen, - char *node, socklen_t nodelen, - char *service, socklen_t servicelen, - int flags); - - The getnameinfo() function shall translate a socket address to a node - name and service location, all of which are defined as in - getaddrinfo(). - - The sa argument points to a socket address structure to be - translated. - - The salen argument holds the size of the socket address structure - pointed to by sa. - - If the socket address structure contains an IPv4-mapped IPv6 address - or an IPv4-compatible IPv6 address, the implementation shall extract - the embedded IPv4 address and lookup the node name for that IPv4 - address. - - Note: The IPv6 unspecified address ("::") and the IPv6 loopback - address ("::1") are not IPv4-compatible addresses. If the address - is the IPv6 unspecified address ("::"), a lookup is not performed, - and the [EAI_NONAME] error is returned. - - If the node argument is non-NULL and the nodelen argument is nonzero, - then the node argument points to a buffer able to contain up to - nodelen characters that receives the node name as a null-terminated - string. If the node argument is NULL or the nodelen argument is - zero, the node name shall not be returned. If the node's name cannot - be located, the numeric form of the node's address is returned - instead of its name. - - If the service argument is non-NULL and the servicelen argument is - non-zero, then the service argument points to a buffer able to - contain up to servicelen bytes that receives the service name as a - null-terminated string. If the service argument is NULL or the - servicelen argument is zero, the service name shall not be returned. - If the service's name cannot be located, the numeric form of the - service address (for example, its port number) shall be returned - instead of its name. - - The arguments node and service cannot both be NULL. - - - - - -Gilligan, et al. Informational [Page 29] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - The flags argument is a flag that changes the default actions of the - function. By default the fully-qualified domain name (FQDN) for the - host shall be returned, but: - - - If the flag bit NI_NOFQDN is set, only the node name portion of - the FQDN shall be returned for local hosts. - - - If the flag bit NI_NUMERICHOST is set, the numeric form of the - host's address shall be returned instead of its name, under all - circumstances. - - - If the flag bit NI_NAMEREQD is set, an error shall be returned if - the host's name cannot be located. - - - If the flag bit NI_NUMERICSERV is set, the numeric form of the - service address shall be returned (for example, its port number) - instead of its name, under all circumstances. - - - If the flag bit NI_DGRAM is set, this indicates that the service - is a datagram service (SOCK_DGRAM). The default behavior shall - assume that the service is a stream service (SOCK_STREAM). - - Note: - - 1. The NI_NUMERICxxx flags are required to support the "-n" flags - that many commands provide. - - 2. The NI_DGRAM flag is required for the few AF_INET and AF_INET6 - port numbers (for example, [512,514]) that represent different - services for UDP and TCP. - - The getnameinfo() function shall be thread safe. - - A zero return value for getnameinfo() indicates successful - completion; a non-zero return value indicates failure. - - Upon successful completion, getnameinfo() shall return the node and - service names, if requested, in the buffers provided. The returned - names are always null-terminated strings. - - - - - - - - - - - - -Gilligan, et al. Informational [Page 30] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - Error Return Values: - - The getnameinfo() function shall fail and return the corresponding - value if: - - [EAI_AGAIN] The name could not be resolved at this time. - Future attempts may succeed. - - [EAI_BADFLAGS] The flags had an invalid value. - - [EAI_FAIL] A non-recoverable error occurred. - - [EAI_FAMILY] The address family was not recognized or the address - length was invalid for the specified family. - - [EAI_MEMORY] There was a memory allocation failure. - - [EAI_NONAME] The name does not resolve for the supplied parameters. - NI_NAMEREQD is set and the host's name cannot be - located, or both nodename and servname were null. - - [EAI_OVERFLOW] An argument buffer overflowed. - - [EAI_SYSTEM] A system error occurred. The error code can be found - in errno. - -6.3 Address Conversion Functions - - The two IPv4 functions inet_addr() and inet_ntoa() convert an IPv4 - address between binary and text form. IPv6 applications need similar - functions. The following two functions convert both IPv6 and IPv4 - addresses: - - #include - - int inet_pton(int af, const char *src, void *dst); - - const char *inet_ntop(int af, const void *src, - char *dst, socklen_t size); - - The inet_pton() function shall convert an address in its standard - text presentation form into its numeric binary form. The af argument - shall specify the family of the address. The AF_INET and AF_INET6 - address families shall be supported. The src argument points to the - string being passed in. The dst argument points to a buffer into - which the function stores the numeric address; this shall be large - enough to hold the numeric address (32 bits for AF_INET, 128 bits for - AF_INET6). The inet_pton() function shall return 1 if the conversion - - - -Gilligan, et al. Informational [Page 31] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - succeeds, with the address pointed to by dst in network byte order. - It shall return 0 if the input is not a valid IPv4 dotted-decimal - string or a valid IPv6 address string, or -1 with errno set to - EAFNOSUPPORT if the af argument is unknown. - - If the af argument of inet_pton() is AF_INET, the src string shall be - in the standard IPv4 dotted-decimal form: - - ddd.ddd.ddd.ddd - - where "ddd" is a one to three digit decimal number between 0 and 255. - The inet_pton() function does not accept other formats (such as the - octal numbers, hexadecimal numbers, and fewer than four numbers that - inet_addr() accepts). - - If the af argument of inet_pton() is AF_INET6, the src string shall - be in one of the standard IPv6 text forms defined in Section 2.2 of - the addressing architecture specification [2]. - - The inet_ntop() function shall convert a numeric address into a text - string suitable for presentation. The af argument shall specify the - family of the address. This can be AF_INET or AF_INET6. The src - argument points to a buffer holding an IPv4 address if the af - argument is AF_INET, or an IPv6 address if the af argument is - AF_INET6; the address must be in network byte order. The dst - argument points to a buffer where the function stores the resulting - text string; it shall not be NULL. The size argument specifies the - size of this buffer, which shall be large enough to hold the text - string (INET_ADDRSTRLEN characters for IPv4, INET6_ADDRSTRLEN - characters for IPv6). - - In order to allow applications to easily declare buffers of the - proper size to store IPv4 and IPv6 addresses in string form, the - following two constants are defined in : - - #define INET_ADDRSTRLEN 16 - #define INET6_ADDRSTRLEN 46 - - The inet_ntop() function shall return a pointer to the buffer - containing the text string if the conversion succeeds, and NULL - otherwise. Upon failure, errno is set to EAFNOSUPPORT if the af - argument is invalid or ENOSPC if the size of the result buffer is - inadequate. - - - - - - - - -Gilligan, et al. Informational [Page 32] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -6.4 Address Testing Macros - - The following macros can be used to test for special IPv6 addresses. - - #include - - int IN6_IS_ADDR_UNSPECIFIED (const struct in6_addr *); - int IN6_IS_ADDR_LOOPBACK (const struct in6_addr *); - int IN6_IS_ADDR_MULTICAST (const struct in6_addr *); - int IN6_IS_ADDR_LINKLOCAL (const struct in6_addr *); - int IN6_IS_ADDR_SITELOCAL (const struct in6_addr *); - int IN6_IS_ADDR_V4MAPPED (const struct in6_addr *); - int IN6_IS_ADDR_V4COMPAT (const struct in6_addr *); - - int IN6_IS_ADDR_MC_NODELOCAL(const struct in6_addr *); - int IN6_IS_ADDR_MC_LINKLOCAL(const struct in6_addr *); - int IN6_IS_ADDR_MC_SITELOCAL(const struct in6_addr *); - int IN6_IS_ADDR_MC_ORGLOCAL (const struct in6_addr *); - int IN6_IS_ADDR_MC_GLOBAL (const struct in6_addr *); - - The first seven macros return true if the address is of the specified - type, or false otherwise. The last five test the scope of a - multicast address and return true if the address is a multicast - address of the specified scope or false if the address is either not - a multicast address or not of the specified scope. - - Note that IN6_IS_ADDR_LINKLOCAL and IN6_IS_ADDR_SITELOCAL return true - only for the two types of local-use IPv6 unicast addresses (Link- - Local and Site-Local) defined in [2], and that by this definition, - the IN6_IS_ADDR_LINKLOCAL macro returns false for the IPv6 loopback - address (::1). These two macros do not return true for IPv6 - multicast addresses of either link-local scope or site-local scope. - -7. Summary of New Definitions - - The following list summarizes the constants, structure, and extern - definitions discussed in this memo, sorted by header. - - IF_NAMESIZE - struct if_nameindex{}; - - AI_ADDRCONFIG - AI_ALL - AI_CANONNAME - AI_NUMERICHOST - AI_NUMERICSERV - AI_PASSIVE - AI_V4MAPPED - - - -Gilligan, et al. Informational [Page 33] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - EAI_AGAIN - EAI_BADFLAGS - EAI_FAIL - EAI_FAMILY - EAI_MEMORY - EAI_NONAME - EAI_OVERFLOW - EAI_SERVICE - EAI_SOCKTYPE - EAI_SYSTEM - NI_DGRAM - NI_NAMEREQD - NI_NOFQDN - NI_NUMERICHOST - NI_NUMERICSERV - struct addrinfo{}; - - IN6ADDR_ANY_INIT - IN6ADDR_LOOPBACK_INIT - INET6_ADDRSTRLEN - INET_ADDRSTRLEN - IPPROTO_IPV6 - IPV6_JOIN_GROUP - IPV6_LEAVE_GROUP - IPV6_MULTICAST_HOPS - IPV6_MULTICAST_IF - IPV6_MULTICAST_LOOP - IPV6_UNICAST_HOPS - IPV6_V6ONLY - SIN6_LEN - extern const struct in6_addr in6addr_any; - extern const struct in6_addr in6addr_loopback; - struct in6_addr{}; - struct ipv6_mreq{}; - struct sockaddr_in6{}; - - AF_INET6 - PF_INET6 - struct sockaddr_storage; - - The following list summarizes the function and macro prototypes - discussed in this memo, sorted by header. - - int inet_pton(int, const char *, void *); - const char *inet_ntop(int, const void *, - char *, socklen_t); - - - - - -Gilligan, et al. Informational [Page 34] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - char *if_indextoname(unsigned int, char *); - unsigned int if_nametoindex(const char *); - void if_freenameindex(struct if_nameindex *); - struct if_nameindex *if_nameindex(void); - - int getaddrinfo(const char *, const char *, - const struct addrinfo *, - struct addrinfo **); - int getnameinfo(const struct sockaddr *, socklen_t, - char *, socklen_t, char *, socklen_t, int); - void freeaddrinfo(struct addrinfo *); - const char *gai_strerror(int); - - int IN6_IS_ADDR_LINKLOCAL(const struct in6_addr *); - int IN6_IS_ADDR_LOOPBACK(const struct in6_addr *); - int IN6_IS_ADDR_MC_GLOBAL(const struct in6_addr *); - int IN6_IS_ADDR_MC_LINKLOCAL(const struct in6_addr *); - int IN6_IS_ADDR_MC_NODELOCAL(const struct in6_addr *); - int IN6_IS_ADDR_MC_ORGLOCAL(const struct in6_addr *); - int IN6_IS_ADDR_MC_SITELOCAL(const struct in6_addr *); - int IN6_IS_ADDR_MULTICAST(const struct in6_addr *); - int IN6_IS_ADDR_SITELOCAL(const struct in6_addr *); - int IN6_IS_ADDR_UNSPECIFIED(const struct in6_addr *); - int IN6_IS_ADDR_V4COMPAT(const struct in6_addr *); - int IN6_IS_ADDR_V4MAPPED(const struct in6_addr *); - -8. Security Considerations - - IPv6 provides a number of new security mechanisms, many of which need - to be accessible to applications. Companion memos detailing the - extensions to the socket interfaces to support IPv6 security are - being written. - -9. Changes from RFC 2553 - - 1. Add brief description of the history of this API and its relation - to the Open Group/IEEE/ISO standards. - - 2. Alignments with [3]. - - 3. Removed all references to getipnodebyname() and getipnodebyaddr(), - which are deprecated in favor of getaddrinfo() and getnameinfo(). - - 4. Added IPV6_V6ONLY IP level socket option to permit nodes to not - process IPv4 packets as IPv4 Mapped addresses in implementations. - - 5. Added SIIT to references and added new contributors. - - - - -Gilligan, et al. Informational [Page 35] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - - 6. In previous versions of this specification, the sin6_flowinfo - field was associated with the IPv6 traffic class and flow label, - but its usage was not completely specified. The complete - definition of the sin6_flowinfo field, including its association - with the traffic class or flow label, is now deferred to a future - specification. - -10. Acknowledgments - - This specification's evolution and completeness were significantly - influenced by the efforts of Richard Stevens, who has passed on. - Richard's wisdom and talent made the specification what it is today. - The co-authors will long think of Richard with great respect. - - Thanks to the many people who made suggestions and provided feedback - to this document, including: - - Werner Almesberger, Ran Atkinson, Fred Baker, Dave Borman, Andrew - Cherenson, Alex Conta, Alan Cox, Steve Deering, Richard Draves, - Francis Dupont, Robert Elz, Brian Haberman, Jun-ichiro itojun Hagino, - Marc Hasson, Tom Herbert, Bob Hinden, Wan-Yen Hsu, Christian Huitema, - Koji Imada, Markus Jork, Ron Lee, Alan Lloyd, Charles Lynn, Dan - McDonald, Dave Mitton, Finnbarr Murphy, Thomas Narten, Josh Osborne, - Craig Partridge, Jean-Luc Richier, Bill Sommerfield, Erik Scoredos, - Keith Sklower, JINMEI Tatuya, Dave Thaler, Matt Thomas, Harvey - Thompson, Dean D. Throop, Karen Tracey, Glenn Trewitt, Paul Vixie, - David Waitzman, Carl Williams, Kazu Yamamoto, Vlad Yasevich, Stig - Venaas, and Brian Zill. - - The getaddrinfo() and getnameinfo() functions are taken from an - earlier document by Keith Sklower. As noted in that document, - William Durst, Steven Wise, Michael Karels, and Eric Allman provided - many useful discussions on the subject of protocol-independent name- - to-address translation, and reviewed early versions of Keith - Sklower's original proposal. Eric Allman implemented the first - prototype of getaddrinfo(). The observation that specifying the pair - of name and service would suffice for connecting to a service - independent of protocol details was made by Marshall Rose in a - proposal to X/Open for a "Uniform Network Interface". - - Craig Metz, Jack McCann, Erik Nordmark, Tim Hartrick, and Mukesh - Kacker made many contributions to this document. Ramesh Govindan - made a number of contributions and co-authored an earlier version of - this memo. - - - - - - - -Gilligan, et al. Informational [Page 36] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -11. References - - [1] Deering, S. and R. Hinden, "Internet Protocol, Version 6 (IPv6) - Specification", RFC 2460, December 1998. - - [2] Hinden, R. and S. Deering, "IP Version 6 Addressing - Architecture", RFC 2373, July 1998. - - [3] IEEE Std. 1003.1-2001 Standard for Information Technology -- - Portable Operating System Interface (POSIX). Open Group - Technical Standard: Base Specifications, Issue 6, December 2001. - ISO/IEC 9945:2002. http://www.opengroup.org/austin - - [4] Stevens, W. and M. Thomas, "Advanced Sockets API for IPv6", RFC - 2292, February 1998. - - [5] Nordmark, E., "Stateless IP/ICMP Translation Algorithm (SIIT)", - RFC 2765, February 2000. - - [6] The Open Group Base Working Group - http://www.opengroup.org/platform/base.html - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Gilligan, et al. Informational [Page 37] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -12. Authors' Addresses - - Bob Gilligan - Intransa, Inc. - 2870 Zanker Rd. - San Jose, CA 95134 - - Phone: 408-678-8647 - EMail: gilligan@intransa.com - - - Susan Thomson - Cisco Systems - 499 Thornall Street, 8th floor - Edison, NJ 08837 - - Phone: 732-635-3086 - EMail: sethomso@cisco.com - - - Jim Bound - Hewlett-Packard Company - 110 Spitbrook Road ZKO3-3/W20 - Nashua, NH 03062 - - Phone: 603-884-0062 - EMail: Jim.Bound@hp.com - - - Jack McCann - Hewlett-Packard Company - 110 Spitbrook Road ZKO3-3/W20 - Nashua, NH 03062 - - Phone: 603-884-2608 - EMail: Jack.McCann@hp.com - - - - - - - - - - - - - - - -Gilligan, et al. Informational [Page 38] - -RFC 3493 Basic Socket Interface Extensions for IPv6 February 2003 - - -13. Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Gilligan, et al. Informational [Page 39] - diff --git a/kernel/picotcp/RFC/rfc3517.txt b/kernel/picotcp/RFC/rfc3517.txt deleted file mode 100644 index f5eeb61..0000000 --- a/kernel/picotcp/RFC/rfc3517.txt +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - -Network Working Group E. Blanton -Request for Comments: 3517 Purdue University -Category: Standards Track M. Allman - BBN/NASA GRC - K. Fall - Intel Research - L. Wang - University of Kentucky - April 2003 - - - A Conservative Selective Acknowledgment (SACK)-based - Loss Recovery Algorithm for TCP - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - This document presents a conservative loss recovery algorithm for TCP - that is based on the use of the selective acknowledgment (SACK) TCP - option. The algorithm presented in this document conforms to the - spirit of the current congestion control specification (RFC 2581), - but allows TCP senders to recover more effectively when multiple - segments are lost from a single flight of data. - -Terminology - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in BCP 14, RFC 2119 - [RFC2119]. - - - - - - - - - - -Blanton, et al. Standards Track [Page 1] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - -1 Introduction - - This document presents a conservative loss recovery algorithm for TCP - that is based on the use of the selective acknowledgment (SACK) TCP - option. While the TCP SACK [RFC2018] is being steadily deployed in - the Internet [All00], there is evidence that hosts are not using the - SACK information when making retransmission and congestion control - decisions [PF01]. The goal of this document is to outline one - straightforward method for TCP implementations to use SACK - information to increase performance. - - [RFC2581] allows advanced loss recovery algorithms to be used by TCP - [RFC793] provided that they follow the spirit of TCP's congestion - control algorithms [RFC2581, RFC2914]. [RFC2582] outlines one such - advanced recovery algorithm called NewReno. This document outlines a - loss recovery algorithm that uses the SACK [RFC2018] TCP option to - enhance TCP's loss recovery. The algorithm outlined in this - document, heavily based on the algorithm detailed in [FF96], is a - conservative replacement of the fast recovery algorithm [Jac90, - RFC2581]. The algorithm specified in this document is a - straightforward SACK-based loss recovery strategy that follows the - guidelines set in [RFC2581] and can safely be used in TCP - implementations. Alternate SACK-based loss recovery methods can be - used in TCP as implementers see fit (as long as the alternate - algorithms follow the guidelines provided in [RFC2581]). Please - note, however, that the SACK-based decisions in this document (such - as what segments are to be sent at what time) are largely decoupled - from the congestion control algorithms, and as such can be treated as - separate issues if so desired. - -2 Definitions - - The reader is expected to be familiar with the definitions given in - [RFC2581]. - - The reader is assumed to be familiar with selective acknowledgments - as specified in [RFC2018]. - - For the purposes of explaining the SACK-based loss recovery algorithm - we define four variables that a TCP sender stores: - - "HighACK" is the sequence number of the highest byte of data that - has been cumulatively ACKed at a given point. - - "HighData" is the highest sequence number transmitted at a given - point. - - - - - -Blanton, et al. Standards Track [Page 2] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - - "HighRxt" is the highest sequence number which has been - retransmitted during the current loss recovery phase. - - "Pipe" is a sender's estimate of the number of bytes outstanding - in the network. This is used during recovery for limiting the - sender's sending rate. The pipe variable allows TCP to use a - fundamentally different congestion control than specified in - [RFC2581]. The algorithm is often referred to as the "pipe - algorithm". - - For the purposes of this specification we define a "duplicate - acknowledgment" as a segment that arrives with no data and an - acknowledgment (ACK) number that is equal to the current value of - HighACK, as described in [RFC2581]. - - We define a variable "DupThresh" that holds the number of duplicate - acknowledgments required to trigger a retransmission. Per [RFC2581] - this threshold is defined to be 3 duplicate acknowledgments. - However, implementers should consult any updates to [RFC2581] to - determine the current value for DupThresh (or method for determining - its value). - - Finally, a range of sequence numbers [A,B] is said to "cover" - sequence number S if A <= S <= B. - -3 Keeping Track of SACK Information - - For a TCP sender to implement the algorithm defined in the next - section it must keep a data structure to store incoming selective - acknowledgment information on a per connection basis. Such a data - structure is commonly called the "scoreboard". The specifics of the - scoreboard data structure are out of scope for this document (as long - as the implementation can perform all functions required by this - specification). - - Note that this document refers to keeping account of (marking) - individual octets of data transferred across a TCP connection. A - real-world implementation of the scoreboard would likely prefer to - manage this data as sequence number ranges. The algorithms presented - here allow this, but require arbitrary sequence number ranges to be - marked as having been selectively acknowledged. - - - - - - - - - - -Blanton, et al. Standards Track [Page 3] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - -4 Processing and Acting Upon SACK Information - - For the purposes of the algorithm defined in this document the - scoreboard SHOULD implement the following functions: - - Update (): - - Given the information provided in an ACK, each octet that is - cumulatively ACKed or SACKed should be marked accordingly in the - scoreboard data structure, and the total number of octets SACKed - should be recorded. - - Note: SACK information is advisory and therefore SACKed data MUST - NOT be removed from TCP's retransmission buffer until the data is - cumulatively acknowledged [RFC2018]. - - IsLost (SeqNum): - - This routine returns whether the given sequence number is - considered to be lost. The routine returns true when either - DupThresh discontiguous SACKed sequences have arrived above - 'SeqNum' or (DupThresh * SMSS) bytes with sequence numbers greater - than 'SeqNum' have been SACKed. Otherwise, the routine returns - false. - - SetPipe (): - - This routine traverses the sequence space from HighACK to HighData - and MUST set the "pipe" variable to an estimate of the number of - octets that are currently in transit between the TCP sender and - the TCP receiver. After initializing pipe to zero the following - steps are taken for each octet 'S1' in the sequence space between - HighACK and HighData that has not been SACKed: - - (a) If IsLost (S1) returns false: - - Pipe is incremented by 1 octet. - - The effect of this condition is that pipe is incremented for - packets that have not been SACKed and have not been determined - to have been lost (i.e., those segments that are still assumed - to be in the network). - - (b) If S1 <= HighRxt: - - Pipe is incremented by 1 octet. - - - - - -Blanton, et al. Standards Track [Page 4] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - - The effect of this condition is that pipe is incremented for - the retransmission of the octet. - - Note that octets retransmitted without being considered lost are - counted twice by the above mechanism. - - NextSeg (): - - This routine uses the scoreboard data structure maintained by the - Update() function to determine what to transmit based on the SACK - information that has arrived from the data receiver (and hence - been marked in the scoreboard). NextSeg () MUST return the - sequence number range of the next segment that is to be - transmitted, per the following rules: - - (1) If there exists a smallest unSACKed sequence number 'S2' that - meets the following three criteria for determining loss, the - sequence range of one segment of up to SMSS octets starting - with S2 MUST be returned. - - (1.a) S2 is greater than HighRxt. - - (1.b) S2 is less than the highest octet covered by any - received SACK. - - (1.c) IsLost (S2) returns true. - - (2) If no sequence number 'S2' per rule (1) exists but there - exists available unsent data and the receiver's advertised - window allows, the sequence range of one segment of up to SMSS - octets of previously unsent data starting with sequence number - HighData+1 MUST be returned. - - (3) If the conditions for rules (1) and (2) fail, but there exists - an unSACKed sequence number 'S3' that meets the criteria for - detecting loss given in steps (1.a) and (1.b) above - (specifically excluding step (1.c)) then one segment of up to - SMSS octets starting with S3 MAY be returned. - - Note that rule (3) is a sort of retransmission "last resort". - It allows for retransmission of sequence numbers even when the - sender has less certainty a segment has been lost than as with - rule (1). Retransmitting segments via rule (3) will help - sustain TCP's ACK clock and therefore can potentially help - avoid retransmission timeouts. However, in sending these - segments the sender has two copies of the same data considered - to be in the network (and also in the Pipe estimate). When an - ACK or SACK arrives covering this retransmitted segment, the - - - -Blanton, et al. Standards Track [Page 5] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - - sender cannot be sure exactly how much data left the network - (one of the two transmissions of the packet or both - transmissions of the packet). Therefore the sender may - underestimate Pipe by considering both segments to have left - the network when it is possible that only one of the two has. - - We believe that the triggering of rule (3) will be rare and - that the implications are likely limited to corner cases - relative to the entire recovery algorithm. Therefore we leave - the decision of whether or not to use rule (3) to - implementors. - - (4) If the conditions for each of (1), (2), and (3) are not met, - then NextSeg () MUST indicate failure, and no segment is - returned. - - Note: The SACK-based loss recovery algorithm outlined in this - document requires more computational resources than previous TCP loss - recovery strategies. However, we believe the scoreboard data - structure can be implemented in a reasonably efficient manner (both - in terms of computation complexity and memory usage) in most TCP - implementations. - -5 Algorithm Details - - Upon the receipt of any ACK containing SACK information, the - scoreboard MUST be updated via the Update () routine. - - Upon the receipt of the first (DupThresh - 1) duplicate ACKs, the - scoreboard is to be updated as normal. Note: The first and second - duplicate ACKs can also be used to trigger the transmission of - previously unsent segments using the Limited Transmit algorithm - [RFC3042]. - - When a TCP sender receives the duplicate ACK corresponding to - DupThresh ACKs, the scoreboard MUST be updated with the new SACK - information (via Update ()). If no previous loss event has occurred - on the connection or the cumulative acknowledgment point is beyond - the last value of RecoveryPoint, a loss recovery phase SHOULD be - initiated, per the fast retransmit algorithm outlined in [RFC2581]. - The following steps MUST be taken: - - (1) RecoveryPoint = HighData - - When the TCP sender receives a cumulative ACK for this data octet - the loss recovery phase is terminated. - - - - - -Blanton, et al. Standards Track [Page 6] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - - (2) ssthresh = cwnd = (FlightSize / 2) - - The congestion window (cwnd) and slow start threshold (ssthresh) - are reduced to half of FlightSize per [RFC2581]. - - (3) Retransmit the first data segment presumed dropped -- the segment - starting with sequence number HighACK + 1. To prevent repeated - retransmission of the same data, set HighRxt to the highest - sequence number in the retransmitted segment. - - (4) Run SetPipe () - - Set a "pipe" variable to the number of outstanding octets - currently "in the pipe"; this is the data which has been sent by - the TCP sender but for which no cumulative or selective - acknowledgment has been received and the data has not been - determined to have been dropped in the network. It is assumed - that the data is still traversing the network path. - - (5) In order to take advantage of potential additional available - cwnd, proceed to step (C) below. - - Once a TCP is in the loss recovery phase the following procedure MUST - be used for each arriving ACK: - - (A) An incoming cumulative ACK for a sequence number greater than - RecoveryPoint signals the end of loss recovery and the loss - recovery phase MUST be terminated. Any information contained in - the scoreboard for sequence numbers greater than the new value of - HighACK SHOULD NOT be cleared when leaving the loss recovery - phase. - - (B) Upon receipt of an ACK that does not cover RecoveryPoint the - following actions MUST be taken: - - (B.1) Use Update () to record the new SACK information conveyed - by the incoming ACK. - - (B.2) Use SetPipe () to re-calculate the number of octets still - in the network. - - (C) If cwnd - pipe >= 1 SMSS the sender SHOULD transmit one or more - segments as follows: - - (C.1) The scoreboard MUST be queried via NextSeg () for the - sequence number range of the next segment to transmit (if any), - - - - - -Blanton, et al. Standards Track [Page 7] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - - and the given segment sent. If NextSeg () returns failure (no - data to send) return without sending anything (i.e., terminate - steps C.1 -- C.5). - - (C.2) If any of the data octets sent in (C.1) are below HighData, - HighRxt MUST be set to the highest sequence number of the - retransmitted segment. - - (C.3) If any of the data octets sent in (C.1) are above HighData, - HighData must be updated to reflect the transmission of - previously unsent data. - - (C.4) The estimate of the amount of data outstanding in the - network must be updated by incrementing pipe by the number of - octets transmitted in (C.1). - - (C.5) If cwnd - pipe >= 1 SMSS, return to (C.1) - -5.1 Retransmission Timeouts - - In order to avoid memory deadlocks, the TCP receiver is allowed to - discard data that has already been selectively acknowledged. As a - result, [RFC2018] suggests that a TCP sender SHOULD expunge the SACK - information gathered from a receiver upon a retransmission timeout - "since the timeout might indicate that the data receiver has - reneged." Additionally, a TCP sender MUST "ignore prior SACK - information in determining which data to retransmit." However, a - SACK TCP sender SHOULD still use all SACK information made available - during the slow start phase of loss recovery following an RTO. - - If an RTO occurs during loss recovery as specified in this document, - RecoveryPoint MUST be set to HighData. Further, the new value of - RecoveryPoint MUST be preserved and the loss recovery algorithm - outlined in this document MUST be terminated. In addition, a new - recovery phase (as described in section 5) MUST NOT be initiated - until HighACK is greater than or equal to the new value of - RecoveryPoint. - - As described in Sections 4 and 5, Update () SHOULD continue to be - used appropriately upon receipt of ACKs. This will allow the slow - start recovery period to benefit from all available information - provided by the receiver, despite the fact that SACK information was - expunged due to the RTO. - - If there are segments missing from the receiver's buffer following - processing of the retransmitted segment, the corresponding ACK will - contain SACK information. In this case, a TCP sender SHOULD use this - SACK information when determining what data should be sent in each - - - -Blanton, et al. Standards Track [Page 8] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - - segment of the slow start. The exact algorithm for this selection is - not specified in this document (specifically NextSeg () is - inappropriate during slow start after an RTO). A relatively - straightforward approach to "filling in" the sequence space reported - as missing should be a reasonable approach. - -6 Managing the RTO Timer - - The standard TCP RTO estimator is defined in [RFC2988]. Due to the - fact that the SACK algorithm in this document can have an impact on - the behavior of the estimator, implementers may wish to consider how - the timer is managed. [RFC2988] calls for the RTO timer to be - re-armed each time an ACK arrives that advances the cumulative ACK - point. Because the algorithm presented in this document can keep the - ACK clock going through a fairly significant loss event, - (comparatively longer than the algorithm described in [RFC2581]), on - some networks the loss event could last longer than the RTO. In this - case the RTO timer would expire prematurely and a segment that need - not be retransmitted would be resent. - - Therefore we give implementers the latitude to use the standard - [RFC2988] style RTO management or, optionally, a more careful variant - that re-arms the RTO timer on each retransmission that is sent during - recovery MAY be used. This provides a more conservative timer than - specified in [RFC2988], and so may not always be an attractive - alternative. However, in some cases it may prevent needless - retransmissions, go-back-N transmission and further reduction of the - congestion window. - -7 Research - - The algorithm specified in this document is analyzed in [FF96], which - shows that the above algorithm is effective in reducing transfer time - over standard TCP Reno [RFC2581] when multiple segments are dropped - from a window of data (especially as the number of drops increases). - [AHKO97] shows that the algorithm defined in this document can - greatly improve throughput in connections traversing satellite - channels. - -8 Security Considerations - - The algorithm presented in this paper shares security considerations - with [RFC2581]. A key difference is that an algorithm based on SACKs - is more robust against attackers forging duplicate ACKs to force the - TCP sender to reduce cwnd. With SACKs, TCP senders have an - additional check on whether or not a particular ACK is legitimate. - While not fool-proof, SACK does provide some amount of protection in - this area. - - - -Blanton, et al. Standards Track [Page 9] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - -Acknowledgments - - The authors wish to thank Sally Floyd for encouraging this document - and commenting on early drafts. The algorithm described in this - document is loosely based on an algorithm outlined by Kevin Fall and - Sally Floyd in [FF96], although the authors of this document assume - responsibility for any mistakes in the above text. Murali Bashyam, - Ken Calvert, Tom Henderson, Reiner Ludwig, Jamshid Mahdavi, Matt - Mathis, Shawn Ostermann, Vern Paxson and Venkat Venkatsubra provided - valuable feedback on earlier versions of this document. We thank - Matt Mathis and Jamshid Mahdavi for implementing the scoreboard in ns - and hence guiding our thinking in keeping track of SACK state. - - The first author would like to thank Ohio University and the Ohio - University Internetworking Research Group for supporting the bulk of - his work on this project. - -Normative References - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [RFC2026] Bradner, S., "The Internet Standards Process -- Revision - 3", BCP 9, RFC 2026, October 1996. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2581] Allman, M., Paxson, V. and R. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - -Informative References - - [AHKO97] Mark Allman, Chris Hayes, Hans Kruse, Shawn Ostermann. TCP - Performance Over Satellite Links. Proceedings of the Fifth - International Conference on Telecommunications Systems, - Nashville, TN, March, 1997. - - [All00] Mark Allman. A Web Server's View of the Transport Layer. - ACM Computer Communication Review, 30(5), October 2000. - - [FF96] Kevin Fall and Sally Floyd. Simulation-based Comparisons - of Tahoe, Reno and SACK TCP. Computer Communication - Review, July 1996. - - - - -Blanton, et al. Standards Track [Page 10] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - - [Jac90] Van Jacobson. Modified TCP Congestion Avoidance Algorithm. - Technical Report, LBL, April 1990. - - [PF01] Jitendra Padhye, Sally Floyd. Identifying the TCP Behavior - of Web Servers, ACM SIGCOMM, August 2001. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [RFC2914] Floyd, S., "Congestion Control Principles", BCP 41, RFC - 2914, September 2000. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [RFC3042] Allman, M., Balakrishnan, H, and S. Floyd, "Enhancing TCP's - Loss Recovery Using Limited Transmit", RFC 3042, January - 2001. - -Intellectual Property Rights Notice - - The IETF takes no position regarding the validity or scope of any - intellectual property or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; neither does it represent that it - has made any effort to identify any such rights. Information on the - IETF's procedures with respect to rights in standards-track and - standards-related documentation can be found in BCP-11. Copies of - claims of rights made available for publication and any assurances of - licenses to be made available, or the result of an attempt made to - obtain a general license or permission for the use of such - proprietary rights by implementors or users of this specification can - be obtained from the IETF Secretariat. - - The IETF invites any interested party to bring to its attention any - copyrights, patents or patent applications, or other proprietary - rights which may cover technology that may be required to practice - this standard. Please address the information to the IETF Executive - Director. - - - - - - - - - - - -Blanton, et al. Standards Track [Page 11] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - -Authors' Addresses - - Ethan Blanton - Purdue University Computer Sciences - 1398 Computer Science Building - West Lafayette, IN 47907 - - EMail: eblanton@cs.purdue.edu - - - Mark Allman - BBN Technologies/NASA Glenn Research Center - Lewis Field - 21000 Brookpark Rd. MS 54-5 - Cleveland, OH 44135 - - Phone: 216-433-6586 - Fax: 216-433-8705 - EMail: mallman@bbn.com - http://roland.grc.nasa.gov/~mallman - - - Kevin Fall - Intel Research - 2150 Shattuck Ave., PH Suite - Berkeley, CA 94704 - - EMail: kfall@intel-research.net - - - Lili Wang - Laboratory for Advanced Networking - 210 Hardymon Building - University of Kentucky - Lexington, KY 40506-0495 - - EMail: lwang0@uky.edu - - - - - - - - - - - - - - -Blanton, et al. Standards Track [Page 12] - -RFC 3517 SACK-based Loss Recovery for TCP April 2003 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Blanton, et al. Standards Track [Page 13] - diff --git a/kernel/picotcp/RFC/rfc3522.txt b/kernel/picotcp/RFC/rfc3522.txt deleted file mode 100644 index 4f48546..0000000 --- a/kernel/picotcp/RFC/rfc3522.txt +++ /dev/null @@ -1,787 +0,0 @@ - - - - - - -Network Working Group R. Ludwig -Request for Comments: 3522 M. Meyer -Category: Experimental Ericsson Research - April 2003 - - - The Eifel Detection Algorithm for TCP - -Status of this Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - The Eifel detection algorithm allows a TCP sender to detect a - posteriori whether it has entered loss recovery unnecessarily. It - requires that the TCP Timestamps option defined in RFC 1323 be - enabled for a connection. The Eifel detection algorithm makes use of - the fact that the TCP Timestamps option eliminates the retransmission - ambiguity in TCP. Based on the timestamp of the first acceptable ACK - that arrives during loss recovery, it decides whether loss recovery - was entered unnecessarily. The Eifel detection algorithm provides a - basis for future TCP enhancements. This includes response algorithms - to back out of loss recovery by restoring a TCP sender's congestion - control state. - -Terminology - - The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, - SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this - document, are to be interpreted as described in [RFC2119]. - - We refer to the first-time transmission of an octet as the 'original - transmit'. A subsequent transmission of the same octet is referred - to as a 'retransmit'. In most cases, this terminology can likewise - be applied to data segments as opposed to octets. However, with - repacketization, a segment can contain both first-time transmissions - and retransmissions of octets. In that case, this terminology is - only consistent when applied to octets. For the Eifel detection - algorithm, this makes no difference as it also operates correctly - when repacketization occurs. - - - -Ludwig & Meyer Experimental [Page 1] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - We use the term 'acceptable ACK' as defined in [RFC793]. That is an - ACK that acknowledges previously unacknowledged data. We use the - term 'duplicate ACK', and the variable 'dupacks' as defined in - [WS95]. The variable 'dupacks' is a counter of duplicate ACKs that - have already been received by a TCP sender before the fast retransmit - is sent. We use the variable 'DupThresh' to refer to the so-called - duplicate acknowledgement threshold, i.e., the number of duplicate - ACKs that need to arrive at a TCP sender to trigger a fast - retransmit. Currently, DupThresh is specified as a fixed value of - three [RFC2581]. Future TCPs might implement an adaptive DupThresh. - -1. Introduction - - The retransmission ambiguity problem [Zh86], [KP87] is a TCP sender's - inability to distinguish whether the first acceptable ACK that - arrives after a retransmit was sent in response to the original - transmit or the retransmit. This problem occurs after a timeout- - based retransmit and after a fast retransmit. The Eifel detection - algorithm uses the TCP Timestamps option defined in [RFC1323] to - eliminate the retransmission ambiguity. It thereby allows a TCP - sender to detect a posteriori whether it has entered loss recovery - unnecessarily. - - This added capability of a TCP sender is useful in environments where - TCP's loss recovery and congestion control algorithms may often get - falsely triggered. This can be caused by packet reordering, packet - duplication, or a sudden delay increase in the data or the ACK path - that results in a spurious timeout. For example, such sudden delay - increases can often occur in wide-area wireless access networks due - to handovers, resource preemption due to higher priority traffic - (e.g., voice), or because the mobile transmitter traverses through a - radio coverage hole (e.g., see [Gu01]). In such wireless networks, - the often unnecessary go-back-N retransmits that typically occur - after a spurious timeout create a serious problem. They decrease - end-to-end throughput, are useless load upon the network, and waste - transmission (battery) power. Note that across such networks the use - of timestamps is recommended anyway [RFC3481]. - - Based on the Eifel detection algorithm, a TCP sender may then choose - to implement dedicated response algorithms. One goal of such a - response algorithm would be to alleviate the consequences of a - falsely triggered loss recovery. This may include restoring the TCP - sender's congestion control state, and avoiding the mentioned - unnecessary go-back-N retransmits. Another goal would be to adapt - protocol parameters such as the duplicate acknowledgement threshold - [RFC2581], and the RTT estimators [RFC2988]. This is to reduce the - risk of falsely triggering TCP's loss recovery again as the - connection progresses. However, such response algorithms are outside - - - -Ludwig & Meyer Experimental [Page 2] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - the scope of this document. Note: The original proposal, the "Eifel - algorithm" [LK00], comprises both a detection and a response - algorithm. This document only defines the detection part. The - response part is defined in [LG03]. - - A key feature of the Eifel detection algorithm is that it already - detects, upon the first acceptable ACK that arrives during loss - recovery, whether a fast retransmit or a timeout was spurious. This - is crucial to be able to avoid the mentioned go-back-N retransmits. - Another feature is that the Eifel detection algorithm is fairly - robust against the loss of ACKs. - - Also the DSACK option [RFC2883] can be used to detect a posteriori - whether a TCP sender has entered loss recovery unnecessarily [BA02]. - However, the first ACK carrying a DSACK option usually arrives at a - TCP sender only after loss recovery has already terminated. Thus, - the DSACK option cannot be used to eliminate the retransmission - ambiguity. Consequently, it cannot be used to avoid the mentioned - unnecessary go-back-N retransmits. Moreover, a DSACK-based detection - algorithm is less robust against ACK losses. A recent proposal based - on neither the TCP timestamps nor the DSACK option does not have the - limitation of DSACK-based schemes, but only addresses the case of - spurious timeouts [SK03]. - -2. Events that Falsely Trigger TCP Loss Recovery - - The following events may falsely trigger a TCP sender's loss recovery - and congestion control algorithms. This causes a so-called spurious - retransmit, and an unnecessary reduction of the TCP sender's - congestion window and slow start threshold [RFC2581]. - - - Spurious timeout - - - Packet reordering - - - Packet duplication - - A spurious timeout is a timeout that would not have occurred had the - sender "waited longer". This may be caused by increased delay that - suddenly occurs in the data and/or the ACK path. That in turn might - cause an acceptable ACK to arrive too late, i.e., only after a TCP - sender's retransmission timer has expired. For the purpose of - specifying the algorithm in Section 3, we define this case as SPUR_TO - (equal 1). - - Note: There is another case where a timeout would not have - occurred had the sender "waited longer": the retransmission timer - expires, and afterwards the TCP sender receives the duplicate ACK - - - -Ludwig & Meyer Experimental [Page 3] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - that would have triggered a fast retransmit of the oldest - outstanding segment. We call this a 'fast timeout', since in - competition with the fast retransmit algorithm the timeout was - faster. However, a fast timeout is not spurious since apparently - a segment was in fact lost, i.e., loss recovery was initiated - rightfully. In this document, we do not consider fast timeouts. - - Packet reordering in the network may occur because IP [RFC791] does - not guarantee in-order delivery of packets. Additionally, a TCP - receiver generates a duplicate ACK for each segment that arrives - out-of-order. This results in a spurious fast retransmit if three or - more data segments arrive out-of-order at a TCP receiver, and at - least three of the resulting duplicate ACKs arrive at the TCP sender. - This assumes that the duplicate acknowledgement threshold is set to - three as defined in [RFC2581]. - - Packet duplication may occur because a receiving IP does not (cannot) - remove packets that have been duplicated in the network. A TCP - receiver in turn also generates a duplicate ACK for each duplicate - segment. As with packet reordering, this results in a spurious fast - retransmit if duplication of data segments or ACKs results in three - or more duplicate ACKs to arrive at a TCP sender. Again, this - assumes that the duplicate acknowledgement threshold is set to three. - - The negative impact on TCP performance caused by packet reordering - and packet duplication is commonly the same: a single spurious - retransmit (the fast retransmit), and the unnecessary halving of a - TCP sender's congestion window as a result of the subsequent fast - recovery phase [RFC2581]. - - The negative impact on TCP performance caused by a spurious timeout - is more severe. First, the timeout event itself causes a single - spurious retransmit, and unnecessarily forces a TCP sender into slow - start [RFC2581]. Then, as the connection progresses, a chain - reaction gets triggered that further decreases TCP's performance. - Since the timeout was spurious, at least some ACKs for original - transmits typically arrive at the TCP sender before the ACK for the - retransmit arrives. (This is unless severe packet reordering - coincided with the spurious timeout in such a way that the ACK for - the retransmit is the first acceptable ACK to arrive at the TCP - sender.) Those ACKs for original transmits then trigger an implicit - go-back-N loss recovery at the TCP sender [LK00]. Assuming that none - of the outstanding segments and none of the corresponding ACKs were - lost, all outstanding segments get retransmitted unnecessarily. In - fact, during this phase, a TCP sender violates the packet - conservation principle [Jac88]. This is because the unnecessary go- - back-N retransmits are sent during slow start. Thus, for each packet - that leaves the network and that belongs to the first half of the - - - -Ludwig & Meyer Experimental [Page 4] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - original flight, two useless retransmits are sent into the network. - In addition, some TCPs suffer from a spurious fast retransmit. This - is because the unnecessary go-back-N retransmits arrive as duplicates - at the TCP receiver, which in turn triggers a series of duplicate - ACKs. Note that this last spurious fast retransmit could be avoided - with the careful variant of 'bugfix' [RFC2582]. - - More detailed explanations, including TCP trace plots that visualize - the effects of spurious timeouts and packet reordering, can be found - in the original proposal [LK00]. - -3. The Eifel Detection Algorithm - -3.1 The Idea - - The goal of the Eifel detection algorithm is to allow a TCP sender to - detect a posteriori whether it has entered loss recovery - unnecessarily. Furthermore, the TCP sender should be able to make - this decision upon the first acceptable ACK that arrives after the - timeout-based retransmit or the fast retransmit has been sent. This - in turn requires extra information in ACKs by which the TCP sender - can unambiguously distinguish whether that first acceptable ACK was - sent in response to the original transmit or the retransmit. Such - extra information is provided by the TCP Timestamps option [RFC1323]. - Generally speaking, timestamps are monotonously increasing "serial - numbers" added into every segment that are then echoed within the - corresponding ACKs. This is exploited by the Eifel detection - algorithm in the following way. - - Given that timestamps are enabled for a connection, a TCP sender - always stores the timestamp of the retransmit sent in the beginning - of loss recovery, i.e., the timestamp of the timeout-based retransmit - or the fast retransmit. If the timestamp of the first acceptable - ACK, that arrives after the retransmit was sent, is smaller then the - stored timestamp of that retransmit, then that ACK must have been - sent in response to an original transmit. Hence, the TCP sender must - have entered loss recovery unnecessarily. - - The fact that the Eifel detection algorithm decides upon the first - acceptable ACK is crucial to allow future response algorithms to - avoid the unnecessary go-back-N retransmits that typically occur - after a spurious timeout. Also, if loss recovery was entered - unnecessarily, a window worth of ACKs are outstanding that all carry - a timestamp that is smaller than the stored timestamp of the - retransmit. The arrival of any one of those ACKs is sufficient for - the Eifel detection algorithm to work. Hence, the solution is fairly - - - - - -Ludwig & Meyer Experimental [Page 5] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - robust against ACK losses. Even the ACK sent in response to the - retransmit, i.e., the one that carries the stored timestamp, may get - lost without compromising the algorithm. - -3.2 The Algorithm - - Given that the TCP Timestamps option [RFC1323] is enabled for a - connection, a TCP sender MAY use the Eifel detection algorithm as - defined in this subsection. - - If the Eifel detection algorithm is used, the following steps MUST be - taken by a TCP sender, but only upon initiation of loss recovery, - i.e., when either the timeout-based retransmit or the fast retransmit - is sent. The Eifel detection algorithm MUST NOT be reinitiated after - loss recovery has already started. In particular, it must not be - reinitiated upon subsequent timeouts for the same segment, and not - upon retransmitting segments other than the oldest outstanding - segment, e.g., during selective loss recovery. - - (1) Set a "SpuriousRecovery" variable to FALSE (equal 0). - - (2) Set a "RetransmitTS" variable to the value of the - Timestamp Value field of the Timestamps option included in - the retransmit sent when loss recovery is initiated. A - TCP sender must ensure that RetransmitTS does not get - overwritten as loss recovery progresses, e.g., in case of - a second timeout and subsequent second retransmit of the - same octet. - - (3) Wait for the arrival of an acceptable ACK. When an - acceptable ACK has arrived, proceed to step (4). - - (4) If the value of the Timestamp Echo Reply field of the - acceptable ACK's Timestamps option is smaller than the - value of RetransmitTS, then proceed to step (5), - - else proceed to step (DONE). - - (5) If the acceptable ACK carries a DSACK option [RFC2883], - then proceed to step (DONE), - - else if during the lifetime of the TCP connection the TCP - sender has previously received an ACK with a DSACK option, - or the acceptable ACK does not acknowledge all outstanding - data, then proceed to step (6), - - else proceed to step (DONE). - - - - -Ludwig & Meyer Experimental [Page 6] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - (6) If the loss recovery has been initiated with a timeout- - based retransmit, then set - SpuriousRecovery <- SPUR_TO (equal 1), - - else set - SpuriousRecovery <- dupacks+1 - - (RESP) Do nothing (Placeholder for a response algorithm). - - (DONE) No further processing. - - The comparison "smaller than" in step (4) is conservative. In - theory, if the timestamp clock is slow or the network is fast, - RetransmitTS could at most be equal to the timestamp echoed by an ACK - sent in response to an original transmit. In that case, it is - assumed that the loss recovery was not falsely triggered. - - Note that the condition "if during the lifetime of the TCP connection - the TCP sender has previously received an ACK with a DSACK option" in - step (5) would be true in case the TCP receiver would signal in the - SYN that it is DSACK-enabled. But unfortunately, this is not - required by [RFC2883]. - -3.3 A Corner Case: "Timeout due to loss of all ACKs" (step 5) - - Even though the oldest outstanding segment arrived at a TCP receiver, - the TCP sender is forced into a timeout if all ACKs are lost. - Although the resulting retransmit is unnecessary, such a timeout is - unavoidable. It should therefore not be considered spurious. - Moreover, the subsequent reduction of the congestion window is an - appropriate response to the potentially heavy congestion in the ACK - path. The original proposal [LK00] does not handle this case well. - It effectively disables this implicit form of congestion control for - the ACK path, which otherwise does not exist in TCP. This problem is - fixed by step (5) of the Eifel detection algorithm as explained in - the remainder of this section. - - If all ACKs are lost while the oldest outstanding segment arrived at - the TCP receiver, the retransmit arrives as a duplicate. In response - to duplicates, RFC 1323 mandates that the timestamp of the last - segment that arrived in-sequence should be echoed. That timestamp is - carried by the first acceptable ACK that arrives at the TCP sender - after loss recovery was entered, and is commonly smaller than the - timestamp carried by the retransmit. Consequently, the Eifel - detection algorithm misinterprets such a timeout as being spurious, - unless the TCP receiver is DSACK-enabled [RFC2883]. In that case, - the acceptable ACK carries a DSACK option, and the Eifel algorithm is - terminated through the first part of step (5). - - - -Ludwig & Meyer Experimental [Page 7] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - Note: Not all TCP implementations strictly follow RFC 1323. In - response to a duplicate data segment, some TCP receivers echo the - timestamp of the duplicate. With such TCP receivers, the corner - case discussed in this section does not apply. The timestamp - carried by the retransmit would be echoed in the first acceptable - ACK, and the Eifel detection algorithm would be terminated through - step (4). Thus, even though all ACKs were lost and independent of - whether the DSACK option was enabled for a connection, the Eifel - detection algorithm would have no effect. - - With TCP receivers that are not DSACK-enabled, disabling the - mentioned implicit congestion control for the ACK path is not a - problem as long as data segments are lost, in addition to the entire - flight of ACKs. The Eifel detection algorithm misinterprets such a - timeout as being spurious, and the Eifel response algorithm would - reverse the congestion control state. Still, the TCP sender would - respond to congestion (in the data path) as soon as it finds out - about the first loss in the outstanding flight. I.e., the TCP sender - would still halve its congestion window for that flight of packets. - If no data segment is lost while the entire flight of ACKs is lost, - the first acceptable ACK that arrives at the TCP sender after loss - recovery was entered acknowledges all outstanding data. In that - case, the Eifel algorithm is terminated through the second part of - step (5). - - Note that there is little concern about violating the packet - conservation principle when entering slow start after an unavoidable - timeout caused by the loss of an entire flight of ACKs, i.e., when - the Eifel detection algorithm was terminated through step (5). This - is because in that case, the acceptable ACK corresponds to the - retransmit, which is a strong indication that the pipe has drained - entirely, i.e., that no more original transmits are in the network. - This is different with spurious timeouts as discussed in Section 2. - -3.4 Protecting Against Misbehaving TCP Receivers (the Safe Variant) - - A TCP receiver can easily make a genuine retransmit appear to the TCP - sender as a spurious retransmit by forging echoed timestamps. This - may pose a security concern. - - Fortunately, there is a way to modify the Eifel detection algorithm - in a way that makes it robust against lying TCP receivers. The idea - is to use timestamps as a segment's "secret" that a TCP receiver only - gets to know if it receives the segment. Conversely, a TCP receiver - will not know the timestamp of a segment that was lost. Hence, to - "prove" that it received the original transmit of a segment that a - TCP sender retransmitted, the TCP receiver would need to return the - timestamp of that original transmit. The Eifel detection algorithm - - - -Ludwig & Meyer Experimental [Page 8] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - could then be modified to only decide that loss recovery has been - unnecessarily entered if the first acceptable ACK echoes the - timestamp of the original transmit. - - Hence, implementers may choose to implement the algorithm with the - following modifications. - - Step (2) is replaced with step (2'): - - (2') Set a "RetransmitTS" variable to the value of the - Timestamp Value field of the Timestamps option that was - included in the original transmit corresponding to the - retransmit. Note: This step requires that the TCP sender - stores the timestamps of all outstanding original - transmits. - - Step (4) is replaced with step (4'): - - (4') If the value of the Timestamp Echo Reply field of the - acceptable ACK's Timestamps option is equal to the value - of the variable RetransmitTS, then proceed to step (5), - - else proceed to step (DONE). - - These modifications come at a cost: the modified algorithm is fairly - sensitive against ACK losses since it relies on the arrival of the - acceptable ACK that corresponds to the original transmit. - - Note: The first acceptable ACK that arrives after loss recovery - has been unnecessarily entered should echo the timestamp of the - original transmit. This assumes that the ACK corresponding to the - original transmit was not lost, that that ACK was not reordered in - the network, and that the TCP receiver does not forge timestamps - but complies with RFC 1323. In case of a spurious fast - retransmit, this is implied by the rules for generating ACKs for - data segments that fill in all or part of a gap in the sequence - space (see section 4.2 of [RFC2581]) and by the rules for echoing - timestamps in that case (see rule (C) in section 3.4 of - [RFC1323]). In case of a spurious timeout, it is likely that the - delay that has caused the spurious timeout has also caused the TCP - receiver's delayed ACK timer [RFC1122] to expire before the - original transmit arrives. Also, in this case the rules for - generating ACKs and the rules for echoing timestamps (see rule (A) - in section 3.4 of [RFC1323]) ensure that the original transmit's - timestamp is echoed. - - - - - - -Ludwig & Meyer Experimental [Page 9] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - A remaining problem is that a TCP receiver might guess a lost - segment's timestamp from observing the timestamps of recently - received segments. For example, if segment N was lost while segment - N-1 and N+1 have arrived, a TCP receiver could guess the timestamp - that lies in the middle of the timestamps of segments N-1 and N+1, - and echo it in the ACK sent in response to the retransmit of segment - N. Especially if the TCP sender implements timestamps with a coarse - granularity, a misbehaving TCP receiver is likely to be successful - with such an approach. In fact, with the 500 ms granularity - suggested in [WS95], it even becomes quite likely that the timestamps - of segments N-1, N, N+1 are identical. - - One way to reduce this risk is to implement fine grained timestamps. - Note that the granularity of the timestamps is independent of the - granularity of the retransmission timer. For example, some TCP - implementations run a timestamp clock that ticks every millisecond. - This should make it more difficult for a TCP receiver to guess the - timestamp of a lost segment. Alternatively, it might be possible to - combine the timestamps with a nonce, as is done for the Explicit - Congestion Notification (ECN) [RFC3168]. One would need to take - care, though, that the timestamps of consecutive segments remain - monotonously increasing and do not interfere with the RTT timing - defined in [RFC1323]. - -4. IPR Considerations - - The IETF has been notified of intellectual property rights claimed in - regard to some or all of the specification contained in this - document. For more information consult the online list of claimed - rights at http://www.ietf.org/ipr. - - The IETF takes no position regarding the validity or scope of any - intellectual property or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; neither does it represent that it - has made any effort to identify any such rights. Information on the - IETF's procedures with respect to rights in standards-track and - standards-related documentation can be found in BCP-11. Copies of - claims of rights made available for publication and any assurances of - licenses to be made available, or the result of an attempt made to - obtain a general license or permission for the use of such - proprietary rights by implementors or users of this specification can - be obtained from the IETF Secretariat. - - - - - - - -Ludwig & Meyer Experimental [Page 10] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - -5. Security Considerations - - There do not seem to be any security considerations associated with - the Eifel detection algorithm. This is because the Eifel detection - algorithm does not alter the existing protocol state at a TCP sender. - Note that the Eifel detection algorithm only requires changes to the - implementation of a TCP sender. - - Moreover, a variant of the Eifel detection algorithm has been - proposed in Section 3.4 that makes it robust against lying TCP - receivers. This may become relevant when the Eifel detection - algorithm is combined with a response algorithm such as the Eifel - response algorithm [LG03]. - -Acknowledgments - - Many thanks to Keith Sklower, Randy Katz, Stephan Baucke, Sally - Floyd, Vern Paxson, Mark Allman, Ethan Blanton, Andrei Gurtov, Pasi - Sarolahti, and Alexey Kuznetsov for useful discussions that - contributed to this work. - -Normative References - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2883] Floyd, S., Mahdavi, J., Mathis, M., Podolsky, M. and A. - Romanow, "An Extension to the Selective Acknowledgement - (SACK) Option for TCP", RFC 2883, July 2000. - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for - High Performance", RFC 1323, May 1992. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October 1996. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - - - - - - - - - -Ludwig & Meyer Experimental [Page 11] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - -Informative References - - [BA02] Blanton, E. and M. Allman, "Using TCP DSACKs and SCTP - Duplicate TSNs to Detect Spurious Retransmissions", Work in - Progress. - - [RFC1122] Braden, R., "Requirements for Internet Hosts - - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [Gu01] Gurtov, A., "Effect of Delays on TCP Performance", In - Proceedings of IFIP Personal Wireless Communications, - August 2001. - - [RFC3481] Inamura, H., Montenegro, G., Ludwig, R., Gurtov, A. and F. - Khafizov, "TCP over Second (2.5G) and Third (3G) Generation - Wireless Networks", RFC 3481, February 2003. - - [Jac88] Jacobson, V., "Congestion Avoidance and Control", In - Proceedings of ACM SIGCOMM 88. - - [KP87] Karn, P. and C. Partridge, "Improving Round-Trip Time - Estimates in Reliable Transport Protocols", In Proceedings - of ACM SIGCOMM 87. - - [LK00] Ludwig, R. and R. H. Katz, "The Eifel Algorithm: Making TCP - Robust Against Spurious Retransmissions", ACM Computer - Communication Review, Vol. 30, No. 1, January 2000. - - [LG03] Ludwig, R. and A. Gurtov, "The Eifel Response Algorithm for - TCP", Work in Progress. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [RFC791] Postel, J., "Internet Protocol", STD 5, RFC 791, September - 1981. - - [RFC3168] Ramakrishnan, K., Floyd, S. and D. Black, "The Addition of - Explicit Congestion Notification (ECN) to IP", RFC 3168, - September 2001. - - [SK03] Sarolahti, P. and M. Kojo, "F-RTO: A TCP RTO Recovery - Algorithm for Avoiding Unnecessary Retransmissions", Work - in Progress. - - - - -Ludwig & Meyer Experimental [Page 12] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - - [WS95] Wright, G. R. and W. R. Stevens, "TCP/IP Illustrated, - Volume 2 (The Implementation)", Addison Wesley, January - 1995. - - [Zh86] Zhang, L., "Why TCP Timers Don't Work Well", In Proceedings - of ACM SIGCOMM 86. - -Authors' Addresses - - Reiner Ludwig - Ericsson Research - Ericsson Allee 1 - 52134 Herzogenrath, Germany - - EMail: Reiner.Ludwig@eed.ericsson.se - - - Michael Meyer - Ericsson Research - Ericsson Allee 1 - 52134 Herzogenrath, Germany - - EMail: Michael.Meyer@eed.ericsson.se - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Ludwig & Meyer Experimental [Page 13] - -RFC 3522 The Eifel Detection Algorithm for TCP April 2003 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Ludwig & Meyer Experimental [Page 14] - diff --git a/kernel/picotcp/RFC/rfc3540.txt b/kernel/picotcp/RFC/rfc3540.txt deleted file mode 100644 index f781c50..0000000 --- a/kernel/picotcp/RFC/rfc3540.txt +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - -Network Working Group N. Spring -Request for Comments: 3540 D. Wetherall -Category: Experimental D. Ely - University of Washington - June 2003 - - - Robust Explicit Congestion Notification (ECN) - Signaling with Nonces - -Status of this Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - This note describes the Explicit Congestion Notification (ECN)-nonce, - an optional addition to ECN that protects against accidental or - malicious concealment of marked packets from the TCP sender. It - improves the robustness of congestion control by preventing receivers - from exploiting ECN to gain an unfair share of network bandwidth. - The ECN-nonce uses the two ECN-Capable Transport (ECT)codepoints in - the ECN field of the IP header, and requires a flag in the TCP - header. It is computationally efficient for both routers and hosts. - -1. Introduction - - Statement of Intent - - This specification describes an optional addition to Explicit - Congestion Notification [RFC3168] improving its robustness against - malicious or accidental concealment of marked packets. It has not - been deployed widely. One goal of publication as an Experimental - RFC is to be prudent, and encourage use and deployment prior to - publication in the standards track. Another consideration is to - give time for firewall developers to recognize and accept the - pattern presented by the nonce. It is the intent of the Transport - Area Working Group to re-submit this specification as an IETF - Proposed Standard in the future after more experience has been - gained. - - - - -Spring, et. al. Experimental [Page 1] - -RFC 3540 Robust ECN Signaling June 2003 - - - The correct operation of ECN requires the cooperation of the receiver - to return Congestion Experienced signals to the sender, but the - protocol lacks a mechanism to enforce this cooperation. This raises - the possibility that an unscrupulous or poorly implemented receiver - could always clear ECN-Echo and simply not return congestion signals - to the sender. This would give the receiver a performance advantage - at the expense of competing connections that behave properly. More - generally, any device along the path (NAT box, firewall, QOS - bandwidth shapers, and so forth) could remove congestion marks with - impunity. - - The above behaviors may or may not constitute a threat to the - operation of congestion control in the Internet. However, given the - central role of congestion control, it is prudent to design the ECN - signaling loop to be robust against as many threats as possible. In - this way, ECN can provide a clear incentive for improvement over the - prior state-of-the-art without potential incentives for abuse. The - ECN-nonce is a simple, efficient mechanism to eliminate the potential - abuse of ECN. - - The ECN-nonce enables the sender to verify the correct behavior of - the ECN receiver and that there is no other interference that - conceals marked (or dropped) packets in the signaling path. The ECN- - nonce protects against both implementation errors and deliberate - abuse. The ECN-nonce: - - - catches a misbehaving receiver with a high probability, and never - implicates an innocent receiver. - - - does not change other aspects of ECN, nor does it reduce the - benefits of ECN for behaving receivers. - - - is cheap in both per-packet overhead (one TCP header flag) and - processing requirements. - - - is simple and, to the best of our knowledge, not prone to other - attacks. - - We also note that use of the ECN-nonce has two additional benefits, - even when only drop-tail routers are used. First, packet drops - cannot be concealed from the sender. Second, it prevents optimistic - acknowledgements [Savage], in which TCP segments are acknowledged - before they have been received. These benefits also serve to - increase the robustness of congestion control from attacks. We do - not elaborate on these benefits in this document. - - The rest of this document describes the ECN-nonce. We present an - overview followed by detailed behavior at senders and receivers. - - - -Spring, et. al. Experimental [Page 2] - -RFC 3540 Robust ECN Signaling June 2003 - - - The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, - SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this - document, are to be interpreted as described in [RFC2119]. - -2. Overview - - The ECN-nonce builds on the existing ECN-Echo and Congestion Window - Reduced (CWR) signaling mechanism. Familiarity with ECN [ECN] is - assumed. For simplicity, we describe the ECN-nonce in one direction - only, though it is run in both directions in parallel. - - The ECN protocol for TCP remains unchanged, except for the definition - of a new field in the TCP header. As in [RFC3168], ECT(0) or ECT(1) - (ECN-Capable Transport) is set in the ECN field of the IP header on - outgoing packets. Congested routers change this field to CE - (Congestion Experienced). When TCP receivers notice CE, the ECE - (ECN-Echo) flag is set in subsequent acknowledgements until receiving - a CWR flag. The CWR flag is sent on new data whenever the sender - reacts to congestion. - - The ECN-nonce adds to this protocol, and enables the receiver to - demonstrate to the sender that segments being acknowledged were - received unmarked. A random one-bit value (a nonce) is encoded in - the two ECT codepoints. The one-bit sum of these nonces is returned - in a TCP header flag, the nonce sum (NS) bit. Packet marking erases - the nonce value in the ECT codepoints because CE overwrites both ECN - IP header bits. Since each nonce is required to calculate the sum, - the correct nonce sum implies receipt of only unmarked packets. Not - only are receivers prevented from concealing marked packets, middle- - boxes along the network path cannot unmark a packet without - successfully guessing the value of the original nonce. - - The sender can verify the nonce sum returned by the receiver to - ensure that congestion indications in the form of marked (or dropped) - packets are not being concealed. Because the nonce sum is only one - bit long, senders have a 50-50 chance of catching a lying receiver - whenever an acknowledgement conceals a mark. Because each - acknowledgement is an independent trial, cheaters will be caught - quickly if there are repeated congestion signals. - - The following paragraphs describe aspects of the ECN-nonce protocol - in greater detail. - - - - - - - - - -Spring, et. al. Experimental [Page 3] - -RFC 3540 Robust ECN Signaling June 2003 - - - Each acknowledgement carries a nonce sum, which is the one bit sum - (exclusive-or, or parity) of nonces over the byte range represented - by the acknowledgement. The sum is used because not every packet is - acknowledged individually, nor are packets acknowledged reliably. If - a sum were not used, the nonce in an unmarked packet could be echoed - to prove to the sender that the individual packet arrived unmarked. - However, since these acks are not reliably delivered, the sender - could not distinguish a lost ACK from one that was never sent in - order to conceal a marked packet. The nonce sum prevents the - receiver from concealing individual marked packets by not - acknowledging them. Because the nonce and nonce sum are both one bit - quantities, the sum is no easier to guess than the individual nonces. - We show the nonce sum calculation below in Figure 1. - - Sender Receiver - initial sum = 1 - -- 1:4 ECT(0) --> NS = 1 + 0(1:4) = 1(:4) - <- ACK 4, NS=1 --- - -- 4:8 ECT(1) --> NS = 1(:4) + 1(4:8) = 0(:8) - <- ACK 8, NS=0 --- - -- 8:12 ECT(1) -> NS = 0(:8) + 1(8:12) = 1(:12) - <- ACK 12, NS=1 -- - -- 12:16 ECT(1) -> NS = 1(:12) + 1(12:16) = 0(:16) - <- ACK 16, NS=0 -- - - Figure 1: The calculation of nonce sums at the receiver. - - After congestion has occurred and packets have been marked or lost, - resynchronization of the sender and receiver nonce sums is needed. - When packets are marked, the nonce is cleared, and the sum of the - nonces at the receiver will no longer match the sum at the sender. - Once nonces have been lost, the difference between sender and - receiver nonce sums is constant until there is further loss. This - means that it is possible to resynchronize the sender and receiver - after congestion by having the sender set its nonce sum to that of - the receiver. Because congestion indications do not need to be - conveyed more frequently than once per round trip, the sender - suspends checking while the CWR signal is being delivered and resets - its nonce sum to the receiver's when new data is acknowledged. This - has the benefit that the receiver is not explicitly involved in the - re-synchronization process. The resynchronization process is shown - in Figure 2 below. Note that the nonce sum returned in ACK 12 (NS=0) - differs from that in the previous example (NS=1), and it continues to - differ for ACK 16. - - - - - - - -Spring, et. al. Experimental [Page 4] - -RFC 3540 Robust ECN Signaling June 2003 - - - Sender Receiver - initial sum = 1 - -- 1:4 ECT(0) -> NS = 1 + 0(1:4) = 1(:4) - <- ACK 4, NS=1 -- - -- 4:8 ECT(1) -> CE -> NS = 1(:4) + ?(4:8) = 1(:8) - <- ACK 8, ECE NS=1 -- - -- 8:12 ECT(1), CWR -> NS = 1(:8) + 1(8:12) = 0(:12) - <- ACK 12, NS=0 -- - -- 12:16 ECT(1) -> NS = 0(:12) + 1(12:16) = 1(:16) - <- ACK 16, NS=1 -- - - Figure 2: The calculation of nonce sums at the receiver when a - packet (4:8) is marked. The receiver may calculate the wrong - nonce sum when the original nonce information is lost after a - packet is marked. - - Third, we need to reconcile that nonces are sent with packets but - acknowledgements cover byte ranges. Acknowledged byte boundaries - need not match the transmitted boundaries, and information can be - retransmitted in packets with different byte boundaries. We discuss - the first issue, how a receiver sets a nonce when acknowledging part - of a segment, in Section 6.1. The second question, what nonce to send - when retransmitting smaller segments as a large segment, has a simple - answer: ECN is disabled for retransmissions, so can carry no nonce. - Because retransmissions are associated with congestion events, nonce - checking is suspended until after CWR is acknowledged and the - congestion event is over. - - The next sections describe the detailed behavior of senders, routers - and receivers, starting with sender transmit behavior, then around - the ECN signaling loop, and finish with sender acknowledgement - processing. - -3. Sender Behavior (Transmit) - - Senders manage CWR and ECN-Echo as before. In addition, they must - place nonces on packets as they are transmitted and check the - validity of the nonce sums in acknowledgments as they are received. - This section describes the transmit process. - - To place a one bit nonce value on every ECN-capable IP packet, the - sender uses the two ECT codepoints: ECT(0) represents a nonce of 0, - and ECT(1) a nonce of 1. As in ECN, retransmissions are not ECN- - capable, so carry no nonce. - - The sender maintains a mapping from each packet's end sequence number - to the expected nonce sum (not the nonce placed in the original - transmission) in the acknowledgement bearing that sequence number. - - - -Spring, et. al. Experimental [Page 5] - -RFC 3540 Robust ECN Signaling June 2003 - - -4. Router Behavior - - Routers behave as specified in [RFC3168]. By marking packets to - signal congestion, the original value of the nonce, in ECT(0) or - ECT(1), is removed. Neither the receiver nor any other party can - unmark the packet without successfully guessing the value of the - original nonce. - -5. Receiver Behavior (Receive and Transmit) - - ECN-nonce receivers maintain the nonce sum as in-order packets arrive - and return the current nonce sum in each acknowledgement. Receiver - behavior is otherwise unchanged from [RFC3168]. Returning the nonce - sum is optional, but recommended, as senders are allowed to - discontinue sending ECN-capable packets to receivers that do not - support the ECN-nonce. - - As packets are removed from the queue of out-of-order packets to be - acknowledged, the nonce is recovered from the IP header. The nonce - is added to the current nonce sum as the acknowledgement sequence - number is advanced for the recent packet. - - In the case of marked packets, one or more nonce values may be - unknown to the receiver. In this case the missing nonce values are - ignored when calculating the sum (or equivalently a value of zero is - assumed) and ECN-Echo will be set to signal congestion to the sender. - - Returning the nonce sum corresponding to a given acknowledgement is - straightforward. It is carried in a single "NS" (Nonce Sum) bit in - the TCP header. This bit is adjacent to the CWR and ECN-Echo bits, - set as Bit 7 in byte 13 of the TCP header, as shown below: - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | N | C | E | U | A | P | R | S | F | - | Header Length | Reserved | S | W | C | R | C | S | S | Y | I | - | | | | R | E | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - Figure 3: The new definition of bytes 13 and 14 of the TCP Header. - - The initial nonce sum is 1, and is included in the SYN/ACK and ACK of - the three way TCP handshake. This allows the other endpoint to infer - nonce support, but is not a negotiation, in that the receiver of the - SYN/ACK need not check if NS is set to decide whether to set NS in - the subsequent ACK. - - - - - -Spring, et. al. Experimental [Page 6] - -RFC 3540 Robust ECN Signaling June 2003 - - -6. Sender Behavior (Receive) - - This section completes the description of sender behavior by - describing how senders check the validity of the nonce sums. - - The nonce sum is checked when an acknowledgement of new data is - received, except during congestion recovery when additional ECN-Echo - signals would be ignored. Checking consists of comparing the correct - nonce sum stored in a buffer to that carried in the acknowledgement, - with a correction described in the following subsection. - - If ECN-Echo is not set, the receiver claims to have received no - marked packets, and can therefore compute and return the correct - nonce sum. To conceal a mark, the receiver must successfully guess - the sum of the nonces that it did not receive, because at least one - packet was marked and the corresponding nonce was erased. Provided - the individual nonces are equally likely to be 0 or 1, their sum is - equally likely to be 0 or 1. In other words, any guess is equally - likely to be wrong and has a 50-50 chance of being caught by the - sender. Because each new acknowledgement is an independent trial, a - cheating receiver is likely to be caught after a small number of - lies. - - If ECN-Echo is set, the receiver is sending a congestion signal and - it is not necessary to check the nonce sum. The congestion window - will be halved, CWR will be set on the next packet with new data - sent, and ECN-Echo will be cleared once the CWR signal is received, - as in [RFC3168]. During this recovery process, the sum may be - incorrect because one or more nonces were not received. This does - not matter during recovery, because TCP invokes congestion mechanisms - at most once per RTT, whether there are one or more losses during - that period. - -6.1. Resynchronization After Loss or Mark - - After recovery, it is necessary to re-synchronize the sender and - receiver nonce sums so that further acknowledgments can be checked. - When the receiver's sum is incorrect, it will remain incorrect until - further loss. - - This leads to a simple re-synchronization mechanism where the sender - resets its nonce sum to that of the receiver when it receives an - acknowledgment for new data sent after the congestion window was - reduced. When responding to explicit congestion signals, this will - be the first acknowledgement without the ECN-Echo flag set: the - acknowledgement of the packet containing the CWR flag. - - - - - -Spring, et. al. Experimental [Page 7] - -RFC 3540 Robust ECN Signaling June 2003 - - - Sender Receiver - initial sum = 1 - -- 1:4 ECT(0) -> NS = 1 + 0(1:4) = 1(:4) - <- ACK 4, NS=1 -- - -- 4:8 ECT(1) -> LOST - -- 8:12 ECT(1) -> nonce sum calculation deferred - until in-order data received - <- ACK 4, NS=0 -- - -- 12:16 ECT(1) -> nonce sum calculation deferred - <- ACK 4, NS=0 -- - -- 4:8 retransmit -> NS = 1(:4) + ?(4:8) + - 1(8:12) + 1(12:16) = 1(:16) - <- ACK 16, NS=1 -- - -- 16:20 ECT(1) CWR -> - <- ACK 20, NS=0 -- NS = 1(:16) + 1(16:20) = 0(:20) - - Figure 4: The calculation of nonce sums at the receiver when a - packet is lost, and resynchronization after loss. The nonce sum - is not changed until the cumulative acknowledgement is advanced. - - In practice, resynchronization can be accomplished by storing a bit - that has the value one if the expected nonce sum stored by the sender - and the received nonce sum in the acknowledgement of CWR differ, and - zero otherwise. This synchronization offset bit can then be used in - the comparison between expected nonce sum and received nonce sum. - - The sender should ignore the nonce sum returned on any - acknowledgements bearing the ECN-echo flag. - - When an acknowledgment covers only a portion of a segment, such as - when a middlebox resegments at the TCP layer instead of fragmenting - IP packets, the sender should accept the nonce sum expected at the - next segment boundary. In other words, an acknowledgement covering - part of an original segment will include the nonce sum expected when - the entire segment is acknowledged. - - Finally, in ECN, senders can choose not to indicate ECN capability on - some packets for any reason. An ECN-nonce sender must resynchronize - after sending such ECN-incapable packets, as though a CWR had been - sent with the first new data after the ECN-incapable packets. The - sender loses protection for any unacknowledged packets until - resynchronization occurs. - - - - - - - - - -Spring, et. al. Experimental [Page 8] - -RFC 3540 Robust ECN Signaling June 2003 - - -6.2. Sender Behavior - Incorrect Nonce Received - - The sender's response to an incorrect nonce is a matter of policy. - It is separate from the checking mechanism and does not need to be - handled uniformly by senders. Further, checking received nonce sums - at all is optional, and may be disabled. - - If the receiver has never sent a non-zero nonce sum, the sender can - infer that the receiver does not understand the nonce, and rate limit - the connection, place it in a lower-priority queue, or cease setting - ECT in outgoing segments. - - If the received nonce sum has been set in a previous acknowledgement, - the sender might infer that a network device has interfered with - correct ECN signaling between ECN-nonce supporting endpoints. The - minimum response to an incorrect nonce is the same as the response to - a received ECE. However, to compensate for hidden congestion - signals, the sender might reduce the congestion window to one segment - and cease setting ECT in outgoing segments. An incorrect nonce sum - is a sign of misbehavior or error between ECN-nonce supporting - endpoints. - -6.2.1. Using the ECN-nonce to Protect Against Other Misbehaviors - - The ECN-nonce can provide robustness beyond checking that marked - packets are signaled to the sender. It also ensures that dropped - packets cannot be concealed from the sender (because their nonces - have been lost). Drops could potentially be concealed by a faulty - TCP implementation, certain attacks, or even a hypothetical TCP - accelerator. Such an accelerator could gamble that it can either - successfully "fast start" to a preset bandwidth quickly, retry with - another connection, or provide reliability at the application level. - If robustness against these faults is also desired, then the ECN- - nonce should not be disabled. Instead, reducing the congestion - window to one, or using a low-priority queue, would penalize faulty - operation while providing continued checking. - - The ECN-nonce can also detect misbehavior in Eifel [Eifel], a - recently proposed mechanism for removing the retransmission ambiguity - to improve TCP performance. A misbehaving receiver might claim to - have received only original transmissions to convince the sender to - undo congestion actions. Since retransmissions are sent without ECT, - and thus no nonce, returning the correct nonce sum confirms that only - original transmissions were received. - - - - - - - -Spring, et. al. Experimental [Page 9] - -RFC 3540 Robust ECN Signaling June 2003 - - -7. Interactions - -7.1. Path MTU Discovery - - As described in RFC3168, use of the Don't Fragment bit with ECN is - recommended. Receivers that receive unmarked fragments can - reconstruct the original nonce to conceal a marked fragment. The - ECN-nonce cannot protect against misbehaving receivers that conceal - marked fragments, so some protection is lost in situations where Path - MTU discovery is disabled. - - When responding to a small path MTU, the sender will retransmit a - smaller frame in place of a larger one. Since these smaller packets - are retransmissions, they will be ECN-incapable and bear no nonce. - The sender should resynchronize on the first newly transmitted - packet. - -7.2. SACK - - Selective acknowledgements allow receivers to acknowledge out of - order segments as an optimization. It is not necessary to modify the - selective acknowledgment option to fit per-range nonce sums, because - SACKs cannot be used by a receiver to hide a congestion signal. The - nonce sum corresponds only to the data acknowledged by the cumulative - acknowledgement. - -7.3. IPv6 - - Although the IPv4 header is protected by a checksum, this is not the - case with IPv6, making undetected bit errors in the IPv6 header more - likely. Bit errors that compromise the integrity of the congestion - notification fields may cause an incorrect nonce to be received, and - an incorrect nonce sum to be returned. - -8. Security Considerations - - The random one-bit nonces need not be from a cryptographic-quality - pseudo-random number generator. A strong random number generator - would compromise performance. Consequently, the sequence of random - nonces should not be used for any other purpose. - - Conversely, the pseudo-random bit sequence should not be generated by - a linear feedback shift register [Schneier], or similar scheme that - would allow an adversary who has seen several previous bits to infer - the generation function and thus its future output. - - - - - - -Spring, et. al. Experimental [Page 10] - -RFC 3540 Robust ECN Signaling June 2003 - - - Although the ECN-nonce protects against concealment of congestion - signals and optimistic acknowledgement, it provides no additional - protection for the integrity of the connection. - -9. IANA Considerations - - The Nonce Sum (NS) is carried in a reserved TCP header bit that must - be allocated. This document describes the use of Bit 7, adjacent to - the other header bits used by ECN. - - The codepoint for the NS flag in the TCP header is specified by the - Standards Action of this RFC, as is required by RFC 2780. The IANA - has added the following to the registry for "TCP Header Flags": - - RFC 3540 defines bit 7 from the Reserved field to be used for the - Nonce Sum, as follows: - - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - | | | N | C | E | U | A | P | R | S | F | - | Header Length | Reserved | S | W | C | R | C | S | S | Y | I | - | | | | R | E | G | K | H | T | N | N | - +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - TCP Header Flags - - Bit Name Reference - --- ---- --------- - 7 NS (Nonce Sum) [RFC 3540] - -10. Conclusion - - The ECN-nonce is a simple modification to the ECN signaling mechanism - that improves ECN's robustness by preventing receivers from - concealing marked (or dropped) packets. The intent of this work is - to help improve the robustness of congestion control in the Internet. - The modification retains the character and simplicity of existing ECN - signaling. It is also practical for deployment in the Internet. It - uses the ECT(0) and ECT(1) codepoints and one TCP header flag (as - well as CWR and ECN-Echo) and has simple processing rules. - - - - - - - - - - - -Spring, et. al. Experimental [Page 11] - -RFC 3540 Robust ECN Signaling June 2003 - - -11. References - - [ECN] "The ECN Web Page", URL - "http://www.icir.org/floyd/ecn.html". - - [RFC3168] Ramakrishnan, K., Floyd, S. and D. Black, "The addition of - explicit congestion notification (ECN) to IP", RFC 3168, - September 2001. - - [Eifel] R. Ludwig and R. Katz. The Eifel Algorithm: Making TCP - Robust Against Spurious Retransmissions. Computer - Communications Review, January, 2000. - - [B97] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [Savage] S. Savage, N. Cardwell, D. Wetherall, T. Anderson. TCP - congestion control with a misbehaving receiver. SIGCOMM - CCR, October 1999. - - [Schneier] Bruce Schneier. Applied Cryptography, 2nd ed., 1996 - -12. Acknowledgements - - This note grew out of research done by Stefan Savage, David Ely, - David Wetherall, Tom Anderson and Neil Spring. We are very grateful - for feedback and assistance from Sally Floyd. - -13. Authors' Addresses - - Neil Spring - EMail: nspring@cs.washington.edu - - - David Wetherall - Department of Computer Science and Engineering, Box 352350 - University of Washington - Seattle WA 98195-2350 - EMail: djw@cs.washington.edu - - - David Ely - Computer Science and Engineering, 352350 - University of Washington - Seattle, WA 98195-2350 - EMail: ely@cs.washington.edu - - - - - -Spring, et. al. Experimental [Page 12] - -RFC 3540 Robust ECN Signaling June 2003 - - -14. Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assigns. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Spring, et. al. Experimental [Page 13] - diff --git a/kernel/picotcp/RFC/rfc3562.txt b/kernel/picotcp/RFC/rfc3562.txt deleted file mode 100644 index 7a23a5b..0000000 --- a/kernel/picotcp/RFC/rfc3562.txt +++ /dev/null @@ -1,395 +0,0 @@ - - - - - - -Network Working Group M. Leech -Request for Comments: 3562 Nortel Networks -Category:Informational July 2003 - - - Key Management Considerations for - the TCP MD5 Signature Option - -Status of this Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - The TCP MD5 Signature Option (RFC 2385), used predominantly by BGP, - has seen significant deployment in critical areas of Internet - infrastructure. The security of this option relies heavily on the - quality of the keying material used to compute the MD5 signature. - This document addresses the security requirements of that keying - material. - -1. Introduction - - The security of various cryptographic functions lies both in the - strength of the functions themselves against various forms of attack, - and also, perhaps more importantly, in the keying material that is - used with them. While theoretical attacks against the simple MAC - construction used in RFC 2385 are possible [MDXMAC], the number of - text-MAC pairs required to mount a forgery make it vastly more - probable that key-guessing is the main threat against RFC 2385. - - We show a quantitative approach to determining the security - requirements of keys used with [RFC2385], which tends to suggest the - following: - - o Key lengths SHOULD be between 12 and 24 bytes, with larger keys - having effectively zero additional computational costs when - compared to shorter keys. - - - - - - - -Leech Informational [Page 1] - -RFC 3562 Considerations for the TCP MD5 Signature Option July 2003 - - - o Key sharing SHOULD be limited so that keys aren't shared among - multiple BGP peering arrangements. - - o Keys SHOULD be changed at least every 90 days. - -1.1. Requirements Keywords - - The keywords "MUST", "MUST NOT", "REQUIRED", "SHOULD", "SHOULD NOT", - and "MAY" that appear in this document are to be interpreted as - described in [RFC2119]. - -2. Performance assumptions - - The most recent performance study of MD5 that this author was able to - find was undertaken by J. Touch at ISI. The results of this study - were documented in [RFC1810]. The assumption is that Moores Law - applies to the data in the study, which at the time showed a - best-possible *software* performance for MD5 of 87Mbits/second. - Projecting this number forward to the ca 2002 timeframe of this - document, would suggest a number near 2.1Gbits/second. - - For purposes of simplification, we will assume that our key-guessing - attacker will attack short packets only. A likely minimal packet is - an ACK, with no data. This leads to having to compute the MD5 over - about 40 bytes of data, along with some reasonable maximum number of - key bytes. MD5 effectively pads its input to 512-bit boundaries (64 - bytes) (it's actually more complicated than that, but this - simplifying assumption will suffice for this analysis). That means - that a minimum MD5 "block" is 64 bytes, so for a ca 2002-scaled - software performance of 2.1Gbits/second, we get a single-CPU software - MD5 performance near 4.1e6 single-block MD5 operations per second. - - These numbers are, of course, assuming that any key-guessing attacker - is resource-constrained to a single CPU. In reality, distributed - cryptographic key-guessing attacks have been remarkably successful in - the recent past. - - It may be instructive to look at recent Internet worm infections, to - determine what the probable maximum number of hosts that could be - surreptitiously marshalled for a key-guessing attack against MD5. - CAIDA [CAIDA2001] has reported that the Code Red worm infected over - 350,000 Internet hosts in the first 14 hours of operation. It seems - reasonable to assume that a worm whose "payload" is a mechanism for - quietly performing a key-guessing attack (perhaps using idle CPU - cycles of the infected host) could be at least as effective as Code - Red was. If one assumes that such a worm were engineered to be - maximally stealthy, then steady-state infection could conceivably - reach 1 million hosts or more. That changes our single-CPU - - - -Leech Informational [Page 2] - -RFC 3562 Considerations for the TCP MD5 Signature Option July 2003 - - - performance from 4.1e6 operations per second, to somewhere between - 1.0e11 and 1.0e13 MD5 operations per second. - - In 1997, John Gilmore, and the Electronic Frontier Foundation [EFF98] - developed a special-purpose machine, for an investment of - approximately USD$250,000. This machine was able to mount a - key-guessing attack against DES, and compute a key in under 1 week. - Given Moores Law, the same investment today would yield a machine - that could do the same work approximately 8 times faster. It seems - reasonable to assume that a similar hardware approach could be - brought to bear on key-guessing attacks against MD5, for similar key - lengths to DES, with somewhat-reduced performance (MD5 performance in - hardware may be as much as 2-3 times slower than DES). - -3. Key Lifetimes - - Operational experience with RFC 2385 would suggest that keys used - with this option may have lifetimes on the order of months. It would - seem prudent, then, to choose a minimum key length that guarantees - that key-guessing runtimes are some small multiple of the key-change - interval under best-case (for the attacker) practical attack - performance assumptions. - - The keys used with RFC 2385 are intended only to provide - authentication, and not confidentiality. Consequently, the ability - of an attacker to determine the key used for old traffic (traffic - emitted before a key-change event) is not considered a threat. - -3. Key Entropy - - If we make an assumption that key-change intervals are 90 days, and - that the reasonable upper-bound for software-based attack performance - is 1.0e13 MD5 operations per second, then the minimum required key - entropy is approximately 68 bits. It is reasonable to round this - number up to at least 80 bits, or 10 bytes. If one assumes that - hardware-based attacks are likely, using an EFF-like development - process, but with small-country-sized budgets, then the minimum key - size steps up considerably to around 83 bits, or 11 bytes. Since 11 - is such an ugly number, rounding up to 12 bytes is reasonable. - - In order to achieve this much entropy with an English-language key, - one needs to remember that English has an entropy of approximately - 1.3 bits per character. Other human languages are similar. This - means that a key derived from a human language would need to be - approximately 61 bytes long to produce 80 bits of entropy, and 73 - bytes to produce 96 bits of entropy. - - - - - -Leech Informational [Page 3] - -RFC 3562 Considerations for the TCP MD5 Signature Option July 2003 - - - A more reasonable approach would be to use the techniques described - in [RFC1750] to produce a high quality random key of 96 bits or more. - - It has previously been noted that an attacker will tend to choose - short packets to mount an attack on, since that increases the - key-guessing performance for the attacker. It has also been noted - that MD5 operations are effectively computed in blocks of 64 bytes. - Given that the shortest packet an attacker could reasonably use would - consist of 40 bytes of IP+TCP header data, with no payload, the - remaining 24 bytes of the MD5 block can reasonably be used for keying - material without added CPU cost for routers, but substantially - increase the burden on the attacker. While this practice will tend - to increase the CPU burden for ordinary short BGP packets, since it - will tend to cause the MD5 calculations to overflow into a second MD5 - block, it isn't currently seen to be a significant extra burden to - BGP routing machinery. - - The most reasonable practice, then, would be to choose the largest - possible key length smaller than 25 bytes that is operationally - reasonable, but at least 12 bytes. - - Some implementations restrict the key to a string of ASCII - characters, much like simple passwords, usually of 8 bytes or less. - The very real risk is that such keys are quite vulnerable to - key-guessing attacks, as outlined above. The worst-case scenario - would occur when the ASCII key/password is a human-language word, or - pseudo-word. Such keys/passwords contain, at most, 12 bits of - entropy. In such cases, dictionary driven attacks can yield results - in a fraction of the time that a brute-force approach would take. - Such implementations SHOULD permit users to enter a direct binary key - using the command line interface. One possible implementation would - be to establish a convention that an ASCII key beginning with the - prefix "0x" be interpreted as a string of bytes represented in - hexadecimal. Ideally, such byte strings will have been derived from - a random source, as outlined in [RFC1750]. Implementations SHOULD - NOT limit the length of the key unnecessarily, and SHOULD allow keys - of at least 16 bytes, to allow for the inevitable threat from Moores - Law. - -4. Key management practices - - In current operational use, TCP MD5 Signature keys [RFC2385] may be - shared among significant numbers of systems. Conventional wisdom in - cryptography and security is that such sharing increases the - probability of accidental or deliberate exposure of keys. The more - frequently such keying material is handled, the more likely it is to - be accidentally exposed to unauthorized parties. - - - - -Leech Informational [Page 4] - -RFC 3562 Considerations for the TCP MD5 Signature Option July 2003 - - - Since it is possible for anyone in possession of a key to forge - packets as if they originated with any of the other keyholders, the - most reasonable security practice would be to limit keys to use - between exactly two parties. Current implementations may make this - difficult, but it is the most secure approach when key lifetimes are - long. Reducing key lifetimes can partially mitigate widescale - key-sharing, by limiting the window of opportunity for a "rogue" - keyholder. - - Keying material is extremely sensitive data, and as such, should be - handled with reasonable caution. When keys are transported - electronically, including when configuring network elements like - routers, secure handling techniques MUST be used. Use of protocols - such as S/MIME [RFC2633], TLS [RFC2246], Secure Shell (SSH) SHOULD be - used where appropriate, to protect the transport of the key. - -5. Security Considerations - - This document is entirely about security requirements for keying - material used with RFC 2385. - - No new security exposures are created by this document. - -6. Acknowledgements - - Steve Bellovin, Ran Atkinson, and Randy Bush provided valuable - commentary in the development of this document. - -7. References - - [RFC1771] Rekhter, Y. and T. Li, "A Border Gateway Protocol 4 - (BGP-4)", RFC 1771, March 1995. - - [RFC1810] Touch, J., "Report on MD5 Performance", RFC 1810, June - 1995. - - [RFC2385] Heffernan, A., "Protection of BGP Sessions via the TCP - MD5 Signature Option", RFC 2385, August 1998. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [MDXMAC] Van Oorschot, P. and B. Preneel, "MDx-MAC and Building - Fast MACs from Hash Functions". Proceedings Crypto '95, - Springer-Verlag LNCS, August 1995. - - [RFC1750] Eastlake, D., Crocker, S. and J. Schiller, "Randomness - Recommendations for Security", RFC 1750, December 1994. - - - -Leech Informational [Page 5] - -RFC 3562 Considerations for the TCP MD5 Signature Option July 2003 - - - [EFF98] "Cracking DES: Secrets of Encryption Research, Wiretap - Politics, and Chip Design". Electronic Frontier - Foundation, 1998. - - [RFC2633] Ramsdell, B., "S/MIME Version 3 Message Specification", - RFC 2633, June 1999. - - [RFC2246] Dierks, T. and C. Allen, "The TLS Protocol Version 1.0", - RFC 2246, January 1999. - - [CAIDA2001] "CAIDA Analysis of Code Red" - http://www.caida.org/analysis/security/code-red/ - -8. Author's Address - - Marcus D. Leech - Nortel Networks - P.O. Box 3511, Station C - Ottawa, ON - Canada, K1Y 4H7 - - Phone: +1 613-763-9145 - EMail: mleech@nortelnetworks.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Leech Informational [Page 6] - -RFC 3562 Considerations for the TCP MD5 Signature Option July 2003 - - -9. Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assignees. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Leech Informational [Page 7] - diff --git a/kernel/picotcp/RFC/rfc3649.txt b/kernel/picotcp/RFC/rfc3649.txt deleted file mode 100644 index 6a20e0d..0000000 --- a/kernel/picotcp/RFC/rfc3649.txt +++ /dev/null @@ -1,1907 +0,0 @@ - - - - - - -Network Working Group S. Floyd -Request for Comments: 3649 ICSI -Category: Experimental December 2003 - - - HighSpeed TCP for Large Congestion Windows - -Status of this Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2003). All Rights Reserved. - -Abstract - - The proposals in this document are experimental. While they may be - deployed in the current Internet, they do not represent a consensus - that this is the best method for high-speed congestion control. In - particular, we note that alternative experimental proposals are - likely to be forthcoming, and it is not well understood how the - proposals in this document will interact with such alternative - proposals. - - This document proposes HighSpeed TCP, a modification to TCP's - congestion control mechanism for use with TCP connections with large - congestion windows. The congestion control mechanisms of the current - Standard TCP constrains the congestion windows that can be achieved - by TCP in realistic environments. For example, for a Standard TCP - connection with 1500-byte packets and a 100 ms round-trip time, - achieving a steady-state throughput of 10 Gbps would require an - average congestion window of 83,333 segments, and a packet drop rate - of at most one congestion event every 5,000,000,000 packets (or - equivalently, at most one congestion event every 1 2/3 hours). This - is widely acknowledged as an unrealistic constraint. To address this - limitation of TCP, this document proposes HighSpeed TCP, and solicits - experimentation and feedback from the wider community. - - - - - - - - - - -Floyd Experimental [Page 1] - -RFC 3649 HighSpeed TCP December 2003 - - -Table of Contents - - 1. Introduction. . . . . . . . . . . . . . . . . . . . . . . . . . 2 - 2. The Problem Description.. . . . . . . . . . . . . . . . . . . . 3 - 3. Design Guidelines.. . . . . . . . . . . . . . . . . . . . . . . 4 - 4. Non-Goals.. . . . . . . . . . . . . . . . . . . . . . . . . . . 5 - 5. Modifying the TCP Response Function.. . . . . . . . . . . . . . 6 - 6. Fairness Implications of the HighSpeed Response - Function. . . . . . . . . . . . . . . . . . . . . . . . . . . . 9 - 7. Translating the HighSpeed Response Function into - Congestion Control Parameters . . . . . . . . . . . . . . . . . 12 - 8. An alternate, linear response functions.. . . . . . . . . . . . 13 - 9. Tradeoffs for Choosing Congestion Control Parameters. . . . . . 16 - 9.1. The Number of Round-Trip Times between Loss Events . . . . 17 - 9.2. The Number of Packet Drops per Loss Event, with Drop-Tail. 17 - 10. Related Issues . . . . . . . . . . . . . . . . . . . . . . . . 18 - 10.1. Slow-Start. . . . . . . . . . . . . . . . . . . . . . . . 18 - 10.2. Limiting burstiness on short time scales. . . . . . . . . 19 - 10.3. Other limitations on window size. . . . . . . . . . . . . 19 - 10.4. Implementation issues.. . . . . . . . . . . . . . . . . . 19 - 11. Deployment issues. . . . . . . . . . . . . . . . . . . . . . . 20 - 11.1. Deployment issues of HighSpeed TCP. . . . . . . . . . . . 20 - 11.2. Deployment issues of Scalable TCP . . . . . . . . . . . . 22 - 12. Related Work in HighSpeed TCP. . . . . . . . . . . . . . . . . 23 - 13. Relationship to other Work.. . . . . . . . . . . . . . . . . . 25 - 14. Conclusions. . . . . . . . . . . . . . . . . . . . . . . . . . 25 - 15. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 25 - 16. Normative References . . . . . . . . . . . . . . . . . . . . . 26 - 17. Informative References . . . . . . . . . . . . . . . . . . . . 26 - 18. Security Considerations. . . . . . . . . . . . . . . . . . . . 28 - 19. IANA Considerations. . . . . . . . . . . . . . . . . . . . . . 28 - A. TCP's Loss Event Rate in Steady-State. . . . . . . . . . . . . 29 - B. A table for a(w) and b(w). . . . . . . . . . . . . . . . . . . 30 - C. Exploring the time to converge to fairness . . . . . . . . . . 32 - Author's Address . . . . . . . . . . . . . . . . . . . . . . . 33 - Full Copyright Statement . . . . . . . . . . . . . . . . . . . 34 - -1. Introduction - - This document proposes HighSpeed TCP, a modification to TCP's - congestion control mechanism for use with TCP connections with large - congestion windows. In a steady-state environment, with a packet - loss rate p, the current Standard TCP's average congestion window is - roughly 1.2/sqrt(p) segments. This places a serious constraint on - the congestion windows that can be achieved by TCP in realistic - environments. For example, for a Standard TCP connection with 1500- - byte packets and a 100 ms round-trip time, achieving a steady-state - throughput of 10 Gbps would require an average congestion window of - - - -Floyd Experimental [Page 2] - -RFC 3649 HighSpeed TCP December 2003 - - - 83,333 segments, and a packet drop rate of at most one congestion - event every 5,000,000,000 packets (or equivalently, at most one - congestion event every 1 2/3 hours). The average packet drop rate of - at most 2*10^(-10) needed for full link utilization in this - environment corresponds to a bit error rate of at most 2*10^(-14), - and this is an unrealistic requirement for current networks. - - To address this fundamental limitation of TCP and of the TCP response - function (the function mapping the steady-state packet drop rate to - TCP's average sending rate in packets per round-trip time), this - document describes a modified TCP response function for regimes with - higher congestion windows. This document also solicits - experimentation and feedback on HighSpeed TCP from the wider - community. - - Because HighSpeed TCP's modified response function would only take - effect with higher congestion windows, HighSpeed TCP does not modify - TCP behavior in environments with heavy congestion, and therefore - does not introduce any new dangers of congestion collapse. However, - if relative fairness between HighSpeed TCP connections is to be - preserved, then in our view any modification to the TCP response - function should be addressed in the IETF, rather than made as ad hoc - decisions by individual implementors or TCP senders. Modifications - to the TCP response function would also have implications for - transport protocols that use TFRC and other forms of equation-based - congestion control, as these congestion control mechanisms directly - use the TCP response function [RFC3448]. - - This proposal for HighSpeed TCP focuses specifically on a proposed - change to the TCP response function, and its implications for TCP. - This document does not address what we view as a separate fundamental - issue, of the mechanisms required to enable best-effort connections - to *start* with large initial windows. In our view, while HighSpeed - TCP proposes a somewhat fundamental change to the TCP response - function, at the same time it is a relatively simple change to - implement in a single TCP sender, and presents no dangers in terms of - congestion collapse. In contrast, in our view, the problem of - enabling connections to *start* with large initial windows is - inherently more risky and structurally more difficult, requiring some - form of explicit feedback from all of the routers along the path. - This is another reason why we would propose addressing the problem of - starting with large initial windows separately, and on a separate - timetable, from the problem of modifying the TCP response function. - - - - - - - - -Floyd Experimental [Page 3] - -RFC 3649 HighSpeed TCP December 2003 - - -2. The Problem Description - - This section describes the number of round-trip times between - congestion events required for a Standard TCP flow to achieve an - average throughput of B bps, given packets of D bytes and a round- - trip time of R seconds. A congestion event refers to a window of - data with one or more dropped or ECN-marked packets (where ECN stands - for Explicit Congestion Notification). - - From Appendix A, achieving an average TCP throughput of B bps - requires a loss event at most every BR/(12D) round-trip times. This - is illustrated in Table 1, for R = 0.1 seconds and D = 1500 bytes. - The table also gives the average congestion window W of BR/(8D), and - the steady-state packet drop rate P of 1.5/W^2. - - TCP Throughput (Mbps) RTTs Between Losses W P - --------------------- ------------------- ---- ----- - 1 5.5 8.3 0.02 - 10 55.5 83.3 0.0002 - 100 555.5 833.3 0.000002 - 1000 5555.5 8333.3 0.00000002 - 10000 55555.5 83333.3 0.0000000002 - - Table 1: RTTs Between Congestion Events for Standard TCP, for - 1500-Byte Packets and a Round-Trip Time of 0.1 Seconds. - - This document proposes HighSpeed TCP, a minimal modification to TCP's - increase and decrease parameters, for TCP connections with larger - congestion windows, to allow TCP to achieve high throughput with more - realistic requirements for the steady-state packet drop rate. - Equivalently, HighSpeed TCP has more realistic requirements for the - number of round-trip times between loss events. - -3. Design Guidelines - - Our proposal for HighSpeed TCP is motivated by the following - requirements: - - * Achieve high per-connection throughput without requiring - unrealistically low packet loss rates. - - * Reach high throughput reasonably quickly when in slow-start. - - * Reach high throughput without overly long delays when recovering - from multiple retransmit timeouts, or when ramping-up from a - period with small congestion windows. - - - - - -Floyd Experimental [Page 4] - -RFC 3649 HighSpeed TCP December 2003 - - - * No additional feedback or support required from routers: - - For example, the goal is for acceptable performance in both ECN- - capable and non-ECN-capable environments, and with Drop-Tail as well - as with Active Queue Management such as RED in the routers. - - * No additional feedback required from TCP receivers. - - * TCP-compatible performance in environments with moderate or high - congestion (e.g., packet drop rates of 1% or higher): - - Equivalently, the requirement is that there be no additional load on - the network (in terms of increased packet drop rates) in environments - with moderate or high congestion. - - * Performance at least as good as Standard TCP in environments with - moderate or high congestion. - - * Acceptable transient performance, in terms of increases in the - congestion window in one round-trip time, responses to severe - congestion, and convergence times to fairness. - - Currently, users wishing to achieve throughputs of 1 Gbps or more - typically open up multiple TCP connections in parallel, or use MulTCP - [CO98,GRK99], which behaves roughly like the aggregate of N virtual - TCP connections. While this approach suffices for the occasional - user on well-provisioned links, it leaves the parameter N to be - determined by the user, and results in more aggressive performance - and higher steady-state packet drop rates if used in environments - with periods of moderate or high congestion. We believe that a new - approach is needed that offers more flexibility, more effectively - scales to a wide range of available bandwidths, and competes more - fairly with Standard TCP in congested environments. - -4. Non-Goals - - The following are explicitly *not* goals of our work: - - * Non-goal: TCP-compatible performance in environments with very low - packet drop rates. - - We note that our proposal does not require, or deliver, TCP- - compatible performance in environments with very low packet drop - rates, e.g., with packet loss rates of 10^-5 or 10^-6. As we discuss - later in this document, we assume that Standard TCP is unable to make - effective use of the available bandwidth in environments with loss - - - - - -Floyd Experimental [Page 5] - -RFC 3649 HighSpeed TCP December 2003 - - - rates of 10^-6 in any case, so that it is acceptable and appropriate - for HighSpeed TCP to perform more aggressively than Standard TCP in - such an environment. - - * Non-goal: Ramping-up more quickly than allowed by slow-start. - - It is our belief that ramping-up more quickly than allowed by slow- - start would necessitate more explicit feedback from routers along the - path. The proposal for HighSpeed TCP is focused on changes to TCP - that could be effectively deployed in the current Internet - environment. - - * Non-goal: Avoiding oscillations in environments with only one-way, - long-lived flows all with the same round-trip times. - - While we agree that attention to oscillatory behavior is useful, - avoiding oscillations in aggregate throughput has not been our - primary consideration, particularly for simplified environments - limited to one-way, long-lived flows all with the same, large round- - trip times. Our assessment is that some oscillatory behavior in - these extreme environments is an acceptable price to pay for the - other benefits of HighSpeed TCP. - -5. Modifying the TCP Response Function - - The TCP response function, w = 1.2/sqrt(p), gives TCP's average - congestion window w in MSS-sized segments, as a function of the - steady-state packet drop rate p [FF98]. This TCP response function - is a direct consequence of TCP's Additive Increase Multiplicative - Decrease (AIMD) mechanisms of increasing the congestion window by - roughly one segment per round-trip time in the absence of congestion, - and halving the congestion window in response to a round-trip time - with a congestion event. This response function for Standard TCP is - reflected in the table below. In this proposal we restrict our - attention to TCP performance in environments with packet loss rates - of at most 10^-2, and so we can ignore the more complex response - functions that are required to model TCP performance in more - congested environments with retransmit timeouts. From Appendix A, an - average congestion window of W corresponds to an average of 2/3 W - round-trip times between loss events for Standard TCP (with the - congestion window varying from 2/3 W to 4/3 W). - - - - - - - - - - -Floyd Experimental [Page 6] - -RFC 3649 HighSpeed TCP December 2003 - - - Packet Drop Rate P Congestion Window W RTTs Between Losses - ------------------ ------------------- ------------------- - 10^-2 12 8 - 10^-3 38 25 - 10^-4 120 80 - 10^-5 379 252 - 10^-6 1200 800 - 10^-7 3795 2530 - 10^-8 12000 8000 - 10^-9 37948 25298 - 10^-10 120000 80000 - - Table 2: TCP Response Function for Standard TCP. The average - congestion window W in MSS-sized segments is given as a function of - the packet drop rate P. - - To specify a modified response function for HighSpeed TCP, we use - three parameters, Low_Window, High_Window, and High_P. To ensure TCP - compatibility, the HighSpeed response function uses the same response - function as Standard TCP when the current congestion window is at - most Low_Window, and uses the HighSpeed response function when the - current congestion window is greater than Low_Window. In this - document we set Low_Window to 38 MSS-sized segments, corresponding to - a packet drop rate of 10^-3 for TCP. - - To specify the upper end of the HighSpeed response function, we - specify the packet drop rate needed in the HighSpeed response - function to achieve an average congestion window of 83000 segments. - This is roughly the window needed to sustain 10 Gbps throughput, for - a TCP connection with the default packet size and round-trip time - used earlier in this document. For High_Window set to 83000, we - specify High_P of 10^-7; that is, with HighSpeed TCP a packet drop - rate of 10^-7 allows the HighSpeed TCP connection to achieve an - average congestion window of 83000 segments. We believe that this - loss rate sets an achievable target for high-speed environments, - while still allowing acceptable fairness for the HighSpeed response - function when competing with Standard TCP in environments with packet - drop rates of 10^-4 or 10^5. - - For simplicity, for the HighSpeed response function we maintain the - property that the response function gives a straight line on a log- - log scale (as does the response function for Standard TCP, for low to - moderate congestion). This results in the following response - function, for values of the average congestion window W greater than - Low_Window: - - W = (p/Low_P)^S Low_Window, - - - - -Floyd Experimental [Page 7] - -RFC 3649 HighSpeed TCP December 2003 - - - for Low_P the packet drop rate corresponding to Low_Window, and for S - as following constant [FRS02]: - - S = (log High_Window - log Low_Window)/(log High_P - log Low_P). - - (In this paper, "log x" refers to the log base 10.) For example, for - Low_Window set to 38, we have Low_P of 10^-3 (for compatibility with - Standard TCP). Thus, for High_Window set to 83000 and High_P set to - 10^-7, we get the following response function: - - W = 0.12/p^0.835. (1) - - This HighSpeed response function is illustrated in Table 3 below. - For HighSpeed TCP, the number of round-trip times between losses, - 1/(pW), equals 12.7 W^0.2, for W > 38 segments. - - Packet Drop Rate P Congestion Window W RTTs Between Losses - ------------------ ------------------- ------------------- - 10^-2 12 8 - 10^-3 38 25 - 10^-4 263 38 - 10^-5 1795 57 - 10^-6 12279 83 - 10^-7 83981 123 - 10^-8 574356 180 - 10^-9 3928088 264 - 10^-10 26864653 388 - - Table 3: TCP Response Function for HighSpeed TCP. The average - congestion window W in MSS-sized segments is given as a function of - the packet drop rate P. - - We believe that the problem of backward compatibility with Standard - TCP requires a response function that is quite close to that of - Standard TCP for loss rates of 10^-1, 10^-2, or 10^-3. We believe, - however, that such stringent TCP-compatibility is not required for - smaller loss rates, and that an appropriate response function is one - that gives a plausible packet drop rate for a connection throughput - of 10 Gbps. This also gives a slowly increasing number of round-trip - times between loss events as a function of a decreasing packet drop - rate. - - Another way to look at the HighSpeed response function is to consider - that HighSpeed TCP is roughly emulating the congestion control - response of N parallel TCP connections, where N is initially one, and - where N increases as a function of the HighSpeed TCP's congestion - window. Thus for the HighSpeed response function in Equation (1) - above, the response function can be viewed as equivalent to that of - - - -Floyd Experimental [Page 8] - -RFC 3649 HighSpeed TCP December 2003 - - - N(W) parallel TCP connections, where N(W) varies as a function of the - congestion window W. Recall that for a single standard TCP - connection, the average congestion window equals 1.2/sqrt(p). For N - parallel TCP connections, the aggregate congestion window for the N - connections equals N*1.2/sqrt(p). From the HighSpeed response - function in Equation (1) and the relationship above, we can derive - the following: - - N(W) = 0.23*W^(0.4) - - for N(W) the number of parallel TCP connections emulated by the - HighSpeed TCP response function, and for N(W) >= 1. This is shown in - Table 4 below. - - Congestion Window W Number N(W) of Parallel TCPs - ------------------- ------------------------- - 1 1 - 10 1 - 100 1.4 - 1,000 3.6 - 10,000 9.2 - 100,000 23.0 - - Table 4: Number N(W) of parallel TCP connections roughly emulated by - the HighSpeed TCP response function. - - In this document, we do not attempt to seriously evaluate the - HighSpeed response function for congestion windows greater than - 100,000 packets. We believe that we will learn more about the - requirements for sustaining the throughput of best-effort connections - in that range as we gain more experience with HighSpeed TCP with - congestion windows of thousands and tens of thousands of packets. - There also might be limitations to the per-connection throughput that - can be realistically achieved for best-effort traffic, in terms of - congestion window of hundreds of thousands of packets or more, in the - absence of additional support or feedback from the routers along the - path. - -6. Fairness Implications of the HighSpeed Response Function - - The Standard and Highspeed Response Functions can be used directly to - infer the relative fairness between flows using the two response - functions. For example, given a packet drop rate P, assume that - Standard TCP has an average congestion window of W_Standard, and - HighSpeed TCP has a higher average congestion window of W_HighSpeed. - - - - - - -Floyd Experimental [Page 9] - -RFC 3649 HighSpeed TCP December 2003 - - - In this case, a single HighSpeed TCP connection is receiving - W_HighSpeed/W_Standard times the throughput of a single Standard TCP - connection competing in the same environment. - - This relative fairness is illustrated below in Table 5, for the - parameters used for the Highspeed response function in the section - above. The second column gives the relative fairness, for the - steady-state packet drop rate specified in the first column. To help - calibrate, the third column gives the aggregate average congestion - window for the two TCP connections, and the fourth column gives the - bandwidth that would be needed by the two connections to achieve that - aggregate window and packet drop rate, given 100 ms round-trip times - and 1500-byte packets. - - Packet Drop Rate P Fairness Aggregate Window Bandwidth - ------------------ -------- ---------------- --------- - 10^-2 1.0 24 2.8 Mbps - 10^-3 1.0 76 9.1 Mbps - 10^-4 2.2 383 45.9 Mbps - 10^-5 4.7 2174 260.8 Mbps - 10^-6 10.2 13479 1.6 Gbps - 10^-7 22.1 87776 10.5 Gbps - - Table 5: Relative Fairness between the HighSpeed and Standard - Response Functions. - - Thus, for packet drop rates of 10^-4, a flow with the HighSpeed - response function can expect to receive 2.2 times the throughput of a - flow using the Standard response function, given the same round-trip - times and packet sizes. With packet drop rates of 10^-6 (or 10^-7), - the unfairness is more severe, and we have entered the regime where a - Standard TCP connection requires at most one congestion event every - 800 (or 2530) round-trip times in order to make use of the available - bandwidth. Our judgement would be that there are not a lot of TCP - connections effectively operating in this regime today, with - congestion windows of thousands of packets, and that therefore the - benefits of the HighSpeed response function would outweigh the - unfairness that would be experienced by Standard TCP in this regime. - However, one purpose of this document is to solicit feedback on this - issue. The parameter Low_Window determines directly the point of - divergence between the Standard and HighSpeed Response Functions. - - The third column of Table 5, the Aggregate Window, gives the - aggregate congestion window of the two competing TCP connections, - with HighSpeed and Standard TCP, given the packet drop rate specified - in the first column. From Table 5, a HighSpeed TCP connection would - receive ten times the bandwidth of a Standard TCP in an environment - with a packet drop rate of 10^-6. This would occur when the two - - - -Floyd Experimental [Page 10] - -RFC 3649 HighSpeed TCP December 2003 - - - flows sharing a single pipe achieved an aggregate window of 13479 - packets. Given a round-trip time of 100 ms and a packet size of 1500 - bytes, this would occur with an available bandwidth for the two - competing flows of 1.6 Gbps. - - Next we consider the time that it takes a standard or HighSpeed TCP - flow to converge to fairness against a pre-existing HighSpeed TCP - flow. The worst case for convergence to fairness occurs when a new - flow is starting up, competing against a high-bandwidth existing - flow, and the new flow suffers a packet drop and exits slow-start - while its window is still small. In the worst case, consider that - the new flow has entered the congestion avoidance phase while its - window is only one packet. A standard TCP flow in congestion - avoidance increases its window by at most one packet per round-trip - time, and after N round-trip times has only achieved a window of N - packets (when starting with a window of 1 in the first round-trip - time). In contrast, a HighSpeed TCP flows increases much faster than - a standard TCP flow while in the congestion avoidance phase, and we - can expect its convergence to fairness to be much better. This is - shown in Table 6 below. The script used to generate this table is - given in Appendix C. - - RTT HS_Window Standard_TCP_Window - --- --------- ------------------- - 100 131 100 - 200 475 200 - 300 1131 300 - 400 2160 400 - 500 3601 500 - 600 5477 600 - 700 7799 700 - 800 10567 800 - 900 13774 900 - 1000 17409 1000 - 1100 21455 1100 - 1200 25893 1200 - 1300 30701 1300 - 1400 35856 1400 - 1500 41336 1500 - 1600 47115 1600 - 1700 53170 1700 - 1800 59477 1800 - 1900 66013 1900 - 2000 72754 2000 - - Table 6: For a HighSpeed and a Standard TCP connection, the - congestion window during congestion avoidance phase (starting with a - congestion window of 1 packet during RTT 1). - - - -Floyd Experimental [Page 11] - -RFC 3649 HighSpeed TCP December 2003 - - - The classic paper on relative fairness is from Chiu and Jain [CJ89]. - This paper shows that AIMD (Additive Increase Multiplicative - Decrease) converges to fairness in an environment with synchronized - congestion events. From [CJ89], it is easy to see that MIMD and AIAD - do not converge to fairness in this environment. However, the - results of [CJ89] do not apply to an asynchronous environment such as - that of the current Internet, where the frequency of congestion - feedback can be different for different flows. For example, it has - been shown that MIMD converges to fair states in a model with - proportional instead of synchronous feedback in terms of packet drops - [GV02]. Thus, we are not concerned about abandoning a strict model - of AIMD for HighSpeed TCP. However, we note that in an environment - with Drop-Tail queue management, there is likely to be some - synchronization of packet drops. In this environment, the model of - completely synchronous feedback does not hold, but the model of - completely asynchronous feedback is not accurate either. Fairness in - Drop-Tail environments is discussed in more detail in Sections 9 and - 12. - -7. Translating the HighSpeed Response Function into Congestion Control - Parameters - - For equation-based congestion control such as TFRC, the HighSpeed - Response Function above could be used directly by the TFRC congestion - control mechanism. However, for TCP the HighSpeed response function - has to be translated into additive increase and multiplicative - decrease parameters. The HighSpeed response function cannot be - achieved by TCP with an additive increase of one segment per round- - trip time and a multiplicative decrease of halving the current - congestion window; HighSpeed TCP will have to modify either the - increase or the decrease parameter, or both. We have concluded that - HighSpeed TCP is most likely to achieve an acceptable compromise - between moderate increases and timely decreases by modifying both the - increase and the decrease parameter. - - That is, for HighSpeed TCP let the congestion window increase by a(w) - segments per round-trip time in the absence of congestion, and let - the congestion window decrease to w(1-b(w)) segments in response to a - round-trip time with one or more loss events. Thus, in response to a - single acknowledgement HighSpeed TCP increases its congestion window - in segments as follows: - - w <- w + a(w)/w. - - In response to a congestion event, HighSpeed TCP decreases as - follows: - - w <- (1-b(w))w. - - - -Floyd Experimental [Page 12] - -RFC 3649 HighSpeed TCP December 2003 - - - For Standard TCP, a(w) = 1 and b(w) = 1/2, regardless of the value of - w. HighSpeed TCP uses the same values of a(w) and b(w) for w <= - Low_Window. This section specifies a(w) and b(w) for HighSpeed TCP - for larger values of w. - - For w = High_Window, we have specified a loss rate of High_P. From - [FRS02], or from elementary calculations, this requires the following - relationship between a(w) and b(w) for w = High_Window: - - a(w) = High_Window^2 * High_P * 2 * b(w)/(2-b(w)). (2) - - We use the parameter High_Decrease to specify the decrease parameter - b(w) for w = High_Window, and use Equation (2) to derive the increase - parameter a(w) for w = High_Window. Along with High_P = 10^-7 and - High_Window = 83000, for example, we specify High_Decrease = 0.1, - specifying that b(83000) = 0.1, giving a decrease of 10% after a - congestion event. Equation (2) then gives a(83000) = 72, for an - increase of 72 segments, or just under 0.1%, within a round-trip - time, for w = 83000. - - This moderate decrease strikes us as acceptable, particularly when - coupled with the role of TCP's ACK-clocking in limiting the sending - rate in response to more severe congestion [BBFS01]. A more severe - decrease would require a more aggressive increase in the congestion - window for a round-trip time without congestion. In particular, a - decrease factor High_Decrease of 0.5, as in Standard TCP, would - require an increase of 459 segments per round-trip time when w = - 83000. - - Given decrease parameters of b(w) = 1/2 for w = Low_Window, and b(w) - = High_Decrease for w = High_Window, we are left to specify the value - of b(w) for other values of w > Low_Window. From [FRS02], we let - b(w) vary linearly as the log of w, as follows: - - b(w) = (High_Decrease - 0.5) (log(w)-log(W)) / (log(W_1)-log(W)) + - 0.5, - - for W = Low_window and W_1 = High_window. The increase parameter - a(w) can then be computed as follows: - - a(w) = w^2 * p(w) * 2 * b(w)/(2-b(w)), - - for p(w) the packet drop rate for congestion window w. From - inverting Equation (1), we get p(w) as follows: - - p(w) = 0.078/w^1.2. - - - - - -Floyd Experimental [Page 13] - -RFC 3649 HighSpeed TCP December 2003 - - - We assume that experimental implementations of HighSpeed TCP for - further investigation will use a pre-computed look-up table for - finding a(w) and b(w). For example, the implementation from Tom - Dunigan adjusts the a(w) and b(w) parameters every 0.1 seconds. In - the appendix we give such a table for our default values of - Low_Window = 38, High_Window = 83,000, High_P = 10^-7, and - High_Decrease = 0.1. These are also the default values in the NS - simulator; example simulations in NS can be run with the command - "./test-all-tcpHighspeed" in the directory tcl/test. - -8. An alternate, linear response functions - - In this section we explore an alternate, linear response function for - HighSpeed TCP that has been proposed by a number of other people, in - particular by Glenn Vinnicombe and Tom Kelly. Similarly, it has been - suggested by others that a less "ad-hoc" guideline for a response - function for HighSpeed TCP would be to specify a constant value for - the number of round-trip times between congestion events. - - Assume that we keep the value of Low_Window as 38 MSS-sized segments, - indicating when the HighSpeed response function diverges from the - current TCP response function, but that we modify the High_Window and - High_P parameters that specify the upper range of the HighSpeed - response function. In particular, consider the response function - given by High_Window = 380,000 and High_P = 10^-7, with Low_Window = - 38 and Low_P = 10^-3 as before. - - Using the equations in Section 5, this would give the following - Linear response function, for w > Low_Window: - - W = 0.038/p. - - This Linear HighSpeed response function is illustrated in Table 7 - below. For HighSpeed TCP, the number of round-trip times between - losses, 1/(pW), equals 1/0.38, or equivalently, 26, for W > 38 - segments. - - - - - - - - - - - - - - - -Floyd Experimental [Page 14] - -RFC 3649 HighSpeed TCP December 2003 - - - Packet Drop Rate P Congestion Window W RTTs Between Losses - ------------------ ------------------- ------------------- - 10^-2 12 8 - 10^-3 38 26 - 10^-4 380 26 - 10^-5 3800 26 - 10^-6 38000 26 - 10^-7 380000 26 - 10^-8 3800000 26 - 10^-9 38000000 26 - 10^-10 380000000 26 - - Table 7: An Alternate, Linear TCP Response Function for HighSpeed - TCP. The average congestion window W in MSS-sized segments is given - as a function of the packet drop rate P. - - Given a constant decrease b(w) of 1/2, this would give an increase - a(w) of w/Low_Window, or equivalently, a constant increase of - 1/Low_Window packets per acknowledgement, for w > Low_Window. - Another possibility is Scalable TCP [K03], which uses a fixed - decrease b(w) of 1/8 and a fixed increase per acknowledgement of - 0.01. This gives an increase a(w) per window of 0.005 w, for a TCP - with delayed acknowledgements, for pure MIMD. - - The relative fairness between the alternate Linear response function - and the standard TCP response function is illustrated below in Table - 8. - - Packet Drop Rate P Fairness Aggregate Window Bandwidth - ------------------ -------- ---------------- --------- - 10^-2 1.0 24 2.8 Mbps - 10^-3 1.0 76 9.1 Mbps - 10^-4 3.2 500 60.0 Mbps - 10^-5 15.1 4179 501.4 Mbps - 10^-6 31.6 39200 4.7 Gbps - 10^-7 100.1 383795 46.0 Gbps - - Table 8: Relative Fairness between the Linear HighSpeed and Standard - Response Functions. - - One attraction of the linear response function is that it is scale- - invariant, with a fixed increase in the congestion window per - acknowledgement, and a fixed number of round-trip times between loss - events. My own assumption would be that having a fixed length for - the congestion epoch in round-trip times, regardless of the packet - drop rate, would be a poor fit for an imprecise and imperfect world - with routers with a range of queue management mechanisms, such as the - Drop-Tail queue management that is common today. For example, a - - - -Floyd Experimental [Page 15] - -RFC 3649 HighSpeed TCP December 2003 - - - response function with a fixed length for the congestion epoch in - round-trip times might give less clearly-differentiated feedback in - an environment with steady-state background losses at fixed intervals - for all flows (as might occur with a wireless link with occasional - short error bursts, giving losses for all flows every N seconds - regardless of their sending rate). - - While it is not a goal to have perfect fairness in an environment - with synchronized losses, it would be good to have moderately - acceptable performance in this regime. This goal might argue against - a response function with a constant number of round-trip times - between congestion events. However, this is a question that could - clearly use additional research and investigation. In addition, - flows with different round-trip times would have different time - durations for congestion epochs even in the model with a linear - response function. - - The third column of Table 8, the Aggregate Window, gives the - aggregate congestion window of two competing TCP connections, one - with Linear HighSpeed TCP and one with Standard TCP, given the packet - drop rate specified in the first column. From Table 8, a Linear - HighSpeed TCP connection would receive fifteen times the bandwidth of - a Standard TCP in an environment with a packet drop rate of 10^-5. - This would occur when the two flows sharing a single pipe achieved an - aggregate window of 4179 packets. Given a round-trip time of 100 ms - and a packet size of 1500 bytes, this would occur with an available - bandwidth for the two competing flows of 501 Mbps. Thus, because the - Linear HighSpeed TCP is more aggressive than the HighSpeed TCP - proposed above, it also is less fair when competing with Standard TCP - in a high-bandwidth environment. - -9. Tradeoffs for Choosing Congestion Control Parameters - - A range of metrics can be used for evaluating choices for congestion - control parameters for HighSpeed TCP. My assumption in this section - is that for a response function of the form w = c/p^d, for constant c - and exponent d, the only response functions that would be considered - are response functions with 1/2 <= d <= 1. The two ends of this - spectrum are represented by current TCP, with d = 1/2, and by the - linear response function described in Section 8 above, with d = 1. - HighSpeed TCP lies somewhere in the middle of the spectrum, with d = - 0.835. - - Response functions with exponents less than 1/2 can be eliminated - from consideration because they would be even worse than standard TCP - in accommodating connections with high congestion windows. - - - - - -Floyd Experimental [Page 16] - -RFC 3649 HighSpeed TCP December 2003 - - -9.1. The Number of Round-Trip Times between Loss Events - - Response functions with exponents greater than 1 can be eliminated - from consideration because for these response functions, the number - of round-trip times between loss events decreases as congestion - decreases. For a response function of w = c/p^d, with one loss event - or congestion event every 1/p packets, the number of round-trip times - between loss events is w^((1/d)-1)/c^(1/d). Thus, for standard TCP - the number of round-trip times between loss events is linear in w. - In contrast, one attraction of the linear response function, as - described in Section 8 above, is that it is scale-invariant, in terms - of a fixed increase in the congestion window per acknowledgement, and - a fixed number of round-trip times between loss events. - - However, for a response function with d > 1, the number of round- - trip times between loss events would be proportional to w^((1/d)-1), - for a negative exponent ((1/d)-1), setting smaller as w increases. - This would seem undesirable. - -9.2. The Number of Packet Drops per Loss Event, with Drop-Tail - - A TCP connection increases its sending rate by a(w) packets per - round-trip time, and in a Drop-Tail environment, this is likely to - result in a(w) dropped packets during a single loss event. One - attraction of standard TCP is that it has a fixed increase per - round-trip time of one packet, minimizing the number of packets that - would be dropped in a Drop-Tail environment. For an environment with - some form of Active Queue Management, and in particular for an - environment that uses ECN, the number of packets dropped in a single - congestion event would not be a problem. However, even in these - environments, larger increases in the sending rate per round-trip - time result in larger stresses on the ability of the queues in the - router to absorb the fluctuations. - - HighSpeed TCP plays a middle ground between the metrics of a moderate - number of round-trip times between loss events, and a moderate - increase in the sending rate per round-trip time. As shown in - Appendix B, for a congestion window of 83,000 packets, HighSpeed TCP - increases its sending rate by 70 packets per round-trip time, - resulting in at most 70 packet drops when the buffer overflows in a - Drop-Tail environment. This increased aggressiveness is the price - paid by HighSpeed TCP for its increased scalability. A large number - of packets dropped per congestion event could result in synchronized - drops from multiple flows, with a possible loss of throughput as a - result. - - - - - - -Floyd Experimental [Page 17] - -RFC 3649 HighSpeed TCP December 2003 - - - Scalable TCP has an increase a(w) of 0.005 w packets per round-trip - time. For a congestion window of 83,000 packets, this gives an - increase of 415 packets per round-trip time, resulting in roughly 415 - packet drops per congestion event in a Drop-Tail environment. - - Thus, HighSpeed TCP and its variants place increased demands on queue - management in routers, relative to Standard TCP. (This is rather - similar to the increased demands on queue management that would - result from using N parallel TCP connections instead of a single - Standard TCP connection.) - -10. Related Issues - -10.1. Slow-Start - - A companion internet-draft on "Limited Slow-Start for TCP with Large - Congestion Windows" [F02b] proposes a modification to TCP's slow- - start procedure that can significantly improve the performance of TCP - connections slow-starting up to large congestion windows. For TCP - connections that are able to use congestion windows of thousands (or - tens of thousands) of MSS-sized segments (for MSS the sender's - MAXIMUM SEGMENT SIZE), the current slow-start procedure can result in - increasing the congestion window by thousands of segments in a single - round-trip time. Such an increase can easily result in thousands of - packets being dropped in one round-trip time. This is often - counter-productive for the TCP flow itself, and is also hard on the - rest of the traffic sharing the congested link. - - [F02b] proposes Limited Slow-Start, limiting the number of segments - by which the congestion window is increased for one window of data - during slow-start, in order to improve performance for TCP - connections with large congestion windows. We have separated out - Limited Slow-Start to a separate draft because it can be used both - with Standard or with HighSpeed TCP. - - Limited Slow-Start is illustrated in the NS simulator, for snapshots - after May 1, 2002, in the tests "./test-all-tcpHighspeed tcp1A" and - "./test-all-tcpHighspeed tcpHighspeed1" in the subdirectory - "tcl/lib". - - In order for best-effort flows to safely start-up faster than slow- - start, e.g., in future high-bandwidth networks, we believe that it - would be necessary for the flow to have explicit feedback from the - routers along the path. There are a number of proposals for this, - ranging from a minimal proposal for an IP option that allows TCP SYN - packets to collect information from routers along the path about the - allowed initial sending rate [J02], to proposals with more power that - require more fine-tuned and continuous feedback from routers. These - - - -Floyd Experimental [Page 18] - -RFC 3649 HighSpeed TCP December 2003 - - - proposals are all somewhat longer-term proposals than the HighSpeed - TCP proposal in this document, requiring longer lead times and more - coordination for deployment, and will be discussed in later - documents. - -10.2. Limiting burstiness on short time scales - - Because the congestion window achieved by a HighSpeed TCP connection - could be quite large, there is a possibility for the sender to send a - large burst of packets in response to a single acknowledgement. This - could happen, for example, when there is congestion or reordering on - the reverse path, and the sender receives an acknowledgement - acknowledging hundreds or thousands of new packets. Such a burst - would also result if the application was idle for a short period of - time less than a round-trip time, and then suddenly had lots of data - available to send. In this case, it would be useful for the - HighSpeed TCP connection to have some method for limiting bursts. - - In this document, we do not specify TCP mechanisms for reducing the - short-term burstiness. One possible mechanism is to use some form of - rate-based pacing, and another possibility is to use maxburst, which - limits the number of packets that are sent in response to a single - acknowledgement. We would caution, however, against a permanent - reduction in the congestion window as a mechanism for limiting - short-term bursts. Such a mechanism has been deployed in some TCP - stacks, and our view would be that using permanent reductions of the - congestion window to reduce transient bursts would be a bad idea - [Fl03]. - -10.3. Other limitations on window size - - The TCP header uses a 16-bit field to report the receive window size - to the sender. Unmodified, this allows a window size of at most - 2**16 = 65K bytes. With window scaling, the maximum window size is - 2**30 = 1073M bytes [RFC 1323]. Given 1500-byte packets, this allows - a window of up to 715,000 packets. - -10.4. Implementation issues - - One implementation issue that has been raised with HighSpeed TCP is - that with congestion windows of 4MB or more, the handling of - successive SACK packets after a packet is dropped becomes very time- - consuming at the TCP sender [S03]. Tom Kelly's Scalable TCP includes - a "SACK Fast Path" patch that addresses this problem. - - The issues addressed in the Web100 project, the Net100 project, and - related projects about the tuning necessary to achieve high bandwidth - data rates with TCP apply to HighSpeed TCP as well [Net100, Web100]. - - - -Floyd Experimental [Page 19] - -RFC 3649 HighSpeed TCP December 2003 - - -11. Deployment issues - -11.1. Deployment issues of HighSpeed TCP - - We do not claim that the HighSpeed TCP modification to TCP described - in this paper is an optimal transport protocol for high-bandwidth - environments. Based on our experiences with HighSpeed TCP in the NS - simulator [NS], on simulation studies [SA03], and on experimental - reports [ABLLS03,D02,CC03,F03], we believe that HighSpeed TCP - improves the performance of TCP in high-bandwidth environments, and - we are documenting it for the benefit of the IETF community. We - encourage the use of HighSpeed TCP, and of its underlying response - function, and we further encourage feedback about operational - experiences with this or related modifications. - - We note that in environments typical of much of the current Internet, - HighSpeed TCP behaves exactly as does Standard TCP today. This is - the case any time the congestion window is less than 38 segments. - - Bandwidth Avg Cwnd w (pkts) Increase a(w) Decrease b(w) - --------- ----------------- ------------- ------------- - 1.5 Mbps 12.5 1 0.50 - 10 Mbps 83 1 0.50 - 100 Mbps 833 6 0.35 - 1 Gbps 8333 26 0.22 - 10 Gbps 83333 70 0.10 - - Table 9: Performance of a HighSpeed TCP connection - - To help calibrate, Table 9 considers a TCP connection with 1500-byte - packets, an RTT of 100 ms (including average queueing delay), and no - competing traffic, and shows the average congestion window if that - TCP connection had a pipe all to itself and fully used the link - bandwidth, for a range of bandwidths for the pipe. This assumes that - the TCP connection would use Table 12 in determining its increase and - decrease parameters. The first column of Table 9 gives the - bandwidth, and the second column gives the average congestion window - w needed to utilize that bandwidth. The third column shows the - increase a(w) in segments per RTT for window w. The fourth column - shows the decrease b(w) for that window w (where the TCP sender - decreases the congestion window from w to w(1-b(w)) segments after a - loss event). When a loss occurs we note that the actual congestion - window is likely to be greater than the average congestion window w - in column 2, so the decrease parameter used could be slightly smaller - than the one given in column 4 of Table 9. - - Table 9 shows that a HighSpeed TCP over a 10 Mbps link behaves - exactly the same as a Standard TCP connection, even in the absence of - - - -Floyd Experimental [Page 20] - -RFC 3649 HighSpeed TCP December 2003 - - - competing traffic. One can think of the congestion window staying - generally in the range of 55 to 110 segments, with the HighSpeed TCP - behavior being exactly the same as the behavior of Standard TCP. (If - the congestion window is ever 128 segments or more, then the - HighSpeed TCP increases by two segments per RTT instead of by one, - and uses a decrease parameter of 0.44 instead of 0.50.) - - Table 9 shows that for a HighSpeed TCP connection over a 100 Mbps - link, with no competing traffic, HighSpeed TCP behaves roughly as - aggressively as six parallel TCP connections, increasing its - congestion window by roughly six segments per round-trip time, and - with a decrease parameter of roughly 1/3 (corresponding to decreasing - down to 2/3-rds of its old congestion window, rather than to half, in - response to a loss event). - - For a Standard TCP connection in this environment, the congestion - window could be thought of as generally varying in the range of 550 - to 1100 segments, with an average packet drop rate of 2.2 * 10^-6 - (corresponding to a bit error rate of 1.8 * 10^-10), or equivalently, - roughly 55 seconds between congestion events. While a Standard TCP - connection could sustain such a low packet drop rate in a carefully - controlled environment with minimal competing traffic, we would - contend that in an uncontrolled best-effort environment with even a - small amount of competing traffic, the occasional congestion events - from smaller competing flows could easily be sufficient to prevent a - Standard TCP flow with no lower-speed bottlenecks from fully - utilizing the available bandwidth of the underutilized 100 Mbps link. - - That is, we would contend that in the environment of 100 Mbps links - with a significant amount of available bandwidth, Standard TCP would - sometimes be unable to fully utilize the link bandwidth, and that - HighSpeed TCP would be an improvement in this regard. We would - further contend that in this environment, the behavior of HighSpeed - TCP is sufficiently close to that of Standard TCP that HighSpeed TCP - would be safe to deploy in the current Internet. We note that - HighSpeed TCP can only use high congestion windows if allowed by the - receiver's advertised window size. As a result, even if HighSpeed - TCP was ubiquitously deployed in the Internet, the impact would be - limited to those TCP connections with an advertised window from the - receiver of 118 MSS or larger. - - We do not believe that the deployment of HighSpeed TCP would serve as - a block to the possible deployment of alternate experimental - protocols for high-speed congestion control, such as Scalable TCP, - XCP [KHR02], or FAST TCP [JWL03]. In particular, we don't expect - HighSpeed TCP to interact any more poorly with alternative - experimental proposals than would the N parallel TCP connections - commonly used today in the absence of HighSpeed TCP. - - - -Floyd Experimental [Page 21] - -RFC 3649 HighSpeed TCP December 2003 - - -11.2. Deployment issues of Scalable TCP - - We believe that Scalable TCP and HighSpeed TCP have sufficiently - similar response functions that they could easily coexist in the - Internet. However, we have not investigated Scalable TCP - sufficiently to be able to claim, in this document, that Scalable TCP - is safe for a widespread deployment in the current Internet. - - Bandwidth Avg Cwnd w (pkts) Increase a(w) Decrease b(w) - --------- ----------------- ------------- ------------- - 1.5 Mbps 12.5 1 0.50 - 10 Mbps 83 0.4 0.125 - 100 Mbps 833 4.1 0.125 - 1 Gbps 8333 41.6 0.125 - 10 Gbps 83333 416.5 0.125 - - Table 10: Performance of a Scalable TCP connection. - - Table 10 shows the performance of a Scalable TCP connection with - 1500-byte packets, an RTT of 100 ms (including average queueing - delay), and no competing traffic. The TCP connection is assumed to - use delayed acknowledgements. The first column of Table 10 gives the - bandwidth, the second column gives the average congestion window - needed to utilize that bandwidth, and the third and fourth columns - give the increase and decrease parameters. - - Note that even in an environment with a 10 Mbps link, Scalable TCP's - behavior is considerably different from that of Standard TCP. The - increase parameter is smaller than that of Standard TCP, and the - decrease is smaller also, 1/8-th instead of 1/2. That is, for 10 - Mbps links, Scalable TCP increases less aggressively than Standard - TCP or HighSpeed TCP, but decreases less aggressively as well. - - In an environment with a 100 Mbps link, Scalable TCP has an increase - parameter of roughly four segments per round-trip time, with the same - decrease parameter of 1/8-th. A comparison of Tables 9 and 10 shows - that for this scenario of 100 Mbps links, HighSpeed TCP increases - more aggressively than Scalable TCP. - - Next we consider the relative fairness between Standard TCP, - HighSpeed TCP and Scalable TCP. The relative fairness between - HighSpeed TCP and Standard TCP was shown in Table 5 earlier in this - document, and the relative fairness between Scalable TCP and Standard - TCP was shown in Table 8. Following the approach in Section 6, for a - given packet drop rate p, for p < 10^-3, we can estimate the relative - fairness between Scalable and HighSpeed TCP as - W_Scalable/W_HighSpeed. This relative fairness is shown in Table 11 - below. The bandwidth in the last column of Table 11 is the aggregate - - - -Floyd Experimental [Page 22] - -RFC 3649 HighSpeed TCP December 2003 - - - bandwidth of the two competing flows given 100 ms round-trip times - and 1500-byte packets. - - Packet Drop Rate P Fairness Aggregate Window Bandwidth - ------------------ -------- ---------------- --------- - 10^-2 1.0 24 2.8 Mbps - 10^-3 1.0 76 9.1 Mbps - 10^-4 1.4 643 77.1 Mbps - 10^-5 2.1 5595 671.4 Mbps - 10^-6 3.1 50279 6.0 Gbps - 10^-7 4.5 463981 55.7 Gbps - - Table 11: Relative Fairness between the Scalable and HighSpeed - Response Functions. - - The second row of Table 11 shows that for a Scalable TCP and a - HighSpeed TCP flow competing in an environment with 100 ms RTTs and a - 10 Mbps pipe, the two flows would receive essentially the same - bandwidth. The next row shows that for a Scalable TCP and a - HighSpeed TCP flow competing in an environment with 100 ms RTTs and a - 100 Mbps pipe, the Scalable TCP flow would receive roughly 50% more - bandwidth than would HighSpeed TCP. Table 11 shows the relative - fairness in higher-bandwidth environments as well. This relative - fairness seems sufficient that there should be no problems with - Scalable TCP and HighSpeed TCP coexisting in the same environment as - Experimental variants of TCP. - - We note that one question that requires more investigation with - Scalable TCP is that of convergence to fairness in environments with - Drop-Tail queue management. - -12. Related Work in HighSpeed TCP - - HighSpeed TCP has been separately investigated in simulations by - Sylvia Ratnasamy and by Evandro de Souza [SA03]. The simulations in - [SA03] verify the fairness properties of HighSpeed TCP when sharing a - link with Standard TCP. - - These simulations explore the relative fairness of HighSpeed TCP - flows when competing with Standard TCP. The simulation environment - includes background forward and reverse-path TCP traffic limited by - the TCP receive window, along with a small amount of forward and - reverse-path traffic from the web traffic generator. Most of the - simulations so far explore performance on a simple dumbbell topology - with a 1 Gbps link with a propagation delay of 50 ms. Simulations - have been run with Adaptive RED and with DropTail queue management. - - - - - -Floyd Experimental [Page 23] - -RFC 3649 HighSpeed TCP December 2003 - - - The simulations in [SA03] explore performance with a varying number - of competing flows, with the competing traffic being all standard - TCP; all HighSpeed TCP; or a mix of standard and HighSpeed TCP. For - the simulations in [SA03] with RED queue management, the relative - fairness between standard and HighSpeed TCP is consistent with the - relative fairness predicted in Table 5. For the simulations with - Drop Tail queues, the relative fairness is more skewed, with the - HighSpeed TCP flows receiving an even larger share of the link - bandwidth. This is not surprising; with Active Queue Management at - the congested link, the fraction of packet drops received by each - flow should be roughly proportional to that flow's share of the link - bandwidth, while this property no longer holds with Drop Tail queue - management. We also note that relative fairness in simulations with - Drop Tail queue management can sometimes depend on small details of - the simulation scenario, and that Drop Tail simulations need special - care to avoid phase effects [F92]. - - [SA03] explores the bandwidth `stolen' by HighSpeed TCP from standard - TCP by exploring the fraction of the link bandwidth N standard TCP - flows receive when competing against N other standard TCP flows, and - comparing this to the fraction of the link bandwidth the N standard - TCP flows receive when competing against N HighSpeed TCP flows. For - the 1 Gbps simulation scenarios dominated by long-lived traffic, a - small number of standard TCP flows are able to achieve high link - utilization, and the HighSpeed TCP flows can be viewed as stealing - bandwidth from the competing standard TCP flows, as predicted in - Section 6 on the Fairness Implications of the HighSpeed Response - Function. However, [SA03] shows that when even a small fraction of - the link bandwidth is used by more bursty, short TCP connections, the - standard TCP flows are unable to achieve high link utilization, and - the HighSpeed TCP flows in this case are not `stealing' bandwidth - from the standard TCP flows, but instead are using bandwidth that - otherwise would not be utilized. - - The conclusions of [SA03] are that "HighSpeed TCP behaved as forseen - by its response function, and appears to be a real and viable option - for use on high-speed wide area TCP connections." - - Future work that could be explored in more detail includes - convergence times after new flows start-up; recovery time after a - transient outage; the response to sudden severe congestion, and - investigations of the potential for oscillations. We invite - contributions from others in this work. - - - - - - - - -Floyd Experimental [Page 24] - -RFC 3649 HighSpeed TCP December 2003 - - -13. Relationship to other Work - - Our assumption is that HighSpeed TCP will be used with the TCP SACK - option, and also with the increased Initial Window of three or four - segments, as allowed by [RFC3390]. For paths that have substantial - reordering, TCP performance would be greatly improved by some of the - mechanisms still in the research stages for robust performance in the - presence of reordered packets. - - Our view is that HighSpeed TCP is largely orthogonal to proposals for - higher PMTU (Path MTU) values [M02]. Unlike changes to the PMTU, - HighSpeed TCP does not require any changes in the network or at the - TCP receiver, and works well in the current Internet. Our assumption - is that HighSpeed TCP would be useful even with larger values for the - PMTU. Unlike the current congestion window, the PMTU gives no - information about the bandwidth-delay product available to that - particular flow. - - A related approach is that of a virtual MTU, where the actual MTU of - the path might be limited [VMSS,S02]. The virtual MTU approach has - not been fully investigated, and we do not explore the virtual MTU - approach further in this document. - -14. Conclusions - - This document has proposed HighSpeed TCP, a modification to TCP's - congestion control mechanism for use with TCP connections with large - congestion windows. We have explored this proposal in simulations, - and others have explored HighSpeed TCP with experiments, and we - believe HighSpeed TCP to be safe to deploy on the current Internet. - We would welcome additional analysis, simulations, and particularly, - experimentation. More information on simulations and experiments is - available from the HighSpeed TCP Web Page [HSTCP]. There are several - independent implementations of HighSpeed TCP [D02,F03] and of - Scalable TCP [K03] for further investigation. - -15. Acknowledgements - - The HighSpeed TCP proposal is from joint work with Sylvia Ratnasamy - and Scott Shenker (and was initiated by Scott Shenker). Additional - investigations of HighSpeed TCP were joint work with Evandro de Souza - and Deb Agarwal. We thank Tom Dunigan for the implementation in the - Linux 2.4.16 Web100 kernel, and for resulting experimentation with - HighSpeed TCP. We are grateful to the End-to-End Research Group, the - members of the Transport Area Working Group, and to members of the - IPAM program in Large Scale Communication Networks for feedback. We - thank Glenn Vinnicombe for framing the Linear response function in - the parameters of HighSpeed TCP. We are also grateful for - - - -Floyd Experimental [Page 25] - -RFC 3649 HighSpeed TCP December 2003 - - - contributions and feedback from the following individuals: Les - Cottrell, Mitchell Erblich, Jeffrey Hsu, Tom Kelly, Chuck Jackson, - Matt Mathis, Jitendra Padhye, Andrew Reiter, Stanislav Shalunov, Alex - Solan, Paul Sutter, Brian Tierney, Joe Touch. - -16. Normative References - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - -17. Informative References - - [ABLLS03] A. Antony, J. Blom, C. de Laat, J. Lee, and W. Sjouw, - "Microscopic Examination of TCP Flows over Transatlantic - Links", iGrid2002 special issue, Future Generation - Computer Systems, volume 19 issue 6 (2003), URL - "http://www.science.uva.nl/~delaat/techrep-2003-2- - tcp.pdf". - - [BBFS01] Deepak Bansal, Hari Balakrishnan, Sally Floyd, and Scott - Shenker, "Dynamic Behavior of Slowly-Responsive Congestion - Control Algorithms", SIGCOMM 2001, August 2001. - - [CC03] Fabrizio Coccetti and Les Cottrell, "TCP Stack - Measurements on Lightly Loaded Testbeds", 2003. URL - "http://www-iepm.slac.stanford.edu/monitoring/bulk/fast/". - - [CJ89] D. Chiu and R. Jain, "Analysis of the Increase and - Decrease Algorithms for Congestion Avoidance in Computer - Networks", Computer Networks and ISDN Systems, Vol. 17, - pp. 1-14, 1989. - - [CO98] J. Crowcroft and P. Oechslin, "Differentiated End-to-end - Services using a Weighted Proportional Fair Share TCP", - Computer Communication Review, 28(3):53--69, 1998. - - [D02] Tom Dunigan, "Floyd's TCP slow-start and AIMD mods", URL - "http://www.csm.ornl.gov/~dunigan/net100/floyd.html". - - [F03] Gareth Fairey, "High-Speed TCP", 2003. URL - "http://www.hep.man.ac.uk/u/garethf/hstcp/". - - [F92] S. Floyd and V. Jacobson, "On Traffic Phase Effects in - Packet-Switched Gateways, Internetworking: Research and - Experience", V.3 N.3, September 1992, p.115-156. URL - "http://www.icir.org/floyd/papers.html". - - - - - -Floyd Experimental [Page 26] - -RFC 3649 HighSpeed TCP December 2003 - - - [Fl03] Sally Floyd, "Re: [Tsvwg] taking NewReno (RFC 2582) to - Proposed Standard", Email to the tsvwg mailing list, May - 14, 2003. - - URLs "http://www1.ietf.org/mail-archive/working- - groups/tsvwg/current/msg04086.html" and - "http://www1.ietf.org/mail-archive/working- - groups/tsvwg/current/msg04087.html". - - [FF98] Floyd, S., and Fall, K., "Promoting the Use of End-to-End - Congestion Control in the Internet", IEEE/ACM Transactions - on Networking, August 1999. - - [FRS02] Sally Floyd, Sylvia Ratnasamy, and Scott Shenker, - "Modifying TCP's Congestion Control for High Speeds", May - 2002. URL "http://www.icir.org/floyd/notes.html". - - [GRK99] Panos Gevros, Fulvio Risso and Peter Kirstein, "Analysis - of a Method for Differential TCP Service". In Proceedings - of the IEEE GLOBECOM'99, Symposium on Global Internet , - December 1999, Rio de Janeiro, Brazil. - - [GV02] S. Gorinsky and H. Vin, "Extended Analysis of Binary - Adjustment Algorithms", Technical Report TR2002-39, - Department of Computer Sciences, The University of Texas - at Austin, August 2002. URL - "http://www.cs.utexas.edu/users/gorinsky/pubs.html". - - [HSTCP] HighSpeed TCP Web Page, URL - "http://www.icir.org/floyd/hstcp.html". - - [J02] Amit Jain and Sally Floyd, "Quick-Start for TCP and IP", - Work in Progress, 2002. - - [JWL03] Cheng Jin, David X. Wei and Steven H. Low, "FAST TCP for - High-speed Long-distance Networks", Work in Progress, June - 2003. - - [K03] Tom Kelly, "Scalable TCP: Improving Performance in - HighSpeed Wide Area Networks", February 2003. URL - "http://www-lce.eng.cam.ac.uk/~ctk21/scalable/". - - [KHR02] Dina Katabi, Mark Handley, and Charlie Rohrs, "Congestion - Control for High Bandwidth-Delay Product Networks", - SIGCOMM 2002. - - [M02] Matt Mathis, "Raising the Internet MTU", Web Page, URL - "http://www.psc.edu/~mathis/MTU/". - - - -Floyd Experimental [Page 27] - -RFC 3649 HighSpeed TCP December 2003 - - - [Net100] The DOE/MICS Net100 project. URL - "http://www.csm.ornl.gov/~dunigan/net100/". - - [NS] The NS Simulator, "http://www.isi.edu/nsnam/ns/". - - [RFC 1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC3390] Allman, M., Floyd, S. and C., Partridge, "Increasing TCP's - Initial Window", RFC 3390, October 2002. - - [RFC3448] Handley, M., Padhye, J., Floyd, S. and J. Widmer, "TCP - Friendly Rate Control (TFRC): Protocol Specification", RFC - 3448, January 2003. - - [SA03] Souza, E. and D.A., Agarwal, "A HighSpeed TCP Study: - Characteristics and Deployment Issues", LBNL Technical - Report LBNL-53215. URL - "http://www.icir.org/floyd/hstcp.html". - - [S02] Stanislav Shalunov, "TCP Armonk", Work in Progress, 2002, - URL "http://www.internet2.edu/~shalunov/tcpar/". - - [S03] Alex Solan, private communication, 2003. - - [VMSS] "Web100 at ORNL", Web Page, - "http://www.csm.ornl.gov/~dunigan/netperf/web100.html". - - [Web100] The Web100 project. URL "http://www.web100.org/". - -18. Security Considerations - - This proposal makes no changes to the underlying security of TCP. - -19. IANA Considerations - - There are no IANA considerations regarding this document. - - - - - - - - - - - - - - -Floyd Experimental [Page 28] - -RFC 3649 HighSpeed TCP December 2003 - - -A. TCP's Loss Event Rate in Steady-State - - This section gives the number of round-trip times between congestion - events for a TCP flow with D-byte packets, for D=1500, as a function - of the connection's average throughput B in bps. To achieve this - average throughput B, a TCP connection with round-trip time R in - seconds requires an average congestion window w of BR/(8D) segments. - - In steady-state, TCP's average congestion window w is roughly - 1.2/sqrt(p) segments. This is equivalent to a lost event at most - once every 1/p packets, or at most once every 1/(pw) = w/1.5 round- - trip times. Substituting for w, this is a loss event at most every - (BR)/12D)round-trip times. - - An an example, for R = 0.1 seconds and D = 1500 bytes, this gives - B/180000 round-trip times between loss events. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Floyd Experimental [Page 29] - -RFC 3649 HighSpeed TCP December 2003 - - -B. A table for a(w) and b(w). - - This section gives a table for the increase and decrease parameters - a(w) and b(w) for HighSpeed TCP, for the default values of Low_Window - = 38, High_Window = 83000, High_P = 10^-7, and High_Decrease = 0.1. - - w a(w) b(w) - ---- ---- ---- - 38 1 0.50 - 118 2 0.44 - 221 3 0.41 - 347 4 0.38 - 495 5 0.37 - 663 6 0.35 - 851 7 0.34 - 1058 8 0.33 - 1284 9 0.32 - 1529 10 0.31 - 1793 11 0.30 - 2076 12 0.29 - 2378 13 0.28 - 2699 14 0.28 - 3039 15 0.27 - 3399 16 0.27 - 3778 17 0.26 - 4177 18 0.26 - 4596 19 0.25 - 5036 20 0.25 - 5497 21 0.24 - 5979 22 0.24 - 6483 23 0.23 - 7009 24 0.23 - 7558 25 0.22 - 8130 26 0.22 - 8726 27 0.22 - 9346 28 0.21 - 9991 29 0.21 - 10661 30 0.21 - 11358 31 0.20 - 12082 32 0.20 - 12834 33 0.20 - 13614 34 0.19 - 14424 35 0.19 - 15265 36 0.19 - 16137 37 0.19 - 17042 38 0.18 - 17981 39 0.18 - 18955 40 0.18 - - - -Floyd Experimental [Page 30] - -RFC 3649 HighSpeed TCP December 2003 - - - 19965 41 0.17 - 21013 42 0.17 - 22101 43 0.17 - 23230 44 0.17 - 24402 45 0.16 - 25618 46 0.16 - 26881 47 0.16 - 28193 48 0.16 - 29557 49 0.15 - 30975 50 0.15 - 32450 51 0.15 - 33986 52 0.15 - 35586 53 0.14 - 37253 54 0.14 - 38992 55 0.14 - 40808 56 0.14 - 42707 57 0.13 - 44694 58 0.13 - 46776 59 0.13 - 48961 60 0.13 - 51258 61 0.13 - 53677 62 0.12 - 56230 63 0.12 - 58932 64 0.12 - 61799 65 0.12 - 64851 66 0.11 - 68113 67 0.11 - 71617 68 0.11 - 75401 69 0.10 - 79517 70 0.10 - 84035 71 0.10 - 89053 72 0.10 - 94717 73 0.09 - - Table 12: Parameters for HighSpeed TCP. - - - - - - - - - - - - - - - - -Floyd Experimental [Page 31] - -RFC 3649 HighSpeed TCP December 2003 - - - This table was computed with the following Perl program: - - $top = 100000; - $num = 38; - if ($num == 38) { - print " w a(w) b(w)\n"; - print " ---- ---- ----\n"; - print " 38 1 0.50\n"; - $oldb = 0.50; - $olda = 1; - } - while ($num < $top) { - $bw = (0.1 -0.5)*(log($num)-log(38))/(log(83000)-log(38))+0.5; - $aw = ($num**2*2.0*$bw) / ((2.0-$bw)*$num**1.2*12.8); - if ($aw > $olda + 1) { - printf "%6d %5d %3.2f0, $num, $aw, $bw; - $olda = $aw; - } - $num ++; - } - - Table 13: Perl Program for computing parameters for HighSpeed TCP. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Floyd Experimental [Page 32] - -RFC 3649 HighSpeed TCP December 2003 - - -C. Exploring the time to converge to fairness. - - This section gives the Perl program used to compute the congestion - window growth during congestion avoidance. - - $top = 2001; - $hswin = 1; - $regwin = 1; - $rtt = 1; - $lastrtt = 0; - $rttstep = 100; - if ($hswin == 1) { - print " RTT HS_Window Standard_TCP_Window0; - print " --- --------- -------------------0; - } - while ($rtt < $top) { - $bw = (0.1 -0.5)*(log($hswin)-log(38))/(log(83000)-log(38))+0.5; - $aw = ($hswin**2*2.0*$bw) / ((2.0-$bw)*$hswin**1.2*12.8); - if ($aw < 1) { - $aw = 1; - } - if ($rtt >= $lastrtt + $rttstep) { - printf "%5d %9d %10d0, $rtt, $hswin, $regwin; - $lastrtt = $rtt; - } - $hswin += $aw; - $regwin += 1; - $rtt ++; - } - - Table 14: Perl Program for computing the window in congestion - avoidance. - -Author's Address - - Sally Floyd - ICIR (ICSI Center for Internet Research) - - Phone: +1 (510) 666-2989 - EMail: floyd@acm.org - URL: http://www.icir.org/floyd/ - - - - - - - - - - -Floyd Experimental [Page 33] - -RFC 3649 HighSpeed TCP December 2003 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2003). All Rights Reserved. - - This document and translations of it may be copied and furnished to - others, and derivative works that comment on or otherwise explain it - or assist in its implementation may be prepared, copied, published - and distributed, in whole or in part, without restriction of any - kind, provided that the above copyright notice and this paragraph are - included on all such copies and derivative works. However, this - document itself may not be modified in any way, such as by removing - the copyright notice or references to the Internet Society or other - Internet organizations, except as needed for the purpose of - developing Internet standards in which case the procedures for - copyrights defined in the Internet Standards process must be - followed, or as required to translate it into languages other than - English. - - The limited permissions granted above are perpetual and will not be - revoked by the Internet Society or its successors or assignees. - - This document and the information contained herein is provided on an - "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING - TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING - BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION - HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF - MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - - - - - - - - - - - -Floyd Experimental [Page 34] - diff --git a/kernel/picotcp/RFC/rfc3708.txt b/kernel/picotcp/RFC/rfc3708.txt deleted file mode 100644 index 6bd7489..0000000 --- a/kernel/picotcp/RFC/rfc3708.txt +++ /dev/null @@ -1,507 +0,0 @@ - - - - - - -Network Working Group E. Blanton -Request for Comments: 3708 Purdue University -Category: Experimental M. Allman - ICIR - February 2004 - - - Using TCP Duplicate Selective Acknowledgement (DSACKs) and - Stream Control Transmission Protocol (SCTP) Duplicate - Transmission Sequence Numbers (TSNs) to Detect Spurious - Retransmissions - -Status of this Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2004). All Rights Reserved. - -Abstract - - TCP and Stream Control Transmission Protocol (SCTP) provide - notification of duplicate segment receipt through Duplicate Selective - Acknowledgement (DSACKs) and Duplicate Transmission Sequence Number - (TSN) notification, respectively. This document presents - conservative methods of using this information to identify - unnecessary retransmissions for various applications. - -1. Introduction - - TCP [RFC793] and SCTP [RFC2960] provide notification of duplicate - segment receipt through duplicate selective acknowledgment (DSACK) - [RFC2883] and Duplicate TSN notifications, respectively. Using this - information, a TCP or SCTP sender can generally determine when a - retransmission was sent in error. This document presents two methods - for using duplicate notifications. The first method is simple and - can be used for accounting applications. The second method is a - conservative algorithm to disambiguate unnecessary retransmissions - from loss events for the purpose of undoing unnecessary congestion - control changes. - - - - - - - -Blanton & Allman Experimental [Page 1] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - - This document is intended to outline reasonable and safe algorithms - for detecting spurious retransmissions and discuss some of the - considerations involved. It is not intended to describe the only - possible method for achieving the goal, although the guidelines in - this document should be taken into consideration when designing - alternate algorithms. Additionally, this document does not outline - what a TCP or SCTP sender may do after a spurious retransmission is - detected. A number of proposals have been developed (e.g., - [RFC3522], [SK03], [BDA03]), but it is not yet clear which of these - proposals are appropriate. In addition, they all rely on detecting - spurious retransmits and so can share the algorithm specified in this - document. - - Finally, we note that to simplify the text much of the following - discussion is in terms of TCP DSACKs, while applying to both TCP and - SCTP. - - Terminology - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in RFC 2119 [RFC2119]. - -2. Counting Duplicate Notifications - - For certain applications a straight count of duplicate notifications - will suffice. For instance, if a stack simply wants to know (for - some reason) the number of spuriously retransmitted segments, - counting all duplicate notifications for retransmitted segments - should work well. Another application of this strategy is to monitor - and adapt transport algorithms so that the transport is not sending - large amounts of spurious data into the network. For instance, - monitoring duplicate notifications could be used by the Early - Retransmit [AAAB03] algorithm to determine whether fast - retransmitting [RFC2581] segments with a lower than normal duplicate - ACK threshold is working, or if segment reordering is causing - spurious retransmits. - - More speculatively, duplicate notification has been proposed as an - integral part of estimating TCP's total loss rate [AEO03] for the - purposes of mitigating the impact of corruption-based losses on - transport protocol performance. [EOA03] proposes altering the - transport's congestion response to the fraction of losses that are - actually due to congestion by requiring the network to provide the - corruption-based loss rate and making the transport sender estimate - the total loss rate. Duplicate notifications are a key part of - estimating the total loss rate accurately [AEO03]. - - - - -Blanton & Allman Experimental [Page 2] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - -3. Congestion/Duplicate Disambiguation Algorithm - - When the purpose of detecting spurious retransmissions is to "undo" - unnecessary changes made to the congestion control state, as - suggested in [RFC2883], the data sender ideally needs to determine: - - (a) That spurious retransmissions in a particular window of data do - not mask real segment loss (congestion). - - For example, assume segments N and N+1 are retransmitted even - though only segment N was dropped by the network (thus, segment - N+1 was needlessly retransmitted). When the sender receives the - notification that segment N+1 arrived more than once it can - conclude that segment N+1 was needlessly resent. However, it - cannot conclude that it is appropriate to revert the congestion - control state because the window of data contained at least one - valid congestion indication (i.e., segment N was lost). - - (b) That network duplication is not the cause of the duplicate - notification. - - Determining whether a duplicate notification is caused by network - duplication of a packet or a spurious retransmit is a nearly - impossible task in theory. Since [Pax97] shows that packet - duplication by the network is rare, the algorithm in this section - simply ceases to function when network duplication is detected - (by receiving a duplication notification for a segment that was - not retransmitted by the sender). - - The algorithm specified below gives reasonable, but not complete, - protection against both of these cases. - - We assume the TCP sender has a data structure to hold selective - acknowledgment information (e.g., as outlined in [RFC3517]). The - following steps require an extension of such a 'scoreboard' to - incorporate a slightly longer history of retransmissions than called - for in [RFC3517]. The following steps MUST be taken upon the receipt - of each DSACK or duplicate TSN notification: - - (A) Check the corresponding sequence range or TSN to determine - whether the segment has been retransmitted. - - (A.1) If the SACK scoreboard is empty (i.e., the TCP sender has - received no SACK information from the receiver) and the - left edge of the incoming DSACK is equal to SND.UNA, - processing of this DSACK MUST be terminated and the - congestion control state MUST NOT be reverted during the - current window of data. This clause intends to cover the - - - -Blanton & Allman Experimental [Page 3] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - - case when an entire window of acknowledgments have been - dropped by the network. In such a case, the reverse path - seems to be in a congested state and so reducing TCP's - sending rate is the conservative approach. - - (A.2) If the segment was retransmitted exactly one time, mark it - as a duplicate. - - (A.3) If the segment was retransmitted more than once processing - of this DSACK MUST be terminated and the congestion control - state MUST NOT be reverted to its previous state during the - current window of data. - - (A.4) If the segment was not retransmitted the incoming DSACK - indicates that the network duplicated the segment in - question. Processing of this DSACK MUST be terminated. In - addition, the algorithm specified in this document MUST NOT - be used for the remainder of the connection, as future - DSACK reports may be indicating network duplication rather - than unnecessary retransmission. Note that some techniques - to further disambiguate network duplication from - unnecessary retransmission (e.g., the TCP timestamp option - [RFC1323]) may be used to refine the algorithm in this - document further. Using such a technique in conjunction - with an algorithm similar to the one presented herein may - allow for the continued use of the algorithm in the face of - duplicated segments. We do not delve into such an - algorithm in this document due the current rarity of - network duplication. However, future work should include - tackling this problem. - - (B) Assuming processing is allowed to continue (per the (A) rules), - check all retransmitted segments in the previous window of data. - - (B.1) If all segments or chunks marked as retransmitted have also - been marked as acknowledged and duplicated, we conclude - that all retransmissions in the previous window of data - were spurious and no loss occurred. - - (B.2) If any segment or chunk is still marked as retransmitted - but not marked as duplicate, there are outstanding - retransmissions that could indicate loss within this window - of data. We can make no conclusions based on this - particular DSACK/duplicate TSN notification. - - In addition to keeping the state mentioned in [RFC3517] (for TCP) and - [RFC2960] (for SCTP), an implementation of this algorithm must track - - - - -Blanton & Allman Experimental [Page 4] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - - all sequence numbers or TSNs that have been acknowledged as - duplicates. - -4. Related Work - - In addition to the mechanism for detecting spurious retransmits - outlined in this document, several other proposals for finding - needless retransmits have been developed. - - [BA02] uses the algorithm outlined in this document as the basis for - investigating several methods to make TCP more robust to reordered - packets. - - The Eifel detection algorithm [RFC3522] uses the TCP timestamp option - [RFC1323] to determine whether the ACK for a given retransmit is for - the original transmission or a retransmission. More generally, - [LK00] outlines the benefits of detecting spurious retransmits and - reverting from needless congestion control changes using the - timestamp-based scheme or a mechanism that uses a "retransmit bit" to - flag retransmits (and ACKs of retransmits). The Eifel detection - algorithm can detect spurious retransmits more rapidly than a DSACK- - based scheme. However, the tradeoff is that the overhead of the 12- - byte timestamp option must be incurred in every packet transmitted - for Eifel to function. - - The F-RTO scheme [SK03] slightly alters TCP's sending pattern - immediately following a retransmission timeout and then observes the - pattern of the returning ACKs. This pattern can indicate whether the - retransmitted segment was needed. The advantage of F-RTO is that the - algorithm only needs to be implemented on the sender side of the TCP - connection and that nothing extra needs to cross the network (e.g., - DSACKs, timestamps, special flags, etc.). The downside is that the - algorithm is a heuristic that can be confused by network pathologies - (e.g., duplication or reordering of key packets). Finally, note that - F-RTO only works for spurious retransmits triggered by the - transport's retransmission timer. - - Finally, [AP99] briefly investigates using the time between - retransmitting a segment via the retransmission timeout and the - arrival of the next ACK as an indicator of whether the retransmit was - needed. The scheme compares this time delta with a fraction (f) of - the minimum RTT observed thus far on the connection. If the time - delta is less than f*minRTT then the retransmit is labeled spurious. - When f=1/2 the algorithm identifies roughly 59% of the needless - retransmission timeouts and identifies needed retransmits only 2.5% - of the time. As with F-RTO, this scheme only detects spurious - retransmits sent by the transport's retransmission timer. - - - - -Blanton & Allman Experimental [Page 5] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - -5. Security Considerations - - It is possible for the receiver to falsely indicate spurious - retransmissions in the case of actual loss, potentially causing a TCP - or SCTP sender to inaccurately conclude that no loss took place (and - possibly cause inappropriate changes to the senders congestion - control state). - - Consider the following scenario: A receiver watches every segment or - chunk that arrives and acknowledges any segment that arrives out of - order by more than some threshold amount as a duplicate, assuming - that it is a retransmission. A sender using the above algorithm will - assume that the retransmission was spurious. - - The ECN nonce sum proposal [RFC3540] could possibly help mitigate the - ability of the receiver to hide real losses from the sender with - modest extension. In the common case of receiving an original - transmission and a spurious retransmit a receiver will have received - the nonce from the original transmission and therefore can "prove" to - the sender that the duplication notification is valid. In the case - when the receiver did not receive the original and is trying to - improperly induce the sender into transmitting at an inappropriately - high rate, the receiver will not know the ECN nonce from the original - segment and therefore will probabilistically not be able to fool the - sender for long. [RFC3540] calls for disabling nonce sums on - duplicate ACKs, which means that the nonce sum is not directly - suitable for use as a mitigation to the problem of receivers lying - about DSACK information. However, future efforts may be able to use - [RFC3540] as a starting point for building protection should it be - needed. - -6. Acknowledgments - - Sourabh Ladha and Reiner Ludwig made several useful comments on an - earlier version of this document. The second author thanks BBN - Technologies and NASA's Glenn Research Center for supporting this - work. - -7. References - -7.1. Normative References - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - - - -Blanton & Allman Experimental [Page 6] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - - [RFC2883] Floyd, S., Mahdavi, J., Mathis, M. and M. Podolsky, "An - Extension to the Selective Acknowledgement (SACK) Option - for TCP", RFC 2883, July 2000. - - [RFC2960] Stewart, R., Xie, Q., Morneault, K., Sharp, C., - Schwarzbauer, H., Taylor, T., Rytina, I., Kalla, M., Zhang, - L. and V. Paxson, "Stream Control Transmission Protocol", - RFC 2960, October 2000. - -7.2. Informative References - - [AAAB03] Allman, M., Avrachenkov, K., Ayesta, U. and J. Blanton, - "Early Retransmit for TCP", Work in Progress, June 2003. - - [AEO03] Allman, M., Eddy, E. and S. Ostermann, "Estimating Loss - Rates With TCP", Work in Progress, August 2003. - - [AP99] Allman, M. and V. Paxson, "On Estimating End-to-End Network - Path Properties", SIGCOMM 99. - - [BA02] Blanton, E. and M. Allman. On Making TCP More Robust to - Packet Reordering. ACM Computer Communication Review, - 32(1), January 2002. - - [BDA03] Blanton, E., Dimond, R. and M. Allman, "Practices for TCP - Senders in the Face of Segment Reordering", Work in - Progress, February 2003. - - [EOA03] Eddy, W., Ostermann, S. and M. Allman, "New Techniques for - Making Transport Protocols Robust to Corruption-Based - Loss", Work in Progress, July 2003. - - [LK00] R. Ludwig, R. H. Katz. The Eifel Algorithm: Making TCP - Robust Against Spurious Retransmissions. ACM Computer - Communication Review, 30(1), January 2000. - - [Pax97] V. Paxson. End-to-End Internet Packet Dynamics. In ACM - SIGCOMM, September 1997. - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC3517] Blanton, E., Allman, M., Fall, K. and L. Wang, "A - Conservative Selective Acknowledgment (SACK)-based Loss - Recovery Algorithm for TCP", RFC 3517, April 2003. - - [RFC3522] Ludwig, R. and M. Meyer, "The Eifel Detection Algorithm for - TCP," RFC 3522, April 2003. - - - -Blanton & Allman Experimental [Page 7] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - - [RFC3540] Spring, N., Wetherall, D. and D. Ely, "Robust Explicit - Congestion Notification (ECN) Signaling with Nonces", RFC - 3540, June 2003. - - [SK03] Sarolahti, P. and M. Kojo, "F-RTO: An Algorithm for - Detecting Spurious Retransmission Timeouts with TCP and - SCTP", Work in Progress, June 2003. - -8. Authors' Addresses - - Ethan Blanton - Purdue University Computer Sciences - 1398 Computer Science Building - West Lafayette, IN 47907 - - EMail: eblanton@cs.purdue.edu - - - Mark Allman - ICSI Center for Internet Research - 1947 Center Street, Suite 600 - Berkeley, CA 94704-1198 - Phone: 216-243-7361 - - EMail: mallman@icir.org - http://www.icir.org/mallman/ - - - - - - - - - - - - - - - - - - - - - - - - - -Blanton & Allman Experimental [Page 8] - -RFC 3708 TCP DSACKs and SCTP Duplicate TSNs February 2004 - - -9. Full Copyright Statement - - Copyright (C) The Internet Society (2004). This document is subject - to the rights, licenses and restrictions contained in BCP 78 and - except as set forth therein, the authors retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE - REPRESENTS OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE - INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF - THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed - to pertain to the implementation or use of the technology - described in this document or the extent to which any license - under such rights might or might not be available; nor does it - represent that it has made any independent effort to identify any - such rights. Information on the procedures with respect to - rights in RFC documents can be found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use - of such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository - at http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention - any copyrights, patents or patent applications, or other - proprietary rights that may cover technology that may be required - to implement this standard. Please address the information to the - IETF at ietf-ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - -Blanton & Allman Experimental [Page 9] - diff --git a/kernel/picotcp/RFC/rfc3742.txt b/kernel/picotcp/RFC/rfc3742.txt deleted file mode 100644 index 95018d9..0000000 --- a/kernel/picotcp/RFC/rfc3742.txt +++ /dev/null @@ -1,395 +0,0 @@ - - - - - - -Network Working Group S. Floyd -Request for Comments: 3742 ICSI -Category: Experimental March 2004 - - - Limited Slow-Start for TCP with Large Congestion Windows - -Status of this Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2004). All Rights Reserved. - -Abstract - - This document describes an optional modification for TCP's slow-start - for use with TCP connections with large congestion windows. For TCP - connections that are able to use congestion windows of thousands (or - tens of thousands) of MSS-sized segments (for MSS the sender's - MAXIMUM SEGMENT SIZE), the current slow-start procedure can result in - increasing the congestion window by thousands of segments in a single - round-trip time. Such an increase can easily result in thousands of - packets being dropped in one round-trip time. This is often - counter-productive for the TCP flow itself, and is also hard on the - rest of the traffic sharing the congested link. This note describes - Limited Slow-Start as an optional mechanism for limiting the number - of segments by which the congestion window is increased for one - window of data during slow-start, in order to improve performance for - TCP connections with large congestion windows. - -1. Introduction - - This note describes an optional modification for TCP's slow-start for - use with TCP connections with large congestion windows. For TCP - connections that are able to use congestion windows of thousands (or - tens of thousands) of MSS-sized segments (for MSS the sender's - MAXIMUM SEGMENT SIZE), the current slow-start procedure can result in - increasing the congestion window by thousands of segments in a single - round-trip time. Such an increase can easily result in thousands of - packets being dropped in one round-trip time. This is often - counter-productive for the TCP flow itself, and is also hard on the - rest of the traffic sharing the congested link. This note describes - Limited Slow-Start, limiting the number of segments by which the - - - -Floyd Experimental [Page 1] - -RFC 3742 TCP's Slow-Start with Large Congestion Windows March 2004 - - - congestion window is increased for one window of data during slow- - start, in order to improve performance for TCP connections with large - congestion windows. - - When slow-start results in a large increase in the congestion window - in one round-trip time, a large number of packets might be dropped in - the network (even with carefully-tuned active queue management - mechanisms in the routers). This drop of a large number of packets - in the network can result in unnecessary retransmit timeouts for the - TCP connection. The TCP connection could end up in the congestion - avoidance phase with a very small congestion window, and could take a - large number of round-trip times to recover its old congestion - window. This poor performance is illustrated in [F02]. - -2. The Proposal for Limited Slow-Start - - Limited Slow-Start introduces a parameter, "max_ssthresh", and - modifies the slow-start mechanism for values of the congestion window - where "cwnd" is greater than "max_ssthresh". That is, during Slow- - Start, when - - cwnd <= max_ssthresh, - - cwnd is increased by one MSS (MAXIMUM SEGMENT SIZE) for every - arriving ACK (acknowledgement) during slow-start, as is always the - case. During Limited Slow-Start, when - - max_ssthresh < cwnd <= ssthresh, - - the invariant is maintained so that the congestion window is - increased during slow-start by at most max_ssthresh/2 MSS per round- - trip time. This is done as follows: - - For each arriving ACK in slow-start: - If (cwnd <= max_ssthresh) - cwnd += MSS; - else - K = int(cwnd/(0.5 max_ssthresh)); - cwnd += int(MSS/K); - - Thus, during Limited Slow-Start the window is increased by 1/K MSS - for each arriving ACK, for K = int(cwnd/(0.5 max_ssthresh)), instead - of by 1 MSS as in standard slow-start [RFC2581]. - - - - - - - - -Floyd Experimental [Page 2] - -RFC 3742 TCP's Slow-Start with Large Congestion Windows March 2004 - - - When - - ssthresh < cwnd, - - slow-start is exited, and the sender is in the Congestion Avoidance - phase. - - Our recommendation would be for max_ssthresh to be set to 100 MSS. - (This is illustrated in the NS [NS] simulator, for snapshots after - May 1, 2002, in the tests "./test-all-tcpHighspeed tcp1A" and - "./test-all-tcpHighspeed tcpHighspeed1" in the subdirectory - "tcl/lib". Setting max_ssthresh to Infinity causes the TCP - connection in NS not to use Limited Slow-Start.) - - With Limited Slow-Start, when the congestion window is greater than - max_ssthresh, the window is increased by at most 1/2 MSS for each - arriving ACK; when the congestion window is greater than 1.5 - max_ssthresh, the window is increased by at most 1/3 MSS for each - arriving ACK, and so on. - - With Limited Slow-Start it takes: - - log(max_ssthresh) - - round-trip times to reach a congestion window of max_ssthresh, and it - takes: - - log(max_ssthresh) + (cwnd - max_ssthresh)/(max_ssthresh/2) - - round-trip times to reach a congestion window of cwnd, for a - congestion window greater than max_ssthresh. - - Thus, with Limited Slow-Start with max_ssthresh set to 100 MSS, it - would take 836 round-trip times to reach a congestion window of - 83,000 packets, compared to 16 round-trip times without Limited - Slow-Start (assuming no packet drops). With Limited Slow-Start, the - largest transient queue during slow-start would be 100 packets; - without Limited Slow-Start, the transient queue during Slow-Start - would reach more than 32,000 packets. - - By limiting the maximum increase in the congestion window in a - round-trip time, Limited Slow-Start can reduce the number of drops - during slow-start, and improve the performance of TCP connections - with large congestion windows. - - - - - - - -Floyd Experimental [Page 3] - -RFC 3742 TCP's Slow-Start with Large Congestion Windows March 2004 - - -3. Experimental Results - - Tom Dunigan has added Limited Slow-Start to the Linux 2.4.16 Web100 - kernel, and performed experiments comparing TCP with and without - Limited Slow-Start [D02]. Results so far show improved performance - for TCPs using Limited Slow-Start. There are also several - experiments comparing different values for max_ssthresh. - -4. Related Proposals - - There has been considerable research on mechanisms for the TCP sender - to learn about the limitations of the available bandwidth, and to - exit slow-start before receiving a congestion indication from the - network [VEGAS,H96]. Other proposals set TCP's slow-start parameter - ssthresh based on information from previous TCP connections to the - same destination [WS95,G00]. This document proposes a simple - limitation on slow-start that can be effective in some cases even in - the absence of such mechanisms. The max_ssthresh parameter does not - replace ssthresh, but is an additional parameter. Thus, Limited - Slow-Start could be used in addition to mechanisms for setting - ssthresh. - - Rate-based pacing has also been proposed to improve the performance - of TCP during slow-start [VH97,AD98,KCRP99,ASA00]. We believe that - rate-based pacing could be of significant benefit, and could be used - in addition to the Limited Slow-Start in this proposal. - - Appropriate Byte Counting [RFC3465] proposes that TCP increase its - congestion window as a function of the number of bytes acknowledged, - rather than as a function of the number of ACKs received. - Appropriate Byte Counting is largely orthogonal to this proposal for - Limited Slow-Start. - - Limited Slow-Start is also orthogonal to other proposals to change - mechanisms for exiting slow-start. For example, FACK TCP includes an - overdamping mechanism to decrease the congestion window somewhat more - aggressively when a loss occurs during slow-start [MM96]. It is also - true that larger values for the MSS would reduce the size of the - congestion window in units of MSS needed to fill a given pipe, and - therefore would reduce the size of the transient queue in units of - MSS. - -5. Acknowledgements - - This proposal is part of a larger proposal for HighSpeed TCP for TCP - connections with large congestion windows, and resulted from - simulations done by Evandro de Souza, in joint work with Deb Agarwal. - This proposal for Limited Slow-Start draws in part from discussions - - - -Floyd Experimental [Page 4] - -RFC 3742 TCP's Slow-Start with Large Congestion Windows March 2004 - - - with Tom Kelly, who has used a similar modified slow-start in his own - research with congestion control for high-bandwidth connections. We - also thank Tom Dunigan for his experiments with Limited Slow-Start. - - We thank Andrei Gurtov, Reiner Ludwig, members of the End-to-End - Research Group, and members of the Transport Area Working Group, for - feedback on this document. - -6. Security Considerations - - This proposal makes no changes to the underlying security of TCP. - -7. References - -7.1. Normative References - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC3465] Allman, M., "TCP Congestion Control with Appropriate Byte - Counting (ABC)", RFC 3465, February 2003. - -7.2. Informative References - - [AD98] Mohit Aron and Peter Druschel, "TCP: Improving Start-up - Dynamics by Adaptive Timers and Congestion Control"", - TR98-318, Rice University, 1998. URL "http://cs- - tr.cs.rice.edu/Dienst/UI/2.0/Describe/ncstrl.rice_cs/TR98- - 318/". - - [ASA00] A. Aggarwal, S. Savage, and T. Anderson, "Understanding the - Performance of TCP Pacing", Proceedings of the 2000 IEEE - Infocom Conference, Tel-Aviv, Israel, March, 2000. URL - "http://www.cs.ucsd.edu/~savage/". - - [D02] T. Dunigan, "Floyd's TCP slow-start and AIMD mods", 2002. - URL "http://www.csm.ornl.gov/~dunigan/net100/floyd.html". - - [F02] S. Floyd, "Performance Problems with TCP's Slow-Start", - 2002. URL "http://www.icir.org/floyd/hstcp/slowstart/". - - [G00] A. Gurtov, "TCP Performance in the Presence of Congestion - and Corruption Losses", Master's Thesis, University of - Helsinki, Department of Computer Science, Helsinki, - December 2000. URL - "http://www.cs.helsinki.fi/u/gurtov/papers/ms_thesis.html". - - - - - -Floyd Experimental [Page 5] - -RFC 3742 TCP's Slow-Start with Large Congestion Windows March 2004 - - - [H96] J. C. Hoe, "Improving the Start-up Behavior of a Congestion - Control Scheme for TCP", SIGCOMM 96, 1996. URL - "http://www.acm.org/sigcomm/sigcomm96/program.html". - - [KCRP99] J. Kulik, R. Coulter, D. Rockwell, and C. Partridge, "A - Simulation Study of Paced TCP", BBN Technical Memorandum - No. 1218, 1999. URL - "http://www.ir.bbn.com/documents/techmemos/index.html". - - [MM96] M. Mathis and J. Mahdavi, "Forward Acknowledgment: Refining - TCP Congestion Control", SIGCOMM, August 1996. - - [NS] The Network Simulator (NS). URL - "http://www.isi.edu/nsnam/ns/". - - [VEGAS] Vegas Web Page, University of Arizona. URL - "http://www.cs.arizona.edu/protocols/". - - [VH97] Vikram Visweswaraiah and John Heidemann, "Rate Based Pacing - for TCP", 1997. URL - "http://www.isi.edu/lsam/publications/rate_based_pacing/". - - [WS95] G. Wright and W. Stevens, "TCP/IP Illustrated", Volume 2, - Addison-Wesley Publishing Company, 1995. - -Authors' Address - - Sally Floyd - ICIR (ICSI Center for Internet Research) - - Phone: +1 (510) 666-2989 - EMail: floyd@icir.org - URL: http://www.icir.org/floyd/ - - - - - - - - - - - - - - - - - - -Floyd Experimental [Page 6] - -RFC 3742 TCP's Slow-Start with Large Congestion Windows March 2004 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2004). This document is subject - to the rights, licenses and restrictions contained in BCP 78 and - except as set forth therein, the authors retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE - REPRESENTS OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE - INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF - THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed - to pertain to the implementation or use of the technology - described in this document or the extent to which any license - under such rights might or might not be available; nor does it - represent that it has made any independent effort to identify any - such rights. Information on the procedures with respect to - rights in RFC documents can be found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use - of such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository - at http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention - any copyrights, patents or patent applications, or other - proprietary rights that may cover technology that may be required - to implement this standard. Please address the information to the - IETF at ietf-ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - -Floyd Experimental [Page 7] - diff --git a/kernel/picotcp/RFC/rfc3782.txt b/kernel/picotcp/RFC/rfc3782.txt deleted file mode 100644 index 7df0e7f..0000000 --- a/kernel/picotcp/RFC/rfc3782.txt +++ /dev/null @@ -1,1067 +0,0 @@ - - - - - - -Network Working Group S. Floyd -Request for Comments: 3782 ICSI -Obsoletes: 2582 T. Henderson -Category: Standards Track Boeing - A. Gurtov - TeliaSonera - April 2004 - - - The NewReno Modification to TCP's Fast Recovery Algorithm - -Status of this Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2004). All Rights Reserved. - -Abstract - - The purpose of this document is to advance NewReno TCP's Fast - Retransmit and Fast Recovery algorithms in RFC 2582 from Experimental - to Standards Track status. - - The main change in this document relative to RFC 2582 is to specify - the Careful variant of NewReno's Fast Retransmit and Fast Recovery - algorithms. The base algorithm described in RFC 2582 did not attempt - to avoid unnecessary multiple Fast Retransmits that can occur after a - timeout. However, RFC 2582 also defined "Careful" and "Less Careful" - variants that avoid these unnecessary Fast Retransmits, and - recommended the Careful variant. This document specifies the - previously-named "Careful" variant as the basic version of NewReno - TCP. - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 1] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - -1. Introduction - - For the typical implementation of the TCP Fast Recovery algorithm - described in [RFC2581] (first implemented in the 1990 BSD Reno - release, and referred to as the Reno algorithm in [FF96]), the TCP - data sender only retransmits a packet after a retransmit timeout has - occurred, or after three duplicate acknowledgements have arrived - triggering the Fast Retransmit algorithm. A single retransmit - timeout might result in the retransmission of several data packets, - but each invocation of the Fast Retransmit algorithm in RFC 2581 - leads to the retransmission of only a single data packet. - - Problems can arise, therefore, when multiple packets are dropped from - a single window of data and the Fast Retransmit and Fast Recovery - algorithms are invoked. In this case, if the SACK option is - available, the TCP sender has the information to make intelligent - decisions about which packets to retransmit and which packets not to - retransmit during Fast Recovery. This document applies only for TCP - connections that are unable to use the TCP Selective Acknowledgement - (SACK) option, either because the option is not locally supported or - because the TCP peer did not indicate a willingness to use SACK. - - In the absence of SACK, there is little information available to the - TCP sender in making retransmission decisions during Fast Recovery. - From the three duplicate acknowledgements, the sender infers a packet - loss, and retransmits the indicated packet. After this, the data - sender could receive additional duplicate acknowledgements, as the - data receiver acknowledges additional data packets that were already - in flight when the sender entered Fast Retransmit. - - In the case of multiple packets dropped from a single window of data, - the first new information available to the sender comes when the - sender receives an acknowledgement for the retransmitted packet (that - is, the packet retransmitted when Fast Retransmit was first entered). - If there is a single packet drop and no reordering, then the - acknowledgement for this packet will acknowledge all of the packets - transmitted before Fast Retransmit was entered. However, if there - are multiple packet drops, then the acknowledgement for the - retransmitted packet will acknowledge some but not all of the packets - transmitted before the Fast Retransmit. We call this acknowledgement - a partial acknowledgment. - - Along with several other suggestions, [Hoe95] suggested that during - Fast Recovery the TCP data sender responds to a partial - acknowledgment by inferring that the next in-sequence packet has been - lost, and retransmitting that packet. This document describes a - modification to the Fast Recovery algorithm in RFC 2581 that - incorporates a response to partial acknowledgements received during - - - -Floyd, et al. Standards Track [Page 2] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - Fast Recovery. We call this modified Fast Recovery algorithm - NewReno, because it is a slight but significant variation of the - basic Reno algorithm in RFC 2581. This document does not discuss the - other suggestions in [Hoe95] and [Hoe96], such as a change to the - ssthresh parameter during Slow-Start, or the proposal to send a new - packet for every two duplicate acknowledgements during Fast Recovery. - The version of NewReno in this document also draws on other - discussions of NewReno in the literature [LM97, Hen98]. - - We do not claim that the NewReno version of Fast Recovery described - here is an optimal modification of Fast Recovery for responding to - partial acknowledgements, for TCP connections that are unable to use - SACK. Based on our experiences with the NewReno modification in the - NS simulator [NS] and with numerous implementations of NewReno, we - believe that this modification improves the performance of the Fast - Retransmit and Fast Recovery algorithms in a wide variety of - scenarios. - -2. Terminology and Definitions - - In this document, the key words "MUST", "MUST NOT", "REQUIRED", - "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", - and "OPTIONAL" are to be interpreted as described in BCP 14, RFC 2119 - [RFC2119]. This RFC indicates requirement levels for compliant TCP - implementations implementing the NewReno Fast Retransmit and Fast - Recovery algorithms described in this document. - - This document assumes that the reader is familiar with the terms - SENDER MAXIMUM SEGMENT SIZE (SMSS), CONGESTION WINDOW (cwnd), and - FLIGHT SIZE (FlightSize) defined in [RFC2581]. FLIGHT SIZE is - defined as in [RFC2581] as follows: - - FLIGHT SIZE: - The amount of data that has been sent but not yet acknowledged. - -3. The Fast Retransmit and Fast Recovery Algorithms in NewReno - - The standard implementation of the Fast Retransmit and Fast Recovery - algorithms is given in [RFC2581]. This section specifies the basic - NewReno algorithm. Sections 4 through 6 describe some optional - variants, and the motivations behind them, that an implementor may - want to consider when tuning performance for certain network - scenarios. Sections 7 and 8 provide some guidance to implementors - based on experience with NewReno implementations. - - The NewReno modification concerns the Fast Recovery procedure that - begins when three duplicate ACKs are received and ends when either a - retransmission timeout occurs or an ACK arrives that acknowledges all - - - -Floyd, et al. Standards Track [Page 3] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - of the data up to and including the data that was outstanding when - the Fast Recovery procedure began. - - The NewReno algorithm specified in this document differs from the - implementation in [RFC2581] in the introduction of the variable - "recover" in step 1, in the response to a partial or new - acknowledgement in step 5, and in modifications to step 1 and the - addition of step 6 for avoiding multiple Fast Retransmits caused by - the retransmission of packets already received by the receiver. - - The algorithm specified in this document uses a variable "recover", - whose initial value is the initial send sequence number. - - 1) Three duplicate ACKs: - When the third duplicate ACK is received and the sender is not - already in the Fast Recovery procedure, check to see if the - Cumulative Acknowledgement field covers more than "recover". If - so, go to Step 1A. Otherwise, go to Step 1B. - - 1A) Invoking Fast Retransmit: - If so, then set ssthresh to no more than the value given in - equation 1 below. (This is equation 3 from [RFC2581]). - - ssthresh = max (FlightSize / 2, 2*SMSS) (1) - - In addition, record the highest sequence number transmitted in - the variable "recover", and go to Step 2. - - 1B) Not invoking Fast Retransmit: - Do not enter the Fast Retransmit and Fast Recovery procedure. In - particular, do not change ssthresh, do not go to Step 2 to - retransmit the "lost" segment, and do not execute Step 3 upon - subsequent duplicate ACKs. - - 2) Entering Fast Retransmit: - Retransmit the lost segment and set cwnd to ssthresh plus 3*SMSS. - This artificially "inflates" the congestion window by the number - of segments (three) that have left the network and the receiver - has buffered. - - 3) Fast Recovery: - For each additional duplicate ACK received while in Fast - Recovery, increment cwnd by SMSS. This artificially inflates the - congestion window in order to reflect the additional segment that - has left the network. - - - - - - -Floyd, et al. Standards Track [Page 4] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - 4) Fast Recovery, continued: - Transmit a segment, if allowed by the new value of cwnd and the - receiver's advertised window. - - 5) When an ACK arrives that acknowledges new data, this ACK could be - the acknowledgment elicited by the retransmission from step 2, or - elicited by a later retransmission. - - Full acknowledgements: - If this ACK acknowledges all of the data up to and including - "recover", then the ACK acknowledges all the intermediate - segments sent between the original transmission of the lost - segment and the receipt of the third duplicate ACK. Set cwnd to - either (1) min (ssthresh, FlightSize + SMSS) or (2) ssthresh, - where ssthresh is the value set in step 1; this is termed - "deflating" the window. (We note that "FlightSize" in step 1 - referred to the amount of data outstanding in step 1, when Fast - Recovery was entered, while "FlightSize" in step 5 refers to the - amount of data outstanding in step 5, when Fast Recovery is - exited.) If the second option is selected, the implementation is - encouraged to take measures to avoid a possible burst of data, in - case the amount of data outstanding in the network is much less - than the new congestion window allows. A simple mechanism is to - limit the number of data packets that can be sent in response to - a single acknowledgement; this is known as "maxburst_" in the NS - simulator. Exit the Fast Recovery procedure. - - Partial acknowledgements: - If this ACK does *not* acknowledge all of the data up to and - including "recover", then this is a partial ACK. In this case, - retransmit the first unacknowledged segment. Deflate the - congestion window by the amount of new data acknowledged by the - cumulative acknowledgement field. If the partial ACK - acknowledges at least one SMSS of new data, then add back SMSS - bytes to the congestion window. As in Step 3, this artificially - inflates the congestion window in order to reflect the additional - segment that has left the network. Send a new segment if - permitted by the new value of cwnd. This "partial window - deflation" attempts to ensure that, when Fast Recovery eventually - ends, approximately ssthresh amount of data will be outstanding - in the network. Do not exit the Fast Recovery procedure (i.e., - if any duplicate ACKs subsequently arrive, execute Steps 3 and 4 - above). - - For the first partial ACK that arrives during Fast Recovery, also - reset the retransmit timer. Timer management is discussed in - more detail in Section 4. - - - - -Floyd, et al. Standards Track [Page 5] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - 6) Retransmit timeouts: - After a retransmit timeout, record the highest sequence number - transmitted in the variable "recover" and exit the Fast Recovery - procedure if applicable. - - Step 1 specifies a check that the Cumulative Acknowledgement field - covers more than "recover". Because the acknowledgement field - contains the sequence number that the sender next expects to receive, - the acknowledgement "ack_number" covers more than "recover" when: - - ack_number - 1 > recover; - - i.e., at least one byte more of data is acknowledged beyond the - highest byte that was outstanding when Fast Retransmit was last - entered. - - Note that in Step 5, the congestion window is deflated after a - partial acknowledgement is received. The congestion window was - likely to have been inflated considerably when the partial - acknowledgement was received. In addition, depending on the original - pattern of packet losses, the partial acknowledgement might - acknowledge nearly a window of data. In this case, if the congestion - window was not deflated, the data sender might be able to send nearly - a window of data back-to-back. - - This document does not specify the sender's response to duplicate - ACKs when the Fast Retransmit/Fast Recovery algorithm is not invoked. - This is addressed in other documents, such as those describing the - Limited Transmit procedure [RFC3042]. This document also does not - address issues of adjusting the duplicate acknowledgement threshold, - but assumes the threshold specified in the IETF standards; the - current standard is RFC 2581, which specifies a threshold of three - duplicate acknowledgements. - - As a final note, we would observe that in the absence of the SACK - option, the data sender is working from limited information. When - the issue of recovery from multiple dropped packets from a single - window of data is of particular importance, the best alternative - would be to use the SACK option. - -4. Resetting the Retransmit Timer in Response to Partial - Acknowledgements - - One possible variant to the response to partial acknowledgements - specified in Section 3 concerns when to reset the retransmit timer - after a partial acknowledgement. The algorithm in Section 3, Step 5, - resets the retransmit timer only after the first partial ACK. In - this case, if a large number of packets were dropped from a window of - - - -Floyd, et al. Standards Track [Page 6] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - data, the TCP data sender's retransmit timer will ultimately expire, - and the TCP data sender will invoke Slow-Start. (This is illustrated - on page 12 of [F98].) We call this the Impatient variant of NewReno. - We note that the Impatient variant in Section 3 doesn't follow the - recommended algorithm in RFC 2988 of restarting the retransmit timer - after every packet transmission or retransmission [RFC2988, Step - 5.1]. - - In contrast, the NewReno simulations in [FF96] illustrate the - algorithm described above with the modification that the retransmit - timer is reset after each partial acknowledgement. We call this the - Slow-but-Steady variant of NewReno. In this case, for a window with - a large number of packet drops, the TCP data sender retransmits at - most one packet per roundtrip time. (This behavior is illustrated in - the New-Reno TCP simulation of Figure 5 in [FF96], and on page 11 of - [F98]). - - When N packets have been dropped from a window of data for a large - value of N, the Slow-but-Steady variant can remain in Fast Recovery - for N round-trip times, retransmitting one more dropped packet each - round-trip time; for these scenarios, the Impatient variant gives a - faster recovery and better performance. The tests "ns test-suite- - newreno.tcl impatient1" and "ns test-suite-newreno.tcl slow1" in the - NS simulator illustrate such a scenario, where the Impatient variant - performs better than the Slow-but-Steady variant. The Impatient - variant can be particularly important for TCP connections with large - congestion windows, as illustrated by the tests "ns test-suite- - newreno.tcl impatient4" and "ns test-suite-newreno.tcl slow4" in the - NS simulator. - - One can also construct scenarios where the Slow-but-Steady variant - gives better performance than the Impatient variant. As an example, - this occurs when only a small number of packets are dropped, the RTO - is sufficiently small that the retransmit timer expires, and - performance would have been better without a retransmit timeout. The - tests "ns test-suite-newreno.tcl impatient2" and "ns test-suite- - newreno.tcl slow2" in the NS simulator illustrate such a scenario. - - The Slow-but-Steady variant can also achieve higher goodput than the - Impatient variant, by avoiding unnecessary retransmissions. This - could be of special interest for cellular links, where every - transmission costs battery power and money. The tests "ns test- - suite-newreno.tcl impatient3" and "ns test-suite-newreno.tcl slow3" - in the NS simulator illustrate such a scenario. The Slow-but-Steady - variant can also be more robust to delay variation in the network, - where a delay spike might force the Impatient variant into a timeout - and go-back-N recovery. - - - - -Floyd, et al. Standards Track [Page 7] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - Neither of the two variants discussed above are optimal. Our - recommendation is for the Impatient variant, as specified in Section - 3 of this document, because of the poor performance of the Slow-but- - Steady variant for TCP connections with large congestion windows. - - One possibility for a more optimal algorithm would be one that - recovered from multiple packet drops as quickly as does slow-start, - while resetting the retransmit timers after each partial - acknowledgement, as described in the section below. We note, - however, that there is a limitation to the potential performance in - this case in the absence of the SACK option. - -5. Retransmissions after a Partial Acknowledgement - - One possible variant to the response to partial acknowledgements - specified in Section 3 would be to retransmit more than one packet - after each partial acknowledgement, and to reset the retransmit timer - after each retransmission. The algorithm specified in Section 3 - retransmits a single packet after each partial acknowledgement. This - is the most conservative alternative, in that it is the least likely - to result in an unnecessarily-retransmitted packet. A variant that - would recover faster from a window with many packet drops would be to - effectively Slow-Start, retransmitting two packets after each partial - acknowledgement. Such an approach would take less than N roundtrip - times to recover from N losses [Hoe96]. However, in the absence of - SACK, recovering as quickly as slow-start introduces the likelihood - of unnecessarily retransmitting packets, and this could significantly - complicate the recovery mechanisms. - - We note that the response to partial acknowledgements specified in - Section 3 of this document and in RFC 2582 differs from the response - in [FF96], even though both approaches only retransmit one packet in - response to a partial acknowledgement. Step 5 of Section 3 specifies - that the TCP sender responds to a partial ACK by deflating the - congestion window by the amount of new data acknowledged, adding back - SMSS bytes if the partial ACK acknowledges at least SMSS bytes of new - data, and sending a new segment if permitted by the new value of - cwnd. Thus, only one previously-sent packet is retransmitted in - response to each partial acknowledgement, but additional new packets - might be transmitted as well, depending on the amount of new data - acknowledged by the partial acknowledgement. In contrast, the - variant of NewReno illustrated in [FF96] simply set the congestion - window to ssthresh when a partial acknowledgement was received. The - approach in [FF96] is more conservative, and does not attempt to - accurately track the actual number of outstanding packets after a - partial acknowledgement is received. While either of these - approaches gives acceptable performance, the variant specified in - Section 3 recovers more smoothly when multiple packets are dropped - - - -Floyd, et al. Standards Track [Page 8] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - from a window of data. (The [FF96] behavior can be seen in the NS - simulator by setting the variable "partial_window_deflation_" for - "Agent/TCP/Newreno" to 0; the behavior specified in Section 3 is - achieved by setting "partial_window_deflation_" to 1.) - -6. Avoiding Multiple Fast Retransmits - - This section describes the motivation for the sender's state variable - "recover", and discusses possible heuristics for distinguishing - between a retransmitted packet that was dropped, and three duplicate - acknowledgements from the unnecessary retransmission of three - packets. - - In the absence of the SACK option or timestamps, a duplicate - acknowledgement carries no information to identify the data packet or - packets at the TCP data receiver that triggered that duplicate - acknowledgement. In this case, the TCP data sender is unable to - distinguish between a duplicate acknowledgement that results from a - lost or delayed data packet, and a duplicate acknowledgement that - results from the sender's unnecessary retransmission of a data packet - that had already been received at the TCP data receiver. Because of - this, with the Retransmit and Fast Recovery algorithms in Reno TCP, - multiple segment losses from a single window of data can sometimes - result in unnecessary multiple Fast Retransmits (and multiple - reductions of the congestion window) [F94]. - - With the Fast Retransmit and Fast Recovery algorithms in Reno TCP, - the performance problems caused by multiple Fast Retransmits are - relatively minor compared to the potential problems with Tahoe TCP, - which does not implement Fast Recovery. Nevertheless, unnecessary - Fast Retransmits can occur with Reno TCP unless some explicit - mechanism is added to avoid this, such as the use of the "recover" - variable. (This modification is called "bugfix" in [F98], and is - illustrated on pages 7 and 9 of that document. Unnecessary Fast - Retransmits for Reno without "bugfix" is illustrated on page 6 of - [F98].) - - Section 3 of [RFC2582] defined a default variant of NewReno TCP that - did not use the variable "recover", and did not check if duplicate - ACKs cover the variable "recover" before invoking Fast Retransmit. - With this default variant from RFC 2582, the problem of multiple Fast - Retransmits from a single window of data can occur after a Retransmit - Timeout (as in page 8 of [F98]) or in scenarios with reordering (as - in the validation test "./test-all-newreno newreno5_noBF" in - directory "tcl/test" of the NS simulator. This gives performance - similar to that on page 8 of [F03].) RFC 2582 also defined Careful - and Less Careful variants of the NewReno algorithm, and recommended - the Careful variant. - - - -Floyd, et al. Standards Track [Page 9] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - The algorithm specified in Section 3 of this document corresponds to - the Careful variant of NewReno TCP from RFC 2582, and eliminates the - problem of multiple Fast Retransmits. This algorithm uses the - variable "recover", whose initial value is the initial send sequence - number. After each retransmit timeout, the highest sequence number - transmitted so far is recorded in the variable "recover". - - If, after a retransmit timeout, the TCP data sender retransmits three - consecutive packets that have already been received by the data - receiver, then the TCP data sender will receive three duplicate - acknowledgements that do not cover more than "recover". In this - case, the duplicate acknowledgements are not an indication of a new - instance of congestion. They are simply an indication that the - sender has unnecessarily retransmitted at least three packets. - - However, when a retransmitted packet is itself dropped, the sender - can also receive three duplicate acknowledgements that do not cover - more than "recover". In this case, the sender would have been better - off if it had initiated Fast Retransmit. For a TCP that implements - the algorithm specified in Section 3 of this document, the sender - does not infer a packet drop from duplicate acknowledgements in this - scenario. As always, the retransmit timer is the backup mechanism - for inferring packet loss in this case. - - There are several heuristics, based on timestamps or on the amount of - advancement of the cumulative acknowledgement field, that allow the - sender to distinguish, in some cases, between three duplicate - acknowledgements following a retransmitted packet that was dropped, - and three duplicate acknowledgements from the unnecessary - retransmission of three packets [Gur03, GF04]. The TCP sender MAY - use such a heuristic to decide to invoke a Fast Retransmit in some - cases, even when the three duplicate acknowledgements do not cover - more than "recover". - - For example, when three duplicate acknowledgements are caused by the - unnecessary retransmission of three packets, this is likely to be - accompanied by the cumulative acknowledgement field advancing by at - least four segments. Similarly, a heuristic based on timestamps uses - the fact that when there is a hole in the sequence space, the - timestamp echoed in the duplicate acknowledgement is the timestamp of - the most recent data packet that advanced the cumulative - acknowledgement field [RFC1323]. If timestamps are used, and the - sender stores the timestamp of the last acknowledged segment, then - the timestamp echoed by duplicate acknowledgements can be used to - distinguish between a retransmitted packet that was dropped and three - duplicate acknowledgements from the unnecessary retransmission of - three packets. The heuristics are illustrated in the NS simulator in - the validation test "./test-all-newreno". - - - -Floyd, et al. Standards Track [Page 10] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - -6.1. ACK Heuristic - - If the ACK-based heuristic is used, then following the advancement of - the cumulative acknowledgement field, the sender stores the value of - the previous cumulative acknowledgement as prev_highest_ack, and - stores the latest cumulative ACK as highest_ack. In addition, the - following step is performed if Step 1 in Section 3 fails, before - proceeding to Step 1B. - - 1*) If the Cumulative Acknowledgement field didn't cover more than - "recover", check to see if the congestion window is greater than - SMSS bytes and the difference between highest_ack and - prev_highest_ack is at most 4*SMSS bytes. If true, duplicate - ACKs indicate a lost segment (proceed to Step 1A in Section 3). - Otherwise, duplicate ACKs likely result from unnecessary - retransmissions (proceed to Step 1B in Section 3). - - The congestion window check serves to protect against fast retransmit - immediately after a retransmit timeout, similar to the - "exitFastRetrans_" variable in NS. Examples of applying the ACK - heuristic are in validation tests "./test-all-newreno - newreno_rto_loss_ack" and "./test-all-newreno newreno_rto_dup_ack" in - directory "tcl/test" of the NS simulator. - - If several ACKs are lost, the sender can see a jump in the cumulative - ACK of more than three segments, and the heuristic can fail. A - validation test for this scenario is "./test-all-newreno - newreno_rto_loss_ackf". RFC 2581 recommends that a receiver should - send duplicate ACKs for every out-of-order data packet, such as a - data packet received during Fast Recovery. The ACK heuristic is more - likely to fail if the receiver does not follow this advice, because - then a smaller number of ACK losses are needed to produce a - sufficient jump in the cumulative ACK. - -6.2. Timestamp Heuristic - - If this heuristic is used, the sender stores the timestamp of the - last acknowledged segment. In addition, the second paragraph of step - 1 in Section 3 is replaced as follows: - - 1**) If the Cumulative Acknowledgement field didn't cover more than - "recover", check to see if the echoed timestamp in the last - non-duplicate acknowledgment equals the stored timestamp. If - true, duplicate ACKs indicate a lost segment (proceed to Step 1A - in Section 3). Otherwise, duplicate ACKs likely result from - unnecessary retransmissions (proceed to Step 1B in Section 3). - - - - - -Floyd, et al. Standards Track [Page 11] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - Examples of applying the timestamp heuristic are in validation tests - "./test-all-newreno newreno_rto_loss_tsh" and "./test-all-newreno - newreno_rto_dup_tsh". The timestamp heuristic works correctly, both - when the receiver echoes timestamps as specified by [RFC1323], and by - its revision attempts. However, if the receiver arbitrarily echoes - timestamps, the heuristic can fail. The heuristic can also fail if a - timeout was spurious and returning ACKs are not from retransmitted - segments. This can be prevented by detection algorithms such as - [RFC3522]. - -7. Implementation Issues for the Data Receiver - - [RFC2581] specifies that "Out-of-order data segments SHOULD be - acknowledged immediately, in order to accelerate loss recovery." - Neal Cardwell has noted that some data receivers do not send an - immediate acknowledgement when they send a partial acknowledgment, - but instead wait first for their delayed acknowledgement timer to - expire [C98]. As [C98] notes, this severely limits the potential - benefit of NewReno by delaying the receipt of the partial - acknowledgement at the data sender. Echoing RFC 2581, our - recommendation is that the data receiver send an immediate - acknowledgement for an out-of-order segment, even when that out-of- - order segment fills a hole in the buffer. - -8. Implementation Issues for the Data Sender - - In Section 3, Step 5 above, it is noted that implementations should - take measures to avoid a possible burst of data when leaving Fast - Recovery, in case the amount of new data that the sender is eligible - to send due to the new value of the congestion window is large. This - can arise during NewReno when ACKs are lost or treated as pure window - updates, thereby causing the sender to underestimate the number of - new segments that can be sent during the recovery procedure. - Specifically, bursts can occur when the FlightSize is much less than - the new congestion window when exiting from Fast Recovery. One - simple mechanism to avoid a burst of data when leaving Fast Recovery - is to limit the number of data packets that can be sent in response - to a single acknowledgment. (This is known as "maxburst_" in the ns - simulator.) Other possible mechanisms for avoiding bursts include - rate-based pacing, or setting the slow-start threshold to the - resultant congestion window and then resetting the congestion window - to FlightSize. A recommendation on the general mechanism to avoid - excessively bursty sending patterns is outside the scope of this - document. - - An implementation may want to use a separate flag to record whether - or not it is presently in the Fast Recovery procedure. The use of - the value of the duplicate acknowledgment counter for this purpose is - - - -Floyd, et al. Standards Track [Page 12] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - not reliable because it can be reset upon window updates and out-of- - order acknowledgments. - - When not in Fast Recovery, the value of the state variable "recover" - should be pulled along with the value of the state variable for - acknowledgments (typically, "snd_una") so that, when large amounts of - data have been sent and acked, the sequence space does not wrap and - falsely indicate that Fast Recovery should not be entered (Section 3, - step 1, last paragraph). - - It is important for the sender to respond correctly to duplicate ACKs - received when the sender is no longer in Fast Recovery (e.g., because - of a Retransmit Timeout). The Limited Transmit procedure [RFC3042] - describes possible responses to the first and second duplicate - acknowledgements. When three or more duplicate acknowledgements are - received, the Cumulative Acknowledgement field doesn't cover more - than "recover", and a new Fast Recovery is not invoked, it is - important that the sender not execute the Fast Recovery steps (3) and - (4) in Section 3. Otherwise, the sender could end up in a chain of - spurious timeouts. We mention this only because several NewReno - implementations had this bug, including the implementation in the NS - simulator. (This bug in the NS simulator was fixed in July 2003, - with the variable "exitFastRetrans_".) - -9. Simulations - - Simulations with NewReno are illustrated with the validation test - "tcl/test/test-all-newreno" in the NS simulator. The command - "../../ns test-suite-newreno.tcl reno" shows a simulation with Reno - TCP, illustrating the data sender's lack of response to a partial - acknowledgement. In contrast, the command "../../ns test-suite- - newreno.tcl newreno_B" shows a simulation with the same scenario - using the NewReno algorithms described in this paper. - -10. Comparisons between Reno and NewReno TCP - - As we stated in the introduction, we believe that the NewReno - modification described in this document improves the performance of - the Fast Retransmit and Fast Recovery algorithms of Reno TCP in a - wide variety of scenarios. This has been discussed in some depth in - [FF96], which illustrates Reno TCP's poor performance when multiple - packets are dropped from a window of data and also illustrates - NewReno TCP's good performance in that scenario. - - We do, however, know of one scenario where Reno TCP gives better - performance than NewReno TCP, that we describe here for the sake of - completeness. Consider a scenario with no packet loss, but with - sufficient reordering so that the TCP sender receives three duplicate - - - -Floyd, et al. Standards Track [Page 13] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - acknowledgements. This will trigger the Fast Retransmit and Fast - Recovery algorithms. With Reno TCP or with Sack TCP, this will - result in the unnecessary retransmission of a single packet, combined - with a halving of the congestion window (shown on pages 4 and 6 of - [F03]). With NewReno TCP, however, this reordering will also result - in the unnecessary retransmission of an entire window of data (shown - on page 5 of [F03]). - - While Reno TCP performs better than NewReno TCP in the presence of - reordering, NewReno's superior performance in the presence of - multiple packet drops generally outweighs its less optimal - performance in the presence of reordering. (Sack TCP is the - preferred solution, with good performance in both scenarios.) This - document recommends the Fast Retransmit and Fast Recovery algorithms - of NewReno TCP instead of those of Reno TCP for those TCP connections - that do not support SACK. We would also note that NewReno's Fast - Retransmit and Fast Recovery mechanisms are widely deployed in TCP - implementations in the Internet today, as documented in [PF01]. For - example, tests of TCP implementations in several thousand web servers - in 2001 showed that for those TCP connections where the web browser - was not SACK-capable, more web servers used the Fast Retransmit and - Fast Recovery algorithms of NewReno than those of Reno or Tahoe TCP - [PF01]. - -11. Changes Relative to RFC 2582 - - The purpose of this document is to advance the NewReno's Fast - Retransmit and Fast Recovery algorithms in RFC 2582 to Standards - Track. - - The main change in this document relative to RFC 2582 is to specify - the Careful variant of NewReno's Fast Retransmit and Fast Recovery - algorithms. The base algorithm described in RFC 2582 did not attempt - to avoid unnecessary multiple Fast Retransmits that can occur after a - timeout (described in more detail in the section above). However, - RFC 2582 also defined "Careful" and "Less Careful" variants that - avoid these unnecessary Fast Retransmits, and recommended the Careful - variant. This document specifies the previously-named "Careful" - variant as the basic version of NewReno. As described below, this - algorithm uses a variable "recover", whose initial value is the send - sequence number. - - The algorithm specified in Section 3 checks whether the - acknowledgement field of a partial acknowledgement covers *more* than - "recover", as defined in Section 3. Another possible variant would - be to simply require that the acknowledgement field covers *more than - or equal to* "recover" before initiating another Fast Retransmit. We - called this the Less Careful variant in RFC 2582. - - - -Floyd, et al. Standards Track [Page 14] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - There are two separate scenarios in which the TCP sender could - receive three duplicate acknowledgements acknowledging "recover" but - no more than "recover". One scenario would be that the data sender - transmitted four packets with sequence numbers higher than "recover", - that the first packet was dropped in the network, and the following - three packets triggered three duplicate acknowledgements - acknowledging "recover". The second scenario would be that the - sender unnecessarily retransmitted three packets below "recover", and - that these three packets triggered three duplicate acknowledgements - acknowledging "recover". In the absence of SACK, the TCP sender is - unable to distinguish between these two scenarios. - - For the Careful variant of Fast Retransmit, the data sender would - have to wait for a retransmit timeout in the first scenario, but - would not have an unnecessary Fast Retransmit in the second scenario. - For the Less Careful variant to Fast Retransmit, the data sender - would Fast Retransmit as desired in the first scenario, and would - unnecessarily Fast Retransmit in the second scenario. This document - only specifies the Careful variant in Section 3. Unnecessary Fast - Retransmits with the Less Careful variant in scenarios with - reordering are illustrated in page 8 of [F03]. - - The document also specifies two heuristics that the TCP sender MAY - use to decide to invoke Fast Retransmit even when the three duplicate - acknowledgements do not cover more than "recover". These heuristics, - an ACK-based heuristic and a timestamp heuristic, are described in - Sections 6.1 and 6.2 respectively. - -12. Conclusions - - This document specifies the NewReno Fast Retransmit and Fast Recovery - algorithms for TCP. This NewReno modification to TCP can even be - important for TCP implementations that support the SACK option, - because the SACK option can only be used for TCP connections when - both TCP end-nodes support the SACK option. NewReno performs better - than Reno (RFC 2581) in a number of scenarios discussed herein. - - A number of options to the basic algorithm presented in Section 3 are - also described. These include the handling of the retransmission - timer (Section 4), the response to partial acknowledgments (Section - 5), and the value of the congestion window when leaving Fast Recovery - (section 3, step 5). Our belief is that the differences between - these variants of NewReno are small compared to the differences - between Reno and NewReno. That is, the important thing is to - implement NewReno instead of Reno, for a TCP connection without SACK; - it is less important exactly which of the variants of NewReno is - implemented. - - - - -Floyd, et al. Standards Track [Page 15] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - -13. Security Considerations - - RFC 2581 discusses general security considerations concerning TCP - congestion control. This document describes a specific algorithm - that conforms with the congestion control requirements of RFC 2581, - and so those considerations apply to this algorithm, too. There are - no known additional security concerns for this specific algorithm. - -14. Acknowledgements - - Many thanks to Anil Agarwal, Mark Allman, Armando Caro, Jeffrey Hsu, - Vern Paxson, Kacheong Poon, Keyur Shah, and Bernie Volz for detailed - feedback on this document or on its precursor, RFC 2582. - -15. References - -15.1. Normative References - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October 1996. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification to - TCP's Fast Recovery Algorithm", RFC 2582, April 1999. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [RFC3042] Allman, M., Balakrishnan, H. and S. Floyd, "Enhancing TCP's - Loss Recovery Using Limited Transmit", RFC 3042, January - 2001. - -15.2. Informative References - - [C98] Cardwell, N., "delayed ACKs for retransmitted packets: - ouch!". November 1998, Email to the tcpimpl mailing list, - Message-ID "Pine.LNX.4.02A.9811021421340.26785- - 100000@sake.cs.washington.edu", archived at "http://tcp- - impl.lerc.nasa.gov/tcp-impl". - - - - - - - -Floyd, et al. Standards Track [Page 16] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - [F98] Floyd, S., Revisions to RFC 2001, "Presentation to the - TCPIMPL Working Group", August 1998. URLs - "ftp://ftp.ee.lbl.gov/talks/sf-tcpimpl-aug98.ps" and - "ftp://ftp.ee.lbl.gov/talks/sf-tcpimpl-aug98.pdf". - - [F03] Floyd, S., "Moving NewReno from Experimental to Proposed - Standard? Presentation to the TSVWG Working Group", March - 2003. URLs "http://www.icir.org/floyd/talks/newreno- - Mar03.ps" and "http://www.icir.org/floyd/talks/newreno- - Mar03.pdf". - - [FF96] Fall, K. and S. Floyd, "Simulation-based Comparisons of - Tahoe, Reno and SACK TCP", Computer Communication Review, - July 1996. URL "ftp://ftp.ee.lbl.gov/papers/sacks.ps.Z". - - [F94] Floyd, S., "TCP and Successive Fast Retransmits", Technical - report, October 1994. URL - "ftp://ftp.ee.lbl.gov/papers/fastretrans.ps". - - [GF04] Gurtov, A. and S. Floyd, "Resolving Acknowledgment - Ambiguity in non-SACK TCP", Next Generation Teletraffic and - Wired/Wireless Advanced Networking (NEW2AN'04), February - 2004. URL "http://www.cs.helsinki.fi/u/gurtov/papers/ - heuristics.html". - - [Gur03] Gurtov, A., "[Tsvwg] resolving the problem of unnecessary - fast retransmits in go-back-N", email to the tsvwg mailing - list, message ID <3F25B467.9020609@cs.helsinki.fi>, July - 28, 2003. URL "http://www1.ietf.org/mail-archive/working- - groups/tsvwg/current/msg04334.html". - - [Hen98] Henderson, T., Re: NewReno and the 2001 Revision. September - 1998. Email to the tcpimpl mailing list, Message ID - "Pine.BSI.3.95.980923224136.26134A- - 100000@raptor.CS.Berkeley.EDU", archived at "http://tcp- - impl.lerc.nasa.gov/tcp-impl". - - [Hoe95] Hoe, J., "Startup Dynamics of TCP's Congestion Control and - Avoidance Schemes", Master's Thesis, MIT, 1995. - - [Hoe96] Hoe, J., "Improving the Start-up Behavior of a Congestion - Control Scheme for TCP", ACM SIGCOMM, August 1996. URL - "http://www.acm.org/sigcomm/sigcomm96/program.html". - - [LM97] Lin, D. and R. Morris, "Dynamics of Random Early - Detection", SIGCOMM 97, September 1997. URL - "http://www.acm.org/sigcomm/sigcomm97/program.html". - - - - -Floyd, et al. Standards Track [Page 17] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - - [NS] The Network Simulator (NS). URL - "http://www.isi.edu/nsnam/ns/". - - [PF01] Padhye, J. and S. Floyd, "Identifying the TCP Behavior of - Web Servers", June 2001, SIGCOMM 2001. - - [RFC1323] Jacobson, V., Braden, R. and D. Borman, "TCP Extensions for - High Performance", RFC 1323, May 1992. - - [RFC3517] Blanton, E., Allman, M., Fall, K. and L. Wang, "A - Conservative Selective Acknowledgment (SACK)-based Loss - Recovery Algorithm for TCP", RFC 3517, April 2003. - - [RFC3522] Ludwig, R. and M. Meyer, "The Eifel Detection Algorithm for - TCP", RFC 3522, April 2003. - -Authors' Addresses - - Sally Floyd - International Computer Science Institute - - Phone: +1 (510) 666-2989 - EMail: floyd@acm.org - URL: http://www.icir.org/floyd/ - - - Tom Henderson - The Boeing Company - - EMail: thomas.r.henderson@boeing.com - - - Andrei Gurtov - TeliaSonera - - EMail: andrei.gurtov@teliasonera.com - - - - - - - - - - - - - - - -Floyd, et al. Standards Track [Page 18] - -RFC 3782 NewReno Modification to Fast Recovery Algorithm April 2004 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2004). This document is subject - to the rights, licenses and restrictions contained in BCP 78, and - except as set forth therein, the authors retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE - REPRESENTS OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE - INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF - THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed - to pertain to the implementation or use of the technology - described in this document or the extent to which any license - under such rights might or might not be available; nor does it - represent that it has made any independent effort to identify any - such rights. Information on the procedures with respect to - rights in RFC documents can be found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use - of such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository - at http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention - any copyrights, patents or patent applications, or other - proprietary rights that may cover technology that may be required - to implement this standard. Please address the information to the - IETF at ietf-ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - -Floyd, et al. Standards Track [Page 19] - diff --git a/kernel/picotcp/RFC/rfc3819.txt b/kernel/picotcp/RFC/rfc3819.txt deleted file mode 100644 index b4a5e8b..0000000 --- a/kernel/picotcp/RFC/rfc3819.txt +++ /dev/null @@ -1,3363 +0,0 @@ - - - - - - -Network Working Group P. Karn, Ed. -Request for Comments: 3819 Qualcomm -BCP: 89 C. Bormann -Category: Best Current Practice Universitaet Bremen TZI - G. Fairhurst - University of Aberdeen - D. Grossman - Motorola, Inc. - R. Ludwig - Ericsson Research - J. Mahdavi - Novell - G. Montenegro - Sun Microsystems Laboratories, Europe - J. Touch - USC/ISI - L. Wood - Cisco Systems - July 2004 - - - Advice for Internet Subnetwork Designers - -Status of this Memo - - This document specifies an Internet Best Current Practices for the - Internet Community, and requests discussion and suggestions for - improvements. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2004). - -Abstract - - This document provides advice to the designers of digital - communication equipment, link-layer protocols, and packet-switched - local networks (collectively referred to as subnetworks), who wish to - support the Internet protocols but may be unfamiliar with the - Internet architecture and the implications of their design choices on - the performance and efficiency of the Internet. - - - - - - - - - - -Karn, et al. Best Current Practice [Page 1] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -Table of Contents - - 1. Introduction and Overview. . . . . . . . . . . . . . . . . . . 2 - 2. Maximum Transmission Units (MTUs) and IP Fragmentation . . . . 4 - 2.1. Choosing the MTU in Slow Networks. . . . . . . . . . . . 6 - 3. Framing on Connection-Oriented Subnetworks . . . . . . . . . . 7 - 4. Connection-Oriented Subnetworks. . . . . . . . . . . . . . . . 9 - 5. Broadcasting and Discovery . . . . . . . . . . . . . . . . . . 10 - 6. Multicasting . . . . . . . . . . . . . . . . . . . . . . . . . 11 - 7. Bandwidth on Demand (BoD) Subnets. . . . . . . . . . . . . . . 13 - 8. Reliability and Error Control. . . . . . . . . . . . . . . . . 14 - 8.1. TCP vs Link-Layer Retransmission . . . . . . . . . . . . 14 - 8.2. Recovery from Subnetwork Outages . . . . . . . . . . . . 17 - 8.3. CRCs, Checksums and Error Detection. . . . . . . . . . . 18 - 8.4. How TCP Works. . . . . . . . . . . . . . . . . . . . . . 20 - 8.5. TCP Performance Characteristics. . . . . . . . . . . . . 22 - 8.5.1. The Formulae . . . . . . . . . . . . . . . . . . 22 - 8.5.2. Assumptions. . . . . . . . . . . . . . . . . . . 23 - 8.5.3. Analysis of Link-Layer Effects on TCP - Performance. . . . . . . . . . . . . . . . . . . 24 - 9. Quality-of-Service (QoS) Considerations. . . . . . . . . . . . 26 - 10. Fairness vs Performance. . . . . . . . . . . . . . . . . . . . 29 - 11. Delay Characteristics. . . . . . . . . . . . . . . . . . . . . 30 - 12. Bandwidth Asymmetries. . . . . . . . . . . . . . . . . . . . . 31 - 13. Buffering, Flow and Congestion Control . . . . . . . . . . . . 31 - 14. Compression. . . . . . . . . . . . . . . . . . . . . . . . . . 34 - 15. Packet Reordering. . . . . . . . . . . . . . . . . . . . . . . 36 - 16. Mobility . . . . . . . . . . . . . . . . . . . . . . . . . . . 37 - 17. Routing. . . . . . . . . . . . . . . . . . . . . . . . . . . . 39 - 18. Security Considerations. . . . . . . . . . . . . . . . . . . . 41 - 19. Contributors . . . . . . . . . . . . . . . . . . . . . . . . . 44 - 20. Informative References . . . . . . . . . . . . . . . . . . . . 45 - 21. Contributors' Addresses. . . . . . . . . . . . . . . . . . . . 57 - 22. Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . 58 - 23. Full Copyright Statement . . . . . . . . . . . . . . . . . . . 60 - -1. Introduction and Overview - - IP, the Internet Protocol [RFC791] [RFC2460], is the core protocol of - the Internet. IP defines a simple "connectionless" packet-switched - network. The success of the Internet is largely attributed to IP's - simplicity, the "end-to-end principle" [SRC81] on which the Internet - is based, and the resulting ease of carrying IP on a wide variety of - subnetworks, not necessarily designed with IP in mind. A subnetwork - refers to any network operating immediately below the IP layer to - connect two or more systems using IP (i.e., end hosts or routers). - In its simplest form, this may be a direct connection between the IP - systems (e.g., using a length of cable or a wireless medium). - - - -Karn, et al. Best Current Practice [Page 2] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - This document defines a subnetwork as a layer 2 network, which is a - network that does not rely upon the services of IP routers to forward - packets between parts of the subnetwork. However, IP routers may - bridge frames at Layer 2 between parts of a subnetwork. Sometimes, - it is convenient to aggregate a group of such subnetworks into a - single logical subnetwork. IP routing protocols (e.g., OSPF, IS-IS, - and PIM) can be configured to support this aggregation, but typically - present a layer-3 subnetwork rather than a layer-2 subnetwork. This - may also result in a specific packet passing several times over the - same layer-2 subnetwork via an intermediate layer-3 gateway (router). - Because that aggregation requires layer-3 components, issues thereof - are beyond the scope of this document. - - However, while many subnetworks carry IP, they do not necessarily do - so with maximum efficiency, minimum complexity, or cost, nor do they - implement certain features to efficiently support newer Internet - features of increasing importance, such as multicasting or quality of - service. - - With the explosive growth of the Internet, IP packets comprise an - increasingly large fraction of the traffic carried by the world's - telecommunications networks. It therefore makes sense to optimize - both existing and new subnetwork technologies for IP as much as - possible. - - Optimizing a subnetwork for IP involves three complementary - considerations: - - 1. Providing functionality sufficient to carry IP. - - 2. Eliminating unnecessary functions that increase cost or - complexity. - - 3. Choosing subnetwork parameters that maximize the performance of - the Internet protocols. - - Because IP is so simple, consideration 2 is more of an issue than - consideration 1. That is to say, subnetwork designers make many more - errors of commission than errors of omission. However, certain - enhancements to Internet features, such as multicasting and quality- - of-service, benefit significantly from support given by the - underlying subnetworks beyond that necessary to carry "traditional" - unicast, best-effort IP. - - - - - - - - -Karn, et al. Best Current Practice [Page 3] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - A major consideration in the efficient design of any layered - communication network is the appropriate layer(s) in which to - implement a given function. This issue was first addressed in the - seminal paper, "End-to-End Arguments in System Design" [SRC81]. That - paper argued that many functions can be implemented properly *only* - on an end-to-end basis, i.e., at the highest protocol layers, outside - the subnetwork. These functions include ensuring the reliable - delivery of data and the use of cryptography to provide - confidentiality and message integrity. - - Such functions cannot be provided solely by the concatenation of - hop-by-hop services; duplicating these functions at the lower - protocol layers (i.e., within the subnetwork) can be needlessly - redundant or even harmful to cost and performance. - - However, partial duplication of functionality in a lower layer can - *sometimes* be justified by performance, security, or availability - considerations. Examples include link-layer retransmission to - improve the performance of an unusually lossy channel, e.g., mobile - radio, link-level encryption intended to thwart traffic analysis, and - redundant transmission links to improve availability, increase - throughput, or to guarantee performance for certain classes of - traffic. Duplication of protocol functions should be done only with - an understanding of system-level implications, including possible - interactions with higher-layer mechanisms. - - The original architecture of the Internet was influenced by the - end-to-end principle [SRC81], and has been, in our view, part of the - reason for the Internet's success. - - The remainder of this document discusses the various subnetwork - design issues that the authors consider relevant to efficient IP - support. - -2. Maximum Transmission Units (MTUs) and IP Fragmentation - - IPv4 packets (datagrams) vary in size, from 20 bytes (the size of the - IPv4 header alone) to a maximum of 65535 bytes. Subnetworks need not - support maximum-sized (64KB) IP packets, as IP provides a scheme that - breaks packets that are too large for a given subnetwork into - fragments that travel as independent IP packets and are reassembled - at the destination. The maximum packet size supported by a - subnetwork is known as its Maximum Transmission Unit (MTU). - - Subnetworks may, but are not required to, indicate the length of each - packet they carry. One example is Ethernet with the widely used DIX - [DIX82] (not IEEE 802.3 [IEEE8023]) header, which lacks a length - - - - -Karn, et al. Best Current Practice [Page 4] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - field to indicate the true data length when the packet is padded to a - minimum of 60 bytes. This is not a problem for uncompressed IP - because each IP packet carries its own length field. - - If optional header compression [RFC1144] [RFC2507] [RFC2508] - [RFC3095] is used, however, it is required that the link framing - indicate frame length because that is needed for the reconstruction - of the original header. - - In IP version 4 (the version now in widespread use), fragmentation - can occur at either the sending host or in an intermediate router, - and fragments can be further fragmented at subsequent routers if - necessary. - - In IP version 6 [RFC2460], fragmentation can occur only at the - sending host; it cannot occur in a router (called "router - fragmentation" in this document). - - Both IPv4 and IPv6 provide a "path MTU discovery" procedure [RFC1191] - [RFC1435] [RFC1981] that allows the sending host to avoid - fragmentation by discovering the minimum MTU along a given path and - reduce its packet sizes accordingly. This procedure is optional in - IPv4 and IPv6. - - Path MTU discovery is widely deployed, but it sometimes encounters - problems. Some routers fail to generate the ICMP messages that - convey path MTU information to the sender, and sometimes the ICMP - messages are blocked by overly restrictive firewalls. The result can - be a "Path MTU Black Hole" [RFC2923] [RFC1435]. - - The Path MTU Discovery procedure, the persistence of path MTU black - holes, and the deletion of router fragmentation in IPv6 reflect a - consensus of the Internet technical community that router - fragmentation is best avoided. This requires that subnetworks - support MTUs that are "reasonably" large. All IPv4 end hosts are - required to accept and reassemble IP packets of size 576 bytes - [RFC791], but such a small value would clearly be inefficient. - Because IPv6 omits fragmentation by routers, [RFC2460] specifies a - larger minimum MTU of 1280 bytes. Any subnetwork with an internal - packet payload smaller than 1280 bytes must implement a mechanism - that performs fragmentation/reassembly of IP packets to/from - subnetwork frames if it is to support IPv6. - - If a subnetwork cannot directly support a "reasonable" MTU with - native framing mechanisms, it should internally fragment. That is, - it should transparently break IP packets into internal data elements - and reassemble them at the other end of the subnetwork. - - - - -Karn, et al. Best Current Practice [Page 5] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - This leaves the question of what is a "reasonable" MTU. Ethernet (10 - and 100 Mb/s) has an MTU of 1500 bytes, and because of the ubiquity - of Ethernet few Internet paths currently have MTUs larger than this - value. This severely limits the utility of larger MTUs provided by - other subnetworks. Meanwhile, larger MTUs are increasingly desirable - on high-speed subnetworks to reduce the per-packet processing - overhead in host computers, and implementers are encouraged to - provide them even though they may not be usable when Ethernet is also - in the path. - - Various "tunneling" schemes, such as GRE [RFC2784] or IP Security in - tunnel mode [RFC2406], treat IP as a subnetwork for IP. Since - tunneling adds header overhead, it can trigger fragmentation, even - when the same physical subnetworks (e.g., Ethernet) are used on both - sides of the host performing IPsec encapsulation. Tunneling has made - it more difficult to avoid router fragmentation and has increased the - incidence of path MTU black holes [RFC2401] [RFC2923]. Larger - subnetwork MTUs may help to alleviate this problem. - -2.1. Choosing the MTU in Slow Networks - - In slow networks, the largest possible packet may take a considerable - amount of time to send. This is known as channelisation or - serialisation delay. Total end-to-end interactive response time - should not exceed the well-known human factors limit of 100 to 200 - ms. This includes all sources of delay: electromagnetic propagation - delay, queuing delay, serialisation delay, and the store-and-forward - time, i.e., the time to transmit a packet at link speed. - - At low link speeds, store-and-forward delays can dominate total - end-to-end delay; these are in turn directly influenced by the - maximum transmission unit (MTU) size. Even when an interactive - packet is given a higher queuing priority, it may have to wait for a - large bulk transfer packet to finish transmission. This worst-case - wait can be set by an appropriate choice of MTU. - - For example, if the MTU is set to 1500 bytes, then an MTU-sized - packet will take about 8 milliseconds to send on a T1 (1.536 Mb/s) - link. But if the link speed is 19.2kb/s, then the transmission time - becomes 625 ms -- well above our 100-200ms limit. A 256-byte MTU - would lower this delay to a little over 100 ms. However, care should - be taken not to lower the MTU excessively, as this will increase - header overhead and trigger frequent router fragmentation (if Path - MTU discovery is not in use). This is likely to be the case with - multicast, where Path MTU discovery is ineffective. - - One way to limit delay for interactive traffic without imposing a - small MTU is to give priority to this traffic and to preempt (abort) - - - -Karn, et al. Best Current Practice [Page 6] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - the transmission of a lower-priority packet when a higher priority - packet arrives in the queue. However, the link resources used to - send the aborted packet are lost, and overall throughput will - decrease. - - Another way to limit delay is to implement a link-level multiplexing - scheme that allows several packets to be in progress simultaneously, - with transmission priority given to segments of higher-priority IP - packets. For links using the Point-To-Point Protocol (PPP) - [RFC1661], multi-class multilink [RFC2686] [RFC2687] [RFC2689] - provides such a facility. - - ATM (asynchronous transfer mode), where SNDUs are fragmented and - interleaved across smaller 53-byte ATM cells, is another example of - this technique. However, ATM is generally used on high-speed links - where the store-and-forward delays are already minimal, and it - introduces significant (~9%) increases in overhead due to the - addition of 5-byte cell overhead to each 48-byte ATM cell. - - A third example is the Data-Over-Cable Service Interface - Specification (DOCSIS) with typical upstream bandwidths of 2.56 Mb/s - or 5.12 Mb/s. To reduce the impact of a 1500-byte MTU in DOCSIS 1.0 - [DOCSIS1], a data link layer fragmentation mechanism is specified in - DOCSIS 1.1 [DOCSIS2]. To accommodate the installed base, DOCSIS 1.1 - must be backward compatible with DOCSIS 1.0 cable modems, which - generally do not support fragmentation. Under the co-existence of - DOCSIS 1.0 and DOCSIS 1.1, the unfragmented large data packets from - DOCSIS 1.0 cable modems may affect the quality of service for voice - packets from DOCSIS 1.1 cable modems. In this case, it has been - shown in [DOCSIS3] that the use of bandwidth allocation algorithms - can mitigate this effect. - - To summarize, there is a fundamental tradeoff between efficiency and - latency in the design of a subnetwork, and the designer should keep - this tradeoff in mind. - -3. Framing on Connection-Oriented Subnetworks - - IP requires that subnetworks mark the beginning and end of each - variable-length, asynchronous IP packet. Some examples of links and - subnetworks that do not provide this as an intrinsic feature include: - - 1. leased lines carrying a synchronous bit stream; - - 2. ISDN B-channels carrying a synchronous octet stream; - - 3. dialup telephone modems carrying an asynchronous octet stream; - - - - -Karn, et al. Best Current Practice [Page 7] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - and - - 4. Asynchronous Transfer Mode (ATM) networks carrying an - asynchronous stream of fixed-sized "cells". - - The Internet community has defined packet framing methods for all - these subnetworks. The Point-To-Point Protocol (PPP) [RFC1661], - which uses a variant of HDLC, is applicable to bit synchronous, - octet-synchronous, and octet asynchronous links (i.e., examples 1-3 - above). PPP is one preferred framing method for IP, since a large - number of systems interoperate with PPP. ATM has its own framing - methods, described in [RFC2684] [RFC2364]. - - At high speeds, a subnetwork should provide a framed interface - capable of carrying asynchronous, variable-length IP datagrams. The - maximum packet size supported by this interface is discussed above in - the MTU/Fragmentation section. The subnetwork may implement this - facility in any convenient manner. - - IP packet boundaries need not coincide with any framing or - synchronization mechanisms internal to the subnetwork. When the - subnetwork implements variable sized data units, the most - straightforward approach is to place exactly one IP packet into each - subnetwork data unit (SNDU), and to rely on the subnetwork's existing - ability to delimit SNDUs to also delimit IP packets. A good example - is Ethernet. However, some subnetworks have SNDUs of one or more - fixed sizes, as dictated by switching, forward error correction - and/or interleaving considerations. Examples of such subnetworks - include ATM, with a single cell payload size of 48 octets plus a 5- - octet header, and IS-95 digital cellular, with two "rate sets" of - four fixed frame sizes each that may be selected on 20 millisecond - boundaries. - - Because IP packets are of variable length, they may not necessarily - fit into an integer multiple of fixed-sized SNDUs. An "adaptation - layer" is needed to convert IP packets into SNDUs while marking the - boundary between each IP packet in some manner. - - There are several approaches to this problem. The first is to encode - each IP packet into one or more SNDUs with no SNDU containing pieces - of more than one IP packet, and to pad out the last SNDU of the - packet as needed. Bits in a control header added to each SNDU - indicate where the data segment belongs in the IP packet. If the - subnetwork provides in-order, at-most-once delivery, the header can - be as simple as a pair of bits indicating whether the SNDU is the - first and/or the last in the IP packet. Alternatively, for - subnetworks that do not reorder the fragments of an SNDU, only the - last SNDU of the packet could be marked, as this would implicitly - - - -Karn, et al. Best Current Practice [Page 8] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - indicate the next SNDU as the first in a new IP packet. The AAL5 - (ATM Adaptation Layer 5) scheme used with ATM is an example of this - approach, though it adds other features, including a payload length - field and a payload CRC. - - In AAL5, the ATM User-User Indication, which is encoded in the - Payload Type field of an ATM cell, indicates the last cell of a - packet. The packet trailer is located at the end of the SNDU and - contains the packet length and a CRC. - - Another framing technique is to insert per-segment overhead to - indicate the presence of a segment option. When present, the option - carries a pointer to the end of the packet. This differs from AAL5 - in that it permits another packet to follow within the same segment. - MPEG-2 Transport Streams [EN301192] [ISO13818] support this style of - fragmentation, and may either use padding (limiting each MPEG - transport stream packet to carry only part of one IP packet), or - allow a second IP packet to start in the same Transport Stream packet - (no padding). - - A third approach is to insert a special flag sequence into the data - stream between each IP packet, and to pack the resulting data stream - into SNDUs without regard to SNDU boundaries. This may have - implications when frames are lost. The flag sequence can also pad - unused space at the end of an SNDU. If the special flag appears in - the user data, it is escaped to an alternate sequence (usually larger - than a flag) to avoid being misinterpreted as a flag. The HDLC-based - framing schemes used in PPP are all examples of this approach. - - All three adaptation schemes introduce overhead; how much depends on - the distribution of IP packet sizes, the size(s) of the SNDUs, and in - the HDLC-like approaches, the content of the IP packet (since flag- - like sequences occurring in the packet must be escaped, which expands - them). The designer must also weigh implementation complexity and - performance in the choice and design of an adaptation layer. - -4. Connection-Oriented Subnetworks - - IP has no notion of a "connection"; it is a purely connectionless - protocol. When a connection is required by an application, it is - usually provided by TCP [RFC793], the Transmission Control Protocol, - running atop IP on an end-to-end basis. - - Connection-oriented subnetworks can be (and are widely) used to carry - IP, but often with considerable complexity. Subnetworks consisting - of few nodes can simply open a permanent connection between each pair - of nodes. This is frequently done with ATM. However, the number of - connections increases as the square of the number of nodes, so this - - - -Karn, et al. Best Current Practice [Page 9] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - is clearly impractical for large subnetworks. A "shim" layer between - IP and the subnetwork is therefore required to manage connections. - This is one of the most common functions of a Subnetwork Dependent - Convergence Function (SNDCF) sublayer between IP and a subnetwork. - - SNDCFs typically open subnetwork connections as needed when an IP - packet is queued for transmission and close them after an idle - timeout. There is no relation between subnetwork connections and any - connections that may exist at higher layers (e.g., TCP). - - Because Internet traffic is typically bursty and transaction- - oriented, it is often difficult to pick an optimal idle timeout. If - the timeout is too short, subnetwork connections are opened and - closed rapidly, possibly over-stressing the subnetwork connection - management system (especially if it was designed for voice traffic - call holding times). If the timeout is too long, subnetwork - connections are idle much of the time, wasting any resources - dedicated to them by the subnetwork. - - Purely connectionless subnets (such as Ethernet), which have no state - and dynamically share resources, are optimal for supporting best- - effort IP, which is stateless and dynamically shares resources. - Connection-oriented packet networks (such as ATM and Frame Relay), - which have state and dynamically share resources, are less optimal, - since best-effort IP does not benefit from the overhead of creating - and maintaining state. Connection-oriented circuit-switched networks - (including the PSTN and ISDN) have state and statically allocate - resources for a call, and thus require state creation and maintenance - overhead, but do not benefit from the efficiencies of statistical - multiplexing sharing of capacity inherent in IP. - - In any event, if an SNDCF that opens and closes subnet connections is - used to support IP, care should be taken to make sure that connection - processing in the subnet can keep up with relatively short holding - times. - -5. Broadcasting and Discovery - - Subnetworks fall into two categories: point-to-point and shared. A - point-to-point subnet has exactly two endpoint components (hosts or - routers); a shared link has more than two endpoint components, using - either an inherently broadcast medium (e.g., Ethernet, radio) or a - switching layer hidden from the network layer (e.g., switched - Ethernet, Myrinet [MYR95], ATM). Switched subnetworks handle - broadcast by copying broadcast packets, providing each interface that - supports one, or more, systems (hosts or routers) with a copy of each - packet. - - - - -Karn, et al. Best Current Practice [Page 10] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - Several Internet protocols for IPv4 make use of broadcast - capabilities, including link-layer address lookup (ARP), auto- - configuration (RARP, BOOTP, DHCP), and routing (RIP). - - A lack of broadcast capability can impede the performance of these - protocols, or render them inoperable (e.g., DHCP). ARP-like link - address lookup can be provided by a centralized database, but at the - expense of potentially higher response latency and the need for nodes - to have explicit knowledge of the ARP server address. Shared links - should support native, link-layer subnet broadcast. - - A corresponding set of IPv6 protocols uses multicasting (see next - section) instead of broadcasting to provide similar functions with - improved scaling in large networks. - -6. Multicasting - - The Internet model includes "multicasting", where IP packets are sent - to all the members of a multicast group [RFC1112] [RFC3376] - [RFC2710]. Multicast is an option in IPv4, but a standard feature of - IPv6. IPv4 multicast is currently used by multimedia, - teleconferencing, gaming, and file distribution (web, peer-to-peer - sharing) applications, as well as by some key network and host - protocols (e.g., RIPv2, OSPF, NTP). IPv6 additionally relies on - multicast for network configuration (DHCP-like autoconfiguration) and - link-layer address discovery [RFC2461] (replacing ARP). In the case - of IPv6, this can allow autoconfiguration and address discovery to - span across routers, whereas the IPv4 broadcast-based services cannot - without ad-hoc router support [RFC1812]. - - Multicast-enabled IP routers organize each multicast group into a - spanning tree, and route multicast packets by making copies of each - multicast packet and forwarding the copies to each output interface - that includes at least one downstream member of the multicast group. - - Multicasting is considerably more efficient when a subnetwork - explicitly supports it. For example, a router relaying a multicast - packet onto an Ethernet segment need send only one copy of the - packet, no matter how many members of the multicast group are - connected to the segment. Without native multicast support, routers - and switches on shared links would need to use broadcast with - software filters, such that every multicast packet sent incurs - software overhead for every node on the subnetwork, even if a node is - not a member of the multicast group. Alternately, the router would - transmit a separate copy to every member of the multicast group on - the segment, as is done on multicast-incapable switched subnets. - - - - - -Karn, et al. Best Current Practice [Page 11] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - Subnetworks using shared channels (e.g., radio LANs, Ethernets) are - especially suitable for native multicasting, and their designers - should make every effort to support it. This involves designating a - section of the subnetwork's own address space for multicasting. On - these networks, multicast is basically broadcast on the medium, with - Layer-2 receiver filters. - - Subnet interfaces also need to be designed to accept packets - addressed to some number of multicast addresses, in addition to the - unicast packets specifically addressed to them. The number of - multicast addresses that needs to be supported by a host depends on - the requirements of the associated host; at least several dozen will - meet most current needs. - - On low-speed networks, the multicast address recognition function may - be readily implemented in host software, but on high-speed networks, - it should be implemented in subnetwork hardware. This hardware need - not be complete; for example, many Ethernet interfaces implement a - "hashing" function where the IP layer receives all of the multicast - (and unicast) traffic to which the associated host subscribes, plus - some small fraction of multicast traffic to which the host does not - subscribe. Host/router software then has to discard the unwanted - packets that pass the Layer-2 multicast address filter [RFC1112]. - - There does not need to be a one-to-one mapping between a Layer-2 - multicast address and an IP multicast address. An address overlap - may significantly degrade the filtering capability of a receiver's - hardware multicast address filter. A subnetwork supporting only - broadcast should use this service for multicast and must rely on - software filtering. - - Switched subnetworks must also provide a mechanism for copying - multicast packets to ensure the packets reach at least all members of - a multicast group. One option is to "flood" multicast packets in the - same manner as broadcast. This can lead to unnecessary transmissions - on some subnetwork links (notably non-multicast-aware Ethernet - switches). Some subnetworks therefore allow multicast filter tables - to control which links receive packets belonging to a specific group. - To configure this automatically requires access to Layer-3 group - membership information (e.g., IGMP [RFC3376], or MLD [RFC2710]). - Various implementation options currently exist to provide a subnet - node with a list of mappings of multicast addresses to - ports/interfaces. These employ a range of approaches, including - signaling from end hosts (e.g., IEEE 802 GARP/GMRP [802.1p]), - signaling from switches (e.g., CGMP [CGMP] and RGMP [RFC3488]), - interception and proxy of IP group membership packets (e.g., IGMP/MLD - Proxy [MAGMA-PROXY]), and enabling Layer-2 devices to - snoop/inspect/peek into forwarded Layer-3 protocol headers (e.g., - - - -Karn, et al. Best Current Practice [Page 12] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - IGMP, MLD, PIM) so that they may infer Layer-3 multicast group - membership [MAGMA-SNOOP]. These approaches differ in their - complexity, flexibility, and ability to support new protocols. - -7. Bandwidth on Demand (BoD) Subnets - - Some subnets allow a number of subnet nodes to share a channel - efficiently by assigning transmission opportunities dynamically. - Transmission opportunities are requested by a subnet node when it has - packets to send. The subnet schedules and grants transmission - opportunities sufficient to allow the transmitting subnet node to - send one or more packets (or packet fragments). We call these - subnets Bandwidth on Demand (BoD) subnets. Examples of BoD subnets - include Demand Assignment Multiple Access (DAMA) satellite and - terrestrial wireless networks, IEEE 802.11 point coordination - function (PCF) mode, and DOCSIS. A connection-oriented network (such - as the PSTN, ATM or Frame Relay) reserves resources on a much longer - timescale, and is therefore not a BoD subnet in our taxonomy. - - The design parameters for BoD are similar to those in connection- - oriented subnetworks, although the implementations may vary - significantly. In BoD, the user typically requests access to the - shared channel for some duration. Access may be allocated for a - period of time at a specific rate, for a certain number of packets, - or until the user releases the channel. Access may be coordinated - through a central management entity or with a distributed algorithm - amongst the users. Examples of the resource that may be shared - include a terrestrial wireless hop, an upstream channel in a cable - television system, a satellite uplink, and an end-to-end satellite - channel. - - Long-delay BoD subnets pose problems similar to connection-oriented - subnets in anticipating traffic. While connection-oriented subnets - hold idle channels open expecting new data to arrive, BoD subnets - request channel access based on buffer occupancy (or expected buffer - occupancy) on the sending port. Poor performance will likely result - if the sender does not anticipate additional traffic arriving at that - port during the time it takes to grant a transmission request. It is - recommended that the algorithm have the capability to extend a hold - on the channel for data that has arrived after the original request - was generated (this may be done by piggybacking new requests on user - data). - - There is a wide variety of BoD protocols available. However, there - has been relatively little comprehensive research on the interactions - between BoD mechanisms and Internet protocol performance. Research - on some specific mechanisms is available (e.g., [AR02]). One item - that has been studied is TCP's retransmission timer [KY02]. BoD - - - -Karn, et al. Best Current Practice [Page 13] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - systems can cause spurious timeouts when adjusting from a relatively - high data rate, to a relatively low data rate. In this case, TCP's - transmitted data takes longer to get through the network than - predicted by the TCP sender's computed retransmission timeout. - Therefore, the TCP sender is prone to resending a segment - prematurely. - -8. Reliability and Error Control - - In the Internet architecture, the ultimate responsibility for error - recovery is at the end points [SRC81]. The Internet may occasionally - drop, corrupt, duplicate, or reorder packets, and the transport - protocol (e.g., TCP) or application (e.g., if UDP is used as the - transport protocol) must recover from these errors on an end-to-end - basis [RFC3155]. Error recovery in the subnetwork is therefore - justifiable only to the extent that it can enhance overall - performance. It is important to recognize that a subnetwork can go - too far in attempting to provide error recovery services in the - Internet environment. Subnet reliability should be "lightweight", - i.e., it only has to be "good enough", *not* perfect. - - In this section, we discuss how to analyze characteristics of a - subnetwork to determine what is "good enough". The discussion below - focuses on TCP, which is the most widely-used transport protocol in - the Internet. It is widely believed (and is a stated goal within the - IETF) that non-TCP transport protocols should attempt to be "TCP- - friendly" and have many of the same performance characteristics. - Thus, the discussion below should be applicable, even to portions of - the Internet where TCP may not be the predominant protocol. - -8.1. TCP vs Link-Layer Retransmission - - Error recovery involves the generation and transmission of redundant - information computed from user data. Depending on how much redundant - information is sent and how it is generated, the receiver can use it - to reliably detect transmission errors, correct up to some maximum - number of transmission errors, or both. The general approach is - known as Error Control Coding, or ECC. - - The use of ECC to detect transmission errors so that retransmissions - (hopefully without errors) can be requested is widely known as "ARQ" - (Automatic Repeat Request). - - When enough ECC information is available to permit the receiver to - correct some transmission errors without a retransmission, the - approach is known as Forward Error Correction (FEC). Due to the - greater complexity of the required ECC and the need to tailor its - design to the characteristics of a specific modem and channel, FEC - - - -Karn, et al. Best Current Practice [Page 14] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - has traditionally been implemented in special-purpose hardware - integral to a modem. This effectively makes it part of the physical - layer. - - Unlike ARQ, FEC was rarely used for telecommunications outside of - space links prior to the 1990s. It is now nearly universal in - telephone, cable and DSL modems, digital satellite links, and digital - mobile telephones. FEC is also heavily used in optical and magnetic - storage where "retransmissions" are not possible. - - Some systems use hybrid combinations of ARQ layered atop FEC; V.90 - dialup modems (in the upstream direction) with V.42 error control are - one example. Most errors are corrected by the trellis (FEC) code - within the V.90 modem, and most remaining errors are detected and - corrected by the ARQ mechanisms in V.42. - - Work is now underway to apply FEC above the physical layer, primarily - in connection with reliable multicasting [RFC3048] [RFC3450-RFC3453] - where conventional ARQ mechanisms are inefficient or difficult to - implement. However, in this discussion, we will assume that if FEC - is present, it is implemented within the physical layer. - - Depending on the layer in which it is implemented, error control can - operate on an end-to-end basis or over a shorter span, such as a - single link. TCP is the most important example of an end-to-end - protocol that uses an ARQ strategy. - - Many link-layer protocols use ARQ, usually some flavor of HDLC - [ISO3309]. Examples include the X.25 link layer, the AX.25 protocol - used in amateur packet radio, 802.11 wireless LANs, and the reliable - link layer specified in IEEE 802.2. - - Only end-to-end error recovery can ensure reliable service to the - application (see Section 8). However, some subnetworks (e.g., many - wireless links) also have link-layer error recovery as a performance - enhancement [RFC3366]. For example, many cellular links have small - physical frame sizes (< 100 bytes) and relatively high frame loss - rates. Relying solely on end-to-end error recovery can clearly yield - a performance degradation, as retransmissions across the end-to-end - path take much longer to be received than when link layer - retransmissions are used. Thus, link-layer error recovery can often - increase end-to-end performance. As a result, link-layer and end- - to-end recovery often co-exist; this can lead to the possibility of - inefficient interactions between the two layers of ARQ protocols. - - This inter-layer "competition" might lead to the following wasteful - situation. When the link layer retransmits (parts of) a packet, the - link latency momentarily increases. Since TCP bases its - - - -Karn, et al. Best Current Practice [Page 15] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - retransmission timeout on prior measurements of total end-to-end - latency, including that of the link in question, this sudden increase - in latency may trigger an unnecessary retransmission by TCP of a - packet that the link layer is still retransmitting. Such spurious - end-to-end retransmissions generate unnecessary load and reduce end- - to-end throughput. As a result, the link layer may even have - multiple copies of the same packet in the same link queue at the same - time. In general, one could say the competing error recovery is - caused by an inner control loop (link-layer error recovery) reacting - to the same signal as an outer control loop (end-to-end error - recovery) without any coordination between the loops. Note that this - is solely an efficiency issue; TCP continues to provide reliable - end-to-end delivery over such links. - - This raises the question of how persistent a link-layer sender should - be in performing retransmission [RFC3366]. We define the link-layer - (LL) ARQ persistency as the maximum time that a particular link will - spend trying to transfer a packet before it can be discarded. This - deliberately simplified definition says nothing about the maximum - number of retransmissions, retransmission strategies, queue sizes, - queuing disciplines, transmission delays, or the like. The reason we - use the term LL ARQ persistency, instead of a term such as "maximum - link-layer packet holding time," is that the definition closely - relates to link-layer error recovery. For example, on links that - implement straightforward error recovery strategies, LL ARQ - persistency will often correspond to a maximum number of - retransmissions permitted per link-layer frame. - - For link layers that do not or cannot differentiate between flows - (e.g., due to network layer encryption), the LL ARQ persistency - should be small. This avoids any harmful effects or performance - degradation resulting from indiscriminate high persistence. A - detailed discussion of these issues is provided in [RFC3366]. - - However, when a link layer can identify individual flows and apply - ARQ selectively [LKJK02], then the link ARQ persistency should be - high for a flow using reliable unicast transport protocols (e.g., - TCP) and must be low for all other flows. Setting the link ARQ - persistency larger than the largest link outage allows TCP to rapidly - restore transmission without needing to wait for a retransmission - time out. This generally improves TCP performance in the face of - transient outages. However, excessively high persistence may be - disadvantageous; a practical upper limit of 30-60 seconds may be - desirable. Implementation of such schemes remains a research issue. - (See also the following section "Recovery from Subnetwork Outages"). - - - - - - -Karn, et al. Best Current Practice [Page 16] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - Many subnetwork designers have opportunities to reduce the - probability of packet loss, e.g., with FEC, ARQ, and interleaving, at - the cost of increased delay. TCP performance improves with - decreasing loss but worsens with increasing end-to-end delay, so it - is important to find the proper balance through analysis and - simulation. - -8.2. Recovery from Subnetwork Outages - - Some types of subnetworks, particularly mobile radio, are subject to - frequent temporary outages. For example, an active cellular data - user may drive or walk into an area (such as a tunnel) that is out of - range of any base station. No packets will be delivered successfully - until the user returns to an area with coverage. - - The Internet protocols currently provide no standard way for a - subnetwork to explicitly notify an upper layer protocol (e.g., TCP) - that it is experiencing an outage rather than severe congestion. - - Under these circumstances TCP will, after each unsuccessful - retransmission, wait even longer before trying again; this is its - "exponential back-off" algorithm. Furthermore, TCP will not discover - that the subnetwork outage has ended until its next retransmission - attempt. If TCP has backed off, this may take some time. This can - lead to extremely poor TCP performance over such subnetworks. - - It is therefore highly desirable that a subnetwork subject to outages - does not silently discard packets during an outage. Ideally, the - subnetwork should define an interface to the next higher layer (i.e., - IP) that allows it to refuse packets during an outage, and to - automatically ask IP for new packets when it is again able to deliver - them. If it cannot do this, then the subnetwork should hold onto at - least some of the packets it accepts during an outage and attempt to - deliver them when the outage ends. When packets are discarded, IP - should be notified so that the appropriate ICMP messages can be sent. - - Note that it is *not* necessary to completely avoid dropping packets - during an outage. The purpose of holding onto a packet during an - outage, either in the subnetwork or at the IP layer, is so that its - eventual delivery will implicitly notify TCP that the subnetwork is - again operational. This is to enhance performance, not to ensure - reliability -- reliability, as discussed earlier, can only be ensured - on an end-to-end basis. - - Only a few packets per TCP connection, including ACKs, need be held - in this way to cause the TCP sender to recover from the additional - losses once the flow resumes [RFC3366]. - - - - -Karn, et al. Best Current Practice [Page 17] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - Because it would be a layering violation (and possibly a performance - hit) for IP or a subnetwork layer to look at TCP headers (which would - in any event be impossible if IPsec encryption [RFC2401] is in use), - it would be reasonable for the IP or subnetwork layers to choose, as - a design parameter, some small number of packets that will be - retained during an outage. - -8.3. CRCs, Checksums and Error Detection - - The TCP [RFC793], UDP [RFC768], ICMP, and IPv4 [RFC791] protocols all - use the same simple 16-bit 1's complement checksum algorithm - [RFC1071] to detect corrupted packets. The IPv4 header checksum - protects only the IPv4 header, while the TCP, ICMP, and UDP checksums - provide end-to-end error detection for both the transport pseudo - header (including network and transport layer information) and the - transport payload data. Protection of the data is optional for - applications using UDP [RFC768] for IPv4, but is required for IPv6. - - The Internet checksum is not very strong from a coding theory - standpoint, but it is easy to compute in software, and various - proposals to replace the Internet checksums with stronger checksums - have failed. However, it is known that undetected errors can and do - occur in packets received by end hosts [SP2000]. - - To reduce processing costs, IPv6 has no IP header checksum. The - destination host detects "important" errors in the IP header, such as - the delivery of the packet to the wrong destination. This is done by - including the IP source and destination addresses (pseudo header) in - the computation of the checksum in the TCP or UDP header, a practice - already performed in IPv4. Errors in other IPv6 header fields may go - undetected within the network; this was considered a reasonable price - to pay for a considerable reduction in the processing required by - each router, and it was assumed that subnetworks would use a strong - link CRC. - - One way to provide additional protection for an IPv4 or IPv6 header - is by the authentication and packet integrity services of the IP - Security (IPsec) protocol [RFC2401]. However, this may not be a - choice available to the subnetwork designer. - - Most subnetworks implement error detection just above the physical - layer. Packets corrupted in transmission are detected and discarded - before delivery to the IP layer. A 16-bit cyclic redundancy check - (CRC) is usually the minimum for error detection. This is - significantly more robust against most patterns of errors than the - 16-bit Internet checksum. Note that the error detection properties - of a specific CRC code diminish with increasing frame size. The - Point-to-Point Protocol [RFC1662] requires support of a 16-bit CRC - - - -Karn, et al. Best Current Practice [Page 18] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - for each link frame, with a 32-bit CRC as an option. (PPP is often - used in conjunction with a dialup modem, which provides its own error - control). Other subnetworks, including 802.3/Ethernet, AAL5/ATM, - FDDI, Token Ring, and PPP over SONET/SDH all use a 32-bit CRC. Many - subnetworks can also use other mechanisms to enhance the error - detection capability of the link CRC (e.g., FEC in dialup modems, - mobile radio and satellite channels). - - Any new subnetwork designed to carry IP should therefore provide - error detection for each IP packet that is at least as strong as the - 32-bit CRC specified in [ISO3309]. While this will achieve a very - low undetected packet error rate due to transmission errors, it will - not (and need not) achieve a very low packet loss rate as the - Internet protocols are better suited to dealing with lost packets - than to dealing with corrupted packets [SRC81]. - - Packet corruption may be, and is, also caused by bugs in host and - router hardware and software. Even if every subnetwork implemented - strong error detection, it is still essential that end-to-end - checksums are used at the receiving end host [SP2000]. - - Designers of complex subnetworks consisting of internal links and - packet switches should consider implementing error detection on an - edge-to-edge basis to cover an entire SNDU (or IP packet). A CRC - would be generated at the entry point to the subnetwork and checked - at the exit endpoint. This may be used instead of, or in combination - with, error detection at the interface to each physical link. An - edge-to-edge check has the significant advantage of protecting - against errors introduced anywhere within the subnetwork, not just - within its transmission links. Examples of this approach include the - way in which the Ethernet CRC-32 is handled by LAN bridges [802.1D]. - ATM AAL5 [ITU-I363] also uses an edge-to-edge CRC-32. - - Some specific applications may be tolerant of residual errors in the - data they exchange, but removal of the link CRC may expose the - network to an undesirable increase in undetected errors in the IP and - transport headers. Applications may also require a high level of - error protection for control information exchanged by protocols - acting above the transport layer. One example is a voice codec, - which is robust against bit errors in the speech samples. For such - mechanisms to work, the receiving application must be able to - tolerate receiving corrupted data. This also requires that an - application uses a mechanism to signal that payload corruption is - permitted and to indicate the coverage (headers and data) required to - be protected by the subnetwork CRC. The UDP-Lite protocol [RFC3828] - is the first Internet standards track transport protocol supporting - partial payload protection. Receipt of corrupt data by arbitrary - - - - -Karn, et al. Best Current Practice [Page 19] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - application protocols carries a serious danger that a subnet delivers - data with errors that remain undetected by the application and hence - corrupt the communicated data [SRC81]. - -8.4. How TCP Works - - One of TCP's functions is end-host based congestion control for the - Internet. This is a critical part of the overall stability of the - Internet, so it is important that link-layer designers understand - TCP's congestion control algorithms. - - TCP assumes that, at the most abstract level, the network consists of - links and queues. Queues provide output-buffering on links that are - momentarily oversubscribed. They smooth instantaneous traffic bursts - to fit the link bandwidth. When demand exceeds link capacity long - enough to fill the queue, packets must be dropped. The traditional - action of dropping the most recent packet ("tail dropping") is no - longer recommended [RFC2309] [RFC2914], but it is still widely - practiced. - - TCP uses sequence numbering and acknowledgments (ACKs) on an - end-to-end basis to provide reliable, sequenced delivery. TCP ACKs - are cumulative, i.e., each implicitly ACKs every segment received so - far. If a packet with an unexpected sequence number is received, the - ACK field in the packets returned by the receiver will cease to - advance. Using an optional enhancement, TCP can send selective - acknowledgments (SACKs) [RFC2018] to indicate which segments have - arrived at the receiver. - - Since the most common cause of packet loss is congestion, TCP treats - packet loss as an indication of potential Internet congestion along - the path between TCP end hosts. This happens automatically, and the - subnetwork need not know anything about IP or TCP. A subnetwork node - simply drops packets whenever it must, though some packet-dropping - strategies (e.g., RED) are more fair to competing flows than others. - - TCP recovers from packet losses in two different ways. The most - important mechanism is the retransmission timeout. If an ACK fails - to arrive after a certain period of time, TCP retransmits the oldest - unacked packet. Taking this as a hint that the network is congested, - TCP waits for the retransmission to be ACKed before it continues, and - it gradually increases the number of packets in flight as long as a - timeout does not occur again. - - A retransmission timeout can impose a significant performance - penalty, as the sender is idle during the timeout interval and - restarts with a congestion window of one TCP segment following the - - - - -Karn, et al. Best Current Practice [Page 20] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - timeout. To allow faster recovery from the occasional lost packet in - a bulk transfer, an alternate scheme, known as "fast recovery", was - introduced [RFC2581] [RFC2582] [RFC2914] [TCPF98]. - - Fast recovery relies on the fact that when a single packet is lost in - a bulk transfer, the receiver continues to return ACKs to subsequent - data packets that do not actually acknowledge any newly-received - data. These are known as "duplicate acknowledgments" or "dupacks". - The sending TCP can use dupacks as a hint that a packet has been lost - and retransmit it without waiting for a timeout. Dupacks effectively - constitute a negative acknowledgment (NAK) for the packet sequence - number in the acknowledgment field. TCP waits until a certain number - of dupacks (currently 3) are seen prior to assuming a loss has - occurred; this helps avoid an unnecessary retransmission during - out-of-sequence delivery. - - A technique called "Explicit Congestion Notification" (ECN) [RFC3168] - allows routers to directly signal congestion to hosts without - dropping packets. This is done by setting a bit in the IP header. - Since ECN support is likely to remain optional, the lack of an ECN - bit must *never* be interpreted as a lack of congestion. Thus, for - the foreseeable future, TCP must interpret a lost packet as a signal - of congestion. - - The TCP "congestion avoidance" [RFC2581] algorithm maintains a - congestion window (cwnd) controlling the amount of data TCP may have - in flight at any moment. Reducing cwnd reduces the overall bandwidth - obtained by the connection; similarly, raising cwnd increases - performance, up to the limit of the available capacity. - - TCP probes for available network capacity by initially setting cwnd - to one or two packets and then increasing cwnd by one packet for each - ACK returned from the receiver. This is TCP's "slow start" - mechanism. When a packet loss is detected (or congestion is signaled - by other mechanisms), cwnd is reset to one and the slow start process - is repeated until cwnd reaches one half of its previous setting - before the reset. Cwnd continues to increase past this point, but at - a much slower rate than before. If no further losses occur, cwnd - will ultimately reach the window size advertised by the receiver. - - This is an "Additive Increase, Multiplicative Decrease" (AIMD) - algorithm. The steep decrease of cwnd in response to congestion - provides for network stability; the AIMD algorithm also provides for - fairness between long running TCP connections sharing the same path. - - - - - - - -Karn, et al. Best Current Practice [Page 21] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -8.5. TCP Performance Characteristics - - Caveat - - Here we present a current "state-of-the-art" understanding of TCP - performance. This analysis attempts to characterize the performance - of TCP connections over links of varying characteristics. - - Link designers may wish to use the techniques in this section to - predict what performance TCP/IP may achieve over a new link-layer - design. Such analysis is encouraged. Because this is a relatively - new analysis, and the theory is based on single-stream TCP - connections under "ideal" conditions, it should be recognized that - the results of such analysis may differ from actual performance in - the Internet. That being said, we have done our best to provide the - designers with helpful information to get an accurate picture of the - capabilities and limitations of TCP under various conditions. - -8.5.1. The Formulae - - The performance of TCP's AIMD Congestion Avoidance algorithm has been - extensively analyzed. The current best formula for the performance - of the specific algorithms used by Reno TCP (i.e., the TCP specified - in [RFC2581]) is given by Padhye, et al. [PFTK98]. This formula is: - - MSS - BW = -------------------------------------------------------- - RTT*sqrt(1.33*p) + RTO*p*[1+32*p^2]*min[1,3*sqrt(.75*p)] - - where - - BW is the maximum TCP throughout achievable by an - individual TCP flow - MSS is the TCP segment size being used by the connection - RTT is the end-to-end round trip time of the TCP connection - RTO is the packet timeout (based on RTT) - p is the packet loss rate for the path - (i.e., .01 if there is 1% packet loss) - - Note that the speed of the links making up the Internet path does not - explicitly appear in this formula. Attempting to send faster than - the slowest link in the path causes the queue to grow at the - transmitter driving the bottleneck. This increases the RTT, which in - turn reduces the achievable throughput. - - This is currently considered to be the best approximate formula for - Reno TCP performance. A further simplification of this formula is - generally made by assuming that RTO is approximately 5*RTT. - - - -Karn, et al. Best Current Practice [Page 22] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - TCP is constantly being improved. A simpler formula, which gives an - upper bound on the performance of any AIMD algorithm which is likely - to be implemented in TCP in the future, was derived by Ott, et al. - [MSMO97]. - - MSS 1 - BW = C --- ------- - RTT sqrt(p) - - where C is 0.93. - -8.5.2. Assumptions - - Both formulae assume that the TCP Receiver Window is not limiting the - performance of the connection. Because the receiver window is - entirely determined by end-hosts, we assume that hosts will maximize - the announced receiver window to maximize their network performance. - - Both of these formulae allow BW to become infinite if there is no - loss. However, an Internet path will drop packets at bottlenecked - queues if the load is too high. Thus, a completely lossless TCP/IP - network can never occur (unless the network is being underutilized). - - The RTT used is the arithmetic average, including queuing delays. - - The formulae are for a single TCP connection. If a path carries many - TCP connections, each will follow the formulae above independently. - - The formulae assume long-running TCP connections. For connections - that are extremely short (<10 packets) and don't lose any packets, - performance is driven by the TCP slow-start algorithm. For - connections of medium length, where on average only a few segments - are lost, single connection performance will actually be slightly - better than given by the formulae above. - - The difference between the simple and complex formulae above is that - the complex formula includes the effects of TCP retransmission - timeouts. For very low levels of packet loss (significantly less - than 1%), timeouts are unlikely to occur, and the formulae lead to - very similar results. At higher packet losses (1% and above), the - complex formula gives a more accurate estimate of performance (which - will always be significantly lower than the result from the simple - formula). - - Note that these formulae break down as p approaches 100%. - - - - - - -Karn, et al. Best Current Practice [Page 23] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -8.5.3. Analysis of Link-Layer Effects on TCP Performance - - Consider the following example: - - A designer invents a new wireless link layer which, on average, loses - 1% of IP packets. The link layer supports packets of up to 1040 - bytes, and has a one-way delay of 20 msec. - - If this link were to be used on an Internet path with a round trip - time greater than 80ms, the upper bound may be computed by: - - For MSS, use 1000 bytes to exclude the 40 bytes of minimum IPv4 and - TCP headers. - - For RTT, use 120 msec (80 msec for the Internet part, plus 20 msec - each way for the new wireless link). - - For p, use .01. For C, assume 1. - - The simple formula gives: - - BW = (1000 * 8 bits) / (.120 sec * sqrt(.01)) = 666 kbit/sec - - The more complex formula gives: - - BW = 402.9 kbit/sec - - If this were a 2 Mb/s wireless LAN, the designers might be somewhat - disappointed. - - Some observations on performance: - - 1. We have assumed that the packet losses on the link layer are - interpreted as congestion by TCP. This is a "fact of life" that - must be accepted. - - 2. The equations for TCP performance are all expressed in terms of - packet loss, but many subnetwork designers think in terms of - bit-error ratio. *If* channel bit errors are independent, then - the probability of a packet being corrupted is: - - p = 1 - ([1 - BER]^[FRAME_SIZE*8]) - - Here we assume FRAME_SIZE is in bytes and "^" represents - exponentiation. It includes the user data and all headers - (TCP,IP and subnetwork). (Note: this analysis assumes the - - - - - -Karn, et al. Best Current Practice [Page 24] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - subnetwork does not perform ARQ or transparent fragmentation - [RFC3366].) If the inequality - - BER * [FRAME_SIZE*8] << 1 - - holds, the packet loss probability p can be approximated by: - - p = BER * [FRAME_SIZE*8] - - These equations can be used to apply BER to the performance - equations above. - - Note that FRAME_SIZE can vary from one packet to the next. Small - packets (such as TCP acks) generally have a smaller probability - of packet error than, say, a TCP packet carrying one MSS (maximum - segment size) of user data. A flow of small TCP acks can be - expected to be slightly more reliable than a stream of larger TCP - data segments. - - It bears repeating that the above analysis assumes that bit - errors are statistically independent. Because this is not true - for many real links, our computation of p is actually an upper - bound, not the exact probability of packet loss. - - There are many reasons why bit errors are not independent on real - links. Many radio links are affected by propagation fading or by - interference that lasts over many bit times. Also, links with - Forward Error Correction (FEC) generally have very non-uniform - bit error distributions that depend on the type of FEC, but in - general the uncorrected errors tend to occur in bursts even when - channel symbol errors are independent. In all such cases, our - computation of p from BER can only place an upper limit on the - packet loss rate. - - If the distribution of errors under the FEC scheme is known, one - could apply the same type of analysis as above, using the correct - distribution function for the BER. It is more likely in these - FEC cases, however, that empirical methods are needed to - determine the actual packet loss rate. - - 3. Note that the packet size plays an important role. If the - subnetwork loss characteristics are such that large packets have - the same probability of loss as smaller packets, then larger - packets will yield improved performance. - - - - - - - -Karn, et al. Best Current Practice [Page 25] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - 4. We have chosen a specific RTT that might occur on a wide-area - Internet path within the USA. It is important to recognize that - a variety of RTT values are experienced in the Internet. - - For example, RTTs are typically less than 10 msec in a wired LAN - environment when communicating with a local host. International - connections may have RTTs of 200 msec or more. Modems and other - low-capacity links can add considerable delay due to their long - packet transmission (serialisation) times. - - Links over geostationary repeater satellites have one-way speed- - of-light delays of around 250ms, a minimum of 125ms propagation - delay up to the satellite and 125ms down. The RTT of an end-to- - end TCP connection that includes such a link can be expected to - be greater than 250ms. - - Queues on heavily-congested links may back up, increasing RTTs. - Finally, virtual private networks (VPNs) and other forms of - encryption and tunneling can add significant end-to-end delay to - network connections. - -9. Quality-of-Service (QoS) considerations - - It is generally recognized that specific service guarantees are - needed to support real-time multimedia, toll-quality telephony, and - other performance-critical applications. The provision of such - Quality of Service guarantees in the Internet is an active area of - research and standardization. The IETF has not converged on a single - service model, set of services, or single mechanism that will offer - useful guarantees to applications and be scalable to the Internet. - Indeed, the IETF does not have a single definition of Quality of - Service. [RFC2990] represents a current understanding of the - challenges in architecting QoS for the Internet. - - There are presently two architectural approaches to providing - mechanisms for QoS support in the Internet. - - IP Integrated Services (Intserv) [RFC1633] provides fine-grained - service guarantees to individual flows. Flows are identified by a - flow specification (flowspec), which creates a stateful association - between individual packets by matching fields in the packet header. - Capacity is reserved for the flow, and appropriate traffic - conditioning and scheduling is installed in routers along the path. - The ReSerVation Protocol (RSVP) [RFC2205] [RFC2210] is usually, but - need not necessarily be, used to install the flow QoS state. Intserv - defines two services, in addition to the Default (best effort) - service. - - - - -Karn, et al. Best Current Practice [Page 26] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - 1. Guaranteed Service (GS) [RFC2212] offers hard upper bounds on - delay to flows that conform to a traffic specification (TSpec). - It uses a fluid-flow model to relate the TSpec and reserved - bandwidth (RSpec) to variable delay. Non-conforming packets are - forwarded on a best-effort basis. - - 2. Controlled Load Service (CLS) [RFC2211] offers delay and packet - loss equivalent to that of an unloaded network to flows that - conform to a TSpec, but no hard bounds. Non-conforming packets - are forwarded on a best-effort basis. - - Intserv requires installation of state information in every - participating router. Performance guarantees cannot be made unless - this state is present in every router along the path. This, along - with RSVP processing and the need for usage-based accounting, is - believed to have scalability problems, particularly in the core of - the Internet [RFC2208]. - - IP Differentiated Services (Diffserv) [RFC2475] provides a "toolkit" - offering coarse-grained controls to aggregates of flows. Diffserv in - itself does *not* provide QoS guarantees, but can be used to - construct services with QoS guarantees across a Diffserv domain. - Diffserv attempts to address the scaling issues associated with - Intserv by requiring state awareness only at the edge of a Diffserv - domain. At the edge, packets are classified into flows, and the - flows are conditioned (marked, policed, or shaped) to a traffic - conditioning specification (TCS). A Diffserv Codepoint (DSCP), - identifying a per-hop behavior (PHB), is set in each packet header. - The DSCP is carried in the DS-field, subsuming six bits of the former - Type-of-Service (ToS) byte [RFC791] of the IP header [RFC2474]. The - PHB denotes the forwarding behavior to be applied to the packet in - each node in the Diffserv domain. Although there is a "recommended" - DSCP associated with each PHB, the mappings from DSCPs to PHBs are - defined by the DS-domain. In fact, there can be several DSCPs - associated with the same PHB. Diffserv presently defines three PHBs. - - 1. The class selector PHB [RFC2474] replaces the IP precedence field - of the former ToS byte. It offers relative forwarding - priorities. - - 2. The Expedited Forwarding (EF) PHB [RFC3246] [RFC3248] guarantees - that packets will have a well-defined minimum departure rate - which, if not exceeded, ensures that the associated queues are - short or empty. EF is intended to support services that offer - tightly-bounded loss, delay, and delay jitter. - - - - - - -Karn, et al. Best Current Practice [Page 27] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - 3. The Assured Forwarding (AF) PHB group [RFC2597] offers different - levels of forwarding assurance for each aggregated flow of - packets. Each AF group is independently allocated forwarding - resources. Packets are marked with one of three drop - precedences; those with the highest drop precedence are dropped - with lower probability than those marked with the lowest drop - precedence. DSCPs are recommended for four independent AF - groups, although a DS domain can have more or fewer AF groups. - - Ongoing work in the IETF is addressing ways to support Intserv with - Diffserv. There is some belief (e.g., as expressed in [RFC2990]) - that such an approach will allow individual flows to receive service - guarantees and scale to the global Internet. - - The QoS guarantees that can be offered by the IP layer are a product - of two factors: - - 1. the concatenation of the QoS guarantees offered by the subnets - along the path of a flow. This implies that a subnet may wish to - offer multiple services (with different QoS guarantees) to the IP - layer, which can then determine which flows use which subnet - service. To put it another way, forwarding behavior in the - subnet needs to be "clued" by the forwarding behavior (service or - PHB) at the IP layer, and - - 2. the operation of a set of cooperating mechanisms, such as - bandwidth reservation and admission control, policy management, - traffic classification, traffic conditioning (marking, policing - and/or shaping), selective discard, queuing, and scheduling. - Note that support for QoS in subnets may require similar - mechanisms, especially when these subnets are general topology - subnets (e.g., ATM, frame relay, or MPLS) or shared media - subnets. - - Many subnetwork designers face inherent tradeoffs between delay, - throughput, reliability, and cost. Other subnetworks have parameters - that manage bandwidth, internal connection state, and the like. - Therefore, the following subnetwork capabilities may be desirable, - although some might be trivial or moot if the subnet is a dedicated - point-to-point link. - - 1. The subnetwork should have the ability to reserve bandwidth for a - connection or flow and schedule packets accordingly. - - 2. Bandwidth reservations should be based on a one- or two-token - bucket model, depending on whether the service is intended to - support constant-rate or bursty traffic. - - - - -Karn, et al. Best Current Practice [Page 28] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - 3. If a connection or flow does not use its reserved bandwidth at a - given time, the unused bandwidth should be available for other - flows. - - 4. Packets in excess of a connection or flow's agreed rate should be - forwarded as best-effort or discarded, depending on the service - offered by the subnet to the IP layer. - - 5. If a subnet contains error control mechanisms (retransmission - and/or FEC), it should be possible for the IP layer to influence - the inherent tradeoffs between uncorrected errors, packet losses, - and delay. These capabilities at the subnet/IP layer service - boundary correspond to selection of more or less error control - and/or to selection of particular error control mechanisms within - the subnetwork. - - 6. The subnet layer should know, and be able to inform the IP layer, - how much fixed delay and delay jitter it offers for a flow or - connection. If the Intserv model is used, the delay jitter - component may be best expressed in terms of the TSpec/RSpec model - described in [RFC2212]. - - 7. Support of the Diffserv class selectors [RFC2474] suggests that - the subnet might consider mechanisms that support priorities. - -10. Fairness vs Performance - - Subnetwork designers should be aware of the tradeoffs between - fairness and efficiency inherent in many transmission scheduling - algorithms. For example, many local area networks use contention - protocols to resolve access to a shared transmission channel. These - protocols represent overhead. While limiting the amount of data that - a subnet node may transmit per contention cycle helps assure timely - access to the channel for each subnet node, it also increases - contention overhead per unit of data sent. - - In some mobile radio networks, capacity is limited by interference, - which in turn depends on average transmitter power. Some receivers - may require considerably more transmitter power (generating more - interference and consuming more channel capacity) than others. - - In each case, the scheduling algorithm designer must balance - competing objectives: providing a fair share of capacity to each - subnet node while maximizing the total capacity of the network. One - approach for balancing performance and fairness is outlined in - [ES00]. - - - - - -Karn, et al. Best Current Practice [Page 29] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -11. Delay Characteristics - - The TCP sender bases its retransmission timeout (RTO) on measurements - of the round trip delay experienced by previous packets. This allows - TCP to adapt automatically to the very wide range of delays found on - the Internet. The recommended algorithms are described in [RFC2988]. - Evaluations of TCP's retransmission timer can be found in [AP99] and - [LS00]. - - These algorithms model the delay along an Internet path as a - normally-distributed random variable with a slowly-varying mean and - standard deviation. TCP estimates these two parameters by - exponentially smoothing individual delay measurements, and it sets - the RTO to the estimated mean delay plus some fixed number of - standard deviations. (The algorithm actually uses mean deviation as - an approximation to standard deviation, because it is easier to - compute.) - - The goal is to compute an RTO that is small enough to detect and - recover from packet losses while minimizing unnecessary ("spurious") - retransmissions when packets are unexpectedly delayed but not lost. - Although these goals conflict, the algorithm works well when the - delay variance along the Internet path is low, or the packet loss - rate is low. - - If the path delay variance is high, TCP sets an RTO that is much - larger than the mean of the measured delays. If the packet loss rate - is low, the large RTO is of little consequence, as timeouts occur - only rarely. Conversely, if the path delay variance is low, then TCP - recovers quickly from lost packets; again, the algorithm works well. - However, when delay variance and the packet loss rate are both high, - these algorithms perform poorly, especially when the mean delay is - also high. - - Because TCP uses returning acknowledgments as a "clock" to time the - transmission of additional data, excessively high delays (even if the - delay variance is low) also affect TCP's ability to fully utilize a - high-speed transmission pipe. It also slows the recovery of lost - packets, even when delay variance is small. - - Subnetwork designers should therefore minimize all three parameters - (delay, delay variance, and packet loss) as much as possible. - - In many subnetworks, these parameters are inherently in conflict. - For example, on a mobile radio channel, the subnetwork designer can - use retransmission (ARQ) and/or forward error correction (FEC) to - trade off delay, delay variance, and packet loss in an effort to - improve TCP performance. While ARQ increases delay variance, FEC - - - -Karn, et al. Best Current Practice [Page 30] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - does not. However, FEC (especially when combined with interleaving) - often increases mean delay, even on good channels where ARQ - retransmissions are not needed and ARQ would not increase either the - delay or the delay variance. - - The tradeoffs among these error control mechanisms and their - interactions with TCP can be quite complex, and are the subject of - much ongoing research. We therefore recommend that subnetwork - designers provide as much flexibility as possible in the - implementation of these mechanisms, and provide access to them as - discussed above in the section on Quality of Service. - -12. Bandwidth Asymmetries - - Some subnetworks may provide asymmetric bandwidth (or may cause TCP - packet flows to experience asymmetry in the capacity) and the - Internet protocol suite will generally still work fine. However, - there is a case when such a scenario reduces TCP performance. Since - TCP data segments are "clocked" out by returning acknowledgments, TCP - senders are limited by the rate at which ACKs can be returned - [BPK98]. Therefore, when the ratio of the available capacity of the - Internet path carrying the data to the bandwidth of the return path - of the acknowledgments is too large, the slow return of the ACKs - directly impacts performance. Since ACKs are generally smaller than - data segments, TCP can tolerate some asymmetry, but as a general - rule, designers of subnetworks should be aware that subnetworks with - significant asymmetry can result in reduced performance, unless - issues are taken to mitigate this [RFC3449]. - - Several strategies have been identified for reducing the impact of - asymmetry of the network path between two TCP end hosts, e.g., - [RFC3449]. These techniques attempt to reduce the number of ACKs - transmitted over the return path (low bandwidth channel) by changes - at the end host(s), and/or by modification of subnetwork packet - forwarding. While these solutions may mitigate the performance - issues caused by asymmetric subnetworks, they do have associated cost - and may have other implications. A fuller discussion of strategies - and their implications is provided in [RFC3449]. - -13. Buffering, flow and congestion control - - Many subnets include multiple links with varying traffic demands and - possibly different transmission speeds. At each link there must be a - queuing system, including buffering, scheduling, and a capability to - discard excess subnet packets. These queues may also be part of a - subnet flow control or congestion control scheme. - - - - - -Karn, et al. Best Current Practice [Page 31] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - For the purpose of this discussion, we talk about packets without - regard to whether they refer to a complete IP packet or a subnetwork - frame. At each queue, a packet experiences a delay that depends on - competing traffic and the scheduling discipline, and is subjected to - a local discarding policy. - - Some subnets may have flow or congestion control mechanisms in - addition to packet dropping. Such mechanisms can operate on - components in the subnet layer, such as schedulers, shapers, or - discarders, and can affect the operation of IP forwarders at the - edges of the subnet. However, with the exception of Explicit - Congestion Notification [RFC3168] (discussed below), IP has no way to - pass explicit congestion or flow control signals to TCP. - - TCP traffic, especially aggregated TCP traffic, is bursty. As a - result, instantaneous queue depths can vary dramatically, even in - nominally stable networks. For optimal performance, packets should - be dropped in a controlled fashion, not just when buffer space is - unavailable. How much buffer space should be supplied is still a - matter of debate, but as a rule of thumb, each node should have - enough buffering to hold one link_bandwidth*link_delay product's - worth of data for each TCP connection sharing the link. - - This is often difficult to estimate, since it depends on parameters - beyond the subnetwork's control or knowledge. Internet nodes - generally do not implement admission control policies, and cannot - limit the number of TCP connections that use them. In general, it is - wise to err in favor of too much buffering rather than too little. - It may also be useful for subnets to incorporate mechanisms that - measure propagation delays to assist in buffer sizing calculations. - - There is a rough consensus in the research community that active - queue management is important to improving fairness, link - utilization, and throughput [RFC2309]. Although there are questions - and concerns about the effectiveness of active queue management - (e.g., [MBDL99]), it is widely considered an improvement over tail- - drop discard policies. - - One form of active queue management is the Random Early Detection - (RED) algorithm [RED93], a family of related algorithms. In one - version of RED, an exponentially-weighted moving average of the queue - depth is maintained: - - When this average queue depth is between a maximum threshold - max_th and a minimum threshold min_th, the probability of packets - that are dropped is proportional to the amount by which the - average queue depth exceeds min_th. - - - - -Karn, et al. Best Current Practice [Page 32] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - When this average queue depth is equal to max_th, the drop - probability is equal to a configurable parameter max_p. - - When this average queue depth is greater than max_th, packets are - always dropped. - - Numerous variants on RED appear in the literature, and there are - other active queue management algorithms which claim various - advantages over RED [GM02]. - - With an active queue management algorithm, dropped packets become a - feedback signal to trigger more appropriate congestion behavior by - the TCPs in the end hosts. Randomization of dropping tends to break - up the observed tendency of TCP windows belonging to different TCP - connections to become synchronized by correlated drops, and it also - imposes a degree of fairness on those connections that implement TCP - congestion avoidance properly. Another important property of active - queue management algorithms is that they attempt to keep average - queue depths short while accommodating large short-term bursts. - - Since TCP neither knows nor cares whether congestive packet loss - occurs at the IP layer or in a subnet, it may be advisable for - subnets that perform queuing and discarding to consider implementing - some form of active queue management. This is especially true if - large aggregates of TCP connections are likely to share the same - queue. However, active queue management may be less effective in the - case of many queues carrying smaller aggregates of TCP connections, - e.g., in an ATM switch that implements per-VC queuing. - - Note that the performance of active queue management algorithms is - highly sensitive to settings of configurable parameters, and also to - factors such as RTT [MBB00] [FB00]. - - Some subnets, most notably ATM, perform segmentation and reassembly - at the subnetwork edges. Care should be taken here in designing - discard policies. If the subnet discards a fragment of an IP packet, - then the remaining fragments become an unproductive load on the - subnet that can markedly degrade end-to-end performance [RF95]. - Subnetworks should therefore attempt to discard these extra fragments - whenever one of them must be discarded. If the IP packet has already - been partially forwarded when discarding becomes necessary, then - every remaining fragment except the one marking the end of the IP - packet should also be discarded. For ATM subnets, this specifically - means using Early Packet Discard and Partial Packet Discard [ATMFTM]. - - Some subnets include flow control mechanisms that effectively require - that the rate of traffic flows be shaped upon entry to the subnet. - One example of such a subnet mechanism is in the ATM Available Bit - - - -Karn, et al. Best Current Practice [Page 33] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - rate (ABR) service category [ATMFTM]. Such flow control mechanisms - have the effect of making the subnet nearly lossless by pushing - congestion into the IP routers at the edges of the subnet. In such a - case, adequate buffering and discard policies are needed in these - routers to deal with a subnet that appears to have varying bandwidth. - Whether there is a benefit in this kind of flow control is - controversial; there are numerous simulation and analytical studies - that go both ways. It appears that some of the issues leading to - such different results include sensitivity to ABR parameters, use of - binary rather than explicit rate feedback, use (or not) of per-VC - queuing, and the specific ATM switch algorithms selected for the - study. Anecdotally, some large networks that used IP over ABR to - carry TCP traffic have claimed it to be successful, but have - published no results. - - Another possible approach to flow control in the subnet would be to - work with TCP Explicit Congestion Notification (ECN) semantics - [RFC3168] through utilizing explicit congestion indicators in subnet - frames. Routers at the edges of the subnet, rather than shaping, - would set the explicit congestion bit in those IP packets that are - received in subnet frames that have an ECN indication. Nodes in the - subnet would need to implement an active queue management protocol - that marks subnet frames instead of dropping them. - - ECN is currently a proposed standard, but it is not yet widely - deployed. - -14. Compression - - Application data compression is a function that can usually be - omitted in the subnetwork. The endpoints typically have more CPU and - memory resources to run a compression algorithm and a better - understanding of what is being compressed. End-to-end compression - benefits every network element in the path, while subnetwork-layer - compression, by definition, benefits only a single subnetwork. - - Data presented to the subnetwork layer may already be in a compressed - format (e.g., a JPEG file), compressed at the application layer - (e.g., the optional "gzip", "compress", and "deflate" compression in - HTTP/1.1 [RFC2616]), or compressed at the IP layer (the IP Payload - Compression Protocol [RFC3173] supports DEFLATE [RFC2394] and LZS - [RFC2395]). Compression at the subnetwork edges is of no benefit for - any of these cases. - - The subnetwork may also process data that has been encrypted by the - application (OpenPGP [RFC2440] or S/MIME [RFC2633]), just above TCP - (SSL, TLS [RFC2246]), or just above IP (IPsec ESP [RFC2406]). - - - - -Karn, et al. Best Current Practice [Page 34] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - Ciphers generate high-entropy bit streams lacking any patterns that - can be exploited by a compression algorithm. - - However, much data is still transmitted uncompressed over the - Internet, so subnetwork compression may be beneficial. Any - subnetwork compression algorithm must not expand uncompressible data, - e.g., data that has already been compressed or encrypted. - - We make a strong recommendation that subnetworks operating at low - speed or with small MTUs compress IP and transport-level headers (TCP - and UDP) using several header compression schemes developed within - the IETF [RFC3150]. An uncompressed 40-byte TCP/IP header takes - about 33 milliseconds to send at 9600 bps. "VJ" TCP/IP header - compression [RFC1144] compresses most headers to 3-5 bytes, reducing - transmission time to several milliseconds on dialup modem links. - This is especially beneficial for small, latency-sensitive packets in - interactive sessions. - - Similarly, RTP compression schemes, such as CRTP [RFC2508] and ROHC - [RFC3095], compress most IP/UDP/RTP headers to 1-4 bytes. The - resulting savings are especially significant when audio packets are - kept small to minimize store-and-forward latency. - - Designers should consider the effect of the subnetwork error rate on - the performance of header compression. TCP ordinarily recovers from - lost packets by retransmitting only those packets that were actually - lost; packets arriving correctly after a packet loss are kept on a - resequencing queue and do not need to be retransmitted. In VJ TCP/IP - [RFC1144] header compression, however, the receiver cannot explicitly - notify a sender of data corruption and subsequent loss of - synchronization between compressor and decompressor. It relies - instead on TCP retransmission to re-synchronize the decompressor. - After a packet is lost, the decompressor must discard every - subsequent packet, even if the subnetwork makes no further errors, - until the sending TCP retransmits to re-synchronize the decompressor. - This effect can substantially magnify the effect of subnetwork packet - losses if the sending TCP window is large, as it will often be on a - path with a large bandwidth*delay product [LRKOJ99]. - - Alternate header compression schemes, such as those described in - [RFC2507], include an explicit request for retransmission of an - uncompressed packet to allow decompressor resynchronization without - waiting for a TCP retransmission. However, these schemes are not yet - in widespread use. - - Both TCP header compression schemes do not compress widely-used TCP - options such as selective acknowledgements (SACK). Both fail to - compress TCP traffic that makes use of explicit congestion - - - -Karn, et al. Best Current Practice [Page 35] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - notification (ECN). Work is under way in the IETF ROHC WG to address - these shortcomings in a ROHC header compression scheme for TCP - [RFC3095] [RFC3096]. - - The subnetwork error rate also is important for RTP header - compression. CRTP uses delta encoding, so a packet loss on the link - causes uncertainty about the subsequent packets, which often must be - discarded until the decompressor has notified the compressor and the - compressor has sent re-synchronizing information. This typically - takes slightly more than the end-to-end path round-trip time. For - links that combine significant error rates with latencies that - require multiple packets to be in flight at a time, this leads to - significant error propagation, i.e., subsequent losses caused by an - initial loss. - - For links that are both high-latency (multiple packets in flight from - a typical RTP stream) and error-prone, RTP ROHC provides a more - robust way of RTP header compression, at a cost of higher complexity - at the compressor and decompressor. For example, within a talk - spurt, only extended losses of (depending on the mode chosen) 12-64 - packets typically cause error propagation. - -15. Packet Reordering - - The Internet architecture does not guarantee that packets will arrive - in the same order in which they were originally transmitted; - transport protocols like TCP must take this into account. - - However, reordering does come at a cost with TCP as it is currently - defined. Because TCP returns a cumulative acknowledgment (ACK) - indicating the last in-order segment that has arrived, out-of-order - segments cause a TCP receiver to transmit a duplicate acknowledgment. - When the TCP sender notices three duplicate acknowledgments, it - assumes that a segment was dropped by the network and uses the fast - retransmit algorithm [Jac90] [RFC2581] to resend the segment. In - addition, the congestion window is reduced by half, effectively - halving TCP's sending rate. If a subnetwork reorders segments - significantly such that three duplicate ACKs are generated, the TCP - sender needlessly reduces the congestion window and performance - suffers. - - Packet reordering frequently occurs in parts of the Internet, and it - seems to be difficult or impossible to eliminate [BPS99]. For this - reason, research on improving TCP's behavior in the face of packet - reordering [LK00] [BA02] has begun. - - - - - - -Karn, et al. Best Current Practice [Page 36] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [BPS99] cites reasons why it may even be undesirable to eliminate - reordering. There are situations where average packet latency can be - reduced, link efficiency can be increased, and/or reliability can be - improved if reordering is permitted. Examples include certain high - speed switches within the Internet backbone and the parallel links - used over many Internet paths for load splitting and redundancy. - - This suggests that subnetwork implementers should try to avoid packet - reordering whenever possible, but not if doing so compromises - efficiency, impairs reliability, or increases average packet delay. - - Note that every header compression scheme currently standardized for - the Internet requires in-order packet delivery on the link between - compressor and decompressor. PPP is frequently used to carry - compressed TCP/IP packets; since it was originally designed for - point-to-point and dialup links, it is assumed to provide in-order - delivery. For this reason, subnetwork implementers who provide PPP - interfaces to VPNs and other more complex subnetworks, must also - maintain in-order delivery of PPP frames. - -16. Mobility - - Internet users are increasingly mobile. Not only are many Internet - nodes laptop computers, but pocket organizers and mobile embedded - systems are also becoming nodes on the Internet. These nodes may - connect to many different access points on the Internet over time, - and they expect this to be largely transparent to their activities. - Except when they are not connected to the Internet at all, and for - performance differences when they are connected, they expect that - everything will "just work" regardless of their current Internet - attachment point or local subnetwork technology. - - Changing a host's Internet attachment point involves one or more of - the following steps. - - First, if use of the local subnetwork is restricted, the user's - credentials must be verified and access granted. There are many ways - to do this. A trivial example would be an "Internet cafe" that - grants physical access to the subnetwork for a fee. Subnetworks may - implement technical access controls of their own; one example is IEEE - 802.11 Wireless Equivalent Privacy [IEEE80211]. It is common - practice for both cellular telephone and Internet service providers - (ISPs) to agree to serve one anothers' users; RADIUS [RFC2865] is the - standard method for ISPs to exchange authorization information. - - Second, the host may have to be reconfigured with IP parameters - appropriate for the local subnetwork. This usually includes setting - an IP address, default router, and domain name system (DNS) servers. - - - -Karn, et al. Best Current Practice [Page 37] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - On multiple-access networks, the Dynamic Host Configuration Protocol - (DHCP) [RFC2131] is almost universally used for this purpose. On PPP - links, these functions are performed by the IP Control Protocol - (IPCP) [RFC1332]. - - Third, traffic destined for the mobile host must be routed to its - current location. This roaming function is the most common meaning - of the term "Internet mobility". - - Internet mobility can be provided at any of several layers in the - Internet protocol stack, and there is ongoing debate as to which is - the most appropriate and efficient. Mobility is already a feature of - certain application layer protocols; the Post Office Protocol (POP) - [RFC1939] and the Internet Message Access Protocol (IMAP) [RFC3501] - were created specifically to provide mobility in the receipt of - electronic mail. - - Mobility can also be provided at the IP layer [RFC3344]. This - mechanism provides greater transparency, viz., IP addresses that - remain fixed as the nodes move, but at the cost of potentially - significant network overhead and increased delay because of the sub- - optimal network routing and tunneling involved. - - Some subnetworks may provide internal mobility, transparent to IP, as - a feature of their own internal routing mechanisms. To the extent - that these simplify routing at the IP layer, reduce the need for - mechanisms like Mobile IP, or exploit mechanisms unique to the - subnetwork, this is generally desirable. This is especially true - when the subnetwork covers a relatively small geographic area and the - users move rapidly between the attachment points within that area. - Examples of internal mobility schemes include Ethernet switching and - intra-system handoff in cellular telephony. - - However, if the subnetwork is physically large and connects to other - parts of the Internet at multiple geographic points, care should be - taken to optimize the wide-area routing of packets between nodes on - the external Internet and nodes on the subnet. This is generally - done with "nearest exit" routing strategies. Because a given - subnetwork may be unaware of the actual physical location of a - destination on another subnetwork, it simply routes packets bound for - the other subnetwork to the nearest router between the two. This - implies some awareness of IP addressing and routing within the - subnetwork. The subnetwork may wish to use IP routing internally for - wide area routing and restrict subnetwork-specific routing to - constrained geographic areas where the effects of suboptimal routing - are minimized. - - - - - -Karn, et al. Best Current Practice [Page 38] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -17. Routing - - Subnetworks connecting more than two systems must provide their own - internal Layer-2 forwarding mechanisms, either implicitly (e.g., - broadcast) or explicitly (e.g., switched). Since routing is the - major function of the Internet layer, the question naturally arises - as to the interaction between routing at the Internet layer and - routing in the subnet, and proper division of function between the - two. - - Layer-2 subnetworks can be point-to-point, connecting two systems, or - multipoint. Multipoint subnetworks can be broadcast (e.g., shared - media or emulated) or non-broadcast. Generally, IP considers - multipoint subnetworks as broadcast, with shared-medium Ethernet as - the canonical (and historical) example, and point-to-point - subnetworks as a degenerate case. Non-broadcast subnetworks may - require additional mechanisms, e.g., above IP at the routing layer - [RFC2328]. - - IP is ignorant of the topology of the subnetwork layer. In - particular, reconfiguration of subnetwork paths is not tracked by the - IP layer. IP is only affected by whether it can send/receive packets - sent to the remotely connected systems via the subnetwork interface - (i.e., the reachability from one router to another). IP further - considers that subnetworks are largely static -- that both their - membership and existence are stable at routing timescales (tens of - seconds); changes to these are considered re-provisioning, rather - than routing. - - Routing functionality in a subnetwork is related to addressing in - that subnetwork. Resolution of addresses on subnetwork links is - required for forwarding IP packets across links (e.g., ARP for IPv4, - or ND for IPv6). There is unlikely to be direct interaction between - subnetwork routing and IP routing. Where broadcast is provided or - explicitly emulated, address resolution can be used directly; where - not provided, the link layer routing may interface to a protocol for - resolution, e.g., to the Next-Hop Resolution Protocol [RFC2322] to - provide context-dependent address resolution capabilities. - - Subnetwork routing can either complement or compete with IP routing. - It complements IP when a subnetwork encapsulates its internal - routing, and where the effects of that routing are not visible at the - IP layer. However, if different paths in the subnetwork have - characteristics that affect IP routing, it can affect or even inhibit - the convergence of IP routing. - - - - - - -Karn, et al. Best Current Practice [Page 39] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - Routing protocols generally consider Layer-2 subnetworks, i.e., with - subnet masks and no intermediate IP hops, to have uniform routing - metrics to all members. Routing can break when a link's - characteristics do not match the routing metric, in this case, e.g., - when some member pairs have different path characteristics. Consider - a virtual Ethernet subnetwork that includes both nearby (sub- - millisecond latency) and remote (100's of milliseconds away) systems. - Presenting that group as a single subnetwork means that some routing - protocols will assume that all pairs have the same delay, and that - that delay is small. Because this is not the case, the routing - tables constructed may be suboptimal or may even fail to converge. - - When a subnetwork is used for transit between a set of routers, it - conventionally provides the equivalent of a full mesh of point-to- - point links. Simplicity of the internal subnet structure can be used - (e.g., via NHRP [RFC2332]) to reduce the size of address resolution - tables, but routing exchanges will continue to reflect the full mesh - they emulate. In general, subnetworks should not be used as a - transit among a set of routers where routing protocols would break if - a full mesh of equivalent point-to-point links were used. - - Some subnetworks have special features that allow the use of more - effective or responsive routing mechanisms that cannot be implemented - in IP because of its need for generality. One example is the self- - learning bridge algorithm widely used in Ethernet networks. Learning - bridges perform Layer-2 subnetwork forwarding, avoiding the need for - dynamic routing at each subnetwork hop. Another is the "handoff" - mechanism in cellular telephone networks, particularly the "soft - handoff" scheme in IS-95 CDMA. - - Subnetworks that cover large geographic areas or include links of - widely-varying capabilities should be avoided. IP routing generally - considers all multipoint subnets equivalent to a local, shared-medium - link with uniform metrics between any pair of systems, and ignores - internal subnetwork topology. Where a subnetwork diverges from that - assumption, it is the obligation of subnetwork designers to provide - compensating mechanisms. Not doing so can affect the scalability and - convergence of IP routing, as noted above. - - The subnetwork designer who decides to implement internal routing - should consider whether a custom routing algorithm is warranted, or - if an existing Internet routing algorithm or protocol may suffice. - The designer should consider whether this decision is to reduce the - address resolution table size (possible, but with additional protocol - support required), or is trying to reduce routing table complexity. - The latter may be better achieved by partitioning the subnetwork, - either physically or logically, and using network-layer protocols to - support partitioning (e.g., AS's in BGP). Protocols and routing - - - -Karn, et al. Best Current Practice [Page 40] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - algorithms can be notoriously subtle, complex, and difficult to - implement correctly. Much work can be avoided if existing protocols - or implementations can be readily reused. - -18. Security Considerations - - Security has become a high priority in the design and operation of - the Internet. The Internet is vast, and countless organizations and - individuals own and operate its various components. A consensus has - emerged for what might be called a "security placement principle": a - security mechanism is most effective when it is placed as close as - possible to, and under the direct control of the owner of the asset - that it protects. - - A corollary of this principle is that end-to-end security (e.g., - confidentiality, authentication, integrity, and access control) - cannot be ensured with subnetwork security mechanisms. Not only are - end-to-end security mechanisms much more closely associated with the - end-user assets they protect, they are also much more comprehensive. - For example, end-to-end security mechanisms cover gaps that can - appear when otherwise good subnetwork mechanisms are concatenated. - This is an important application of the end-to-end principle [SRC81]. - - Several security mechanisms that can be used end-to-end have already - been deployed in the Internet and are enjoying increasing use. The - most important are the Secure Sockets Layer (SSL) [SSL2] [SSL3] and - TLS [RFC2246] primarily used to protect web commerce, Pretty Good - Privacy (PGP) [RFC1991] and S/MIME [RFCs-2630-2634], primarily used - to protect and authenticate email and software distributions, the - Secure Shell (SSH), used for secure remote access and file transfer, - and IPsec [RFC2401], a general purpose encryption and authentication - mechanism that sits just above IP and can be used by any IP - application. (IPsec can actually be used either on an end-to-end - basis or between security gateways that do not include either or both - end systems.) - - Nonetheless, end-to-end security mechanisms are not used as widely as - might be desired. However, the group could not reach consensus on - whether subnetwork designers should be actively encouraged to - implement mechanisms to protect user data. - - The clear consensus of the working group held that subnetwork - security mechanisms, especially when weak or incorrectly implemented - [BGW01], may actually be counterproductive. The argument is that - subnetwork security mechanisms can lull end users into a false sense - of security, diminish the incentive to deploy effective end-to-end - - - - - -Karn, et al. Best Current Practice [Page 41] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - mechanisms, and encourage "risky" uses of the Internet that would not - be made if users understood the inherent limits of subnetwork - security mechanisms. - - The other point of view encourages subnetwork security on the - principle that it is better than the default situation, which all too - often is no security at all. Users of especially vulnerable subnets - (such as consumers who have wireless home networks and/or shared - media Internet access) often have control over at most one endpoint - -- usually a client -- and therefore cannot enforce the use of end- - to-end mechanisms. However, subnet security can be entirely adequate - for protecting low-valued assets against the most likely threats. In - any event, subnet mechanisms do not preclude the use of end-to-end - mechanisms, which are typically used to protect highly-valued assets. - This viewpoint recognizes that many security policies implicitly - assume that the entire end-to-end path is composed of a series of - concatenated links that are nominally physically secured. That is, - these policies assume that all endpoints of all links are trusted, - and that access to the physical medium by attackers is difficult. To - meet the assumptions of such policies, explicit mechanisms are needed - for links (especially shared medium links) that lack physical - protection. This, for example, is the rationale that underlies Wired - Equivalent Privacy (WEP) in the IEEE 802.11 [IEEE80211] wireless LAN - standard, and the Baseline Privacy Interface in the DOCSIS [DOCSIS1] - [DOCSIS2] data over cable television networks standards. - - We therefore recommend that subnetwork designers who choose to - implement security mechanisms to protect user data be as candid as - possible with the details of such security mechanisms and the - inherent limits of even the most secure mechanisms when implemented - in a subnetwork rather than on an end-to-end basis. - - In keeping with the "placement principle", a clear consensus exists - for another subnetwork security role: the protection of the - subnetwork itself. Possible threats to subnetwork assets include - theft of service and denial of service; shared media subnets tend to - be especially vulnerable to such attacks. In some cases, mechanisms - that protect subnet assets can also improve (but cannot ensure) end- - to-end security. - - One security service can be provided by the subnetwork that will aid - in the solution of an overall Internet problem: subnetwork security - should provide a mechanism to authenticate the source of a subnetwork - frame. This function is missing in some current protocols, e.g., the - use of ARP [RFC826] to associate an IPv4 address with a MAC address. - The IPv6 Neighbor Discovery (ND) [RFC2461] performs a similar - function. - - - - -Karn, et al. Best Current Practice [Page 42] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - There are well-known security flaws with this address resolution - mechanism [Wilbur89]. However, the inclusion of subnetwork frame - source authentication will permit a secure subnetwork address. - - Another potential role for subnetwork security is to protect users - against traffic analysis, i.e., identifying the communicating parties - and determining their communication patterns and volumes even when - their actual contents are protected by strong end-to-end security - mechanisms. Lower-layer security can be more effective against - traffic analysis due to its inherent ability to aggregate the - communications of multiple parties sharing the same physical - facilities while obscuring higher-layer protocol information that - indicates specific end points, such as IP addresses and TCP/UDP port - numbers. - - However, traffic analysis is a notoriously subtle and difficult - threat to understand and defeat, far more so than threats to - confidentiality and integrity. We therefore urge extreme care in the - design of subnetwork security mechanisms specifically intended to - thwart traffic analysis. - - Subnetwork designers must keep in mind that design and implementation - for security is difficult [Schneier00]. [Schneier95] describes - protocols and algorithms which are considered well-understood and - believed to be sound. - - Poor design process, subtle design errors and flawed implementation - can result in gaping vulnerabilities. In recent years, a number of - subnet standards have had problems exposed. The following are - examples of mistakes that have been made: - - 1. Use of weak and untested algorithms [Crypto9912] [BGW01]. For a - variety of reasons, algorithms were chosen which had subtle - flaws, making them vulnerable to a variety of attacks. - - 2. Use of "security by obscurity" [Schneier00] [Crypto9912]. One - common mistake is to assume that keeping cryptographic algorithms - secret makes them more secure. This is intuitive, but wrong. - Full public disclosure early in the design process attracts peer - review by knowledgeable cryptographers. Exposure of flaws by - this review far outweighs any imagined benefit from forcing - attackers to reverse engineer security algorithms. - - 3. Inclusion of trapdoors [Schneier00] [Crypto9912]. Trapdoors are - flaws surreptitiously left in an algorithm to allow it to be - broken. This might be done to recover lost keys or to permit - surreptitious access by governmental agencies. Trapdoors can be - discovered and exploited by malicious attackers. - - - -Karn, et al. Best Current Practice [Page 43] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - 4. Sending passwords or other identifying information as clear text. - For many years, analog cellular telephones could be cloned and - used to steal service. The cloners merely eavesdropped on the - registration protocols that exchanged everything in clear text. - - 5. Keys which are common to all systems on a subnet [BGW01]. - - 6. Incorrect use of a sound mechanism. For example [BGW01], one - subnet standard includes an initialization vector which is poorly - designed and poorly specified. A determined attacker can easily - recover multiple ciphertexts encrypted with the same key stream - and perform statistical attacks to decipher them. - - 7. Identifying information sent in clear text that can be resolved - to an individual, identifiable device. This creates a - vulnerability to attacks targeted to that device (or its owner). - - 8. Inability to renew and revoke shared secret information. - - 9. Insufficient key length. - - 10. Failure to address "man-in-the-middle" attacks, e.g., with mutual - authentication. - - 11. Failure to provide a form of replay detection, e.g., to prevent a - receiver from accepting packets from an attacker that simply - resends previously captured network traffic. - - 12. Failure to provide integrity mechanisms when providing - confidentiality schemes [Bel98]. - - This list is by no means comprehensive. Design problems are - difficult to avoid, but expert review is generally invaluable in - avoiding problems. - - In addition, well-designed security protocols can be compromised by - implementation defects. Examples of such defects include use of - predictable pseudo-random numbers [RFC1750], vulnerability to buffer - overflow attacks due to unsafe use of certain I/O system calls - [WFBA2000], and inadvertent exposure of secret data. - -19. Contributors - - This document represents a consensus of the members of the IETF - Performance Implications of Link Characteristics (PILC) working - group. - - - - - -Karn, et al. Best Current Practice [Page 44] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - This document would not have been possible without the contributions - of a great number of people in the Performance Implications of Link - Characteristics Working Group. In particular, the following people - provided major contributions of text, editing, and advice on this - document: Mark Allman provided the final editing to complete this - document. Carsten Bormann provided text on robust header - compression. Gorry Fairhurst provided text on broadcast and - multicast issues, routing, and many valuable comments on the entire - document. Aaron Falk provided text on bandwidth on demand. Dan - Grossman provided text on many facets of the document. Reiner Ludwig - provided thorough document review and text on TCP vs. Link-Layer - Retransmission. Jamshid Mahdavi provided text on TCP performance - calculations. Saverio Mascolo provided feedback on the document. - Gabriel Montenegro provided feedback on the document. Marie-Jose - Montpetit provided text on bandwidth on demand. Joe Touch provided - text on multicast, broadcast, and routing, and Lloyd Wood provided - many valuable comments on versions of the document. - -20. Informative References - - References of the form RFCnnnn are Internet Request for Comments - (RFC) documents available online at www.rfc-editor.org. - - [802.1D] Information Technology Telecommunications and - information exchange between systems Local and - metropolitan area networks, Common specifications Media - access control (MAC) bridges, IEEE 802.1D, 1998. ISO - 15802-3. - - [802.1p] IEEE, 802.1p, Standard for Local and Metropolitan Area - Networks - Supplement to Media Access Control (MAC) - Bridges: Traffic Class Expediting and Multicast. - - [AP99] Allman, M. and V. Paxson, On Estimating End-to-End - Network Path Properties, In Proceedings of ACM SIGCOMM - 99. - - [AR02] Acar, G. and C. Rosenberg, Weighted Fair Bandwidth-on- - Demand (WFBoD) for Geo-Stationary Satellite Networks - with On-Board Processing, Computer Networks, 39(1), - 2002. - - [ATMFTM] The ATM Forum, "Traffic Management Specification, - Version 4.0", April 1996, document af-tm-0056.000. - http://www.atmforum.com/ - - - - - - -Karn, et al. Best Current Practice [Page 45] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [BA02] Blanton, E. and M. Allman, On Making TCP More Robust to - Packet Reordering. ACM Computer Communication Review, - 32(1), January 2002. - - [Bel98] Bellovin, S., "Cryptography and the Internet", in - Proceedings of CRYPTO '98, August 1998. - http://www.research.att.com/~smb/papers/inet-crypto.pdf - - [BGW01] Borisov, N., Goldberg, I. and D. Wagner, "Intercepting - Mobile Communications: The Insecurity of 802.11," In - Proceedings of ACM MobiCom, July 2001. - - [BPK98] Balakrishnan, H., Padmanabhan, V. and R. Katz. "The - Effects of Asymmetry on TCP Performance." ACM Mobile - Networks and Applications (MONET), 1998. - - [BPS99] Bennet,, J.C.R., Partridge, C. and N. Shectman, "Packet - Reordering is Not Pathological Network Behavior", - IEEE/ACM Transactions on Networking, Vol. 7, No. 6, - December 1999. - - [CGMP] Farinacci D., Tweedly A. and T. Speakman, "Cisco Group - Management Protocol (CGMP)", 1996/1997. - ftp://ftpeng.cisco.com/ipmulticast/specs/cgmp.txt - - [Crypto9912] Schneier, B., "European Cellular Encryption Algorithms" - Crypto-Gram, December 15, 1999. - http://www.counterpane.com - - [DIX82] Digital Equipment Corp, Intel Corp, Xerox Corp, - Ethernet Local Area Network Specification Version 2.0, - November 1982. - - [DOCSIS1] Data-Over-Cable Service Interface Specifications, Radio - Frequency Interface Specification 1.0, SP-RFI-I05- - 991105, November 1999, Cable Television Laboratories, - Inc. - - [DOCSIS2] Data-Over-Cable Service Interface Specifications, Radio - Frequency Interface Specification 1.1, SP-RFIv1.1-I05- - 000714, July 2000, Cable Television Laboratories, Inc. - - [DOCSIS3] Lai, W.S., "DOCSIS-Based Cable Networks: Impact of - Large Data Packets on Upstream Capacity", 14th ITC - Specialists Seminar on Access Networks and Systems, - Barcelona, Spain, April 25-27, 2001. - - - - - -Karn, et al. Best Current Practice [Page 46] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [EN301192] ETSI, European Broadcasting Union, Digital Video - Broadcasting (DVB); DVB Specification for Data - Broadcasting, European Standard (Telecommunications - Series) EN 301 192 v1.2.1(1999-06). - - [ES00] Eckhardt, D. and P. Steenkiste, "Effort-limited Fair - (ELF) Scheduling for Wireless Networks, Proceedings of - IEEE Infocom 2000. - - [FB00] Firoiu V. and M. Borden, "A Study of Active Queue - Management for Congestion Control" to appear in Infocom - 2000. - - [GM02] Grieco1, L. and S. Mascolo, "TCP Westwood and Easy RED - to Improve Fairness in High-Speed Networks", - Proceedings of the 7th International Workshop on - Protocols for High-Speed Networks, April 2002. - - [IEEE8023] IEEE 802.3 CSMA/CD Access Method. - http://standards.ieee.org/ - - [IEEE80211] IEEE 802.11 Wireless LAN standard. - http://standards.ieee.org/ - - [ISO3309] ISO/IEC 3309:1991(E), "Information Technology - - Telecommunications and information exchange between - systems - High-level data link control (HDLC) - procedures - Frame structure", International - Organization For Standardization, Fourth edition 1991- - 06-01. - - [ISO13818] ISO/IEC, ISO/IEC 13818-1:2000(E) Information - Technology - Generic coding of moving pictures and - associated audio information: Systems, Second edition, - 2000-12-01 International Organization for - Standardization and International Electrotechnical - Commission. - - [ITU-I363] ITU-T I.363.5 B-ISDN ATM Adaptation Layer Specification - Type AAL5, International Standards Organisation (ISO), - 1996. - - [Jac90] Jacobson, V., Modified TCP Congestion Avoidance - Algorithm. Email to the end2end-interest mailing list, - April 1990. - ftp://ftp.ee.lbl.gov/email/vanj.90apr30.txt - - - - - -Karn, et al. Best Current Practice [Page 47] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [KY02] Khafizov, F. and M. Yavuz, Running TCP Over IS-2000, - Proceedings of IEEE ICC, 2002. - - [LK00] Ludwig, R. and R. H. Katz, "The Eifel Algorithm: Making - TCP Robust Against Spurious Retransmissions", ACM - Computer Communication Review, Vol. 30, No. 1, January - 2000. - - [LKJK02] Ludwig, R., Konrad, A., Joseph, A. D. and R. H. Katz, - "Optimizing the End-to-End Performance of Reliable - Flows over Wireless Links", Kluwer/ACM Wireless - Networks Journal, Vol. 8, Nos. 2/3, pp. 289-299, - March-May 2002. - - [LRKOJ99] Ludwig, R., Rathonyi, B., Konrad, A., Oden, K. and A. - Joseph, Multi-Layer Tracing of TCP over a Reliable - Wireless Link, pp. 144-154, In Proceedings of ACM - SIGMETRICS 99. - - [LS00] Ludwig, R. and K. Sklower, The Eifel Retransmission - Timer, ACM Computer Communication Review, Vol. 30, No. - 3, July 2000. - - [MAGMA-PROXY] Fenner, B., He, H., Haberman, B. and H. Sandick, - "IGMP/MLD-based Multicast Forwarding ("IGMP/MLD - Proxying")", Work in Progress. - - [MAGMA-SNOOP] Christensen, M., Kimball, K. and F. Solensky, - "Considerations for IGMP and MLD Snooping Switches", - Work in Progress. - - [MBB00] May, M., Bonald, T. and J-C. Bolot, "Analytic - Evaluation of RED Performance", INFOCOM 2000. - - [MBDL99] May, M., Bolot, J., Diot, C. and B. Lyles, "Reasons not - to deploy RED", Proc. of 7th. International Workshop on - Quality of Service (IWQoS'99), June 1999. - - [MSMO97] Mathis, M., Semke, J., Mahdavi, J. and T. Ott, "The - Macroscopic Behavior of the TCP Congestion Avoidance - Algorithm", Computer Communication Review, Vol. 27, - number 3, July 1997. - - [MYR95] Boden, N., Cohen, D., Felderman, R., Kulawik, A., - Seitz, C., et al. MYRINET: A Gigabit per Second Local - Area Network, IEEE-Micro, Vol. 15, No.1, February 1995, - pp. 29-36. - - - - -Karn, et al. Best Current Practice [Page 48] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [PFTK98] Padhye, J., Firoiu, V., Towsley, D. and J. Kurose, - "Modeling TCP Throughput: a Simple Model and its - Empirical Validation", UMASS CMPSCI Tech Report TR98- - 008, Feb. 1998. - - [RED93] Floyd, S. and V. Jacobson, "Random Early Detection - gateways for Congestion Avoidance", IEEE/ACM - Transactions in Networking, Vol. 1 No. 4, August 1993. - http://www.aciri.org/floyd/papers/red/red.html - - [RF95] Romanow, A. and S. Floyd, "Dynamics of TCP Traffic over - ATM Networks". IEEE Journal of Selected Areas in - Communication, Vol.13 No. 4, May 1995, p. 633-641. - - [RFC791] Postel, J., "Internet Protocol", STD 5, RFC 791, - September 1981. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC768] Postel, J., "User Datagram Protocol", STD 6, RFC 768, - August 1980. - - [RFC826] Plummer, D.C., "Ethernet Address Resolution Protocol: - Or converting network protocol addresses to 48-bit - Ethernet address for transmission on Ethernet - hardware", STD 37, RFC 826, November 1982. - - [RFC1071] Braden, R., Borman, D. and C. Partridge, "Computing the - Internet checksum", RFC 1071, September 1988. - - [RFC1112] Deering, S., "Host Extensions for IP Multicasting", STD - 5, RFC 1112, August 1989. - - [RFC1144] Jacobson, V., "Compressing TCP/IP Headers for Low-Speed - Serial Links", RFC 1144, February 1990. - - [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC - 1191, November 1990. - - [RFC1332] McGregor, C., "The PPP Internet Protocol Control - Protocol (IPCP)", RFC 1332, May 1992. - - [RFC1435] Knowles, S., "IESG Advice from Experience with Path MTU - Discovery", RFC 1435, March 1993. - - - - - - -Karn, et al. Best Current Practice [Page 49] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [RFC1633] Braden, R., Clark, D. and S. Shenker, "Integrated - Services in the Internet Architecture: an Overview", - RFC 1633, June 1994. - - [RFC1661] Simpson, W., "The Point-to-Point Protocol (PPP)", STD - 51, RFC 1661, July 1994. - - [RFC1662] Simpson, W., Ed., "PPP in HDLC-like Framing", STD 51, - RFC 1662, July 1994. - - [RFC1750] Eastlake 3rd, D., Crocker, S. and J. Schiller, - "Randomness Recommendations for Security", RFC 1750, - December 1994. - - [RFC1812] Baker, F., Ed., "Requirements for IP Version 4 - Routers", RFC 1812, June 1995. - - [RFC1939] Myers, J. and M. Rose, "Post Office Protocol - Version - 3", STD 53, RFC 1939, May 1996. - - [RFC1981] McCann, J., Deering, S. and J. Mogul, "Path MTU - Discovery for IP version 6", RFC 1981, August 1996. - - [RFC1991] Atkins, D., Stallings, W. and P. Zimmermann, "PGP - Message Exchange Formats", RFC 1991, August 1996. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S. and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October - 1996. - - [RFC2131] Droms, R., "Dynamic Host Configuration Protocol", RFC - 2131, March 1997. - - [RFC2205] Braden, R., Ed., Zhang, L., Berson, S., Herzog, S. and - S. Jamin, "Resource ReSerVation Protocol (RSVP) -- - Version 1 Functional Specification", RFC 2205, - September 1997. - - [RFC2208] Mankin, A., Baker, F., Braden, B., Bradner, S., O`Dell, - M., Romanow, A., Weinrib, A. and L. Zhang, "Resource - ReSerVation Protocol (RSVP) -- Version 1 Applicability - Statement Some Guidelines on Deployment", RFC 2208, - September 1997. - - [RFC2210] Wroclawski, J., "The Use of RSVP with IETF Integrated - Services", RFC 2210, September 1997. - - - - - -Karn, et al. Best Current Practice [Page 50] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [RFC2211] Wroclawski, J., "Specification of the Controlled-Load - Network Element Service", RFC 2211, September 1997. - - [RFC2212] Shenker, S., Partridge, C. and R. Guerin, - "Specification of Guaranteed Quality of Service", RFC - 2212, September 1997. - - [RFC2246] Dierks, T. and C. Allen, "The TLS Protocol Version - 1.0", RFC 2246, January 1999. - - [RFC2309] Braden, B., Clark, D., Crowcroft, J., Davie, B., - Deering, S., Estrin, D., Floyd, S., Jacobson, V., - Minshall, G., Partridge, C., Peterson, L., - Ramakrishnan, K., Shenker, S., Wroclawski, J. and L. - Zhang, "Recommendations on Queue Management and - Congestion Avoidance in the Internet", RFC 2309, April - 1998. - - [RFC2322] van den Hout, K., Koopal, A. and R. van Mook, - "Management of IP numbers by peg-dhcp", RFC 2322, 1 - April 1998. - - [RFC2328] Moy, J., "OSPF Version 2", STD 54, RFC 2328, April - 1998. - - [RFC2332] Luciani, J., Katz, D., Piscitello, D., Cole, B. and N. - Doraswamy, "NBMA Next Hop Resolution Protocol (NHRP)", - RFC 2332, April 1998. - - [RFC2364] Gross, G., Kaycee, M., Li, A., Malis, A. and J. - Stephens, "PPP Over AAL5", RFC 2364, July 1998. - - [RFC2394] Pereira, R., "IP Payload Compression Using DEFLATE", - RFC 2394, December 1998. - - [RFC2395] Friend, R. and R. Monsour, "IP Payload Compression - Using LZS", RFC 2395, December 1998. - - [RFC2401] Kent, S. and R. Atkinson, "Security Architecture for - the Internet Protocol", RFC 2401, November 1998. - - [RFC2406] Kent, S. and R. Atkinson, "IP Encapsulating Security - Payload (ESP)", RFC 2406, November 1998. - - [RFC2440] Callas, J., Donnerhacke, L., Finney, H. and R. Thayer, - "OpenPGP Message Format", RFC 2440, November 1998. - - - - - -Karn, et al. Best Current Practice [Page 51] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version - 6 (IPv6) Specification", RFC 2460, December 1998. - - [RFC2461] Narten, T., Nordmark, E. and W. Simpson, "Neighbor - Discovery for IP Version 6 (IPv6)", RFC 2461, December - 1998. - - [RFC2474] Nichols, K., Blake, S., Baker, F. and D. Black, - "Definition of the Differentiated Services Field (DS - Field) in the IPv4 and IPv6 Headers", RFC 2474, - December 1998. - - [RFC2475] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. - and W. Weiss, "An Architecture for Differentiated - Services", RFC 2475, December 1998. - - [RFC2507] Degermark, M., Nordgren, B. and S. Pink, "IP Header - Compression", RFC 2507, February 1999. - - [RFC2508] Casner, S. and V. Jacobson, "Compressing IP/UDP/RTP - Headers for Low-Speed Serial Links", RFC 2508, February - 1999. - - [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2582] Floyd, S. and T. Henderson, "The NewReno Modification - to TCP's Fast Recovery Algorithm", RFC 2582, April - 1999. - - [RFC2597] Heinanen, J., Baker, F., Weiss, W. and J. Wroclawski, - "Assured Forwarding PHB Group", RFC 2597, June 1999. - - [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., - Masinter, L., Leach, P. and T. Berners-Lee, "Hypertext - Transfer Protocol -- HTTP/1.1", RFC 2616, June 1999. - - [RFC2630] Housley, R., "Cryptographic Message Syntax", RFC 2630, - June 1999. - - [RFC2631] Rescorla, E., "Diffie-Hellman Key Agreement Method", - RFC 2631, June 1999. - - [RFC2632] Ramsdell, B., Ed., "S/MIME Version 3 Certificate - Handling", RFC 2632, June 1999. - - - - - - -Karn, et al. Best Current Practice [Page 52] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [RFC2633] Ramsdell, B., "S/MIME Version 3 Message Specification", - RFC 2633, June 1999. - - [RFC2634] Hoffman, P., "Enhanced Security Services for S/MIME", - RFC 2634, June 1999. - - [RFC2684] Grossman, D. and J. Heinanen, "Multiprotocol - Encapsulation over ATM Adaptation Layer 5", RFC 2684, - September 1999. - - [RFC2686] Bormann, C., "The Multi-Class Extension to Multi-Link - PPP", RFC 2686, September 1999. - - [RFC2687] Bormann, C., "PPP in a Real-time Oriented HDLC-like - Framing", RFC 2687, September 1999. - - [RFC2689] Bormann, C., "Providing Integrated Services over Low- - bitrate Links", RFC 2689, September 1999. - - [RFC2710] Deering, S., Fenner, W. and B. Haberman, "Multicast - Listener Discovery (MLD) for IPv6", RFC 2710, October - 1999. - - [RFC2784] Farinacci, D., Li, T., Hanks, S., Meyer, D. and P. - Traina, "Generic Routing Encapsulation (GRE)", RFC - 2784, March 2000. - - [RFC2865] Rigney, C., Willens, S., Rubens, A. and W. Simpson, - "Remote Authentication Dial In User Service (RADIUS)", - RFC 2865, June 2000. - - [RFC2914] Floyd, S., "Congestion Control Principles", BCP 41, RFC - 2914, September 2000. - - [RFC2923] Lahey, K., "TCP Problems with Path MTU Discovery", RFC - 2923, September 2000. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's - Retransmission Timer", RFC 2988, November 2000. - - [RFC2990] Huston, G., "Next Steps for the IP QoS Architecture", - RFC 2990, November 2000. - - [RFC3048] Whetten, B., Vicisano, L., Kermode, R., Handley, M., - Floyd, S. and M. Luby, "Reliable Multicast Transport - Building Blocks for One-to-Many Bulk-Data Transfer", - RFC 3048, January 2001. - - - - -Karn, et al. Best Current Practice [Page 53] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [RFC3095] Bormann, C., Ed., Burmeister, C., Degermark, M., - Fukushima, H., Hannu, H., Jonsson, L-E., Hakenberg, R., - Koren, T., Le, K., Liu, Z., Martensson, A., Miyazaki, - A., Svanbro, K., Wiebke, T., Yoshimura, T. and H. - Zheng, "RObust Header Compression (ROHC): Framework - and four profiles: RTP, UDP, ESP, and uncompressed", - RFC 3095, July 2001. - - [RFC3096] Degermark, M., Ed., "Requirements for robust IP/UDP/RTP - header compression", RFC 3096, July 2001. - - [RFC3150] Dawkins, S., Montenegro, G., Kojo, M. and V. Magret, - "End-to-end Performance Implications of Slow Links", - BCP 48, RFC 3150, July 2001. - - [RFC3155] Dawkins, S., Montenegro, G., Kojo, M., Magret, V. and - N. Vaidya, "End-to-end Performance Implications of - Links with Errors", BCP 50, RFC 3155, August 2001. - - [RFC3168] Ramakrishnan, K., Floyd, S. and D. Black, "The Addition - of Explicit Congestion Notification (ECN) to IP", RFC - 3168, September 2001. - - [RFC3173] Shacham, A., Monsour, B., Pereira, R. and M. Thomas, - "IP Payload Compression Protocol (IPComp)", RFC 3173, - September 2001. - - [RFC3246] Davie, B., Charny, A., Bennet, J.C.R., Benson, K., Le - Boudec, J.Y., Courtney, W., Davari, S., Firoiu, V. and - D. Stiliadis, "An Expedited Forwarding PHB (Per-Hop - Behavior)", RFC 3246, March 2002. - - [RFC3248] Armitage, G., Carpenter, B., Casati, A., Crowcroft, J., - Halpern, J., Kumar, B. and J. Schnizlein, "A Delay - Bound alternative revision of RFC 2598", RFC 3248, - March 2002. - - [RFC3344] Perkins, C., Ed., "IP Mobility Support for IPv4", RFC - 3344, August 2002. - - [RFC3366] Fairhurst, G. and L. Wood, "Advice to link designers on - link Automatic Repeat reQuest (ARQ)", BCP 62, RFC 3366, - August 2002. - - - - - - - - -Karn, et al. Best Current Practice [Page 54] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [RFC3376] Cain, B., Deering, S., Kouvelas, I., Fenner, B. and A. - Thyagarajan, "Internet Group Management Protocol, - Version 3", RFC 3376, October 2002. - - [RFC3449] Balakrishnan, H., Padmanabhan, V., Fairhurst, G. and M. - Sooriyabandara, "TCP Performance Implications of - Network Path Asymmetry", BCP 69, RFC 3449, December - 2002. - - [RFC3450] Luby, M., Gemmell, J., Vicisano, L., Rizzo, L. and J. - Crowcroft, "Asynchronous Layered Coding (ALC) Protocol - Instantiation", RFC 3450, December 2002. - - [RFC3451] Luby, M., Gemmell, J., Vicisano, L., Rizzo, L., - Handley, M. and J. Crowcroft, "Layered Coding Transport - (LCT) Building Block", RFC 3451, December 2002. - - [RFC3452] Luby, M., Vicisano, L., Gemmell, J., Rizzo, L., - Handley, M. and J. Crowcroft, "Forward Error Correction - (FEC) Building Block", RFC 3452, December 2002. - - [RFC3453] Luby, M., Vicisano, L., Gemmell, J., Rizzo, L., - Handley, M. and J. Crowcroft, "The Use of Forward Error - Correction (FEC) in Reliable Multicast", RFC 3453, - December 2002. - - [RFC3488] Wu, I. and T. Eckert, "Cisco Systems Router-port Group - Management Protocol (RGMP)", RFC 3488, February 2003. - - [RFC3501] Crispin, M., "INTERNET MESSAGE ACCESS PROTOCOL - - VERSION 4rev1", RFC 3501, March 2003. - - [RFC3828] Larzon, L-A., Degermark, M., Pink, S., Jonsson, L-E., - Ed. and G. Fairhurst, Ed., "The User Datagram Protocol - (UDP)-Lite Protocol", RFC 3828, June 2004. - - [Schneier95] Schneier, B., Applied Cryptography: Protocols, - Algorithms and Source Code in C (John Wiley and Sons, - October 1995). - - [Schneier00] Schneier, B., Secrets and Lies: Digital Security in a - Networked World (John Wiley and Sons, August 2000). - - [SP2000] Stone, J. and C. Partridge, "When the CRC and TCP - Checksum Disagree", ACM SIGCOMM, September 2000. - http://www.acm.org/sigcomm/sigcomm2000/conf/ - paper/sigcomm2000-9-1.pdf - - - - -Karn, et al. Best Current Practice [Page 55] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - [SRC81] Saltzer, J., Reed D. and D. Clark, "End-to-End - Arguments in System Design". Second International - Conference on Distributed Computing Systems (April, - 1981) pages 509-512. Published with minor changes in - ACM Transactions in Computer Systems 2, 4, November, - 1984, pages 277-288. Reprinted in Craig Partridge, - editor Innovations in internetworking. Artech House, - Norwood, MA, 1988, pages 195-206. ISBN 0-89006-337-0. - - [SSL2] Hickman, K., "The SSL Protocol", Netscape - Communications Corp., Feb 9, 1995. - - [SSL3] Frier, A., Karlton, P. and P. Kocher, "The SSL 3.0 - Protocol", Netscape Communications Corp., Nov 18, 1996. - - [TCPF98] Lin, D. and H.T. Kung, "TCP Fast Recovery Strategies: - Analysis and Improvements", IEEE Infocom, March 1998. - http://www.eecs.harvard.edu/networking/papers/infocom- - tcp-final-198.pdf - - [WFBA2000] Wagner, D., Foster, J., Brewer, E. and A. Aiken, "A - First Step Toward Automated Detection of Buffer Overrun - Vulnerabilities", Proceedings of NDSS2000. - http://www.isoc.org/isoc/conferences/ndss/ - 2000/proceedings/039.pdf - - [Wilbur89] Wilbur, Steve R., Jon Crowcroft, and Yuko Murayama. - "MAC layer Security Measures in Local Area Networks", - Local Area Network Security, Workshop LANSEC '89 - Proceedings, Springer-Verlag, April 1989, pp. 53-64. - - - - - - - - - - - - - - - - - - - - - -Karn, et al. Best Current Practice [Page 56] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -21. Contributors' Addresses - - Aaron Falk - USC/Information Sciences Institute - 4676 Admiralty Way - Marina Del Rey, CA 90292 - - Phone: 310-448-9327 - EMail: falk@isi.edu - - - Saverio Mascolo - Dipartimento di Elettrotecnica ed Elettronica, - Politecnico di Bari Via Orabona 4, 70125 Bari, Italy - - Phone: +39 080 596 3621 - EMail: mascolo@poliba.it - URL: http://www-dee.poliba.it/dee-web/Personale/mascolo.html - - - Marie-Jose Montpetit - MJMontpetit.com - - EMail: marie@mjmontpetit.com - - - - - - - - - - - - - - - - - - - - - - - - - - - -Karn, et al. Best Current Practice [Page 57] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -22. Authors' Addresses - - Phil Karn, Editor - Qualcomm 5775 Morehouse Drive - San Diego CA 92121 - - Phone: 858 587 1121 - EMail: karn@qualcomm.com - - - Carsten Bormann - Universitaet Bremen TZI - Postfach 330440 - D-28334 Bremen, Germany - - Phone: +49 421 218 7024 - Fax: +49 421 218 7000 - EMail: cabo@tzi.org - - - Godred (Gorry) Fairhurst - Department of Engineering, University of Aberdeen, - Aberdeen, AB24 3UE, United Kingdom - - EMail: gorry@erg.abdn.ac.uk - URL: http://www.erg.abdn.ac.uk/users/gorry - - - Dan Grossman - Motorola, Inc. - 111 Locke Drive - Marlboro, MA 01752 - - EMail: Dan.Grossman@motorola.com - - - Reiner Ludwig - Ericsson Research - Ericsson Allee - 1 52134 Herzogenrath, Germany - - Phone: +49 2407 575 719 - EMail: Reiner.Ludwig@ericsson.com - - - - - - - - -Karn, et al. Best Current Practice [Page 58] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - - Jamshid Mahdavi - Novell, Inc. - - EMail: jmahdavi@earthlink.net - - - Gabriel Montenegro - Sun Microsystems Laboratories, Europe - 180, Avenue de l'Europe - 38334 Saint Ismier CEDEX - France - - EMail: gab@sun.com - - - Joe Touch - USC/Information Sciences Institute - 4676 Admiralty Way - Marina del Rey CA 90292 - - Phone: 310 448 9151 - EMail: touch@isi.edu - URL: http://www.isi.edu/touch - - - Lloyd Wood - Cisco Systems - 9 New Square Park, Bedfont Lakes - Feltham TW14 8HA - United Kingdom - - Phone: +44 (0)20 8824 4236 - EMail: lwood@cisco.com - URL: http://www.ee.surrey.ac.uk/Personal/L.Wood/ - - - - - - - - - - - - - - - - - -Karn, et al. Best Current Practice [Page 59] - -RFC 3819 Advice for Internet Subnetwork Designers July 2004 - - -23. Full Copyright Statement - - Copyright (C) The Internet Society (2004). This document is subject - to the rights, licenses and restrictions contained in BCP 78, and - except as set forth therein, the authors retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE - REPRESENTS OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE - INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF - THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed - to pertain to the implementation or use of the technology - described in this document or the extent to which any license - under such rights might or might not be available; nor does it - represent that it has made any independent effort to identify any - such rights. Information on the procedures with respect to - rights in RFC documents can be found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use - of such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository - at http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention - any copyrights, patents or patent applications, or other - proprietary rights that may cover technology that may be required - to implement this standard. Please address the information to the - IETF at ietf-ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - - - -Karn, et al. Best Current Practice [Page 60] - diff --git a/kernel/picotcp/RFC/rfc3927.txt b/kernel/picotcp/RFC/rfc3927.txt deleted file mode 100644 index 466b9eb..0000000 --- a/kernel/picotcp/RFC/rfc3927.txt +++ /dev/null @@ -1,1851 +0,0 @@ - - - - - - -Network Working Group S. Cheshire -Request for Comments: 3927 Apple Computer -Category: Standards Track B. Aboba - Microsoft Corporation - E. Guttman - Sun Microsystems - May 2005 - - - Dynamic Configuration of IPv4 Link-Local Addresses - -Status of This Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2005). - - -Abstract - - To participate in wide-area IP networking, a host needs to be - configured with IP addresses for its interfaces, either manually by - the user or automatically from a source on the network such as a - Dynamic Host Configuration Protocol (DHCP) server. Unfortunately, - such address configuration information may not always be available. - It is therefore beneficial for a host to be able to depend on a - useful subset of IP networking functions even when no address - configuration is available. This document describes how a host may - automatically configure an interface with an IPv4 address within the - 169.254/16 prefix that is valid for communication with other devices - connected to the same physical (or logical) link. - - IPv4 Link-Local addresses are not suitable for communication with - devices not directly connected to the same physical (or logical) - link, and are only used where stable, routable addresses are not - available (such as on ad hoc or isolated networks). This document - does not recommend that IPv4 Link-Local addresses and routable - addresses be configured simultaneously on the same interface. - - - - - - - -Cheshire, et al. Standards Track [Page 1] - -RFC 3927 IPv4 Link-Local May 2005 - - -Table of Contents - - 1. Introduction. . . . . . . . . . . . . . . . . . . . . . . . . 3 - 1.1. Requirements. . . . . . . . . . . . . . . . . . . . . . 3 - 1.2. Terminology . . . . . . . . . . . . . . . . . . . . . . 3 - 1.3. Applicability . . . . . . . . . . . . . . . . . . . . . 5 - 1.4. Application Layer Protocol Considerations . . . . . . . 6 - 1.5. Autoconfiguration Issues. . . . . . . . . . . . . . . . 7 - 1.6. Alternate Use Prohibition . . . . . . . . . . . . . . . 7 - 1.7. Multiple Interfaces . . . . . . . . . . . . . . . . . . 8 - 1.8. Communication with Routable Addresses . . . . . . . . . 8 - 1.9. When to configure an IPv4 Link-Local Address. . . . . . 8 - 2. Address Selection, Defense and Delivery . . . . . . . . . . . 9 - 2.1. Link-Local Address Selection. . . . . . . . . . . . . . 10 - 2.2. Claiming a Link-Local Address . . . . . . . . . . . . . 11 - 2.3. Shorter Timeouts. . . . . . . . . . . . . . . . . . . . 13 - 2.4. Announcing an Address . . . . . . . . . . . . . . . . . 13 - 2.5. Conflict Detection and Defense. . . . . . . . . . . . . 13 - 2.6. Address Usage and Forwarding Rules. . . . . . . . . . . 14 - 2.7. Link-Local Packets Are Not Forwarded. . . . . . . . . . 16 - 2.8. Link-Local Packets are Local. . . . . . . . . . . . . . 16 - 2.9. Higher-Layer Protocol Considerations. . . . . . . . . . 17 - 2.10. Privacy Concerns. . . . . . . . . . . . . . . . . . . . 17 - 2.11. Interaction between DHCPv4 and IPv4 Link-Local - State Machines. . . . . . . . . . . . . . . . . . . . . 17 - 3. Considerations for Multiple Interfaces. . . . . . . . . . . . 18 - 3.1. Scoped Addresses. . . . . . . . . . . . . . . . . . . . 18 - 3.2. Address Ambiguity . . . . . . . . . . . . . . . . . . . 19 - 3.3. Interaction with Hosts with Routable Addresses. . . . . 20 - 3.4. Unintentional Autoimmune Response . . . . . . . . . . . 21 - 4. Healing of Network Partitions . . . . . . . . . . . . . . . . 22 - 5. Security Considerations . . . . . . . . . . . . . . . . . . . 23 - 6. Application Programming Considerations. . . . . . . . . . . . 24 - 6.1. Address Changes, Failure and Recovery . . . . . . . . . 24 - 6.2. Limited Forwarding of Locators. . . . . . . . . . . . . 24 - 6.3. Address Ambiguity . . . . . . . . . . . . . . . . . . . 25 - 7. Router Considerations . . . . . . . . . . . . . . . . . . . . 25 - 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 25 - 9. Constants . . . . . . . . . . . . . . . . . . . . . . . . . . 26 - 10. References. . . . . . . . . . . . . . . . . . . . . . . . . . 26 - 10.1. Normative References. . . . . . . . . . . . . . . . . . 26 - 10.2. Informative References. . . . . . . . . . . . . . . . . 26 - Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . . . 27 - Appendix A - Prior Implementations. . . . . . . . . . . . . . . . 28 - - - - - - - -Cheshire, et al. Standards Track [Page 2] - -RFC 3927 IPv4 Link-Local May 2005 - - -1. Introduction - - As the Internet Protocol continues to grow in popularity, it becomes - increasingly valuable to be able to use familiar IP tools such as FTP - not only for global communication, but for local communication as - well. For example, two people with laptop computers supporting IEEE - 802.11 Wireless LANs [802.11] may meet and wish to exchange files. - It is desirable for these people to be able to use IP application - software without the inconvenience of having to manually configure - static IP addresses or set up a DHCP server [RFC2131]. - - This document describes a method by which a host may automatically - configure an interface with an IPv4 address in the 169.254/16 prefix - that is valid for Link-Local communication on that interface. This - is especially valuable in environments where no other configuration - mechanism is available. The IPv4 prefix 169.254/16 is registered - with the IANA for this purpose. Allocation of IPv6 Link-Local - addresses is described in "IPv6 Stateless Address Autoconfiguration" - [RFC2462]. - - Link-Local communication using IPv4 Link-Local addresses is only - suitable for communication with other devices connected to the same - physical (or logical) link. Link-Local communication using IPv4 - Link-Local addresses is not suitable for communication with devices - not directly connected to the same physical (or logical) link. - - Microsoft Windows 98 (and later) and Mac OS 8.5 (and later) already - support this capability. This document standardizes usage, - prescribing rules for how IPv4 Link-Local addresses are to be treated - by hosts and routers. In particular, it describes how routers are to - behave when receiving packets with IPv4 Link-Local addresses in the - source or destination address. With respect to hosts, it discusses - claiming and defending addresses, maintaining Link-Local and routable - IPv4 addresses on the same interface, and multi-homing issues. - -1.1. Requirements - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in "Key words for use in - RFCs" [RFC2119]. - -1.2. Terminology - - This document describes Link-Local addressing, for IPv4 communication - between two hosts on a single link. A set of hosts is considered to - be "on the same link", if: - - - - -Cheshire, et al. Standards Track [Page 3] - -RFC 3927 IPv4 Link-Local May 2005 - - - - when any host A from that set sends a packet to any other host B - in that set, using unicast, multicast, or broadcast, the entire - link-layer packet payload arrives unmodified, and - - - a broadcast sent over that link by any host from that set of hosts - can be received by every other host in that set - - The link-layer *header* may be modified, such as in Token Ring Source - Routing [802.5], but not the link-layer *payload*. In particular, if - any device forwarding a packet modifies any part of the IP header or - IP payload then the packet is no longer considered to be on the same - link. This means that the packet may pass through devices such as - repeaters, bridges, hubs or switches and still be considered to be on - the same link for the purpose of this document, but not through a - device such as an IP router that decrements the TTL or otherwise - modifies the IP header. - - This document uses the term "routable address" to refer to all valid - unicast IPv4 addresses outside the 169.254/16 prefix that may be - forwarded via routers. This includes all global IP addresses and - private addresses such as Net 10/8 [RFC1918], but not loopback - addresses such as 127.0.0.1. - - Wherever this document uses the term "host" when describing use of - IPv4 Link-Local addresses, the text applies equally to routers when - they are the source of or intended destination of packets containing - IPv4 Link-Local source or destination addresses. - - Wherever this document uses the term "sender IP address" or "target - IP address" in the context of an ARP packet, it is referring to the - fields of the ARP packet identified in the ARP specification [RFC826] - as "ar$spa" (Sender Protocol Address) and "ar$tpa" (Target Protocol - Address) respectively. For the usage of ARP described in this - document, each of these fields always contains an IP address. - - In this document, the term "ARP Probe" is used to refer to an ARP - Request packet, broadcast on the local link, with an all-zero 'sender - IP address'. The 'sender hardware address' MUST contain the hardware - address of the interface sending the packet. The 'target hardware - address' field is ignored and SHOULD be set to all zeroes. The - 'target IP address' field MUST be set to the address being probed. - - In this document, the term "ARP Announcement" is used to refer to an - ARP Request packet, broadcast on the local link, identical to the ARP - Probe described above, except that both the sender and target IP - address fields contain the IP address being announced. - - - - - -Cheshire, et al. Standards Track [Page 4] - -RFC 3927 IPv4 Link-Local May 2005 - - - Constants are introduced in all capital letters. Their values are - given in Section 9. - -1.3. Applicability - - This specification applies to all IEEE 802 Local Area Networks (LANs) - [802], including Ethernet [802.3], Token-Ring [802.5] and IEEE 802.11 - wireless LANs [802.11], as well as to other link-layer technologies - that operate at data rates of at least 1 Mbps, have a round-trip - latency of at most one second, and support ARP [RFC826]. Wherever - this document uses the term "IEEE 802", the text applies equally to - any of these network technologies. - - Link-layer technologies that support ARP but operate at rates below 1 - Mbps or latencies above one second may need to specify different - values for the following parameters: - - (a) the number of, and interval between, ARP probes, see PROBE_NUM, - PROBE_MIN, PROBE_MAX defined in Section 2.2.1 - - (b) the number of, and interval between, ARP announcements, see - ANNOUNCE_NUM and ANNOUNCE_INTERVAL defined in Section 2.4 - - (c) the maximum rate at which address claiming may be attempted, see - RATE_LIMIT_INTERVAL and MAX_CONFLICTS defined in Section 2.2.1 - - (d) the time interval between conflicting ARPs below which a host - MUST reconfigure instead of attempting to defend its address, see - DEFEND_INTERVAL defined in Section 2.5 - - Link-layer technologies that do not support ARP may be able to use - other techniques for determining whether a particular IP address is - currently in use. However, the application of claim-and-defend - mechanisms to such networks is outside the scope of this document. - - This specification is intended for use with small ad hoc networks -- - a single link containing only a few hosts. Although 65024 IPv4 - Link-Local addresses are available in principle, attempting to use - all those addresses on a single link would result in a high - probability of address conflicts, requiring a host to take an - inordinate amount of time to find an available address. - - Network operators with more than 1300 hosts on a single link may want - to consider dividing that single link into two or more subnets. A - host connecting to a link that already has 1300 hosts, selecting an - IPv4 Link-Local address at random, has a 98% chance of selecting an - unused IPv4 Link-Local address on the first try. A host has a 99.96% - - - - -Cheshire, et al. Standards Track [Page 5] - -RFC 3927 IPv4 Link-Local May 2005 - - - chance of selecting an unused IPv4 Link-Local address within two - tries. The probability that it will have to try more than ten times - is about 1 in 10^17. - -1.4. Application Layer Protocol Considerations - - IPv4 Link-Local addresses and their dynamic configuration have - profound implications upon applications which use them. This is - discussed in Section 6. Many applications fundamentally assume that - addresses of communicating peers are routable, relatively unchanging - and unique. These assumptions no longer hold with IPv4 Link-Local - addresses, or a mixture of Link-Local and routable IPv4 addresses. - - Therefore while many applications will work properly with IPv4 Link- - Local addresses, or a mixture of Link-Local and routable IPv4 - addresses, others may do so only after modification, or will exhibit - reduced or partial functionality. - - In some cases it may be infeasible for the application to be modified - to operate under such conditions. - - IPv4 Link-Local addresses should therefore only be used where stable, - routable addresses are not available (such as on ad hoc or isolated - networks) or in controlled situations where these limitations and - their impact on applications are understood and accepted. This - document does not recommend that IPv4 Link-Local addresses and - routable addresses be configured simultaneously on the same - interface. - - Use of IPv4 Link-Local addresses in off-link communication is likely - to cause application failures. This can occur within any application - that includes embedded addresses, if an IPv4 Link-Local address is - embedded when communicating with a host that is not on the link. - Examples of applications that embed addresses include IPsec, Kerberos - 4/5, FTP, RSVP, SMTP, SIP, X-Windows/Xterm/Telnet, Real Audio, H.323, - and SNMP [RFC3027]. - - To preclude use of IPv4 Link-Local addresses in off-link - communication, the following cautionary measures are advised: - - a. IPv4 Link-Local addresses MUST NOT be configured in the DNS. - Mapping from IPv4 addresses to host names is conventionally done - by issuing DNS queries for names of the form, - "x.x.x.x.in-addr.arpa." When used for link-local addresses, which - have significance only on the local link, it is inappropriate to - send such DNS queries beyond the local link. DNS clients MUST NOT - send DNS queries for any name that falls within the - "254.169.in-addr.arpa." domain. - - - -Cheshire, et al. Standards Track [Page 6] - -RFC 3927 IPv4 Link-Local May 2005 - - - DNS recursive name servers receiving queries from non-compliant - clients for names within the "254.169.in-addr.arpa." domain MUST - by default return RCODE 3, authoritatively asserting that no such - name exists in the Domain Name System. - - b. Names that are globally resolvable to routable addresses should be - used within applications whenever they are available. Names that - are resolvable only on the local link (such as through use of - protocols such as Link Local Multicast Name Resolution [LLMNR]) - MUST NOT be used in off-link communication. IPv4 addresses and - names that can only be resolved on the local link SHOULD NOT be - forwarded beyond the local link. IPv4 Link-Local addresses SHOULD - only be sent when a Link-Local address is used as the source - and/or destination address. This strong advice should hinder - limited scope addresses and names from leaving the context in - which they apply. - - c. If names resolvable to globally routable addresses are not - available, but the globally routable addresses are, they should be - used instead of IPv4 Link-Local addresses. - -1.5. Autoconfiguration Issues - - Implementations of IPv4 Link-Local address autoconfiguration MUST - expect address conflicts, and MUST be prepared to handle them - gracefully by automatically selecting a new address whenever a - conflict is detected, as described in Section 2. This requirement to - detect and handle address conflicts applies during the entire period - that a host is using a 169.254/16 IPv4 Link-Local address, not just - during initial interface configuration. For example, address - conflicts can occur well after a host has completed booting if two - previously separate networks are joined, as described in Section 4. - -1.6. Alternate Use Prohibition - - Note that addresses in the 169.254/16 prefix SHOULD NOT be configured - manually or by a DHCP server. Manual or DHCP configuration may cause - a host to use an address in the 169.254/16 prefix without following - the special rules regarding duplicate detection and automatic - configuration that pertain to addresses in this prefix. While the - DHCP specification [RFC2131] indicates that a DHCP client SHOULD - probe a newly received address with ARP, this is not mandatory. - Similarly, while the DHCP specification recommends that a DHCP server - SHOULD probe an address using an ICMP Echo Request before allocating - it, this is also not mandatory, and even if the server does this, - IPv4 Link-Local addresses are not routable, so a DHCP server not - directly connected to a link cannot detect whether a host on that - link is already using the desired IPv4 Link-Local address. - - - -Cheshire, et al. Standards Track [Page 7] - -RFC 3927 IPv4 Link-Local May 2005 - - - Administrators wishing to configure their own local addresses (using - manual configuration, a DHCP server, or any other mechanism not - described in this document) should use one of the existing private - address prefixes [RFC1918], not the 169.254/16 prefix. - -1.7. Multiple Interfaces - - Additional considerations apply to hosts that support more than one - active interface where one or more of these interfaces support IPv4 - Link-Local address configuration. These considerations are discussed - in Section 3. - -1.8. Communication with Routable Addresses - - There will be cases when devices with a configured Link-Local address - will need to communicate with a device with a routable address - configured on the same physical link, and vice versa. The rules in - Section 2.6 allow this communication. - - This allows, for example, a laptop computer with only a routable - address to communicate with web servers world-wide using its - globally-routable address while at the same time printing those web - pages on a local printer that has only an IPv4 Link-Local address. - -1.9. When to configure an IPv4 Link-Local address - - Having addresses of multiple different scopes assigned to an - interface, with no adequate way to determine in what circumstances - each address should be used, leads to complexity for applications and - confusion for users. A host with an address on a link can - communicate with all other devices on that link, whether those - devices use Link-Local addresses, or routable addresses. For these - reasons, a host SHOULD NOT have both an operable routable address and - an IPv4 Link-Local address configured on the same interface. The - term "operable address" is used to mean an address which works - effectively for communication in the current network context (see - below). When an operable routable address is available on an - interface, the host SHOULD NOT also assign an IPv4 Link-Local address - on that interface. However, during the transition (in either - direction) between using routable and IPv4 Link-Local addresses both - MAY be in use at once subject to these rules: - - 1. The assignment of an IPv4 Link-Local address on an interface is - based solely on the state of the interface, and is independent - of any other protocols such as DHCP. A host MUST NOT alter its - behavior and use of other protocols such as DHCP because the - host has assigned an IPv4 Link-Local address to an interface. - - - - -Cheshire, et al. Standards Track [Page 8] - -RFC 3927 IPv4 Link-Local May 2005 - - - 2. If a host finds that an interface that was previously - configured with an IPv4 Link-Local address now has an operable - routable address available, the host MUST use the routable - address when initiating new communications, and MUST cease - advertising the availability of the IPv4 Link-Local address - through whatever mechanisms that address had been made known to - others. The host SHOULD continue to use the IPv4 Link-Local - address for communications already underway, and MAY continue - to accept new communications addressed to the IPv4 Link-Local - address. Ways in which an operable routable address might - become available on an interface include: - - * Manual configuration - * Address assignment through DHCP - * Roaming of the host to a network on which a previously - assigned address becomes operable - - 3. If a host finds that an interface no longer has an operable - routable address available, the host MAY identify a usable IPv4 - Link-Local address (as described in section 2) and assign that - address to the interface. Ways in which an operable routable - address might cease to be available on an interface include: - - * Removal of the address from the interface through - manual configuration - * Expiration of the lease on the address assigned through - DHCP - * Roaming of the host to a new network on which the - address is no longer operable. - - The determination by the system of whether an address is "operable" - is not clear cut and many changes in the system context (e.g., - router changes) may affect the operability of an address. In - particular roaming of a host from one network to another is likely -- - but not certain -- to change the operability of a configured address - but detecting such a move is not always trivial. - - "Detection of Network Attachment (DNA) in IPv4" [DNAv4] provides - further discussion of address assignment and operability - determination. - -2. Address Selection, Defense and Delivery - - The following section explains the IPv4 Link-Local address selection - algorithm, how IPv4 Link-Local addresses are defended, and how IPv4 - packets with IPv4 Link-Local addresses are delivered. - - - - - -Cheshire, et al. Standards Track [Page 9] - -RFC 3927 IPv4 Link-Local May 2005 - - - Windows and Mac OS hosts that already implement Link-Local IPv4 - address auto-configuration are compatible with the rules presented in - this section. However, should any interoperability problem be - discovered, this document, not any prior implementation, defines the - standard. - -2.1. Link-Local Address Selection - - When a host wishes to configure an IPv4 Link-Local address, it - selects an address using a pseudo-random number generator with a - uniform distribution in the range from 169.254.1.0 to 169.254.254.255 - inclusive. - - The IPv4 prefix 169.254/16 is registered with the IANA for this - purpose. The first 256 and last 256 addresses in the 169.254/16 - prefix are reserved for future use and MUST NOT be selected by a host - using this dynamic configuration mechanism. - - The pseudo-random number generation algorithm MUST be chosen so that - different hosts do not generate the same sequence of numbers. If the - host has access to persistent information that is different for each - host, such as its IEEE 802 MAC address, then the pseudo-random number - generator SHOULD be seeded using a value derived from this - information. This means that even without using any other persistent - storage, a host will usually select the same IPv4 Link-Local address - each time it is booted, which can be convenient for debugging and - other operational reasons. Seeding the pseudo-random number - generator using the real-time clock or any other information which is - (or may be) identical in every host is NOT suitable for this purpose, - because a group of hosts that are all powered on at the same time - might then all generate the same sequence, resulting in a never- - ending series of conflicts as the hosts move in lock-step through - exactly the same pseudo-random sequence, conflicting on every address - they probe. - - Hosts that are equipped with persistent storage MAY, for each - interface, record the IPv4 address they have selected. On booting, - hosts with a previously recorded address SHOULD use that address as - their first candidate when probing. This increases the stability of - addresses. For example, if a group of hosts are powered off at - night, then when they are powered on the next morning they will all - resume using the same addresses, instead of picking different - addresses and potentially having to resolve conflicts that arise. - - - - - - - - -Cheshire, et al. Standards Track [Page 10] - -RFC 3927 IPv4 Link-Local May 2005 - - -2.2. Claiming a Link-Local Address - - After it has selected an IPv4 Link-Local address, a host MUST test to - see if the IPv4 Link-Local address is already in use before beginning - to use it. When a network interface transitions from an inactive to - an active state, the host does not have knowledge of what IPv4 Link- - Local addresses may currently be in use on that link, since the point - of attachment may have changed or the network interface may have been - inactive when a conflicting address was claimed. - - Were the host to immediately begin using an IPv4 Link-Local address - which is already in use by another host, this would be disruptive to - that other host. Since it is possible that the host has changed its - point of attachment, a routable address may be obtainable on the new - network, and therefore it cannot be assumed that an IPv4 Link-Local - address is to be preferred. - - Before using the IPv4 Link-Local address (e.g., using it as the - source address in an IPv4 packet, or as the Sender IPv4 address in an - ARP packet) a host MUST perform the probing test described below to - achieve better confidence that using the IPv4 Link-Local address will - not cause disruption. - - Examples of events that involve an interface becoming active include: - - Reboot/startup - Wake from sleep (if network interface was inactive during sleep) - Bringing up previously inactive network interface - IEEE 802 hardware link-state change (appropriate for the - media type and security mechanisms which apply) indicates - that an interface has become active. - Association with a wireless base station or ad hoc network. - - A host MUST NOT perform this check periodically as a matter of - course. This would be a waste of network bandwidth, and is - unnecessary due to the ability of hosts to passively discover - conflicts, as described in Section 2.5. - -2.2.1. Probe details - - On a link-layer such as IEEE 802 that supports ARP, conflict - detection is done using ARP probes. On link-layer technologies that - do not support ARP other techniques may be available for determining - whether a particular IPv4 address is currently in use. However, the - application of claim-and-defend mechanisms to such networks is - outside the scope of this document. - - - - - -Cheshire, et al. Standards Track [Page 11] - -RFC 3927 IPv4 Link-Local May 2005 - - - A host probes to see if an address is already in use by broadcasting - an ARP Request for the desired address. The client MUST fill in the - 'sender hardware address' field of the ARP Request with the hardware - address of the interface through which it is sending the packet. The - 'sender IP address' field MUST be set to all zeroes, to avoid - polluting ARP caches in other hosts on the same link in the case - where the address turns out to be already in use by another host. - The 'target hardware address' field is ignored and SHOULD be set to - all zeroes. The 'target IP address' field MUST be set to the address - being probed. An ARP Request constructed this way with an all-zero - 'sender IP address' is referred to as an "ARP Probe". - - When ready to begin probing, the host should then wait for a random - time interval selected uniformly in the range zero to PROBE_WAIT - seconds, and should then send PROBE_NUM probe packets, each of these - probe packets spaced randomly, PROBE_MIN to PROBE_MAX seconds apart. - If during this period, from the beginning of the probing process - until ANNOUNCE_WAIT seconds after the last probe packet is sent, the - host receives any ARP packet (Request *or* Reply) on the interface - where the probe is being performed where the packet's 'sender IP - address' is the address being probed for, then the host MUST treat - this address as being in use by some other host, and MUST select a - new pseudo-random address and repeat the process. In addition, if - during this period the host receives any ARP Probe where the packet's - 'target IP address' is the address being probed for, and the packet's - 'sender hardware address' is not the hardware address of the - interface the host is attempting to configure, then the host MUST - similarly treat this as an address conflict and select a new address - as above. This can occur if two (or more) hosts attempt to configure - the same IPv4 Link-Local address at the same time. - - A host should maintain a counter of the number of address conflicts - it has experienced in the process of trying to acquire an address, - and if the number of conflicts exceeds MAX_CONFLICTS then the host - MUST limit the rate at which it probes for new addresses to no more - than one new address per RATE_LIMIT_INTERVAL. This is to prevent - catastrophic ARP storms in pathological failure cases, such as a - rogue host that answers all ARP probes, causing legitimate hosts to - go into an infinite loop attempting to select a usable address. - - If, by ANNOUNCE_WAIT seconds after the transmission of the last ARP - Probe no conflicting ARP Reply or ARP Probe has been received, then - the host has successfully claimed the desired IPv4 Link-Local - address. - - - - - - - -Cheshire, et al. Standards Track [Page 12] - -RFC 3927 IPv4 Link-Local May 2005 - - -2.3. Shorter Timeouts - - Network technologies may emerge for which shorter delays are - appropriate than those required by this document. A subsequent IETF - publication may be produced providing guidelines for different values - for PROBE_WAIT, PROBE_NUM, PROBE_MIN and PROBE_MAX on those - technologies. - -2.4. Announcing an Address - - Having probed to determine a unique address to use, the host MUST - then announce its claimed address by broadcasting ANNOUNCE_NUM ARP - announcements, spaced ANNOUNCE_INTERVAL seconds apart. An ARP - announcement is identical to the ARP Probe described above, except - that now the sender and target IP addresses are both set to the - host's newly selected IPv4 address. The purpose of these ARP - announcements is to make sure that other hosts on the link do not - have stale ARP cache entries left over from some other host that may - previously have been using the same address. - -2.5. Conflict Detection and Defense - - Address conflict detection is not limited to the address selection - phase, when a host is sending ARP probes. Address conflict detection - is an ongoing process that is in effect for as long as a host is - using an IPv4 Link-Local address. At any time, if a host receives an - ARP packet (request *or* reply) on an interface where the 'sender IP - address' is the IP address the host has configured for that - interface, but the 'sender hardware address' does not match the - hardware address of that interface, then this is a conflicting ARP - packet, indicating an address conflict. - - A host MUST respond to a conflicting ARP packet as described in - either (a) or (b) below: - - (a) Upon receiving a conflicting ARP packet, a host MAY elect to - immediately configure a new IPv4 Link-Local address as described - above, or - - (b) If a host currently has active TCP connections or other reasons - to prefer to keep the same IPv4 address, and it has not seen any - other conflicting ARP packets within the last DEFEND_INTERVAL - seconds, then it MAY elect to attempt to defend its address by - recording the time that the conflicting ARP packet was received, and - then broadcasting one single ARP announcement, giving its own IP and - hardware addresses as the sender addresses of the ARP. Having done - this, the host can then continue to use the address normally without - any further special action. However, if this is not the first - - - -Cheshire, et al. Standards Track [Page 13] - -RFC 3927 IPv4 Link-Local May 2005 - - - conflicting ARP packet the host has seen, and the time recorded for - the previous conflicting ARP packet is recent, within DEFEND_INTERVAL - seconds, then the host MUST immediately cease using this address and - configure a new IPv4 Link-Local address as described above. This is - necessary to ensure that two hosts do not get stuck in an endless - loop with both hosts trying to defend the same address. - - A host MUST respond to conflicting ARP packets as described in either - (a) or (b) above. A host MUST NOT ignore conflicting ARP packets. - - Forced address reconfiguration may be disruptive, causing TCP - connections to be broken. However, it is expected that such - disruptions will be rare, and if inadvertent address duplication - happens, then disruption of communication is inevitable, no matter - how the addresses were assigned. It is not possible for two - different hosts using the same IP address on the same network to - operate reliably. - - Before abandoning an address due to a conflict, hosts SHOULD actively - attempt to reset any existing connections using that address. This - mitigates some security threats posed by address reconfiguration, as - discussed in Section 5. - - Immediately configuring a new address as soon as the conflict is - detected is the best way to restore useful communication as quickly - as possible. The mechanism described above of broadcasting a single - ARP announcement to defend the address mitigates the problem - somewhat, by helping to improve the chance that one of the two - conflicting hosts may be able to retain its address. - - All ARP packets (*replies* as well as requests) that contain a Link- - Local 'sender IP address' MUST be sent using link-layer broadcast - instead of link-layer unicast. This aids timely detection of - duplicate addresses. An example illustrating how this helps is given - in Section 4. - -2.6. Address Usage and Forwarding Rules - - A host implementing this specification has additional rules to - conform to, whether or not it has an interface configured with an - IPv4 Link-Local address. - -2.6.1. Source Address Usage - - Since each interface on a host may have an IPv4 Link-Local address in - addition to zero or more other addresses configured by other means - (e.g., manually or via a DHCP server), a host may have to make a - - - - -Cheshire, et al. Standards Track [Page 14] - -RFC 3927 IPv4 Link-Local May 2005 - - - choice about what source address to use when it sends a packet or - initiates a TCP connection. - - Where both an IPv4 Link-Local and a routable address are available on - the same interface, the routable address should be preferred as the - source address for new communications, but packets sent from or to - the IPv4 Link-Local address are still delivered as expected. The - IPv4 Link-Local address may continue to be used as a source address - in communications where switching to a preferred address would cause - communications failure because of the requirements of an upper-layer - protocol (e.g., an existing TCP connection). For more details, see - Section 1.7. - - A multi-homed host needs to select an outgoing interface whether or - not the destination is an IPv4 Link-Local address. Details of that - process are beyond the scope of this specification. After selecting - an interface, the multi-homed host should send packets involving IPv4 - Link-Local addresses as specified in this document, as if the - selected interface were the host's only interface. See Section 3 for - further discussion of multi-homed hosts. - -2.6.2. Forwarding Rules - - Whichever interface is used, if the destination address is in the - 169.254/16 prefix (excluding the address 169.254.255.255, which is - the broadcast address for the Link-Local prefix), then the sender - MUST ARP for the destination address and then send its packet - directly to the destination on the same physical link. This MUST be - done whether the interface is configured with a Link-Local or a - routable IPv4 address. - - In many network stacks, achieving this functionality may be as simple - as adding a routing table entry indicating that 169.254/16 is - directly reachable on the local link. This approach will not work - for routers or multi-homed hosts. Refer to section 3 for more - discussion of multi-homed hosts. - - The host MUST NOT send a packet with an IPv4 Link-Local destination - address to any router for forwarding. - - If the destination address is a unicast address outside the - 169.254/16 prefix, then the host SHOULD use an appropriate routable - IPv4 source address, if it can. If for any reason the host chooses - to send the packet with an IPv4 Link-Local source address (e.g., no - routable address is available on the selected interface), then it - MUST ARP for the destination address and then send its packet, with - - - - - -Cheshire, et al. Standards Track [Page 15] - -RFC 3927 IPv4 Link-Local May 2005 - - - an IPv4 Link-Local source address and a routable destination IPv4 - address, directly to its destination on the same physical link. The - host MUST NOT send the packet to any router for forwarding. - - In the case of a device with a single interface and only an Link- - Local IPv4 address, this requirement can be paraphrased as "ARP for - everything". - - In many network stacks, achieving this "ARP for everything" behavior - may be as simple as having no primary IP router configured, having - the primary IP router address configured to 0.0.0.0, or having the - primary IP router address set to be the same as the host's own Link- - Local IPv4 address. For suggested behavior in multi-homed hosts, see - Section 3. - -2.7. Link-Local Packets Are Not Forwarded - - A sensible default for applications which are sending from an IPv4 - Link-Local address is to explicitly set the IPv4 TTL to 1. This is - not appropriate in all cases as some applications may require that - the IPv4 TTL be set to other values. - - An IPv4 packet whose source and/or destination address is in the - 169.254/16 prefix MUST NOT be sent to any router for forwarding, and - any network device receiving such a packet MUST NOT forward it, - regardless of the TTL in the IPv4 header. Similarly, a router or - other host MUST NOT indiscriminately answer all ARP Requests for - addresses in the 169.254/16 prefix. A router may of course answer - ARP Requests for one or more IPv4 Link-Local address(es) that it has - legitimately claimed for its own use according to the claim-and- - defend protocol described in this document. - - This restriction also applies to multicast packets. IPv4 packets - with a Link-Local source address MUST NOT be forwarded outside the - local link even if they have a multicast destination address. - -2.8. Link-Local Packets are Local - - The non-forwarding rule means that hosts may assume that all - 169.254/16 destination addresses are "on-link" and directly - reachable. The 169.254/16 address prefix MUST NOT be subnetted. - This specification utilizes ARP-based address conflict detection, - which functions by broadcasting on the local subnet. Since such - broadcasts are not forwarded, were subnetting to be allowed then - address conflicts could remain undetected. - - - - - - -Cheshire, et al. Standards Track [Page 16] - -RFC 3927 IPv4 Link-Local May 2005 - - - This does not mean that Link-Local devices are forbidden from any - communication outside the local link. IP hosts that implement both - Link-Local and conventional routable IPv4 addresses may still use - their routable addresses without restriction as they do today. - -2.9. Higher-Layer Protocol Considerations - - Similar considerations apply at layers above IP. - - For example, designers of Web pages (including automatically - generated web pages) SHOULD NOT contain links with embedded IPv4 - Link-Local addresses if those pages are viewable from hosts outside - the local link where the addresses are valid. - - As IPv4 Link-Local addresses may change at any time and have limited - scope, IPv4 Link-Local addresses MUST NOT be stored in the DNS. - -2.10. Privacy Concerns - - Another reason to restrict leakage of IPv4 Link-Local addresses - outside the local link is privacy concerns. If IPv4 Link-Local - addresses are derived from a hash of the MAC address, some argue that - they could be indirectly associated with an individual, and thereby - used to track that individual's activities. Within the local link - the hardware addresses in the packets are all directly observable, so - as long as IPv4 Link-Local addresses don't leave the local link they - provide no more information to an intruder than could be gained by - direct observation of hardware addresses. - -2.11. Interaction between DHCPv4 client and IPv4 Link-Local State - Machines - - As documented in Appendix A, early implementations of IPv4 Link-Local - have modified the DHCP state machine. Field experience shows that - these modifications reduce the reliability of the DHCP service. - - A device that implements both IPv4 Link-Local and a DHCPv4 client - should not alter the behavior of the DHCPv4 client to accommodate - IPv4 Link-Local configuration. In particular configuration of an - IPv4 Link-Local address, whether or not a DHCP server is currently - responding, is not sufficient reason to unconfigure a valid DHCP - lease, to stop the DHCP client from attempting to acquire a new IP - address, to change DHCP timeouts or to change the behavior of the - DHCP state machine in any other way. - - Further discussion of this issue is provided in "Detection of Network - Attachment (DNA) in IPv4" [DNAv4]. - - - - -Cheshire, et al. Standards Track [Page 17] - -RFC 3927 IPv4 Link-Local May 2005 - - -3. Considerations for Multiple Interfaces - - The considerations outlined here also apply whenever a host has - multiple IP addresses, whether or not it has multiple physical - interfaces. Other examples of multiple interfaces include different - logical endpoints (tunnels, virtual private networks etc.) and - multiple logical networks on the same physical medium. This is often - referred to as "multi-homing". - - Hosts which have more than one active interface and elect to - implement dynamic configuration of IPv4 Link-Local addresses on one - or more of those interfaces will face various problems. This section - lists these problems but does no more than indicate how one might - solve them. At the time of this writing, there is no silver bullet - which solves these problems in all cases, in a general way. - Implementors must think through these issues before implementing the - protocol specified in this document on a system which may have more - than one active interface as part of a TCP/IP stack capable of - multi-homing. - -3.1. Scoped Addresses - - A host may be attached to more than one network at the same time. It - would be nice if there was a single address space used in every - network, but this is not the case. Addresses used in one network, be - it a network behind a NAT or a link on which IPv4 Link-Local - addresses are used, cannot be used in another network and have the - same effect. - - It would also be nice if addresses were not exposed to applications, - but they are. Most software using TCP/IP which await messages - receives from any interface at a particular port number, for a - particular transport protocol. Applications are generally only aware - (and care) that they have received a message. The application knows - the address of the sender to which the application will reply. - - The first scoped address problem is source address selection. A - multi-homed host has more than one address. Which address should be - used as the source address when sending to a particular destination? - This question is usually answered by referring to a routing table, - which expresses on which interface (with which address) to send, and - how to send (should one forward to a router, or send directly). The - choice is made complicated by scoped addresses because the address - range in which the destination lies may be ambiguous. The table may - not be able to yield a good answer. This problem is bound up with - next-hop selection, which is discussed in Section 3.2. - - - - - -Cheshire, et al. Standards Track [Page 18] - -RFC 3927 IPv4 Link-Local May 2005 - - - The second scoped address problem arises from scoped parameters - leaking outside their scope. This is discussed in Section 7. - - It is possible to overcome these problems. One way is to expose - scope information to applications such that they are always aware of - what scope a peer is in. This way, the correct interface could be - selected, and a safe procedure could be followed with respect to - forwarding addresses and other scoped parameters. There are other - possible approaches. None of these methods have been standardized - for IPv4 nor are they specified in this document. A good API design - could mitigate the problems, either by exposing address scopes to - 'scoped-address aware' applications or by cleverly encapsulating the - scoping information and logic so that applications do the right thing - without being aware of address scoping. - - An implementer could undertake to solve these problems, but cannot - simply ignore them. With sufficient experience, it is hoped that - specifications will emerge explaining how to overcome scoped address - multi-homing problems. - -3.2. Address Ambiguity - - This is a core problem with respect to IPv4 Link-Local destination - addresses being reachable on more than one interface. What should a - host do when it needs to send to Link-Local destination L and L can - be resolved using ARP on more than one link? - - Even if a Link-Local address can be resolved on only one link at a - given moment, there is no guarantee that it will remain unambiguous - in the future. Additional hosts on other interfaces may claim the - address L as well. - - One possibility is to support this only in the case where the - application specifically expresses which interface to send from. - - There is no standard or obvious solution to this problem. Existing - application software written for the IPv4 protocol suite is largely - incapable of dealing with address ambiguity. This does not preclude - an implementer from finding a solution, writing applications which - are able to use it, and providing a host which can support dynamic - configuration of IPv4 Link-Local addresses on more than one - interface. This solution will almost surely not be generally - applicable to existing software and transparent to higher layers, - however. - - Given that the IP stack must have the outbound interface associated - with a packet that needs to be sent to a Link-Local destination - address, interface selection must occur. The outbound interface - - - -Cheshire, et al. Standards Track [Page 19] - -RFC 3927 IPv4 Link-Local May 2005 - - - cannot be derived from the packet's header parameters such as source - or destination address (e.g., by using the forwarding table lookup). - Therefore, outbound interface association must be done explicitly - through other means. The specification does not stipulate those - means. - -3.3. Interaction with Hosts with Routable Addresses - - Attention is paid in this specification to transition from the use of - IPv4 Link-Local addresses to routable addresses (see Section 1.5). - The intention is to allow a host with a single interface to first - support Link-Local configuration then gracefully transition to the - use of a routable address. Since the host transitioning to the use - of a routable address may temporarily have more than one address - active, the scoped address issues described in Section 3.1 will - apply. When a host acquires a routable address, it does not need to - retain its Link-Local address for the purpose of communicating with - other devices on the link that are themselves using only Link-Local - addresses: any host conforming to this specification knows that - regardless of source address an IPv4 Link-Local destination must be - reached by forwarding directly to the destination, not via a router; - it is not necessary for that host to have a Link-Local source address - in order to send to a Link-Local destination address. - - A host with an IPv4 Link-Local address may send to a destination - which does not have an IPv4 Link-Local address. If the host is not - multi-homed, the procedure is simple and unambiguous: Using ARP and - forwarding directly to on-link destinations is the default route. If - the host is multi-homed, however, the routing policy is more complex, - especially if one of the interfaces is configured with a routable - address and the default route is (sensibly) directed at a router - accessible through that interface. The following example illustrates - this problem and provides a common solution to it. - - i1 +---------+ i2 i3 +-------+ - ROUTER-------= HOST1 =---------= HOST2 | - link1 +---------+ link2 +-------+ - - In the figure above, HOST1 is connected to link1 and link2. - Interface i1 is configured with a routable address, while i2 is an - IPv4 Link-Local address. HOST1 has its default route set to ROUTER's - address, through i1. HOST1 will route to destinations in 169.254/16 - to i2, sending directly to the destination. - - HOST2 has a configured (non-Link-Local) IPv4 address assigned to i3. - - - - - - -Cheshire, et al. Standards Track [Page 20] - -RFC 3927 IPv4 Link-Local May 2005 - - - Using a name resolution or service discovery protocol HOST1 can - discover HOST2's address. Since HOST2's address is not in - 169.254/16, HOST1's routing policy will send datagrams to HOST2 via - i1, to the ROUTER. Unless there is a route from ROUTER to HOST2, the - datagrams sent from HOST1 to HOST2 will not reach it. - - One solution to this problem is for a host to attempt to reach any - host locally (using ARP) for which it receives an unreachable ICMP - error message (ICMP message codes 0, 1, 6 or 7 [RFC792]). The host - tries all its attached links in a round robin fashion. This has been - implemented successfully for some IPv6 hosts, to circumvent exactly - this problem. In terms of this example, HOST1 upon failing to reach - HOST2 via the ROUTER, will attempt to forward to HOST2 via i2 and - succeed. - - It may also be possible to overcome this problem using techniques - described in section 3.2, or other means not discussed here. This - specification does not provide a standard solution, nor does it - preclude implementers from supporting multi-homed configurations, - provided that they address the concerns in this section for the - applications which will be supported on the host. - -3.4. Unintentional Autoimmune Response - - Care must be taken if a multi-homed host can support more than one - interface on the same link, all of which support IPv4 Link-Local - autoconfiguration. If these interfaces attempt to allocate the same - address, they will defend the host against itself -- causing the - claiming algorithm to fail. The simplest solution to this problem is - to run the algorithm independently on each interface configured with - IPv4 Link-Local addresses. - - In particular, ARP packets which appear to claim an address which is - assigned to a specific interface, indicate conflict only if they are - received on that interface and their hardware address is of some - other interface. - - If a host has two interfaces on the same link, then claiming and - defending on those interfaces must ensure that they end up with - different addresses just as if they were on different hosts. Note - that some of the ways a host may find itself with two interfaces on - the same link may be unexpected and non-obvious, such as when a host - has Ethernet and 802.11 wireless, but those two links are (possibly - even without the knowledge of the host's user) bridged together. - - - - - - - -Cheshire, et al. Standards Track [Page 21] - -RFC 3927 IPv4 Link-Local May 2005 - - -4. Healing of Network Partitions - - Hosts on disjoint network links may configure the same IPv4 Link- - Local address. If these separate network links are later joined or - bridged together, then there may be two hosts which are now on the - same link, trying to use the same address. When either host attempts - to communicate with any other host on the network, it will at some - point broadcast an ARP packet which will enable the hosts in question - to detect that there is an address conflict. - - When these address conflicts are detected, the subsequent forced - reconfiguration may be disruptive, causing TCP connections to be - broken. However, it is expected that such disruptions will be rare. - It should be relatively uncommon for networks to be joined while - hosts on those networks are active. Also, 65024 addresses are - available for IPv4 Link-Local use, so even when two small networks - are joined, the chance of conflict for any given host is fairly - small. - - When joining two large networks (defined as networks with a - substantial number of hosts per segment) there is a greater chance of - conflict. In such networks, it is likely that the joining of - previously separated segments will result in one or more hosts - needing to change their IPv4 Link-Local address, with subsequent loss - of TCP connections. In cases where separation and re-joining is - frequent, as in remotely bridged networks, this could prove - disruptive. However, unless the number of hosts on the joined - segments is very large, the traffic resulting from the join and - subsequent address conflict resolution will be small. - - Sending ARP replies that have IPv4 Link-Local sender addresses via - broadcast instead of unicast ensures that these conflicts can be - detected as soon as they become potential problems, but no sooner. - For example, if two disjoint network links are joined, where hosts A - and B have both configured the same Link-Local address, X, they can - remain in this state until A, B or some other host attempts to - initiate communication. If some other host C now sends an ARP - request for address X, and hosts A and B were to both reply with - conventional unicast ARP replies, then host C might be confused, but - A and B still wouldn't know there is a problem because neither would - have seen the other's packet. Sending these replies via broadcast - allows A and B to see each other's conflicting ARP packets and - respond accordingly. - - Note that sending periodic gratuitous ARPs in an attempt to detect - these conflicts sooner is not necessary, wastes network bandwidth, - and may actually be detrimental. For example, if the network links - were joined only briefly, and were separated again before any new - - - -Cheshire, et al. Standards Track [Page 22] - -RFC 3927 IPv4 Link-Local May 2005 - - - communication involving A or B were initiated, then the temporary - conflict would have been benign and no forced reconfiguration would - have been required. Triggering an unnecessary forced reconfiguration - in this case would not serve any useful purpose. Hosts SHOULD NOT - send periodic gratuitous ARPs. - -5. Security Considerations - - The use of IPv4 Link-Local Addresses may open a network host to new - attacks. In particular, a host that previously did not have an IP - address, and no IP stack running, was not susceptible to IP-based - attacks. By configuring a working address, the host may now be - vulnerable to IP-based attacks. - - The ARP protocol [RFC826] is insecure. A malicious host may send - fraudulent ARP packets on the network, interfering with the correct - operation of other hosts. For example, it is easy for a host to - answer all ARP requests with replies giving its own hardware address, - thereby claiming ownership of every address on the network. - - NOTE: There are certain kinds of local links, such as wireless LANs, - that provide no physical security. Because of the existence of these - links it would be very unwise for an implementer to assume that when - a device is communicating only on the local link it can dispense with - normal security precautions. Failure to implement appropriate - security measures could expose users to considerable risks. - - A host implementing IPv4 Link-Local configuration has an additional - vulnerability to selective reconfiguration and disruption. It is - possible for an on-link attacker to issue ARP packets which would - cause a host to break all its connections by switching to a new - address. The attacker could force the host implementing IPv4 Link- - Local configuration to select certain addresses, or prevent it from - ever completing address selection. This is a distinct threat from - that posed by spoofed ARPs, described in the preceding paragraph. - - Implementations and users should also note that a node that gives up - an address and reconfigures, as required by section 2.5, allows the - possibility that another node can easily and successfully hijack - existing TCP connections. - - Implementers are advised that the Internet Protocol architecture - expects every networked device or host must implement security which - is adequate to protect the resources to which the device or host has - access, including the network itself, against known or credible - threats. Even though use of IPv4 Link-Local addresses may reduce the - - - - - -Cheshire, et al. Standards Track [Page 23] - -RFC 3927 IPv4 Link-Local May 2005 - - - number of threats to which a device is exposed, implementers of - devices supporting the Internet Protocol must not assume that a - customer's local network is free from security risks. - - While there may be particular kinds of devices, or particular - environments, for which the security provided by the network is - adequate to protect the resources that are accessible by the device, - it would be misleading to make a general statement to the effect that - the requirement to provide security is reduced for devices using IPv4 - Link-Local addresses as a sole means of access. - - In all cases, whether or not IPv4 Link-Local addresses are used, it - is necessary for implementers of devices supporting the Internet - Protocol to analyze the known and credible threats to which a - specific host or device might be subjected, and to the extent that it - is feasible, to provide security mechanisms which ameliorate or - reduce the risks associated with such threats. - -6. Application Programming Considerations - - Use of IPv4 Link-Local autoconfigured addresses presents additional - challenges to writers of applications and may result in existing - application software failing. - -6.1. Address Changes, Failure and Recovery - - IPv4 Link-Local addresses used by an application may change over - time. Some application software encountering an address change will - fail. For example, existing client TCP connections will be aborted, - servers whose addresses change will have to be rediscovered, blocked - reads and writes will exit with an error condition, and so on. - - Vendors producing application software which will be used on IP - implementations supporting IPv4 Link-Local address configuration - SHOULD detect and cope with address change events. Vendors producing - IPv4 implementations supporting IPv4 Link-Local address configuration - SHOULD expose address change events to applications. - -6.2. Limited Forwarding of Locators - - IPv4 Link-Local addresses MUST NOT be forwarded via an application - protocol (for example in a URL), to a destination that is not on the - same link. This is discussed further in Sections 2.9 and 3. - - Existing distributed application software that forwards address - information may fail. For example, FTP [RFC959] (when not using - passive mode) transmits the IP address of the client. Suppose a - client starts up and obtains its IPv4 configuration at a time when it - - - -Cheshire, et al. Standards Track [Page 24] - -RFC 3927 IPv4 Link-Local May 2005 - - - has only a Link-Local address. Later, the host gets a global IP - address, and the client contacts an FTP server outside the local - link. If the FTP client transmits its old Link-Local address instead - of its new global IP address in the FTP "port" command, then the FTP - server will be unable to open a data connection back to the client, - and the FTP operation will fail. - -6.3. Address Ambiguity - - Application software run on a multi-homed host that supports IPv4 - Link-Local address configuration on more than one interface may fail. - - This is because application software assumes that an IPv4 address is - unambiguous, that it can refer to only one host. IPv4 Link-Local - addresses are unique only on a single link. A host attached to - multiple links can easily encounter a situation where the same - address is present on more than one interface, or first on one - interface, later on another; in any case associated with more than - one host. Most existing software is not prepared for this ambiguity. - In the future, application programming interfaces could be developed - to prevent this problem. This issue is discussed in Section 3. - -7. Router Considerations - - A router MUST NOT forward a packet with an IPv4 Link-Local source or - destination address, irrespective of the router's default route - configuration or routes obtained from dynamic routing protocols. - - A router which receives a packet with an IPv4 Link-Local source or - destination address MUST NOT forward the packet. This prevents - forwarding of packets back onto the network segment from which they - originated, or to any other segment. - -8. IANA Considerations - - The IANA has allocated the prefix 169.254/16 for the use described in - this document. The first and last 256 addresses in this range - (169.254.0.x and 169.254.255.x) are allocated by Standards Action, as - defined in "Guidelines for Writing an IANA" (BCP 26) [RFC2434]. No - other IANA services are required by this document. - - - - - - - - - - - -Cheshire, et al. Standards Track [Page 25] - -RFC 3927 IPv4 Link-Local May 2005 - - -9. Constants - - The following timing constants are used in this protocol; they are - not intended to be user configurable. - - PROBE_WAIT 1 second (initial random delay) - PROBE_NUM 3 (number of probe packets) - PROBE_MIN 1 second (minimum delay till repeated probe) - PROBE_MAX 2 seconds (maximum delay till repeated probe) - ANNOUNCE_WAIT 2 seconds (delay before announcing) - ANNOUNCE_NUM 2 (number of announcement packets) - ANNOUNCE_INTERVAL 2 seconds (time between announcement packets) - MAX_CONFLICTS 10 (max conflicts before rate limiting) - RATE_LIMIT_INTERVAL 60 seconds (delay between successive attempts) - DEFEND_INTERVAL 10 seconds (minimum interval between defensive - ARPs). - -10. References - -10.1. Normative References - - [RFC792] Postel, J., "Internet Control Message Protocol", STD 5, RFC - 792, September 1981. - - [RFC826] Plummer, D., "Ethernet Address Resolution Protocol: Or - converting network protocol addresses to 48.bit Ethernet - address for transmission on Ethernet hardware", STD 37, RFC - 826, November 1982. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an - IANA Considerations Section in RFCs", BCP 26, RFC 2434, - October 1998. - -10.2. Informative References - - [802] IEEE Standards for Local and Metropolitan Area Networks: - Overview and Architecture, ANSI/IEEE Std 802, 1990. - - [802.3] ISO/IEC 8802-3 Information technology - Telecommunications - and information exchange between systems - Local and - metropolitan area networks - Common specifications - Part - 3: Carrier Sense Multiple Access with Collision Detection - (CSMA/CD) Access Method and Physical Layer Specifications, - (also ANSI/IEEE Std 802.3- 1996), 1996. - - - - -Cheshire, et al. Standards Track [Page 26] - -RFC 3927 IPv4 Link-Local May 2005 - - - [802.5] ISO/IEC 8802-5 Information technology - Telecommunications - and information exchange between systems - Local and - metropolitan area networks - Common specifications - Part - 5: Token ring access method and physical layer - specifications, (also ANSI/IEEE Std 802.5-1998), 1998. - - [802.11] Information technology - Telecommunications and information - exchange between systems - Local and metropolitan area - networks - Specific Requirements Part 11: Wireless LAN - Medium Access Control (MAC) and Physical Layer (PHY) - Specifications, IEEE Std. 802.11-1999, 1999. - - [RFC959] Postel, J. and J. Reynolds, "File Transfer Protocol", STD - 9, RFC 959, October 1985. - - [RFC1918] Rekhter, Y., Moskowitz, B., Karrenberg, D., de Groot, G., - and E. Lear, "Address Allocation for Private Internets", - BCP 5, RFC 1918, February 1996. - - [RFC2131] Droms, R., "Dynamic Host Configuration Protocol", RFC 2131, - March 1997. - - [RFC2462] Thomson, S. and T. Narten, "IPv6 Stateless Address - Autoconfiguration", RFC 2462, December 1998. - - [RFC3027] Holdrege, M. and P. Srisuresh, "Protocol Complications with - the IP Network Address Translator", RFC 3027, January 2001. - - [DNAv4] Aboba, B., "Detection of Network Attachment (DNA) in IPv4", - Work in Progress, July 2004. - - [LLMNR] Esibov, L., Aboba, B. and D. Thaler, "Linklocal Multicast - Name Resolution (LLMNR)", Work in Progress, June 2004. - -Acknowledgments - - We would like to thank (in alphabetical order) Jim Busse, Pavani - Diwanji, Donald Eastlake 3rd, Robert Elz, Peter Ford, Spencer - Giacalone, Josh Graessley, Brad Hards, Myron Hattig, Hugh Holbrook, - Christian Huitema, Richard Johnson, Kim Yong-Woon, Mika Liljeberg, - Rod Lopez, Keith Moore, Satish Mundra, Thomas Narten, Erik Nordmark, - Philip Nye, Howard Ridenour, Daniel Senie, Dieter Siegmund, Valery - Smyslov, and Ryan Troll for their contributions. - - - - - - - - -Cheshire, et al. Standards Track [Page 27] - -RFC 3927 IPv4 Link-Local May 2005 - - -Appendix A - Prior Implementations - -A.1. Apple Mac OS 8.x and 9.x. - - Mac OS chooses the IP address on a pseudo-random basis. The selected - address is saved in persistent storage for continued use after - reboot, when possible. - - Mac OS sends nine DHCPDISCOVER packets, with an interval of two - seconds between packets. If no response is received from any of - these requests (18 seconds), it will autoconfigure. - - Upon finding that a selected address is in use, Mac OS will select a - new random address and try again, at a rate limited to no more than - one attempt every two seconds. - - Autoconfigured Mac OS systems check for the presence of a DHCP server - every five minutes. If a DHCP server is found but Mac OS is not - successful in obtaining a new lease, it keeps the existing - autoconfigured IP address. If Mac OS is successful at obtaining a - new lease, it drops all existing connections without warning. This - may cause users to lose sessions in progress. Once a new lease is - obtained, Mac OS will not allocate further connections using the - autoconfigured IP address. - - Mac OS systems do not send packets addressed to a Link-Local address - to the default gateway if one is present; these addresses are always - resolved on the local segment. - - Mac OS systems by default send all outgoing unicast packets with a - TTL of 255. All multicast and broadcast packets are also sent with a - TTL of 255 if they have a source address in the 169.254/16 prefix. - - Mac OS implements media sense where the hardware (and driver - software) supports this. As soon as network connectivity is - detected, a DHCPDISCOVER will be sent on the interface. This means - that systems will immediately transition out of autoconfigured mode - as soon as connectivity is restored. - -A.2. Apple Mac OS X Version 10.2 - - Mac OS X chooses the IP address on a pseudo-random basis. The - selected address is saved in memory so that it can be re-used during - subsequent autoconfiguration attempts during a single boot of the - system. - - - - - - -Cheshire, et al. Standards Track [Page 28] - -RFC 3927 IPv4 Link-Local May 2005 - - - Autoconfiguration of a Link-Local address depends on the results of - the DHCP process. DHCP sends two packets, with timeouts of one and - two seconds. If no response is received (three seconds), it begins - autoconfiguration. DHCP continues sending packets in parallel for a - total time of 60 seconds. - - At the start of autoconfiguration, it generates 10 unique random IP - addresses, and probes each one in turn for 2 seconds. It stops - probing after finding an address that is not in use, or the list of - addresses is exhausted. - - If DHCP is not successful, it waits five minutes before starting over - again. Once DHCP is successful, the autoconfigured Link-Local - address is given up. The Link-Local subnet, however, remains - configured. - - Autoconfiguration is only attempted on a single interface at any - given moment in time. - - Mac OS X ensures that the connected interface with the highest - priority is associated with the Link-Local subnet. Packets addressed - to a Link-Local address are never sent to the default gateway, if one - is present. Link-local addresses are always resolved on the local - segment. - - Mac OS X implements media sense where the hardware and driver support - it. When the network media indicates that it has been connected, the - autoconfiguration process begins again, and attempts to re-use the - previously assigned Link-Local address. When the network media - indicates that it has been disconnected, the system waits four - seconds before de-configuring the Link-Local address and subnet. If - the connection is restored before that time, the autoconfiguration - process begins again. If the connection is not restored before that - time, the system chooses another interface to autoconfigure. - - Mac OS X by default sends all outgoing unicast packets with a TTL of - 255. All multicast and broadcast packets are also sent with a TTL of - 255 if they have a source address in the 169.254/16 prefix. - -A.3. Microsoft Windows 98/98SE - - Windows 98/98SE systems choose their IPv4 Link-Local address on a - pseudo-random basis. The address selection algorithm is based on - computing a hash on the interface's MAC address, so that a large - collection of hosts should obey the uniform probability distribution - in choosing addresses within the 169.254/16 address space. Deriving - - - - - -Cheshire, et al. Standards Track [Page 29] - -RFC 3927 IPv4 Link-Local May 2005 - - - the initial IPv4 Link-Local address from the interface's MAC address - also ensures that systems rebooting will obtain the same - autoconfigured address, unless a conflict is detected. - - When in INIT state, the Windows 98/98SE DHCP Client sends out a total - of 4 DHCPDISCOVERs, with an inter-packet interval of 6 seconds. When - no response is received after all 4 packets (24 seconds), it will - autoconfigure an address. - - The autoconfigure retry count for Windows 98/98SE systems is 10. - After trying 10 autoconfigured IPv4 addresses, and finding all are - taken, the host will boot without an IPv4 address. - - Autoconfigured Windows 98/98SE systems check for the presence of a - DHCP server every five minutes. If a DHCP server is found but - Windows 98 is not successful in obtaining a new lease, it keeps the - existing autoconfigured IPv4 Link-Local address. If Windows 98/98SE - is successful at obtaining a new lease, it drops all existing - connections without warning. This may cause users to lose sessions - in progress. Once a new lease is obtained, Windows 98/98SE will not - allocate further connections using the autoconfigured IPv4 Link-Local - address. - - Windows 98/98SE systems with an IPv4 Link-Local address do not send - packets addressed to an IPv4 Link-Local address to the default - gateway if one is present; these addresses are always resolved on the - local segment. - - Windows 98/98SE systems by default send all outgoing unicast packets - with a TTL of 128. TTL configuration is performed by setting the - Windows Registry Key - HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services:\Tcpip\ - Parameters\DefaultTTL of type REG_DWORD to the appropriate value. - However, this default TTL will apply to all packets. While this - facility could be used to set the default TTL to 255, it cannot be - used to set the default TTL of IPv4 Link-Local packets to one (1), - while allowing other packets to be sent with a TTL larger than one. - - Windows 98/98SE systems do not implement media sense. This means - that network connectivity issues (such as a loose cable) may prevent - a system from contacting the DHCP server, thereby causing it to - auto-configure. When the connectivity problem is fixed (such as when - the cable is re-connected) the situation will not immediately correct - itself. Since the system will not sense the re-connection, it will - remain in autoconfigured mode until an attempt is made to reach the - DHCP server. - - - - - -Cheshire, et al. Standards Track [Page 30] - -RFC 3927 IPv4 Link-Local May 2005 - - - The DHCP server included with Windows 98SE Internet Connection - Sharing (ICS) (a NAT implementation) allocates out of the 192.168/16 - private address space by default. - - However, it is possible to change the allocation prefix via a - registry key, and no checks are made to prevent allocation out of the - IPv4 Link-Local prefix. When configured to do so, Windows 98SE ICS - will rewrite packets from the IPv4 Link-Local prefix and forward them - beyond the local link. Windows 98SE ICS does not automatically route - for the IPv4 Link-Local prefix, so that hosts obtaining addresses via - DHCP cannot communicate with autoconfigured-only devices. - - Other home gateways exist that allocate addresses out of the IPv4 - Link-Local prefix by default. Windows 98/98SE systems can use a - 169.254/16 IPv4 Link-Local address as the source address when - communicating with non-Link-Local hosts. Windows 98/98SE does not - support router solicitation/advertisement. Windows 98/98SE systems - will not automatically discover a default gateway when in - autoconfigured mode. - -A.4. Windows XP, 2000, and ME - - The autoconfiguration behavior of Windows XP, Windows 2000, and - Windows ME systems is identical to Windows 98/98SE except in the - following respects: - - Media Sense - Router Discovery - Silent RIP - - Windows XP, 2000, and ME implement media sense. As soon as network - connectivity is detected, a DHCPREQUEST or DHCPDISCOVER will be sent - on the interface. This means that systems will immediately - transition out of autoconfigured mode as soon as connectivity is - restored. - - Windows XP, 2000, and ME also support router discovery, although it - is turned off by default. Windows XP and 2000 also support a RIP - listener. This means that they may inadvertently discover a default - gateway while in autoconfigured mode. - - ICS on Windows XP/2000/ME behaves identically to Windows 98SE with - respect to address allocation and NATing of Link-Local prefixes. - - - - - - - - -Cheshire, et al. Standards Track [Page 31] - -RFC 3927 IPv4 Link-Local May 2005 - - -Authors' Addresses - - Stuart Cheshire - Apple Computer, Inc. - 1 Infinite Loop - Cupertino - California 95014, USA - - Phone: +1 408 974 3207 - EMail: rfc@stuartcheshire.org - - - Bernard Aboba - Microsoft Corporation - One Microsoft Way - Redmond, WA 98052 - - Phone: +1 425 818 4011 - EMail: bernarda@microsoft.com - - - Erik Guttman - Sun Microsystems - Eichhoelzelstr. 7 - 74915 Waibstadt Germany - - Phone: +49 7263 911 701 - EMail: erik@spybeam.org - - - - - - - - - - - - - - - - - - - - - - - -Cheshire, et al. Standards Track [Page 32] - -RFC 3927 IPv4 Link-Local May 2005 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2005). - - This document is subject to the rights, licenses and restrictions - contained in BCP 78, and except as set forth therein, the authors - retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS - OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, - INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE - INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; nor does it represent that it has - made any independent effort to identify any such rights. Information - on the procedures with respect to rights in RFC documents can be - found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use of - such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository at - http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention any - copyrights, patents or patent applications, or other proprietary - rights that may cover technology that may be required to implement - this standard. Please address the information to the IETF at ietf- - ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - -Cheshire, et al. Standards Track [Page 33] - diff --git a/kernel/picotcp/RFC/rfc4015.txt b/kernel/picotcp/RFC/rfc4015.txt deleted file mode 100644 index fd527a2..0000000 --- a/kernel/picotcp/RFC/rfc4015.txt +++ /dev/null @@ -1,731 +0,0 @@ - - - - - - -Network Working Group R. Ludwig -Request for Comments: 4015 Ericsson Research -Category: Standards Track A. Gurtov - HIIT - February 2005 - - - The Eifel Response Algorithm for TCP - -Status of This Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2005). - -Abstract - - Based on an appropriate detection algorithm, the Eifel response - algorithm provides a way for a TCP sender to respond to a detected - spurious timeout. It adapts the retransmission timer to avoid - further spurious timeouts and (depending on the detection algorithm) - can avoid the often unnecessary go-back-N retransmits that would - otherwise be sent. In addition, the Eifel response algorithm - restores the congestion control state in such a way that packet - bursts are avoided. - -1. Introduction - - The Eifel response algorithm relies on a detection algorithm such as - the Eifel detection algorithm, defined in [RFC3522]. That document - contains informative background and motivation context that may be - useful for implementers of the Eifel response algorithm, but it is - not necessary to read [RFC3522] in order to implement the Eifel - response algorithm. Note that alternative response algorithms have - been proposed [BA02] that could also rely on the Eifel detection - algorithm, and alternative detection algorithms have been proposed - [RFC3708], [SK04] that could work together with the Eifel response - algorithm. - - Based on an appropriate detection algorithm, the Eifel response - algorithm provides a way for a TCP sender to respond to a detected - spurious timeout. It adapts the retransmission timer to avoid - - - -Ludwig & Gurtov Standards Track [Page 1] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - further spurious timeouts and (depending on the detection algorithm) - can avoid the often unnecessary go-back-N retransmits that would - otherwise be sent. In addition, the Eifel response algorithm - restores the congestion control state in such a way that packet - bursts are avoided. - - Note: A previous version of the Eifel response algorithm also - included a response to a detected spurious fast retransmit. - However, as a consensus was not reached about how to adapt the - duplicate acknowledgement threshold in that case, that part of the - algorithm was removed for the time being. - -1.1. Terminology - - The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, - SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this - document, are to be interpreted as described in [RFC2119]. - - We refer to the first-time transmission of an octet as the 'original - transmit'. A subsequent transmission of the same octet is referred - to as a 'retransmit'. In most cases, this terminology can also be - applied to data segments. However, when repacketization occurs, a - segment can contain both first-time transmissions and retransmissions - of octets. In that case, this terminology is only consistent when - applied to octets. For the Eifel detection and response algorithms, - this makes no difference, as they also operate correctly when - repacketization occurs. - - We use the term 'acceptable ACK' as defined in [RFC793]. That is an - ACK that acknowledges previously unacknowledged data. We use the - term 'bytes_acked' to refer to the amount (in terms of octets) of - previously unacknowledged data that is acknowledged by the most - recently received acceptable ACK. We use the TCP sender state - variables 'SND.UNA' and 'SND.NXT' as defined in [RFC793]. SND.UNA - holds the segment sequence number of the oldest outstanding segment. - SND.NXT holds the segment sequence number of the next segment the TCP - sender will (re-)transmit. In addition, we define as 'SND.MAX' the - segment sequence number of the next original transmit to be sent. - The definition of SND.MAX is equivalent to the definition of - 'snd_max' in [WS95]. - - We use the TCP sender state variables 'cwnd' (congestion window), and - 'ssthresh' (slow-start threshold), and the term 'FlightSize' as - defined in [RFC2581]. FlightSize is the amount (in terms of octets) - of outstanding data at a given point in time. We use the term - 'Initial Window' (IW) as defined in [RFC3390]. The IW is the size of - the sender's congestion window after the three-way handshake is - completed. We use the TCP sender state variables 'SRTT' and - - - -Ludwig & Gurtov Standards Track [Page 2] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - 'RTTVAR', and the terms 'RTO' and 'G' as defined in [RFC2988]. G is - the clock granularity of the retransmission timer. In addition, we - assume that the TCP sender maintains the value of the latest round- - trip time (RTT) measurement in the (local) variable 'RTT-SAMPLE'. - - We use the TCP sender state variable 'T_last', and the term 'tcpnow' - as used in [RFC2861]. T_last holds the system time when the TCP - sender sent the last data segment, whereas tcpnow is the TCP sender's - current system time. - -2. Appropriate Detection Algorithms - - If the Eifel response algorithm is implemented at the TCP sender, it - MUST be implemented together with a detection algorithm that is - specified in a standards track or experimental RFC. - - Designers of detection algorithms who want their algorithms to work - together with the Eifel response algorithm should reuse the variable - "SpuriousRecovery" with the semantics and defined values specified in - [RFC3522]. In addition, we define the constant LATE_SPUR_TO (set - equal to -1) as another possible value of the variable - SpuriousRecovery. Detection algorithms should set the value of - SpuriousRecovery to LATE_SPUR_TO if the detection of a spurious - retransmit is based on the ACK for the retransmit (as opposed to an - ACK for an original transmit). For example, this applies to - detection algorithms that are based on the DSACK option [RFC3708]. - -3. The Eifel Response Algorithm - - The complete algorithm is specified in section 3.1. In sections 3.2 - - 3.6, we discuss the different steps of the algorithm. - -3.1. The Algorithm - - Given that a TCP sender has enabled a detection algorithm that - complies with the requirements set in Section 2, a TCP sender MAY use - the Eifel response algorithm as defined in this subsection. - - If the Eifel response algorithm is used, the following steps MUST be - taken by the TCP sender, but only upon initiation of a timeout-based - loss recovery. That is when the first timeout-based retransmit is - sent. The algorithm MUST NOT be reinitiated after a timeout-based - loss recovery has already been started but not completed. In - particular, it may not be reinitiated upon subsequent timeouts for - the same segment, or upon retransmitting segments other than the - oldest outstanding segment. - - - - - -Ludwig & Gurtov Standards Track [Page 3] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - (0) Before the variables cwnd and ssthresh get updated when - loss recovery is initiated, set a "pipe_prev" variable as - follows: - pipe_prev <- max (FlightSize, ssthresh) - - Set a "SRTT_prev" variable and a "RTTVAR_prev" variable as - follows: - SRTT_prev <- SRTT + (2 * G) - RTTVAR_prev <- RTTVAR - - (DET) This is a placeholder for a detection algorithm that must - be executed at this point, and that sets the variable - SpuriousRecovery as outlined in Section 2. If - [RFC3522] is used as the detection algorithm, steps (1) - - (6) of that algorithm go here. - - (7) If SpuriousRecovery equals SPUR_TO, then - proceed to step (8); - - else if SpuriousRecovery equals LATE_SPUR_TO, then - proceed to step (9); - - else - proceed to step (DONE). - - (8) Resume the transmission with previously unsent data: - - Set - SND.NXT <- SND.MAX - - (9) Reverse the congestion control state: - - If the acceptable ACK has the ECN-Echo flag [RFC3168] set, - then - proceed to step (DONE); - - else set - cwnd <- FlightSize + min (bytes_acked, IW) - ssthresh <- pipe_prev - - Proceed to step (DONE). - - (10) Interworking with Congestion Window Validation: - - If congestion window validation is implemented according - to [RFC2861], then set - T_last <- tcpnow - - - - -Ludwig & Gurtov Standards Track [Page 4] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - (11) Adapt the conservativeness of the retransmission timer: - - Upon the first RTT-SAMPLE taken from new data; i.e., the - first RTT-SAMPLE that can be derived from an acceptable - ACK for data that was previously unsent when the spurious - timeout occurred, - - if the retransmission timer is implemented according - to [RFC2988], then set - SRTT <- max (SRTT_prev, RTT-SAMPLE) - RTTVAR <- max (RTTVAR_prev, RTT-SAMPLE/2) - RTO <- SRTT + max (G, 4*RTTVAR) - - Run the bounds check on the RTO (rules (2.4) and - (2.5) in [RFC2988]), and restart the - retransmission timer; - - else - appropriately adapt the conservativeness of the - retransmission timer that is implemented. - - (DONE) No further processing. - -3.2. Storing the Current Congestion Control State (Step 0) - - The TCP sender stores in pipe_prev what is considered a safe slow- - start threshold (ssthresh) before loss recovery is initiated; i.e., - before the loss indication is taken into account. This is either the - current FlightSize, if the TCP sender is in congestion avoidance, or - the current ssthresh, if the TCP sender is in slow-start. If the TCP - sender later detects that it has entered loss recovery unnecessarily, - then pipe_prev is used in step (9) to reverse the congestion control - state. Thus, until the loss recovery phase is terminated, pipe_prev - maintains a memory of the congestion control state of the time right - before the loss recovery phase was initiated. A similar approach is - proposed in [RFC2861], where this state is stored in ssthresh - directly after a TCP sender has become idle or application limited. - - There had been debates about whether the value of pipe_prev should be - decayed over time; e.g., upon subsequent timeouts for the same - outstanding segment. We do not require decaying pipe_prev for the - Eifel response algorithm and do not believe that such a conservative - approach should be in place. Instead, we follow the idea of - revalidating the congestion window through slow-start, as suggested - in [RFC2861]. That is, in step (9), the cwnd is reset to a value - that avoids large packet bursts, and ssthresh is reset to the value - of pipe_prev. Note that [RFC2581] and [RFC2861] also do not require - - - - -Ludwig & Gurtov Standards Track [Page 5] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - a decaying of ssthresh after it has been reset in response to a loss - indication, or after a TCP sender has become idle or application - limited. - -3.3. Suppressing the Unnecessary go-back-N Retransmits (Step 8) - - Without the use of the TCP timestamps option [RFC1323], the TCP - sender suffers from the retransmission ambiguity problem [Zh86], - [KP87]. Therefore, when the first acceptable ACK arrives after a - spurious timeout, the TCP sender must assume that this ACK was sent - in response to the retransmit when in fact it was sent in response to - an original transmit. Furthermore, the TCP sender must further - assume that all other segments that were outstanding at that point - were lost. - - Note: Except for certain cases where original ACKs were lost, the - first acceptable ACK cannot carry a DSACK option [RFC2883]. - - Consequently, once the TCP sender's state has been updated after the - first acceptable ACK has arrived, SND.NXT equals SND.UNA. This is - what causes the often unnecessary go-back-N retransmits. From that - point on every arriving acceptable ACK that was sent in response to - an original transmit will advance SND.NXT. But as long as SND.NXT is - smaller than the value that SND.MAX had when the timeout occurred, - those ACKs will clock out retransmits, whether or not the - corresponding original transmits were lost. - - In fact, during this phase the TCP sender breaks 'packet - conservation' [Jac88]. This is because the go-back-N retransmits are - sent during slow-start. For each original transmit leaving the - network, two retransmits are sent into the network as long as SND.NXT - does not equal SND.MAX (see [LK00] for more detail). - - Once a spurious timeout has been detected (upon receipt of an ACK for - an original transmit), it is safe to let the TCP sender resume the - transmission with previously unsent data. Thus, the Eifel response - algorithm changes the TCP sender's state by setting SND.NXT to - SND.MAX. Note that this step is only executed if the variable - SpuriousRecovery equals SPUR_TO, which in turn requires a detection - algorithm such as the Eifel detection algorithm [RFC3522] or the F- - RTO algorithm [SK04] that detects a spurious retransmit based upon - receiving an ACK for an original transmit (as opposed to the ACK for - the retransmit [RFC3708]). - - - - - - - - -Ludwig & Gurtov Standards Track [Page 6] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - -3.4. Reversing the Congestion Control State (Step 9) - - When a TCP sender enters loss recovery, it reduces cwnd and ssthresh. - However, once the TCP sender detects that the loss recovery has been - falsely triggered, this reduction proves unnecessary. We therefore - believe that it is safe to revert to the previous congestion control - state, following the approach of revalidating the congestion window - as outlined below. This is unless the acceptable ACK signals - congestion through the ECN-Echo flag [RFC3168]. In that case, the - TCP sender MUST refrain from reversing congestion control state. - - If the ECN-Echo flag is not set, cwnd is reset to the sum of the - current FlightSize and the minimum of bytes_acked and IW. In some - cases, this can mean that the first few acceptable ACKs that arrive - will not clock out any data segments. Recall that bytes_acked is the - number of bytes that have been acknowledged by the acceptable ACK. - Note that the value of cwnd must not be changed any further for that - ACK, and that the value of FlightSize at this point in time may be - different from the value of FlightSize in step (0). The value of IW - puts a limit on the size of the packet burst that the TCP sender may - send into the network after the Eifel response algorithm has - terminated. The value of IW is considered an acceptable burst size. - It is the amount of data that a TCP sender may send into a yet - "unprobed" network at the beginning of a connection. - - Then ssthresh is reset to the value of pipe_prev. As a result, the - TCP sender either immediately resumes probing the network for more - bandwidth in congestion avoidance, or it slow-starts to what is - considered a safe operating point for the congestion window. - -3.5. Interworking with the CWV Algorithm (Step 10) - - An implementation of the Congestion Window Validation (CWV) algorithm - [RFC2861] could potentially misinterpret a delay spike that caused a - spurious timeout as a phase where the TCP sender had been idle. - Therefore, T_last is reset to prevent the triggering of the CWV - algorithm in this case. - - Note: The term 'idle' implies that the TCP sender has no data - outstanding; i.e., all data sent has been acknowledged [Jac88]. - According to this definition, a TCP sender is not idle while it is - waiting for an acceptable ACK after a timeout. Unfortunately, the - pseudo-code in [RFC2861] does not include a check for the - condition "idle" (SND.UNA == SND.MAX). We therefore had to add - step (10) to the Eifel response algorithm. - - - - - - -Ludwig & Gurtov Standards Track [Page 7] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - -3.6. Adapting the Retransmission Timer (Step 11) - - There is currently only one retransmission timer standardized for TCP - [RFC2988]. We therefore only address that timer explicitly. Future - standards that might define alternatives to [RFC2988] should propose - similar measures to adapt the conservativeness of the retransmission - timer. - - A spurious timeout often results from a delay spike, which is a - sudden increase of the RTT that usually cannot be predicted. After a - delay spike, the RTT may have changed permanently; e.g., due to a - path change, or because the available bandwidth on a bandwidth- - dominated path has decreased. This may often occur with wide-area - wireless access links. In this case, the RTT estimators (SRTT and - RTTVAR) should be reinitialized from the first RTT-SAMPLE taken from - new data according to rule (2.2) of [RFC2988]. That is, from the - first RTT-SAMPLE that can be derived from an acceptable ACK for data - that was previously unsent when the spurious timeout occurred. - - However, a delay spike may only indicate a transient phase, after - which the RTT returns to its previous range of values, or even to - smaller values. Also, a spurious timeout may occur because the TCP - sender's RTT estimators were only inaccurate enough that the - retransmission timer expires "a tad too early". We believe that two - times the clock granularity of the retransmission timer (2 * G) is a - reasonable upper bound on "a tad too early". Thus, when the new RTO - is calculated in step (11), we ensure that it is at least (2 * G) - greater (see also step (0)) than the RTO was before the spurious - timeout occurred. - - Note that other TCP sender processing will usually take place between - steps (10) and (11). During this phase (i.e., before step (11) has - been reached), the RTO is managed according to the rules of - [RFC2988]. We believe that this is sufficiently conservative for the - following reasons. First, the retransmission timer is restarted upon - the acceptable ACK that was used to detect the spurious timeout. As - a result, the delay spike is already implicitly factored in for - segments outstanding at that time. This is discussed in more detail - in [EL04], where this effect is called the "RTO offset". - Furthermore, if timestamps are enabled, a new and valid RTT-SAMPLE - can be derived from that acceptable ACK. This RTT-SAMPLE must be - relatively large, as it includes the delay spike that caused the - spurious timeout. Consequently, the RTT estimators will be updated - rather conservatively. Without timestamps the RTO will stay - conservatively backed-off due to Karn's algorithm [RFC2988] until the - first RTT-SAMPLE can be derived from an acceptable ACK for data that - was previously unsent when the spurious timeout occurred. - - - - -Ludwig & Gurtov Standards Track [Page 8] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - For the new RTO to become effective, the retransmission timer has to - be restarted. This is consistent with [RFC2988], which recommends - restarting the retransmission timer with the arrival of an acceptable - ACK. - -4. Advanced Loss Recovery is Crucial for the Eifel Response Algorithm - - We have studied environments where spurious timeouts and multiple - losses from the same flight of packets often coincide [GL02], [GL03]. - In such a case, the oldest outstanding segment arrives at the TCP - receiver, but one or more packets from the remaining outstanding - flight are lost. In those environments, end-to-end performance - suffers if the Eifel response algorithm is operated without an - advanced loss recovery scheme such as a SACK-based scheme [RFC3517] - or NewReno [RFC3782]. The reason is TCP-Reno's aggressiveness after - a spurious timeout. Even though TCP-Reno breaks 'packet - conservation' (see Section 3.3) when blindly retransmitting all - outstanding segments, it usually recovers all packets lost from that - flight within a single round-trip time. On the contrary, the more - conservative TCP-Reno-with-Eifel is often forced into another - timeout. Thus, we recommend that the Eifel response algorithm always - be operated in combination with [RFC3517] or [RFC3782]. Additional - robustness is achieved with the Limited Transmit and Early Retransmit - algorithms [RFC3042], [AAAB04]. - - Note: The SACK-based scheme we used for our simulations in [GL02] - and [GL03] is different from the SACK-based scheme that later got - standardized [RFC3517]. The key difference is that [RFC3517] is - more robust to multiple losses from the same flight. It is less - conservative in declaring that a packet has left the network, and - is therefore less dependent on timeouts to recover genuine packet - losses. - - If the NewReno algorithm [RFC3782] is used in combination with the - Eifel response algorithm, step (1) of the NewReno algorithm SHOULD be - modified as follows, but only if SpuriousRecovery equals SPUR_TO: - - (1) Three duplicate ACKs: - When the third duplicate ACK is received and the sender is - not already in the Fast Recovery procedure, go to step 1A. - - That is, the entire step 1B of the NewReno algorithm is obsolete - because step (8) of the Eifel response algorithm avoids the case - where three duplicate ACKs result from unnecessary go-back-N - retransmits after a timeout. Step (8) of the Eifel response - algorithm avoids such unnecessary go-back-N retransmits in the first - place. However, recall that step (8) is only executed if the - variable SpuriousRecovery equals SPUR_TO, which in turn requires a - - - -Ludwig & Gurtov Standards Track [Page 9] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - detection algorithm, such as the Eifel detection algorithm [RFC3522] - or the F-RTO algorithm [SK04], that detects a spurious retransmit - based upon receiving an ACK for an original transmit (as opposed to - the ACK for the retransmit [RFC3708]). - -5. Security Considerations - - There is a risk that a detection algorithm is fooled by spoofed ACKs - that make genuine retransmits appear to the TCP sender as spurious - retransmits. When such a detection algorithm is run together with - the Eifel response algorithm, this could effectively disable - congestion control at the TCP sender. Should this become a concern, - the Eifel response algorithm SHOULD only be run together with - detection algorithms that are known to be safe against such "ACK - spoofing attacks". - - For example, the safe variant of the Eifel detection algorithm - [RFC3522], is a reliable method to protect against this risk. - -6. Acknowledgements - - Many thanks to Keith Sklower, Randy Katz, Michael Meyer, Stephan - Baucke, Sally Floyd, Vern Paxson, Mark Allman, Ethan Blanton, Pasi - Sarolahti, Alexey Kuznetsov, and Yogesh Swami for many discussions - that contributed to this work. - -7. References - -7.1. Normative References - - [RFC2581] Allman, M., Paxson, V., and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC3390] Allman, M., Floyd, S., and C. Partridge, "Increasing TCP's - Initial Window", RFC 3390, October 2002. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC3782] Floyd, S., Henderson, T., and A. Gurtov, "The NewReno - Modification to TCP's Fast Recovery Algorithm", RFC 3782, - April 2004. - - [RFC2861] Handley, M., Padhye, J., and S. Floyd, "TCP Congestion - Window Validation", RFC 2861, June 2000. - - [RFC3522] Ludwig, R. and M. Meyer, "The Eifel Detection Algorithm for - TCP", RFC 3522, April 2003. - - - -Ludwig & Gurtov Standards Track [Page 10] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC3168] Ramakrishnan, K., Floyd, S., and D. Black, "The Addition of - Explicit Congestion Notification (ECN) to IP", RFC 3168, - September 2001. - -7.2. Informative References - - [RFC3042] Allman, M., Balakrishnan, H., and S. Floyd, "Enhancing - TCP's Loss Recovery Using Limited Transmit", RFC 3042, - January 2001. - - [AAAB04] Allman, M., Avrachenkov, K., Ayesta, U., and J. Blanton, - Early Retransmit for TCP and SCTP, Work in Progress, July - 2004. - - [BA02] Blanton, E. and M. Allman, On Making TCP More Robust to - Packet Reordering, ACM Computer Communication Review, Vol. - 32, No. 1, January 2002. - - [RFC3708] Blanton, E. and M. Allman, "Using TCP Duplicate Selective - Acknowledgement (DSACKs) and Stream Control Transmission - Protocol (SCTP) Duplicate Transmission Sequence Numbers - (TSNs) to Detect Spurious Retransmissions", RFC 3708, - February 2004. - - [RFC3517] Blanton, E., Allman, M., Fall, K., and L. Wang, "A - Conservative Selective Acknowledgment (SACK)-based Loss - Recovery Algorithm for TCP", RFC 3517, April 2003. - - [EL04] Ekstrom, H. and R. Ludwig, The Peak-Hopper: A New End-to- - End Retransmission Timer for Reliable Unicast Transport, In - Proceedings of IEEE INFOCOM 04, March 2004. - - [RFC2883] Floyd, S., Mahdavi, J., Mathis, M., and M. Podolsky, "An - Extension to the Selective Acknowledgement (SACK) Option - for TCP", RFC 2883, July 2000. - - [GL02] Gurtov, A. and R. Ludwig, Evaluating the Eifel Algorithm - for TCP in a GPRS Network, In Proceedings of the European - Wireless Conference, February 2002. - - [GL03] Gurtov, A. and R. Ludwig, Responding to Spurious Timeouts - in TCP, In Proceedings of IEEE INFOCOM 03, April 2003. - - - -Ludwig & Gurtov Standards Track [Page 11] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - - [Jac88] Jacobson, V., Congestion Avoidance and Control, In - Proceedings of ACM SIGCOMM 88. - - [RFC1323] Jacobson, V., Braden, R., and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [KP87] Karn, P. and C. Partridge, Improving Round-Trip Time - Estimates in Reliable Transport Protocols, In Proceedings - of ACM SIGCOMM 87. - - [LK00] Ludwig, R. and R. H. Katz, The Eifel Algorithm: Making TCP - Robust Against Spurious Retransmissions, ACM Computer - Communication Review, Vol. 30, No. 1, January 2000. - - [SK04] Sarolahti, P. and M. Kojo, F-RTO: An Algorithm for - Detecting Spurious Retransmission Timeouts with TCP and - SCTP, Work in Progress, November 2004. - - [WS95] Wright, G. R. and W. R. Stevens, TCP/IP Illustrated, Volume - 2 (The Implementation), Addison Wesley, January 1995. - - [Zh86] Zhang, L., Why TCP Timers Don't Work Well, In Proceedings - of ACM SIGCOMM 88. - -Authors' Addresses - - Reiner Ludwig - Ericsson Research (EDD) - Ericsson Allee 1 - 52134 Herzogenrath, Germany - - EMail: Reiner.Ludwig@ericsson.com - - - Andrei Gurtov - Helsinki Institute for Information Technology (HIIT) - P.O. Box 9800, FIN-02015 - HUT, Finland - - EMail: andrei.gurtov@cs.helsinki.fi - Homepage: http://www.cs.helsinki.fi/u/gurtov - - - - - - - - - - -Ludwig & Gurtov Standards Track [Page 12] - -RFC 4015 The Eifel Response Algorithm for TCP February 2005 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2005). - - This document is subject to the rights, licenses and restrictions - contained in BCP 78, and except as set forth therein, the authors - retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS - OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, - INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE - INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; nor does it represent that it has - made any independent effort to identify any such rights. Information - on the IETF's procedures with respect to rights in IETF Documents can - be found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use of - such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository at - http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention any - copyrights, patents or patent applications, or other proprietary - rights that may cover technology that may be required to implement - this standard. Please address the information to the IETF at ietf- - ipr@ietf.org. - - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - -Ludwig & Gurtov Standards Track [Page 13] - diff --git a/kernel/picotcp/RFC/rfc4022.txt b/kernel/picotcp/RFC/rfc4022.txt deleted file mode 100644 index a908dc7..0000000 --- a/kernel/picotcp/RFC/rfc4022.txt +++ /dev/null @@ -1,1347 +0,0 @@ - - - - - - -Network Working Group R. Raghunarayan, Ed. -Request for Comments: 4022 Cisco Systems -Obsoletes: 2452, 2012 March 2005 -Category: Standards Track - - - Management Information Base - for the Transmission Control Protocol (TCP) - -Status of This Memo - - This document specifies an Internet standards track protocol for the - Internet community, and requests discussion and suggestions for - improvements. Please refer to the current edition of the "Internet - Official Protocol Standards" (STD 1) for the standardization state - and status of this protocol. Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2005). - -Abstract - - This memo defines a portion of the Management Information Base (MIB) - for use with network management protocols in the Internet community. - In particular, it describes managed objects used for implementations - of the Transmission Control Protocol (TCP) in an IP version - independent manner. This memo obsoletes RFCs 2452 and 2012. - -Table of Contents - - 1. The Internet-Standard Management Framework . . . . . . . . . 2 - 2. Overview. . . . . . . . . . . . . . . . . . . . . . . . . . . 2 - 2.1. Relationship to Other MIBs. . . . . . . . . . . . . . . 2 - 3. Definitions . . . . . . . . . . . . . . . . . . . . . . . . . 4 - 4. Acknowledgements. . . . . . . . . . . . . . . . . . . . . . . 20 - 5. References. . . . . . . . . . . . . . . . . . . . . . . . . . 20 - 5.1. Normative References. . . . . . . . . . . . . . . . . . 20 - 5.2. Informative References. . . . . . . . . . . . . . . . . 21 - 6. Security Considerations . . . . . . . . . . . . . . . . . . . 21 - 7. Contributors. . . . . . . . . . . . . . . . . . . . . . . . . 23 - Editor's Address. . . . . . . . . . . . . . . . . . . . . . . . . 23 - Full Copyright Statement. . . . . . . . . . . . . . . . . . . . . 24 - - - - - - - - -Raghunarayan Standards Track [Page 1] - -RFC 4022 MIB for TCP March 2005 - - -1. The Internet-Standard Management Framework - - For a detailed overview of the documents that describe the current - Internet-Standard Management Framework, please refer to section 7 of - RFC 3410 [RFC3410]. - - Managed objects are accessed via a virtual information store, termed - the Management Information Base or MIB. MIB objects are generally - accessed through the Simple Network Management Protocol (SNMP). - Objects in the MIB are defined using the mechanisms defined in the - Structure of Management Information (SMI). This memo specifies a MIB - module that is compliant to the SMIv2, which is described in STD 58, - RFC 2578 [RFC2578], STD 58, RFC 2579 [RFC2579] and STD 58, RFC 2580 - [RFC2580]. - -2. Overview - - The current TCP-MIB defined in this memo consists of two tables and a - group of scalars: - - - The tcp group of scalars includes two sets of objects: - - o Parameters of a TCP protocol engine. These include - parameters such as the retransmission algorithm in use - (e.g., vanj [VANJ]) and the retransmission timeout values. - - o Statistics of a TCP protocol engine. These include counters - for the number of active/passive opens, input/output - segments, and errors. Discontinuities in the stats are - identified identified via the sysUpTime object, defined in - [RFC3418]. - - - The tcpConnectionTable provides access to status information - for all TCP connections handled by a TCP protocol engine. In - addition, the table reports identification of the operating - system level processes that handle the TCP connections. - - - The tcpListenerTable provides access to information about all - TCP listening endpoints known by a TCP protocol engine. And as - with the connection table, the tcpListenerTable also reports - the identification of the operating system level processes that - handle this listening TCP endpoint. - -2.1. Relationship to Other MIBs - - This section discusses the relationship of this TCP-MIB module to - other MIB modules. - - - - -Raghunarayan Standards Track [Page 2] - -RFC 4022 MIB for TCP March 2005 - - -2.1.1. Relationship to RFC1213-MIB - - TCP related MIB objects were originally defined as part of the - RFC1213-MIB defined in RFC 1213 [RFC1213]. The TCP related objects - of the RFC1213-MIB were later copied into a separate MIB module and - published in RFC 2012 [RFC2012] in SMIv2 format. - - The previous versions of the TCP-MIB both defined the tcpConnTable, - which has been deprecated basically for two reasons: - - (1) The tcpConnTable only supports IPv4. - - The current approach in the IETF is to write IP version neutral - MIBs, based on the InetAddressType and InetAddress constructs - defined in [RFC4001], rather than to have different definitions - for various version of IP. This reduces the amount of overhead - when new objects are introduced, as there is only one place to - add them. Hence, the approach taken in [RFC2452], of having - separate tables, is not continued. - - (2) The tcpConnTable mixes listening endpoints with connections. - - It turns out that connections tend to have a different behaviour - and management access pattern than listening endpoints. - Therefore, splitting the original tcpConnTable into two tables - allows for the addition of specific status and statistics objects - for listening endpoints and connections. - -2.1.2. Relationship to IPV6-TCP-MIB - - The IPV6-TCP-MIB defined in RFC 2452 has been moved to Historic - status because the approach of having separate IP version specific - tables is not followed anymore. Implementation of RFC 2452 is no - longer suggested. - -2.1.3. Relationship to HOST-RESOURCES-MIB and SYSAPPL-MIB - - The tcpConnectionTable and the tcpListenerTable report the - identification of the operating system level process that handles a - connection or a listening endpoint. The value is reported as an - Unsigned32, which is expected to be the same as the hrSWRunIndex of - the HOST-RESOURCES-MIB [RFC2790] (if the value is smaller than - 2147483647) or the sysApplElmtRunIndex of the SYSAPPL-MIB [RFC2287]. - This allows management applications to identify the TCP connections - that belong to an operating system level process, which has proven to - be valuable in operational environments. - - - - - -Raghunarayan Standards Track [Page 3] - -RFC 4022 MIB for TCP March 2005 - - -3. Definitions - -TCP-MIB DEFINITIONS ::= BEGIN - -IMPORTS - MODULE-IDENTITY, OBJECT-TYPE, Integer32, Unsigned32, - Gauge32, Counter32, Counter64, IpAddress, mib-2 - FROM SNMPv2-SMI - MODULE-COMPLIANCE, OBJECT-GROUP FROM SNMPv2-CONF - InetAddress, InetAddressType, - InetPortNumber FROM INET-ADDRESS-MIB; - -tcpMIB MODULE-IDENTITY - LAST-UPDATED "200502180000Z" -- 18 February 2005 - ORGANIZATION - "IETF IPv6 MIB Revision Team - http://www.ietf.org/html.charters/ipv6-charter.html" - CONTACT-INFO - "Rajiv Raghunarayan (editor) - - Cisco Systems Inc. - 170 West Tasman Drive - San Jose, CA 95134 - - Phone: +1 408 853 9612 - Email: - - Send comments to " - DESCRIPTION - "The MIB module for managing TCP implementations. - - Copyright (C) The Internet Society (2005). This version - of this MIB module is a part of RFC 4022; see the RFC - itself for full legal notices." - REVISION "200502180000Z" -- 18 February 2005 - DESCRIPTION - "IP version neutral revision, published as RFC 4022." - REVISION "9411010000Z" - DESCRIPTION - "Initial SMIv2 version, published as RFC 2012." - REVISION "9103310000Z" - DESCRIPTION - "The initial revision of this MIB module was part of - MIB-II." - ::= { mib-2 49 } - --- the TCP base variables group - - - - -Raghunarayan Standards Track [Page 4] - -RFC 4022 MIB for TCP March 2005 - - -tcp OBJECT IDENTIFIER ::= { mib-2 6 } - --- Scalars - -tcpRtoAlgorithm OBJECT-TYPE - SYNTAX INTEGER { - other(1), -- none of the following - constant(2), -- a constant rto - rsre(3), -- MIL-STD-1778, Appendix B - vanj(4), -- Van Jacobson's algorithm - rfc2988(5) -- RFC 2988 - } - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The algorithm used to determine the timeout value used for - retransmitting unacknowledged octets." - ::= { tcp 1 } - -tcpRtoMin OBJECT-TYPE - SYNTAX Integer32 (0..2147483647) - UNITS "milliseconds" - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The minimum value permitted by a TCP implementation for - the retransmission timeout, measured in milliseconds. - More refined semantics for objects of this type depend - on the algorithm used to determine the retransmission - timeout; in particular, the IETF standard algorithm - rfc2988(5) provides a minimum value." - ::= { tcp 2 } - -tcpRtoMax OBJECT-TYPE - SYNTAX Integer32 (0..2147483647) - UNITS "milliseconds" - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The maximum value permitted by a TCP implementation for - the retransmission timeout, measured in milliseconds. - More refined semantics for objects of this type depend - on the algorithm used to determine the retransmission - timeout; in particular, the IETF standard algorithm - rfc2988(5) provides an upper bound (as part of an - adaptive backoff algorithm)." - ::= { tcp 3 } - - - - -Raghunarayan Standards Track [Page 5] - -RFC 4022 MIB for TCP March 2005 - - -tcpMaxConn OBJECT-TYPE - SYNTAX Integer32 (-1 | 0..2147483647) - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The limit on the total number of TCP connections the entity - can support. In entities where the maximum number of - connections is dynamic, this object should contain the - value -1." - ::= { tcp 4 } - -tcpActiveOpens OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times that TCP connections have made a direct - transition to the SYN-SENT state from the CLOSED state. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 5 } - -tcpPassiveOpens OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times TCP connections have made a direct - transition to the SYN-RCVD state from the LISTEN state. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 6 } - -tcpAttemptFails OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times that TCP connections have made a direct - transition to the CLOSED state from either the SYN-SENT - state or the SYN-RCVD state, plus the number of times that - TCP connections have made a direct transition to the - LISTEN state from the SYN-RCVD state. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - - - -Raghunarayan Standards Track [Page 6] - -RFC 4022 MIB for TCP March 2005 - - - ::= { tcp 7 } - -tcpEstabResets OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of times that TCP connections have made a direct - transition to the CLOSED state from either the ESTABLISHED - state or the CLOSE-WAIT state. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 8 } - -tcpCurrEstab OBJECT-TYPE - SYNTAX Gauge32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of TCP connections for which the current state - is either ESTABLISHED or CLOSE-WAIT." - ::= { tcp 9 } - -tcpInSegs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments received, including those - received in error. This count includes segments received - on currently established connections. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 10 } - -tcpOutSegs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments sent, including those on - current connections but excluding those containing only - retransmitted octets. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - - - -Raghunarayan Standards Track [Page 7] - -RFC 4022 MIB for TCP March 2005 - - - ::= { tcp 11 } - -tcpRetransSegs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments retransmitted; that is, the - number of TCP segments transmitted containing one or more - previously transmitted octets. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 12 } - -tcpInErrs OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments received in error (e.g., bad - TCP checksums). - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 14 } - -tcpOutRsts OBJECT-TYPE - SYNTAX Counter32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of TCP segments sent containing the RST flag. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 15 } - --- { tcp 16 } was used to represent the ipv6TcpConnTable in RFC 2452, --- which has since been obsoleted. It MUST not be used. - -tcpHCInSegs OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments received, including those - received in error. This count includes segments received - - - -Raghunarayan Standards Track [Page 8] - -RFC 4022 MIB for TCP March 2005 - - - on currently established connections. This object is - the 64-bit equivalent of tcpInSegs. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 17 } - -tcpHCOutSegs OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The total number of segments sent, including those on - current connections but excluding those containing only - retransmitted octets. This object is the 64-bit - equivalent of tcpOutSegs. - - Discontinuities in the value of this counter are - indicated via discontinuities in the value of sysUpTime." - ::= { tcp 18 } - - --- The TCP Connection table - -tcpConnectionTable OBJECT-TYPE - SYNTAX SEQUENCE OF TcpConnectionEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table containing information about existing TCP - connections. Note that unlike earlier TCP MIBs, there - is a separate table for connections in the LISTEN state." - ::= { tcp 19 } - -tcpConnectionEntry OBJECT-TYPE - SYNTAX TcpConnectionEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A conceptual row of the tcpConnectionTable containing - information about a particular current TCP connection. - Each row of this table is transient in that it ceases to - exist when (or soon after) the connection makes the - transition to the CLOSED state." - INDEX { tcpConnectionLocalAddressType, - tcpConnectionLocalAddress, - tcpConnectionLocalPort, - tcpConnectionRemAddressType, - - - -Raghunarayan Standards Track [Page 9] - -RFC 4022 MIB for TCP March 2005 - - - tcpConnectionRemAddress, - tcpConnectionRemPort } - ::= { tcpConnectionTable 1 } - -TcpConnectionEntry ::= SEQUENCE { - tcpConnectionLocalAddressType InetAddressType, - tcpConnectionLocalAddress InetAddress, - tcpConnectionLocalPort InetPortNumber, - tcpConnectionRemAddressType InetAddressType, - tcpConnectionRemAddress InetAddress, - tcpConnectionRemPort InetPortNumber, - tcpConnectionState INTEGER, - tcpConnectionProcess Unsigned32 - } - -tcpConnectionLocalAddressType OBJECT-TYPE - SYNTAX InetAddressType - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The address type of tcpConnectionLocalAddress." - ::= { tcpConnectionEntry 1 } - -tcpConnectionLocalAddress OBJECT-TYPE - SYNTAX InetAddress - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The local IP address for this TCP connection. The type - of this address is determined by the value of - tcpConnectionLocalAddressType. - - As this object is used in the index for the - tcpConnectionTable, implementors should be - careful not to create entries that would result in OIDs - with more than 128 subidentifiers; otherwise the information - cannot be accessed by using SNMPv1, SNMPv2c, or SNMPv3." - ::= { tcpConnectionEntry 2 } - -tcpConnectionLocalPort OBJECT-TYPE - SYNTAX InetPortNumber - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The local port number for this TCP connection." - ::= { tcpConnectionEntry 3 } - -tcpConnectionRemAddressType OBJECT-TYPE - - - -Raghunarayan Standards Track [Page 10] - -RFC 4022 MIB for TCP March 2005 - - - SYNTAX InetAddressType - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The address type of tcpConnectionRemAddress." - ::= { tcpConnectionEntry 4 } - -tcpConnectionRemAddress OBJECT-TYPE - SYNTAX InetAddress - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The remote IP address for this TCP connection. The type - of this address is determined by the value of - tcpConnectionRemAddressType. - - As this object is used in the index for the - tcpConnectionTable, implementors should be - careful not to create entries that would result in OIDs - with more than 128 subidentifiers; otherwise the information - cannot be accessed by using SNMPv1, SNMPv2c, or SNMPv3." - ::= { tcpConnectionEntry 5 } - -tcpConnectionRemPort OBJECT-TYPE - SYNTAX InetPortNumber - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The remote port number for this TCP connection." - ::= { tcpConnectionEntry 6 } - -tcpConnectionState OBJECT-TYPE - SYNTAX INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11), - deleteTCB(12) - } - MAX-ACCESS read-write - STATUS current - - - -Raghunarayan Standards Track [Page 11] - -RFC 4022 MIB for TCP March 2005 - - - DESCRIPTION - "The state of this TCP connection. - - The value listen(2) is included only for parallelism to the - old tcpConnTable and should not be used. A connection in - LISTEN state should be present in the tcpListenerTable. - - The only value that may be set by a management station is - deleteTCB(12). Accordingly, it is appropriate for an agent - to return a `badValue' response if a management station - attempts to set this object to any other value. - - If a management station sets this object to the value - deleteTCB(12), then the TCB (as defined in [RFC793]) of - the corresponding connection on the managed node is - deleted, resulting in immediate termination of the - connection. - - As an implementation-specific option, a RST segment may be - sent from the managed node to the other TCP endpoint (note, - however, that RST segments are not sent reliably)." - ::= { tcpConnectionEntry 7 } - -tcpConnectionProcess OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The system's process ID for the process associated with - this connection, or zero if there is no such process. This - value is expected to be the same as HOST-RESOURCES-MIB:: - hrSWRunIndex or SYSAPPL-MIB::sysApplElmtRunIndex for some - row in the appropriate tables." - ::= { tcpConnectionEntry 8 } - --- The TCP Listener table - -tcpListenerTable OBJECT-TYPE - SYNTAX SEQUENCE OF TcpListenerEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table containing information about TCP listeners. A - listening application can be represented in three - possible ways: - - 1. An application that is willing to accept both IPv4 and - IPv6 datagrams is represented by - - - -Raghunarayan Standards Track [Page 12] - -RFC 4022 MIB for TCP March 2005 - - - a tcpListenerLocalAddressType of unknown (0) and - a tcpListenerLocalAddress of ''h (a zero-length - octet-string). - - 2. An application that is willing to accept only IPv4 or - IPv6 datagrams is represented by a - tcpListenerLocalAddressType of the appropriate address - type and a tcpListenerLocalAddress of '0.0.0.0' or '::' - respectively. - - 3. An application that is listening for data destined - only to a specific IP address, but from any remote - system, is represented by a tcpListenerLocalAddressType - of an appropriate address type, with - tcpListenerLocalAddress as the specific local address. - - NOTE: The address type in this table represents the - address type used for the communication, irrespective - of the higher-layer abstraction. For example, an - application using IPv6 'sockets' to communicate via - IPv4 between ::ffff:10.0.0.1 and ::ffff:10.0.0.2 would - use InetAddressType ipv4(1))." - ::= { tcp 20 } - -tcpListenerEntry OBJECT-TYPE - SYNTAX TcpListenerEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A conceptual row of the tcpListenerTable containing - information about a particular TCP listener." - INDEX { tcpListenerLocalAddressType, - tcpListenerLocalAddress, - tcpListenerLocalPort } - ::= { tcpListenerTable 1 } - -TcpListenerEntry ::= SEQUENCE { - tcpListenerLocalAddressType InetAddressType, - tcpListenerLocalAddress InetAddress, - tcpListenerLocalPort InetPortNumber, - tcpListenerProcess Unsigned32 - } - -tcpListenerLocalAddressType OBJECT-TYPE - SYNTAX InetAddressType - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - - - -Raghunarayan Standards Track [Page 13] - -RFC 4022 MIB for TCP March 2005 - - - "The address type of tcpListenerLocalAddress. The value - should be unknown (0) if connection initiations to all - local IP addresses are accepted." - ::= { tcpListenerEntry 1 } - -tcpListenerLocalAddress OBJECT-TYPE - SYNTAX InetAddress - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The local IP address for this TCP connection. - - The value of this object can be represented in three - possible ways, depending on the characteristics of the - listening application: - - 1. For an application willing to accept both IPv4 and - IPv6 datagrams, the value of this object must be - ''h (a zero-length octet-string), with the value - of the corresponding tcpListenerLocalAddressType - object being unknown (0). - - 2. For an application willing to accept only IPv4 or - IPv6 datagrams, the value of this object must be - '0.0.0.0' or '::' respectively, with - tcpListenerLocalAddressType representing the - appropriate address type. - - 3. For an application which is listening for data - destined only to a specific IP address, the value - of this object is the specific local address, with - tcpListenerLocalAddressType representing the - appropriate address type. - - As this object is used in the index for the - tcpListenerTable, implementors should be - careful not to create entries that would result in OIDs - with more than 128 subidentifiers; otherwise the information - cannot be accessed, using SNMPv1, SNMPv2c, or SNMPv3." - ::= { tcpListenerEntry 2 } - -tcpListenerLocalPort OBJECT-TYPE - SYNTAX InetPortNumber - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "The local port number for this TCP connection." - ::= { tcpListenerEntry 3 } - - - -Raghunarayan Standards Track [Page 14] - -RFC 4022 MIB for TCP March 2005 - - -tcpListenerProcess OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The system's process ID for the process associated with - this listener, or zero if there is no such process. This - value is expected to be the same as HOST-RESOURCES-MIB:: - hrSWRunIndex or SYSAPPL-MIB::sysApplElmtRunIndex for some - row in the appropriate tables." - ::= { tcpListenerEntry 4 } - - --- The deprecated TCP Connection table - -tcpConnTable OBJECT-TYPE - SYNTAX SEQUENCE OF TcpConnEntry - MAX-ACCESS not-accessible - STATUS deprecated - DESCRIPTION - "A table containing information about existing IPv4-specific - TCP connections or listeners. This table has been - deprecated in favor of the version neutral - tcpConnectionTable." - ::= { tcp 13 } - -tcpConnEntry OBJECT-TYPE - SYNTAX TcpConnEntry - MAX-ACCESS not-accessible - STATUS deprecated - DESCRIPTION - "A conceptual row of the tcpConnTable containing information - about a particular current IPv4 TCP connection. Each row - of this table is transient in that it ceases to exist when - (or soon after) the connection makes the transition to the - CLOSED state." - INDEX { tcpConnLocalAddress, - tcpConnLocalPort, - tcpConnRemAddress, - tcpConnRemPort } - ::= { tcpConnTable 1 } - -TcpConnEntry ::= SEQUENCE { - tcpConnState INTEGER, - tcpConnLocalAddress IpAddress, - tcpConnLocalPort Integer32, - tcpConnRemAddress IpAddress, - tcpConnRemPort Integer32 - - - -Raghunarayan Standards Track [Page 15] - -RFC 4022 MIB for TCP March 2005 - - - } - -tcpConnState OBJECT-TYPE - SYNTAX INTEGER { - closed(1), - listen(2), - synSent(3), - synReceived(4), - established(5), - finWait1(6), - finWait2(7), - closeWait(8), - lastAck(9), - closing(10), - timeWait(11), - deleteTCB(12) - } - MAX-ACCESS read-write - STATUS deprecated - DESCRIPTION - "The state of this TCP connection. - - The only value that may be set by a management station is - deleteTCB(12). Accordingly, it is appropriate for an agent - to return a `badValue' response if a management station - attempts to set this object to any other value. - - If a management station sets this object to the value - deleteTCB(12), then the TCB (as defined in [RFC793]) of - the corresponding connection on the managed node is - deleted, resulting in immediate termination of the - connection. - - As an implementation-specific option, a RST segment may be - sent from the managed node to the other TCP endpoint (note, - however, that RST segments are not sent reliably)." - ::= { tcpConnEntry 1 } - -tcpConnLocalAddress OBJECT-TYPE - SYNTAX IpAddress - MAX-ACCESS read-only - STATUS deprecated - DESCRIPTION - "The local IP address for this TCP connection. In the case - of a connection in the listen state willing to - accept connections for any IP interface associated with the - node, the value 0.0.0.0 is used." - ::= { tcpConnEntry 2 } - - - -Raghunarayan Standards Track [Page 16] - -RFC 4022 MIB for TCP March 2005 - - -tcpConnLocalPort OBJECT-TYPE - SYNTAX Integer32 (0..65535) - MAX-ACCESS read-only - STATUS deprecated - DESCRIPTION - "The local port number for this TCP connection." - ::= { tcpConnEntry 3 } - -tcpConnRemAddress OBJECT-TYPE - SYNTAX IpAddress - MAX-ACCESS read-only - STATUS deprecated - DESCRIPTION - "The remote IP address for this TCP connection." - ::= { tcpConnEntry 4 } - -tcpConnRemPort OBJECT-TYPE - SYNTAX Integer32 (0..65535) - MAX-ACCESS read-only - STATUS deprecated - DESCRIPTION - "The remote port number for this TCP connection." - ::= { tcpConnEntry 5 } - --- conformance information - -tcpMIBConformance OBJECT IDENTIFIER ::= { tcpMIB 2 } - -tcpMIBCompliances OBJECT IDENTIFIER ::= { tcpMIBConformance 1 } -tcpMIBGroups OBJECT IDENTIFIER ::= { tcpMIBConformance 2 } - --- compliance statements - -tcpMIBCompliance2 MODULE-COMPLIANCE - STATUS current - DESCRIPTION - "The compliance statement for systems that implement TCP. - - A number of INDEX objects cannot be - represented in the form of OBJECT clauses in SMIv2 but - have the following compliance requirements, - expressed in OBJECT clause form in this description - clause: - - -- OBJECT tcpConnectionLocalAddressType - -- SYNTAX InetAddressType { ipv4(1), ipv6(2) } - -- DESCRIPTION - -- This MIB requires support for only global IPv4 - - - -Raghunarayan Standards Track [Page 17] - -RFC 4022 MIB for TCP March 2005 - - - -- and IPv6 address types. - -- - -- OBJECT tcpConnectionRemAddressType - -- SYNTAX InetAddressType { ipv4(1), ipv6(2) } - -- DESCRIPTION - -- This MIB requires support for only global IPv4 - -- and IPv6 address types. - -- - -- OBJECT tcpListenerLocalAddressType - -- SYNTAX InetAddressType { unknown(0), ipv4(1), - -- ipv6(2) } - -- DESCRIPTION - -- This MIB requires support for only global IPv4 - -- and IPv6 address types. The type unknown also - -- needs to be supported to identify a special - -- case in the listener table: a listen using - -- both IPv4 and IPv6 addresses on the device. - -- - " - MODULE -- this module - MANDATORY-GROUPS { tcpBaseGroup, tcpConnectionGroup, - tcpListenerGroup } - GROUP tcpHCGroup - DESCRIPTION - "This group is mandatory for systems that are capable - of receiving or transmitting more than 1 million TCP - segments per second. 1 million segments per second will - cause a Counter32 to wrap in just over an hour." - OBJECT tcpConnectionState - SYNTAX INTEGER { closed(1), listen(2), synSent(3), - synReceived(4), established(5), - finWait1(6), finWait2(7), closeWait(8), - lastAck(9), closing(10), timeWait(11) } - MIN-ACCESS read-only - DESCRIPTION - "Write access is not required, nor is support for the value - deleteTCB (12)." - ::= { tcpMIBCompliances 2 } - -tcpMIBCompliance MODULE-COMPLIANCE - STATUS deprecated - DESCRIPTION - "The compliance statement for IPv4-only systems that - implement TCP. In order to be IP version independent, this - compliance statement is deprecated in favor of - tcpMIBCompliance2. However, agents are still encouraged - to implement these objects in order to interoperate with - the deployed base of managers." - - - -Raghunarayan Standards Track [Page 18] - -RFC 4022 MIB for TCP March 2005 - - - MODULE -- this module - MANDATORY-GROUPS { tcpGroup } - OBJECT tcpConnState - MIN-ACCESS read-only - DESCRIPTION - "Write access is not required." - ::= { tcpMIBCompliances 1 } - - --- units of conformance - -tcpGroup OBJECT-GROUP - OBJECTS { tcpRtoAlgorithm, tcpRtoMin, tcpRtoMax, - tcpMaxConn, tcpActiveOpens, - tcpPassiveOpens, tcpAttemptFails, - tcpEstabResets, tcpCurrEstab, tcpInSegs, - tcpOutSegs, tcpRetransSegs, tcpConnState, - tcpConnLocalAddress, tcpConnLocalPort, - tcpConnRemAddress, tcpConnRemPort, - tcpInErrs, tcpOutRsts } - STATUS deprecated - DESCRIPTION - "The tcp group of objects providing for management of TCP - entities." - ::= { tcpMIBGroups 1 } - -tcpBaseGroup OBJECT-GROUP - OBJECTS { tcpRtoAlgorithm, tcpRtoMin, tcpRtoMax, - tcpMaxConn, tcpActiveOpens, - tcpPassiveOpens, tcpAttemptFails, - tcpEstabResets, tcpCurrEstab, tcpInSegs, - tcpOutSegs, tcpRetransSegs, - tcpInErrs, tcpOutRsts } - STATUS current - DESCRIPTION - "The group of counters common to TCP entities." - ::= { tcpMIBGroups 2 } - -tcpConnectionGroup OBJECT-GROUP - OBJECTS { tcpConnectionState, tcpConnectionProcess } - STATUS current - DESCRIPTION - "The group provides general information about TCP - connections." - ::= { tcpMIBGroups 3 } - -tcpListenerGroup OBJECT-GROUP - OBJECTS { tcpListenerProcess } - - - -Raghunarayan Standards Track [Page 19] - -RFC 4022 MIB for TCP March 2005 - - - STATUS current - DESCRIPTION - "This group has objects providing general information about - TCP listeners." - ::= { tcpMIBGroups 4 } - -tcpHCGroup OBJECT-GROUP - OBJECTS { tcpHCInSegs, tcpHCOutSegs } - STATUS current - DESCRIPTION - "The group of objects providing for counters of high speed - TCP implementations." - ::= { tcpMIBGroups 5 } - -END - -4. Acknowledgements - - This document contains a modified subset of RFC 1213 and updates RFC - 2012 and RFC 2452. Acknowledgements are therefore due to the authors - and editors of these documents for their excellent work. Several - useful comments regarding usability and design were also received - from Kristine Adamson. The authors would like to thank all these - people for their contribution to this effort. - -5. References - -5.1. Normative References - - [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, DARPA, September 1981. - - [RFC2287] Krupczak, C. and J. Saperia, "Definitions of System-Level - Managed Objects for Applications", RFC 2287, February 1998. - - [RFC2578] McCloghrie, K., Perkins, D., and J. Schoenwaelder, - "Structure of Management Information Version 2 (SMIv2)", - STD 58, RFC 2578, April 1999. - - [RFC2579] McCloghrie, K., Perkins, D., and J. Schoenwaelder, "Textual - Conventions for SMIv2", STD 58, RFC 2579, April 1999. - - [RFC2580] McCloghrie, K., Perkins, D., and J. Schoenwaelder, - "Conformance Statements for SMIv2", STD 58, RFC 2580, April - 1999. - - [RFC2790] Waldbusser, S. and P. Grillo, "Host Resources MIB", RFC - 2790, March 2000. - - - -Raghunarayan Standards Track [Page 20] - -RFC 4022 MIB for TCP March 2005 - - - [RFC4001] Daniele, M., Haberman, B., Routhier, S., and J. - Schoenwaelder, "Textual Conventions for Internet Network - Addresses", RFC 4001, February 2005. - -5.2. Informative References - - [RFC1213] McCloghrie, K. and M. Rose, "Management Information Base - for Network Management of TCP/IP-based internets", RFC - 1213, March 1991. - - [RFC2012] McCloghrie, K., Ed., "SNMPv2 Management Information Base - for the Transmission Control Protocol using SMIv2", RFC - 2012, November 1996. - - [RFC2452] Daniele, M., "IP Version 6 Management Information Base for - the Transmission Control Protocol", RFC 2452, December - 1998. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [RFC3410] Case, J., Mundy, R., Partain, D., and B. Stewart, - "Introduction and Applicability Statements for Internet- - Standard Management Framework", RFC 3410, December 2002. - - [RFC3418] Presuhn, R., Ed., "Management Information Base (MIB) for - the Simple Network Management Protocol (SNMP)", RFC 3418, - December 2002. - - [VANJ] Jacobson, V., "Congestion Avoidance and Control", SIGCOMM - 1988, Stanford, California. - -6. Security Considerations - - There are a number of management objects defined in this MIB module - with a MAX-ACCESS clause of read-write. Such objects may be - considered sensitive or vulnerable in some network environments. The - support for SET operations in a non-secure environment without proper - protection can have a negative effect on network operations. These - are the tables and objects and their sensitivity/vulnerability: - - o The tcpConnectionState and tcpConnState objects have a MAX-ACCESS - clause of read-write, which allows termination of an arbitrary - connection. Unauthorized access could cause a denial of service. - - Some of the readable objects in this MIB module (i.e., objects with a - MAX-ACCESS other than not-accessible) may be considered sensitive or - vulnerable in some network environments. It is thus important to - - - -Raghunarayan Standards Track [Page 21] - -RFC 4022 MIB for TCP March 2005 - - - control even GET and/or NOTIFY access to these objects and possibly - to even encrypt the values of these objects when sending them over - the network via SNMP. These are the tables and objects and their - sensitivity/vulnerability: - - o The tcpConnectionTable and the tcpConnTable contain objects - providing information about the active connections on the device, - the status of these connections, and the associated processes. - This information may be used by an attacker to launch attacks - against known/unknown weakness in certain protocols/applications. - In addition, access to the connection table could also have - privacy implications, as it provides detailed information on - active connections. - - o The tcpListenerTable and the tcpConnTable contain objects - providing information about listeners on an entity. For example, - the tcpListenerLocalPort and tcpConnLocalPort objects can be used - to identify what ports are open on the machine and what attacks - are likely to succeed, without the attacker having to run a port - scanner. - - SNMP versions prior to SNMPv3 did not include adequate security. - Even if the network itself is secure (for example by using IPSec), - even then, there is no control as to who on the secure network is - allowed to access and GET/SET (read/change/create/delete) the objects - in this MIB module. - - It is RECOMMENDED that implementers consider the security features as - provided by the SNMPv3 framework (see [RFC3410], section 8), - including full support for the SNMPv3 cryptographic mechanisms (for - authentication and privacy). - - Further, deployment of SNMP versions prior to SNMPv3 is NOT - RECOMMENDED. Instead, it is RECOMMENDED to deploy SNMPv3 and to - enable cryptographic security. It is then a customer/operator - responsibility to ensure that the SNMP entity giving access to an - instance of this MIB module is properly configured to give access to - the objects only to those principals (users) that have legitimate - rights to indeed GET or SET (change/create/delete) them. - - - - - - - - - - - - -Raghunarayan Standards Track [Page 22] - -RFC 4022 MIB for TCP March 2005 - - -7. Contributors - - This document is an output of the IPv6 MIB revision team, and - contributors to earlier versions of this document include: - - Bill Fenner, AT&T Labs -- Research - EMail: fenner@research.att.com - - Brian Haberman - EMail: brian@innovationslab.net - - Shawn A. Routhier, Wind River - EMail: shawn.routhier@windriver.com - - Juergen Schoenwalder, TU Braunschweig - EMail: schoenw@ibr.cs.tu-bs.de - - Dave Thaler, Microsoft - EMail: dthaler@windows.microsoft.com - - This document updates parts of the MIBs from several documents. RFC - 2012 has been the base document for these updates, and RFC 2452 was - the first document to define the managed objects for implementations - of TCP over IPv6. - - RFC 2012: - - Keith McCloghrie, Cisco Systems (Editor) - EMail: kzm@cisco.com - - RFC 2452: - - Mike Daniele, Compaq Computer Corporation - EMail: daniele@zk3.dec.com - -Editor's Address - - Rajiv Raghunarayan - Cisco Systems Inc. - 170 West Tasman Drive - San Jose, CA 95134 - USA - - EMail: raraghun@cisco.com - - - - - - - -Raghunarayan Standards Track [Page 23] - -RFC 4022 MIB for TCP March 2005 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2005). - - This document is subject to the rights, licenses and restrictions - contained in BCP 78, and except as set forth therein, the authors - retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS - OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, - INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE - INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; nor does it represent that it has - made any independent effort to identify any such rights. Information - on the procedures with respect to rights in RFC documents can be - found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use of - such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository at - http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention any - copyrights, patents or patent applications, or other proprietary - rights that may cover technology that may be required to implement - this standard. Please address the information to the IETF at ietf- - ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - -Raghunarayan Standards Track [Page 24] - diff --git a/kernel/picotcp/RFC/rfc4138.txt b/kernel/picotcp/RFC/rfc4138.txt deleted file mode 100644 index 4e18add..0000000 --- a/kernel/picotcp/RFC/rfc4138.txt +++ /dev/null @@ -1,1291 +0,0 @@ - - - - - - -Network Working Group P. Sarolahti -Request for Comments: 4138 Nokia Research Center -Category: Experimental M. Kojo - University of Helsinki - August 2005 - - - Forward RTO-Recovery (F-RTO): An Algorithm for Detecting - Spurious Retransmission Timeouts with TCP and the - Stream Control Transmission Protocol (SCTP) - -Status of This Memo - - This memo defines an Experimental Protocol for the Internet - community. It does not specify an Internet standard of any kind. - Discussion and suggestions for improvement are requested. - Distribution of this memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2005). - -Abstract - - Spurious retransmission timeouts cause suboptimal TCP performance - because they often result in unnecessary retransmission of the last - window of data. This document describes the F-RTO detection - algorithm for detecting spurious TCP retransmission timeouts. F-RTO - is a TCP sender-only algorithm that does not require any TCP options - to operate. After retransmitting the first unacknowledged segment - triggered by a timeout, the F-RTO algorithm of the TCP sender - monitors the incoming acknowledgments to determine whether the - timeout was spurious. It then decides whether to send new segments - or retransmit unacknowledged segments. The algorithm effectively - helps to avoid additional unnecessary retransmissions and thereby - improves TCP performance in the case of a spurious timeout. The - F-RTO algorithm can also be applied to the Stream Control - Transmission Protocol (SCTP). - - - - - - - - - - - - - -Sarolahti & Kojo Experimental [Page 1] - -RFC 4138 Forward RTO-Recovery August 2005 - - -Table of Contents - - 1. Introduction . . . . . . . . . . . . . . . . . . . . . . 2 - 1.1. Terminology . . . . . . . . . . . . . . . . . . . . 4 - 2. F-RTO Algorithm . . . . . . . . . . . . . . . . . . . . . 4 - 2.1. The Algorithm . . . . . . . . . . . . . . . . . . . 5 - 2.2. Discussion . . . . . . . . . . . . . . . . . . . . 6 - 3. SACK-Enhanced Version of the F-RTO Algorithm . . . . . . 8 - 4. Taking Actions after Detecting Spurious RTO . . . . . . . 10 - 5. SCTP Considerations . . . . . . . . . . . . . . . . . . . 10 - 6. Security Considerations . . . . . . . . . . . . . . . . . 11 - 7. Acknowledgements . . . . . . . . . . . . . . . . . . . . 12 - 8. References . . . . . . . . . . . . . . . . . . . . . . . 12 - 8.1. Normative References. . . . . . . . . . . . . . . . 12 - 8.2. Informative References. . . . . . . . . . . . . . . 13 - Appendix A: Scenarios . . . . . . . . . . . . . . . . . . . . 15 - Appendix B: SACK-Enhanced F-RTO and Fast Recovery . . . . . . 20 - Appendix C: Discussion of Window-Limited Cases . . . . . . . 21 - -1. Introduction - - The Transmission Control Protocol (TCP) [Pos81] has two methods for - triggering retransmissions. First, the TCP sender relies on incoming - duplicate ACKs, which indicate that the receiver is missing some of - the data. After a required number of successive duplicate ACKs have - arrived at the sender, it retransmits the first unacknowledged - segment [APS99] and continues with a loss recovery algorithm such as - NewReno [FHG04] or SACK-based loss recovery [BAFW03]. Second, the - TCP sender maintains a retransmission timer which triggers - retransmission of segments, if they have not been acknowledged before - the retransmission timeout (RTO) expires. When the retransmission - timeout occurs, the TCP sender enters the RTO recovery where the - congestion window is initialized to one segment and unacknowledged - segments are retransmitted using the slow-start algorithm. The - retransmission timer is adjusted dynamically, based on the measured - round-trip times [PA00]. - - It has been pointed out that the retransmission timer can expire - spuriously and cause unnecessary retransmissions when no segments - have been lost [LK00, GL02, LM03]. After a spurious retransmission - timeout, the late acknowledgments of the original segments arrive at - the sender, usually triggering unnecessary retransmissions of a whole - window of segments during the RTO recovery. Furthermore, after a - spurious retransmission timeout, a conventional TCP sender increases - the congestion window on each late acknowledgment in slow start. - This injects a large number of data segments into the network within - one round-trip time, thus violating the packet conservation principle - [Jac88]. - - - -Sarolahti & Kojo Experimental [Page 2] - -RFC 4138 Forward RTO-Recovery August 2005 - - - There are a number of potential reasons for spurious retransmission - timeouts. First, some mobile networking technologies involve sudden - delay spikes on transmission because of actions taken during a - hand-off. Second, given a low-bandwidth link or some other change in - available bandwidth, arrival of competing traffic (possibly with - higher priority) can cause a sudden increase of round-trip time. - This may trigger a spurious retransmission timeout. A persistently - reliable link layer can also cause a sudden delay when a data frame - and several retransmissions of it are lost for some reason. This - document does not distinguish between the different causes of such a - delay spike. Rather, it discusses the spurious retransmission - timeouts caused by a delay spike in general. - - This document describes the F-RTO detection algorithm. It is based - on the detection mechanism of the "Forward RTO-Recovery" (F-RTO) - algorithm [SKR03] that is used for detecting spurious retransmission - timeouts and thus avoids unnecessary retransmissions following the - retransmission timeout. When the timeout is not spurious, the F-RTO - algorithm reverts back to the conventional RTO recovery algorithm, - and therefore has similar behavior and performance. In contrast to - alternative algorithms proposed for detecting unnecessary - retransmissions (Eifel [LK00], [LM03] and DSACK-based algorithms - [BA04]), F-RTO does not require any TCP options for its operation, - and it can be implemented by modifying only the TCP sender. The - Eifel algorithm uses TCP timestamps [BBJ92] for detecting a spurious - timeout upon arrival of the first acknowledgment after the - retransmission. The DSACK-based algorithms require that the TCP - Selective Acknowledgment Option [MMFR96], with the DSACK extension - [FMMP00], is in use. With DSACK, the TCP receiver can report if it - has received a duplicate segment, enabling the sender to detect - afterwards whether it has retransmitted segments unnecessarily. The - F-RTO algorithm only attempts to detect and avoid unnecessary - retransmissions after an RTO. Eifel and DSACK can also be used for - detecting unnecessary retransmissions caused by other events, such as - packet reordering. - - When an RTO expires, the F-RTO sender retransmits the first - unacknowledged segment as usual [APS99]. Deviating from the normal - operation after a timeout, it then tries to transmit new, previously - unsent data, for the first acknowledgment that arrives after the - timeout, given that the acknowledgment advances the window. If the - second acknowledgment that arrives after the timeout advances the - window (i.e., acknowledges data that was not retransmitted), the F- - RTO sender declares the timeout spurious and exits the RTO recovery. - However, if either of these two acknowledgments is a duplicate ACK, - there will not be sufficient evidence of a spurious timeout. - Therefore, the F-RTO sender retransmits the unacknowledged segments - in slow start similarly to the traditional algorithm. With a - - - -Sarolahti & Kojo Experimental [Page 3] - -RFC 4138 Forward RTO-Recovery August 2005 - - - SACK-enhanced version of the F-RTO algorithm, spurious timeouts may - be detected even if duplicate ACKs arrive after an RTO - retransmission. - - The F-RTO algorithm can also be applied to the Stream Control - Transmission Protocol (SCTP) [Ste00], because SCTP has acknowledgment - and packet retransmission concepts similar to TCP. For convenience, - this document mostly refers to TCP, but the algorithms and other - discussion are valid for SCTP as well. - - This document is organized as follows. Section 2 describes the basic - F-RTO algorithm. Section 3 outlines an optional enhancement to the - F-RTO algorithm that takes advantage of the TCP SACK option. Section - 4 discusses the possible actions to be taken after detecting a - spurious RTO. Section 5 gives considerations on applying F-RTO with - SCTP, and Section 6 discusses the security considerations. - -1.1. Terminology - - The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, - SHOULD NOT, RECOMMENDED, MAY, and OPTIONAL, when they appear in this - document, are to be interpreted as described in [RFC2119]. - -2. F-RTO Algorithm - - A timeout is considered spurious if it would have been avoided had - the sender waited longer for an acknowledgment to arrive [LM03]. - F-RTO affects the TCP sender behavior only after a retransmission - timeout. Otherwise, the TCP behavior remains the same. When the RTO - expires, the F-RTO algorithm monitors incoming acknowledgments and if - the TCP sender gets an acknowledgment for a segment that was not - retransmitted due to timeout, the F-RTO algorithm declares a timeout - spurious. The actions taken in response to a spurious timeout are - not specified in this document, but we discuss some alternatives in - Section 4. This section introduces the algorithm and then discusses - the different steps of the algorithm in more detail. - - Following the practice used with the Eifel Detection algorithm - [LM03], we use the "SpuriousRecovery" variable to indicate whether - the retransmission is declared spurious by the sender. This variable - can be used as an input for a corresponding response algorithm. With - F-RTO, the value of SpuriousRecovery can be either SPUR_TO - (indicating a spurious retransmission timeout) or FALSE (indicating - that the timeout is not declared spurious), and the TCP sender should - follow the conventional RTO recovery algorithm. - - - - - - -Sarolahti & Kojo Experimental [Page 4] - -RFC 4138 Forward RTO-Recovery August 2005 - - -2.1. The Algorithm - - A TCP sender MAY implement the basic F-RTO algorithm. If it chooses - to apply the algorithm, the following steps MUST be taken after the - retransmission timer expires. If the sender implements some loss - recovery algorithm other than Reno or NewReno [FHG04], the F-RTO - algorithm SHOULD NOT be entered when earlier fast recovery is - underway. - - 1) When RTO expires, retransmit the first unacknowledged segment and - set SpuriousRecovery to FALSE. Also, store the highest sequence - number transmitted so far in variable "recover". - - 2) When the first acknowledgment after the RTO retransmission arrives - at the sender, the sender chooses one of the following actions, - depending on whether the ACK advances the window or whether it is - a duplicate ACK. - - a) If the acknowledgment is a duplicate ACK OR it acknowledges a - sequence number equal to the value of "recover" OR it does not - acknowledge all of the data that was retransmitted in step 1, - revert to the conventional RTO recovery and continue by - retransmitting unacknowledged data in slow start. Do not enter - step 3 of this algorithm. The SpuriousRecovery variable - remains as FALSE. - - b) Else, if the acknowledgment advances the window AND it is below - the value of "recover", transmit up to two new (previously - unsent) segments and enter step 3 of this algorithm. If the - TCP sender does not have enough unsent data, it can send only - one segment. In addition, the TCP sender MAY override the - Nagle algorithm [Nag84] and immediately send a segment if - needed. Note that sending two segments in this step is allowed - by TCP congestion control requirements [APS99]: An F-RTO TCP - sender simply chooses different segments to transmit. - - If the TCP sender does not have any new data to send, or the - advertised window prohibits new transmissions, the recommended - action is to skip step 3 of this algorithm and continue with - slow start retransmissions, following the conventional RTO - recovery algorithm. However, alternative ways of handling the - window-limited cases that could result in better performance - are discussed in Appendix C. - - 3) When the second acknowledgment after the RTO retransmission - arrives at the sender, the TCP sender either declares the timeout - spurious, or starts retransmitting the unacknowledged segments. - - - - -Sarolahti & Kojo Experimental [Page 5] - -RFC 4138 Forward RTO-Recovery August 2005 - - - a) If the acknowledgment is a duplicate ACK, set the congestion - window to no more than 3 * MSS, and continue with the slow - start algorithm retransmitting unacknowledged segments. The - congestion window can be set to 3 * MSS, because two round-trip - times have elapsed since the RTO, and a conventional TCP sender - would have increased cwnd to 3 during the same time. Leave - SpuriousRecovery set to FALSE. - - b) If the acknowledgment advances the window (i.e., if it - acknowledges data that was not retransmitted after the - timeout), declare the timeout spurious, set SpuriousRecovery to - SPUR_TO, and set the value of the "recover" variable to SND.UNA - (the oldest unacknowledged sequence number [Pos81]). - -2.2. Discussion - - The F-RTO sender takes cautious actions when it receives duplicate - acknowledgments after a retransmission timeout. Because duplicate - ACKs may indicate that segments have been lost, reliably detecting a - spurious timeout is difficult due to the lack of additional - information. Therefore, it is prudent to follow the conventional TCP - recovery in those cases. - - If the first acknowledgment after the RTO retransmission covers the - "recover" point at algorithm step (2a), there is not enough evidence - that a non-retransmitted segment has arrived at the receiver after - the timeout. This is a common case when a fast retransmission is - lost and has been retransmitted again after an RTO, while the rest of - the unacknowledged segments were successfully delivered to the TCP - receiver before the retransmission timeout. Therefore, the timeout - cannot be declared spurious in this case. - - If the first acknowledgment after the RTO retransmission does not - acknowledge all of the data that was retransmitted in step 1, the TCP - sender reverts to the conventional RTO recovery. Otherwise, a - malicious receiver acknowledging partial segments could cause the - sender to declare the timeout spurious in a case where data was lost. - - The TCP sender is allowed to send two new segments in algorithm - branch (2b) because the conventional TCP sender would transmit two - segments when the first new ACK arrives after the RTO retransmission. - If sending new data is not possible in algorithm branch (2b), or if - the receiver window limits the transmission, the TCP sender has to - send something in order to prevent the TCP transfer from stalling. - If no segments were sent, the pipe between sender and receiver might - run out of segments, and no further acknowledgments would arrive. - Therefore, in the window-limited case, the recommendation is to - - - - -Sarolahti & Kojo Experimental [Page 6] - -RFC 4138 Forward RTO-Recovery August 2005 - - - revert to the conventional RTO recovery with slow start - retransmissions. Appendix C discusses some alternative solutions for - window-limited situations. - - If the retransmission timeout is declared spurious, the TCP sender - sets the value of the "recover" variable to SND.UNA in order to allow - fast retransmit [FHG04]. The "recover" variable was proposed for - avoiding unnecessary, multiple fast retransmits when RTO expires - during fast recovery with NewReno TCP. Because the sender - retransmits only the segment that triggered the timeout, the problem - of unnecessary multiple fast retransmits [FHG04] cannot occur. - Therefore, if three duplicate ACKs arrive at the sender after the - timeout, they probably indicate a packet loss, and thus fast - retransmit should be used to allow efficient recovery. If there are - not enough duplicate ACKs arriving at the sender after a packet loss, - the retransmission timer expires again and the sender enters step 1 - of this algorithm. - - When the timeout is declared spurious, the TCP sender cannot detect - whether the unnecessary RTO retransmission was lost. In principle, - the loss of the RTO retransmission should be taken as a congestion - signal. Thus, there is a small possibility that the F-RTO sender - will violate the congestion control rules, if it chooses to fully - revert congestion control parameters after detecting a spurious - timeout. The Eifel detection algorithm has a similar property, while - the DSACK option can be used to detect whether the retransmitted - segment was successfully delivered to the receiver. - - The F-RTO algorithm has a side-effect on the TCP round-trip time - measurement. Because the TCP sender can avoid most of the - unnecessary retransmissions after detecting a spurious timeout, the - sender is able to take round-trip time samples on the delayed - segments. If the regular RTO recovery was used without TCP - timestamps, this would not be possible due to the retransmission - ambiguity. As a result, the RTO is likely to have more accurate and - larger values with F-RTO than with the regular TCP after a spurious - timeout that was triggered due to delayed segments. We believe this - is an advantage in the networks that are prone to delay spikes. - - There are some situations where the F-RTO algorithm may not avoid - unnecessary retransmissions after a spurious timeout. If packet - reordering or packet duplication occurs on the segment that triggered - the spurious timeout, the F-RTO algorithm may not detect the spurious - timeout due to incoming duplicate ACKs. Additionally, if a spurious - timeout occurs during fast recovery, the F-RTO algorithm often cannot - detect the spurious timeout because the segments that were - transmitted before the fast recovery trigger duplicate ACKs. - However, we consider these cases rare, and note that in cases where - - - -Sarolahti & Kojo Experimental [Page 7] - -RFC 4138 Forward RTO-Recovery August 2005 - - - F-RTO fails to detect the spurious timeout, it retransmits the - unacknowledged segments in slow start, and thus performs similarly to - the regular RTO recovery. - -3. SACK-Enhanced Version of the F-RTO Algorithm - - This section describes an alternative version of the F-RTO algorithm - that uses the TCP Selective Acknowledgment Option [MMFR96]. By using - the SACK option, the TCP sender detects spurious timeouts in most of - the cases when packet reordering or packet duplication is present. - If the SACK blocks acknowledge new data that was not transmitted - after the RTO retransmission, the sender may declare the timeout - spurious, even when duplicate ACKs follow the RTO. - - Given that the TCP Selective Acknowledgment Option [MMFR96] is - enabled for a TCP connection, a TCP sender MAY implement the - SACK-enhanced F-RTO algorithm. If the sender applies the - SACK-enhanced F-RTO algorithm, it MUST follow the steps below. This - algorithm SHOULD NOT be applied if the TCP sender is already in SACK - loss recovery when retransmission timeout occurs. However, when - retransmission timeout occurs during existing loss recovery, it - should be possible to apply the principle of F-RTO within certain - limitations. This is a topic for further research. Appendix B - briefly discusses the related issues. - - The steps of the SACK-enhanced version of the F-RTO algorithm are as - follows. - - 1) When the RTO expires, retransmit the first unacknowledged segment - and set SpuriousRecovery to FALSE. Set variable "recover" to - indicate the highest segment transmitted so far. Following the - recommendation in SACK specification [MMFR96], reset the SACK - scoreboard. - - 2) Wait until the acknowledgment of the data retransmitted due to the - timeout arrives at the sender. If duplicate ACKs arrive before - the cumulative acknowledgment for retransmitted data, adjust the - scoreboard according to the incoming SACK information. Stay in - step 2 and wait for the next new acknowledgment. If RTO expires - again, go to step 1 of the algorithm. - - a) if a cumulative ACK acknowledges a sequence number equal to - "recover", revert to the conventional RTO recovery and set the - congestion window to no more than 2 * MSS, like a regular TCP - would do. Do not enter step 3 of this algorithm. - - - - - - -Sarolahti & Kojo Experimental [Page 8] - -RFC 4138 Forward RTO-Recovery August 2005 - - - b) else, if a cumulative ACK acknowledges a sequence number - (smaller than "recover", but larger than SND.UNA) transmit up - to two new (previously unsent) segments and proceed to step 3. - If the TCP sender is not able to transmit any previously unsent - data -- either due to receiver window limitation, or because it - does not have any new data to send -- the recommended action is - to refrain from entering step 3 of this algorithm. Rather, - continue with slow start retransmissions following the - conventional RTO recovery algorithm. - - It is also possible to apply some of the alternatives for - handling window-limited cases discussed in Appendix C. In this - case, the TCP sender should follow the recommendations - concerning acknowledgments of retransmitted segments given in - Appendix B. - - 3) The next acknowledgment arrives at the sender. Either a duplicate - ACK or a new cumulative ACK (advancing the window) applies in this - step. - - a) if the ACK acknowledges a sequence number above "recover", - either in SACK blocks or as a cumulative ACK, set the - congestion window to no more than 3 * MSS and proceed with the - conventional RTO recovery, retransmitting unacknowledged - segments. Take this branch also when the acknowledgment is a - duplicate ACK and it does not acknowledge any new, previously - unacknowledged data below "recover" in the SACK blocks. Leave - SpuriousRecovery set to FALSE. - - b) if the ACK does not acknowledge sequence numbers above - "recover" AND it acknowledges data that was not acknowledged - earlier (either with cumulative acknowledgment or using SACK - blocks), declare the timeout spurious and set SpuriousRecovery - to SPUR_TO. The retransmission timeout can be declared - spurious, because the segment acknowledged with this ACK was - transmitted before the timeout. - - If there are unacknowledged holes between the received SACK blocks, - those segments are retransmitted similarly to the conventional SACK - recovery algorithm [BAFW03]. If the algorithm exits with - SpuriousRecovery set to SPUR_TO, "recover" is set to SND.UNA, thus - allowing fast recovery on incoming duplicate acknowledgments. - - - - - - - - - -Sarolahti & Kojo Experimental [Page 9] - -RFC 4138 Forward RTO-Recovery August 2005 - - -4. Taking Actions after Detecting Spurious RTO - - Upon retransmission timeout, a conventional TCP sender assumes that - outstanding segments are lost and starts retransmitting the - unacknowledged segments. When the retransmission timeout is detected - to be spurious, the TCP sender should not continue retransmitting - based on the timeout. For example, if the sender was in congestion - avoidance phase transmitting new, previously unsent segments, it - should continue transmitting previously unsent segments after - detecting a spurious RTO. This document does not describe the - response to spurious timeouts, but a response algorithm is described - in RFC 4015 [LG04]. - - Additionally, different response variants to spurious retransmission - timeout have been discussed in various research papers [SKR03, GL03, - Sar03] and IETF documents [SL03]. The different response - alternatives vary in whether the spurious retransmission timeout - should be taken as a congestion signal, thus causing the congestion - window or slow start threshold to be reduced at the sender, or - whether the congestion control state should be fully reverted to the - state valid prior to the retransmission timeout. - -5. SCTP Considerations - - SCTP has similar retransmission algorithms and congestion control to - TCP. The SCTP T3-rtx timer for one destination address is maintained - in the same way as the TCP retransmission timer, and after a T3-rtx - expires, an SCTP sender retransmits unacknowledged data chunks in - slow start like TCP does. Therefore, SCTP is vulnerable to the - negative effects of the spurious retransmission timeouts similarly to - TCP. Due to similar RTO recovery algorithms, F-RTO algorithm logic - can be applied also to SCTP. Since SCTP uses selective - acknowledgments, the SACK-based variant of the algorithm is - recommended, although the basic version can also be applied to SCTP. - However, SCTP contains features that are not present with TCP that - need to be discussed when applying the F-RTO algorithm. - - SCTP associations can be multi-homed. The current retransmission - policy states that retransmissions should go to alternative - addresses. If the retransmission was due to spurious timeout caused - by a delay spike, it is possible that the acknowledgment for the - retransmission arrives back at the sender before the acknowledgments - of the original transmissions arrive. If this happens, a possible - loss of the original transmission of the data chunk that was - retransmitted due to the spurious timeout may remain undetected when - applying the F-RTO algorithm. Because the timeout was caused by a - delay spike, and it was spurious in that respect, a suitable response - is to continue by sending new data. However, if the original - - - -Sarolahti & Kojo Experimental [Page 10] - -RFC 4138 Forward RTO-Recovery August 2005 - - - transmission was lost, fully reverting the congestion control - parameters is too aggressive. Therefore, taking conservative actions - on congestion control is recommended, if the SCTP association is - multi-homed and retransmissions go to alternative addresses. The - information in duplicate TSNs can be then used for reverting - congestion control, if desired [BA04]. - - Note that the forward transmissions made in F-RTO algorithm step (2b) - should be destined to the primary address, since they are not - retransmissions. - - When making a retransmission, an SCTP sender can bundle a number of - unacknowledged data chunks and include them in the same packet. This - needs to be considered when implementing F-RTO for SCTP. The basic - principle of F-RTO still holds: in order to declare the timeout - spurious, the sender must get an acknowledgment for a data chunk that - was not retransmitted after the retransmission timeout. In other - words, acknowledgments of data chunks that were bundled in RTO - retransmission must not be used for declaring the timeout spurious. - -6. Security Considerations - - The main security threat regarding F-RTO is the possibility that a - receiver could mislead the sender into setting too large a congestion - window after an RTO. There are two possible ways a malicious - receiver could trigger a wrong output from the F-RTO algorithm. - First, the receiver can acknowledge data that it has not received. - Second, it can delay acknowledgment of a segment it has received - earlier, and acknowledge the segment after the TCP sender has been - deluded to enter algorithm step 3. - - If the receiver acknowledges a segment it has not really received, - the sender can be led to declare spurious timeout in the F-RTO - algorithm, step 3. However, because the sender will have an - incorrect state, it cannot retransmit the segment that has never - reached the receiver. Therefore, this attack is unlikely to be - useful for the receiver to maliciously gain a larger congestion - window. - - A common case for a retransmission timeout is that a fast - retransmission of a segment is lost. If all other segments have been - received, the RTO retransmission causes the whole window to be - acknowledged at once. This case is recognized in F-RTO algorithm - branch (2a). However, if the receiver only acknowledges one segment - after receiving the RTO retransmission, and then the rest of the - segments, it could cause the timeout to be declared spurious when it - is not. Therefore, it is suggested that, when an RTO expires during - - - - -Sarolahti & Kojo Experimental [Page 11] - -RFC 4138 Forward RTO-Recovery August 2005 - - - fast recovery phase, the sender would not fully revert the congestion - window even if the timeout was declared spurious. Instead, the - sender would reduce the congestion window to 1. - - If there is more than one segment missing at the time of a - retransmission timeout, the receiver does not benefit from misleading - the sender to declare a spurious timeout because the sender would - have to go through another recovery period to retransmit the missing - segments, usually after an RTO has elapsed. - -7. Acknowledgements - - We are grateful to Reiner Ludwig, Andrei Gurtov, Josh Blanton, Mark - Allman, Sally Floyd, Yogesh Swami, Mika Liljeberg, Ivan Arias - Rodriguez, Sourabh Ladha, Martin Duke, Motoharu Miyake, Ted Faber, - Samu Kontinen, and Kostas Pentikousis for the discussion and feedback - contributed to this text. - -8. References - -8.1. Normative References - - [APS99] Allman, M., Paxson, V., and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [BAFW03] Blanton, E., Allman, M., Fall, K., and L. Wang, "A - Conservative Selective Acknowledgment (SACK)-based Loss - Recovery Algorithm for TCP", RFC 3517, April 2003. - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [FHG04] Floyd, S., Henderson, T., and A. Gurtov, "The NewReno - Modification to TCP's Fast Recovery Algorithm", RFC 3782, - April 2004. - - [MMFR96] Mathis, M., Mahdavi, J., Floyd, S., and A. Romanow, "TCP - Selective Acknowledgement Options", RFC 2018, October 1996. - - [PA00] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - - [Pos81] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - - - - - - -Sarolahti & Kojo Experimental [Page 12] - -RFC 4138 Forward RTO-Recovery August 2005 - - - [Ste00] Stewart, R., Xie, Q., Morneault, K., Sharp, C., - Schwarzbauer, H., Taylor, T., Rytina, I., Kalla, M., Zhang, - L., and V. Paxson, "Stream Control Transmission Protocol", - RFC 2960, October 2000. - -8.2. Informative References - - [ABF01] Allman, M., Balakrishnan, H., and S. Floyd, "Enhancing - TCP's Loss Recovery Using Limited Transmit", RFC 3042, - January 2001. - - [BA04] Blanton, E. and M. Allman, "Using TCP Duplicate Selective - Acknowledgement (DSACKs) and Stream Control Transmission - Protocol (SCTP) Duplicate Transmission Sequence Numbers - (TSNs) to Detect Spurious Retransmissions", RFC 3708, - February 2004. - - [BBJ92] Jacobson, V., Braden, R., and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [FMMP00] Floyd, S., Mahdavi, J., Mathis, M., and M. Podolsky, "An - Extension to the Selective Acknowledgement (SACK) Option - for TCP", RFC 2883, July 2000. - - [GL02] A. Gurtov and R. Ludwig. Evaluating the Eifel Algorithm - for TCP in a GPRS Network. In Proc. of European Wireless, - Florence, Italy, February 2002. - - [GL03] A. Gurtov and R. Ludwig, Responding to Spurious Timeouts in - TCP. In Proceedings of IEEE INFOCOM 03, San Francisco, CA, - USA, March 2003. - - [Jac88] V. Jacobson. Congestion Avoidance and Control. In - Proceedings of ACM SIGCOMM 88. - - [LG04] Ludwig, R. and A. Gurtov, "The Eifel Response Algorithm for - TCP", RFC 4015, February 2005. - - [LK00] R. Ludwig and R.H. Katz. The Eifel Algorithm: Making TCP - Robust Against Spurious Retransmissions. ACM SIGCOMM - Computer Communication Review, 30(1), January 2000. - - [LM03] Ludwig, R. and M. Meyer, "The Eifel Detection Algorithm for - TCP", RFC 3522, April 2003. - - [Nag84] Nagle, J., "Congestion Control in IP/TCP Internetworks", - RFC 896, January 1984. - - - - -Sarolahti & Kojo Experimental [Page 13] - -RFC 4138 Forward RTO-Recovery August 2005 - - - [SKR03] P. Sarolahti, M. Kojo, and K. Raatikainen. F-RTO: An - Enhanced Recovery Algorithm for TCP Retransmission - Timeouts. ACM SIGCOMM Computer Communication Review, - 33(2), April 2003. - - [Sar03] P. Sarolahti. Congestion Control on Spurious TCP - Retransmission Timeouts. In Proceedings of IEEE Globecom - 2003, San Francisco, CA, USA. December 2003. - - [SL03] Y. Swami and K. Le, "DCLOR: De-correlated Loss Recovery - using SACK Option for Spurious Timeouts", work in progress, - September 2003. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Sarolahti & Kojo Experimental [Page 14] - -RFC 4138 Forward RTO-Recovery August 2005 - - -Appendix A: Scenarios - - This section discusses different scenarios where RTOs occur and how - the basic F-RTO algorithm performs in those scenarios. The - interesting scenarios are: a sudden delay triggering retransmission - timeout, loss of a retransmitted packet during fast recovery, link - outage causing the loss of several packets, and packet reordering. A - performance evaluation with a more thorough analysis on a real - implementation of F-RTO is given in [SKR03]. - -A.1. Sudden Delay - - The main motivation behind the F-RTO algorithm is to improve TCP - performance when a delay spike triggers a spurious retransmission - timeout. The example below illustrates the segments and - acknowledgments transmitted by the TCP end hosts when a spurious - timeout occurs, but no packets are lost. For simplicity, delayed - acknowledgments are not used in the example. The example below - applies the Eifel Response Algorithm [LG04] after detecting a - spurious timeout. - - ... - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 1. <---------------------------- ACK 5 - 2. SEND 10 ----------------------------> - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 3. <---------------------------- ACK 6 - 4. SEND 11 ----------------------------> - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 5. | - [delay] - | - [RTO] - [F-RTO step (1)] - 6. SEND 6 ----------------------------> - (cwnd = 6, ssthresh = 3, FlightSize = 6) - ---> - 7. <---------------------------- ACK 7 - [F-RTO step (2b)] - 8. SEND 12 ----------------------------> - 9. SEND 13 ----------------------------> - (cwnd = 7, ssthresh = 3, FlightSize = 7) - ---> - 10. <---------------------------- ACK 8 - [F-RTO step (3b)] - [SpuriousRecovery <- SPUR_TO] - (cwnd = 7, ssthresh = 6, FlightSize = 6) - - - - -Sarolahti & Kojo Experimental [Page 15] - -RFC 4138 Forward RTO-Recovery August 2005 - - - 11. SEND 14 ----------------------------> - (cwnd = 7, ssthresh = 6, FlightSize = 7) - 12. <---------------------------- ACK 9 - 13. SEND 15 ----------------------------> - (cwnd = 7, ssthresh = 6, FlightSize = 7) - 14. <---------------------------- ACK 10 - 15. SEND 16 ----------------------------> - (cwnd = 7, ssthresh = 6, FlightSize = 7) - ... - - When a sudden delay (long enough to trigger timeout) occurs at step - 5, the TCP sender retransmits the first unacknowledged segment (step - 6). The next ACK covers the RTO retransmission because the - originally transmitted segment 6 arrived at the receiver, and the TCP - sender continues by sending two new data segments (steps 8, 9). Note - that on F-RTO steps (1) and (2b), congestion window and FlightSize - are not yet reset because in the case of spurious timeout, the - segments sent before the timeout are still in the network. However, - the sender should still be equally aggressive toward conventional - TCP. Because the second acknowledgment arriving after the RTO - retransmission acknowledges data that was not retransmitted due to - timeout (step 10), the TCP sender declares the timeout to be spurious - and continues by sending new data on the next acknowledgments. Also, - the congestion control state is reversed, as required by the Eifel - Response Algorithm. - -A.2. Loss of a Retransmission - - If a retransmitted segment is lost, the only way to retransmit it is - to wait for the timeout to trigger the retransmission. Once the - segment is successfully received, the receiver usually acknowledges - several segments at once, because other segments in the same window - have been successfully delivered before the retransmission arrives at - the receiver. The example below shows a scenario where - retransmission (of segment 6) is lost, as well as a later segment - (segment 9) in the same window. The limited transmit [ABF01] or SACK - TCP [MMFR96] enhancements are not in use in this example. - - ... - (cwnd = 6, ssthresh < 6, FlightSize = 6) - - - 1. <---------------------------- ACK 5 - 2. SEND 10 ----------------------------> - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 3. <---------------------------- ACK 6 - 4. SEND 11 ----------------------------> - (cwnd = 6, ssthresh < 6, FlightSize = 6) - - - -Sarolahti & Kojo Experimental [Page 16] - -RFC 4138 Forward RTO-Recovery August 2005 - - - 5. <---------------------------- ACK 6 - 6. <---------------------------- ACK 6 - 7. <---------------------------- ACK 6 - 8. SEND 6 --------------X - (cwnd = 6, ssthresh = 3, FlightSize = 6) - - 9. <---------------------------- ACK 6 - 10. SEND 12 ----------------------------> - (cwnd = 7, ssthresh = 3, FlightSize = 7) - 11. <---------------------------- ACK 6 - 12. SEND 13 ----------------------------> - (cwnd = 8, ssthresh = 3, FlightSize = 8) - [RTO] - 13. SEND 6 ----------------------------> - (cwnd = 8, ssthresh = 2, FlightSize = 8) - 14. <---------------------------- ACK 9 - [F-RTO step (2b)] - 15. SEND 14 ----------------------------> - 16. SEND 15 ----------------------------> - (cwnd = 7, ssthresh = 2, FlightSize = 7) - 17. <---------------------------- ACK 9 - [F-RTO step (3a)] - [SpuriousRecovery <- FALSE] - (cwnd = 3, ssthresh = 2, FlightSize = 7) - 18. SEND 9 ----------------------------> - 19. SEND 10 ----------------------------> - 20. SEND 11 ----------------------------> - ... - - In the example above, segment 6 is lost and the sender retransmits it - after three duplicate ACKs in step 8. However, the retransmission is - also lost, and the sender has to wait for the RTO to expire before - retransmitting it again. Because the first ACK following the RTO - retransmission acknowledges the RTO retransmission (step 14), the - sender transmits two new segments. The second ACK in step 17 does - not acknowledge any previously unacknowledged data. Therefore, the - F-RTO sender enters the slow start and sets cwnd to 3 * MSS. The - congestion window can be set to three segments, because two round- - trips have elapsed after the retransmission timeout. Finally, the - receiver acknowledges all segments transmitted prior to entering - recovery and the sender can continue transmitting new data in - congestion avoidance. - - - - - - - - - -Sarolahti & Kojo Experimental [Page 17] - -RFC 4138 Forward RTO-Recovery August 2005 - - -A.3. Link Outage - - The example below illustrates the F-RTO behavior when 4 consecutive - packets are lost in the network causing the TCP sender to fall back - to RTO recovery. Limited transmit and SACK are not used in this - example. - - ... - (cwnd = 6, ssthresh < 6, FlightSize = 6) - - 1. <---------------------------- ACK 5 - 2. SEND 10 ----------------------------> - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 3. <---------------------------- ACK 6 - 4. SEND 11 ----------------------------> - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 5. <---------------------------- ACK 6 - | - | - [RTO] - 6. SEND 6 ----------------------------> - (cwnd = 6, ssthresh = 3, FlightSize = 6) - 7. <---------------------------- ACK 7 - [F-RTO step (2b)] - 8. SEND 12 ----------------------------> - 9. SEND 13 ----------------------------> - (cwnd = 7, ssthresh = 3, FlightSize = 7) - 10. <---------------------------- ACK 7 - [F-RTO step (3a)] - [SpuriousRecovery <- FALSE] - (cwnd = 3, ssthresh = 3, FlightSize = 7) - 11. SEND 7 ----------------------------> - 12. SEND 8 ----------------------------> - 13. SEND 9 ----------------------------> - - Again, F-RTO sender transmits two new segments (steps 8 and 9) after - the RTO retransmission is acknowledged. Because the next ACK does - not acknowledge any data that was not retransmitted after the - retransmission timeout (step 10), the F-RTO sender proceeds with - conventional recovery and slow start retransmissions. - -A.4. Packet Reordering - - Because F-RTO modifies the TCP sender behavior only after a - retransmission timeout and it is intended to avoid unnecessary - retransmissions only after spurious timeout, we limit the discussion - on the effects of packet reordering on F-RTO behavior to the cases - where it occurs immediately after the retransmission timeout. When - - - -Sarolahti & Kojo Experimental [Page 18] - -RFC 4138 Forward RTO-Recovery August 2005 - - - the TCP receiver gets an out-of-order segment, it generates a - duplicate ACK. If the TCP sender implements the basic F-RTO - algorithm, this may prevent the sender from detecting a spurious - timeout. - - However, if the TCP sender applies the SACK-enhanced F-RTO, it is - possible to detect a spurious timeout when packet reordering occurs. - Below, we illustrate the behavior of SACK-enhanced F-RTO when segment - 8 arrives before segments 6 and 7, and segments starting from segment - 6 are delayed in the network. In this example the TCP sender reduces - the congestion window and slow start threshold in response to - spurious timeout. - - ... - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 1. <---------------------------- ACK 5 - 2. SEND 10 ----------------------------> - (cwnd = 6, ssthresh < 6, FlightSize = 6) - 3. <---------------------------- ACK 6 - 4. SEND 11 ----------------------------> - 5. | - [delay] - | - [RTO] - 6. SEND 6 ----------------------------> - (cwnd = 6, ssthresh = 3, FlightSize = 6) - ---> - 7. <---------------------------- ACK 6 - [SACK 8] - [SACK F-RTO stays in step 2] - 8. ---> - 9. <---------------------------- ACK 7 - [SACK 8] - [SACK F-RTO step (2b)] - 10. SEND 12 ----------------------------> - 11. SEND 13 ----------------------------> - (cwnd = 7, ssthresh = 3, FlightSize = 7) - 12. ---> - 13. <---------------------------- ACK 9 - [SACK F-RTO step (3b)] - [SpuriousRecovery <- SPUR_TO] - (cwnd = 7, ssthresh = 6, FlightSize = 6) - 14. SEND 14 ----------------------------> - (cwnd = 7, ssthresh = 6, FlightSize = 7) - 15. <---------------------------- ACK 10 - 16. SEND 15 ----------------------------> - ... - - - - -Sarolahti & Kojo Experimental [Page 19] - -RFC 4138 Forward RTO-Recovery August 2005 - - - After RTO expires and the sender retransmits segment 6 (step 6), the - receiver gets segment 8 and generates duplicate ACK with SACK for - segment 8. In response to the acknowledgment, the TCP sender does - not send anything but stays in F-RTO step 2. Because the next - acknowledgment advances the cumulative ACK point (step 9), the sender - can transmit two new segments according to SACK-enhanced F-RTO. The - next segment acknowledges new data between 7 and 11 that was not - acknowledged earlier (segment 7), so the F-RTO sender declares the - timeout spurious. - -Appendix B: SACK-enhanced F-RTO and Fast Recovery - - We believe that a slightly modified, SACK-enhanced F-RTO algorithm - can be used to detect spurious timeouts also when RTO expires while - an earlier loss recovery is underway. However, there are issues that - need to be considered if F-RTO is applied in this case. - - In step 3, the original SACK-based F-RTO algorithm requires that an - ACK acknowledges previously unacknowledged non-retransmitted data - between SND.UNA and send_high. If RTO expires during earlier - (SACK-based) loss recovery, the F-RTO sender must use only - acknowledgments for non-retransmitted segments transmitted before the - SACK-based loss recovery started. This means that in order to - declare timeout spurious, the TCP sender must receive an - acknowledgment for non-retransmitted segment between SND.UNA and - RecoveryPoint in algorithm step 3. RecoveryPoint is defined in - conservative SACK-recovery algorithm [BAFW03], and it is set to - indicate the highest segment transmitted so far when SACK-based loss - recovery begins. In other words, if the TCP sender receives - acknowledgment for a segment that was transmitted more than one RTO - ago, it can declare the timeout spurious. Defining an efficient - algorithm for checking these conditions remains a future work item. - - When spurious timeout is detected according to the rules given above, - it may be possible that the response algorithm needs to consider this - case separately, for example, in terms of which segments to - retransmit after RTO expires, and whether it is safe to revert the - congestion control parameters. This is considered a topic for future - research. - - - - - - - - - - - - -Sarolahti & Kojo Experimental [Page 20] - -RFC 4138 Forward RTO-Recovery August 2005 - - -Appendix C: Discussion of Window-Limited Cases - - When the advertised window limits the transmission of two new - previously unsent segments, or there are no new data to send, it is - recommended in F-RTO algorithm step (2b) that the TCP sender continue - with the conventional RTO recovery algorithm. The disadvantage is - that the sender may continue unnecessary retransmissions due to - possible spurious timeout. This section briefly discusses the - options that can potentially improve performance when transmitting - previously unsent data is not possible. - - - The TCP sender could reserve an unused space of a size of one or - two segments in the advertised window to ensure the use of - algorithms such as F-RTO or Limited Transmit [ABF01] in window- - limited situations. On the other hand, while doing this, the TCP - sender should ensure that the window of outstanding segments is - large enough for proper utilization of the available pipe. - - - Use additional information if available, e.g., TCP timestamps with - the Eifel Detection algorithm, for detecting a spurious timeout. - However, Eifel detection may yield different results from F-RTO - when ACK losses and an RTO occur within the same round-trip time - [SKR03]. - - - Retransmit data from the tail of the retransmission queue and - continue with step 3 of the F-RTO algorithm. It is possible that - the retransmission will be made unnecessarily. Thus, this option - is not encouraged, except for hosts that are known to operate in an - environment that is prone to spurious timeouts. On the other hand, - with this method it is possible to limit unnecessary - retransmissions due to spurious timeout to one retransmission. - - - Send a zero-sized segment below SND.UNA, similar to TCP Keep-Alive - probe, and continue with step 3 of the F-RTO algorithm. Because - the receiver replies with a duplicate ACK, the sender is able to - detect whether the timeout was spurious from the incoming - acknowledgment. This method does not send data unnecessarily, but - it delays the recovery by one round-trip time in cases where the - timeout was not spurious. Therefore, this method is not - encouraged. - - - In receiver-limited cases, send one octet of new data, regardless - of the advertised window limit, and continue with step 3 of the - F-RTO algorithm. It is possible that the receiver will have free - buffer space to receive the data by the time the segment has - propagated through the network, in which case no harm is done. If - the receiver is not capable of receiving the segment, it rejects - the segment and sends a duplicate ACK. - - - -Sarolahti & Kojo Experimental [Page 21] - -RFC 4138 Forward RTO-Recovery August 2005 - - -Authors' Addresses - - Pasi Sarolahti - Nokia Research Center - P.O. Box 407 - FIN-00045 NOKIA GROUP - Finland - - Phone: +358 50 4876607 - EMail: pasi.sarolahti@nokia.com - http://www.cs.helsinki.fi/u/sarolaht/ - - - Markku Kojo - University of Helsinki - Department of Computer Science - P.O. Box 68 - FIN-00014 UNIVERSITY OF HELSINKI - Finland - - Phone: +358 9 191 51305 - EMail: kojo@cs.helsinki.fi - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Sarolahti & Kojo Experimental [Page 22] - -RFC 4138 Forward RTO-Recovery August 2005 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2005). - - This document is subject to the rights, licenses and restrictions - contained in BCP 78, and except as set forth therein, the authors - retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS - OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, - INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE - INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; nor does it represent that it has - made any independent effort to identify any such rights. Information - on the procedures with respect to rights in RFC documents can be - found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use of - such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository at - http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention any - copyrights, patents or patent applications, or other proprietary - rights that may cover technology that may be required to implement - this standard. Please address the information to the IETF at ietf- - ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is currently provided by the - Internet Society. - - - - - - - -Sarolahti & Kojo Experimental [Page 23] - diff --git a/kernel/picotcp/RFC/rfc4278.txt b/kernel/picotcp/RFC/rfc4278.txt deleted file mode 100644 index 377314d..0000000 --- a/kernel/picotcp/RFC/rfc4278.txt +++ /dev/null @@ -1,395 +0,0 @@ - - - - - - -Network Working Group S. Bellovin -Request for Comments: 4278 AT&T Labs Research -Category: Informational A. Zinin - Alcatel - January 2006 - - - Standards Maturity Variance Regarding the TCP MD5 Signature Option - (RFC 2385) and the BGP-4 Specification - -Status of This Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2006). - -Abstract - - The IETF Standards Process requires that all normative references for - a document be at the same or higher level of standardization. RFC - 2026 section 9.1 allows the IESG to grant a variance to the standard - practices of the IETF. This document explains why the IESG is - considering doing so for the revised version of the BGP-4 - specification, which refers normatively to RFC 2385, "Protection of - BGP Sessions via the TCP MD5 Signature Option". RFC 2385 will remain - at the Proposed Standard level. - -1. Introduction - - The IETF Standards Process [RFC2026] requires that all normative - references for a document be at the same or higher level of - standardization. RFC 2026 section 9.1 allows the IESG to grant a - variance to the standard practices of the IETF. Pursuant to that, it - is considering publishing the updated BGP-4 specification [RFC4271] - as Draft Standard, despite the normative reference to [RFC2385], - "Protection of BGP Sessions via the TCP MD5 Signature Option". RFC - 2385 will remain a Proposed Standard. (Note that although the title - of [RFC2385] includes the word "signature", the technology described - in it is commonly known as a Message Authentication Code or MAC, and - should not be confused with digital signature technologies.) - - [RFC2385], which is widely implemented, is the only transmission - security mechanism defined for BGP-4. Other possible mechanisms, - such as IPsec [RFC2401] and TLS [RFC2246], are rarely, if ever, used - - - -Bellovin & Zinin Informational [Page 1] - -RFC 4278 Standards Maturity Variance: RFC 2385 and BGP-4 January 2006 - - - for this purpose. Given the long-standing requirement for security - features in protocols, it is not possible to advance BGP-4 without a - mandated security mechanism. - - The conflict of maturity levels between specifications would normally - be resolved by advancing the specification being referred to along - the standards track, to the level of maturity that the referring - specification needs to achieve. However, in the particular case - considered here, the IESG believes that [RFC2385], though adequate - for BGP deployments at this moment, is not strong enough for general - use, and thus should not be progressed along the standards track. In - this situation, the IESG believes that variance procedure should be - used to allow the updated BGP-4 specification to be published as - Draft Standard. - - The following sections of the document give detailed explanations of - the statements above. - -2. Draft Standard Requirements - - The requirements for Proposed Standards and Draft Standards are given - in [RFC2026]. For Proposed Standards, [RFC2026] warns that: - - Implementors should treat Proposed Standards as immature - specifications. It is desirable to implement them in order to - gain experience and to validate, test, and clarify the - specification. However, since the content of Proposed Standards - may be changed if problems are found or better solutions are - identified, deploying implementations of such standards into a - disruption-sensitive environment is not recommended. - - In other words, it is considered reasonable for flaws to be - discovered in Proposed Standards. - - The requirements for Draft Standards are higher: - - A Draft Standard must be well-understood and known to be quite - stable, both in its semantics and as a basis for developing an - implementation. - - In other words, any document that has known deficiencies should not - be promoted to Draft Standard. - - - - - - - - - -Bellovin & Zinin Informational [Page 2] - -RFC 4278 Standards Maturity Variance: RFC 2385 and BGP-4 January 2006 - - -3. The TCP MD5 Signature Option - - [RFC2385], despite its 1998 publication date, describes a Message - Authentication Code (MAC) that is considerably older. It utilizes a - technique known as a "keyed hash function", using MD5 [RFC1321] as - the hash function. When the original code was developed, this was - believed to be a reasonable technique, especially if the key was - appended (rather than prepended) to the data being protected. But - cryptographic hash functions were never intended for use as MACs, and - later cryptanalytic results showed that the construct was not as - strong as originally believed [PV1, PV2]. Worse yet, the underlying - hash function, MD5, has shown signs of weakness [Dobbertin, Wang]. - Accordingly, the IETF community has adopted Hashed Message - Authentication Code (HMAC) [RFC2104], a scheme with provable security - properties, as its standard MAC. - - Beyond that, [RFC2385] does not include any sort of key management - technique. Common practice is to use a password as a shared secret - between pairs of sites, but this is not a good idea [RFC3562]. - - Other problems are documented in [RFC2385] itself, including the lack - of a type code or version number, and the inability of systems using - this scheme to accept certain TCP resets. - - Despite the widespread deployment of [RFC2385] in BGP deployments, - the IESG has thus concluded that it is not appropriate for use in - other contexts. [RFC2385] is not suitable for advancement to Draft - Standard. - -4. Usage Patterns for RFC 2385 - - Given the above analysis, it is reasonable to ask why [RFC2385] is - still used for BGP. The answer lies in the deployment patterns - peculiar to BGP. - - BGP connections inherently tend to travel over short paths. Indeed, - most external BGP links are one hop. Although internal BGP sessions - are usually multi-hop, the links involved are generally inhabited - only by routers rather than general-purpose computers; general- - purpose computers are easier for attackers to use as TCP hijacking - tools [Joncheray]. - - Also, BGP peering associations tend to be long-lived and static. By - contrast, many other security situations are more dynamic. - - This is not to say that such attacks cannot happen. (If they - couldn't happen at all, there would be no point to any security - measures.) Attackers could divert links at layers 1 or 2, or they - - - -Bellovin & Zinin Informational [Page 3] - -RFC 4278 Standards Maturity Variance: RFC 2385 and BGP-4 January 2006 - - - could (in some situations) use Address Resolution Protocol (ARP) - spoofing at Ethernet-based exchange points. Still, on balance, BGP - is employed in an environment that is less susceptible to this sort - of attack. - - There is another class of attack against which BGP is extremely - vulnerable: false route advertisements from more than one autonomous - system (AS) hop away. However, neither [RFC2385] nor any other - transmission security mechanism can block such attacks. Rather, a - scheme such as S-BGP [Kent] would be needed. - -5. LDP - - The Label Distribution Protocol (LDP) [RFC3036] also uses [RFC2385]. - Deployment practices for LDP are very similar to those of BGP: LDP - connections are usually confined within a single autonomous system - and most frequently span a single link between two routers. This - makes the LDP threat environment very similar to BGP's. Given this, - and a considerable installed base of LDP in service provider - networks, we are not deprecating [RFC2385] for use with LDP. - -6. Security Considerations - - The IESG believes that the variance described here will not adversely - affect the security of the Internet. - -7. Conclusions - - Given the above analysis, the IESG is persuaded that waiving the - prerequisite requirement is the appropriate thing to do. [RFC2385] - is clearly not suitable for Draft Standard. Other existing - mechanisms, such as IPsec, would do its job better. However, given - the current operational practices in service provider networks at the - moment -- and in particular the common use of long-lived standard - keys, [RFC3562] notwithstanding -- the marginal benefit of such - schemes in this situation would be low, and not worth the transition - effort. We would prefer to wait for a security mechanism tailored to - the major threat environment for BGP. - -8. Informative References - - [Dobbertin] H. Dobbertin, "The Status of MD5 After a Recent Attack", - RSA Labs' CryptoBytes, Vol. 2 No. 2, Summer 1996. - - [Joncheray] Joncheray, L. "A Simple Active Attack Against TCP." - Proceedings of the Fifth Usenix Unix Security Symposium, - 1995. - - - - -Bellovin & Zinin Informational [Page 4] - -RFC 4278 Standards Maturity Variance: RFC 2385 and BGP-4 January 2006 - - - [Kent] Kent, S., C. Lynn, and K. Seo. "Secure Border Gateway - Protocol (Secure-BGP)." IEEE Journal on Selected Areas - in Communications, vol. 18, no. 4, April, 2000, pp. - 582-592. - - [RFC3562] Leech, M., "Key Management Considerations for the TCP - MD5 Signature Option", RFC 3562, July 2003. - - [PV1] B. Preneel and P. van Oorschot, "MD-x MAC and building - fast MACs from hash functions," Advances in Cryptology - --- Crypto 95 Proceedings, Lecture Notes in Computer - Science Vol. 963, D. Coppersmith, ed., Springer-Verlag, - 1995. - - [PV2] B. Preneel and P. van Oorschot, "On the security of two - MAC algorithms," Advances in Cryptology --- Eurocrypt 96 - Proceedings, Lecture Notes in Computer Science, U. - Maurer, ed., Springer-Verlag, 1996. - - [RFC1321] Rivest, R., "The MD5 Message-Digest Algorithm ", RFC - 1321, April 1992. - - [RFC2026] Bradner, S., "The Internet Standards Process -- Revision - 3", BCP 9, RFC 2026, October 1996. - - [RFC2104] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: - Keyed-Hashing for Message Authentication", RFC 2104, - February 1997. - - [RFC2246] Dierks, T. and C. Allen, "The TLS Protocol Version 1.0", - RFC 2246, January 1999. - - [RFC2385] Heffernan, A., "Protection of BGP Sessions via the TCP - MD5 Signature Option", RFC 2385, August 1998. - - [RFC2401] Kent, S. and R. Atkinson, "Security Architecture for the - Internet Protocol", RFC 2401, November 1998. - - [RFC3036] Andersson, L., Doolan, P., Feldman, N., Fredette, A., - and B. Thomas, "LDP Specification", RFC 3036, January - 2001. - - [RFC4271] Rekhter, Y., Li, T., and S. Hares, Eds., "A Border - Gateway Protocol 4 (BGP-4)", RFC 4271, January 2006. - - [Wang] Wang, X. and H. Yu, "How to Break MD5 and Other Hash - Functions." Proceedings of Eurocrypt '05, 2005. - - - - -Bellovin & Zinin Informational [Page 5] - -RFC 4278 Standards Maturity Variance: RFC 2385 and BGP-4 January 2006 - - -Authors' Addresses - - Steven M. Bellovin - Department of Computer Science - Columbia University - 1214 Amsterdam Avenue, M.C. 0401 - New York, NY 10027-7003 - - Phone: +1 212-939-7149 - EMail: bellovin@acm.org - - - Alex Zinin - Alcatel - 701 E Middlefield Rd - Mountain View, CA 94043 - - EMail: zinin@psg.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bellovin & Zinin Informational [Page 6] - -RFC 4278 Standards Maturity Variance: RFC 2385 and BGP-4 January 2006 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2006). - - This document is subject to the rights, licenses and restrictions - contained in BCP 78, and except as set forth therein, the authors - retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS - OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, - INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE - INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; nor does it represent that it has - made any independent effort to identify any such rights. Information - on the procedures with respect to rights in RFC documents can be - found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use of - such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository at - http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention any - copyrights, patents or patent applications, or other proprietary - rights that may cover technology that may be required to implement - this standard. Please address the information to the IETF at - ietf-ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is provided by the IETF - Administrative Support Activity (IASA). - - - - - - - -Bellovin & Zinin Informational [Page 7] - diff --git a/kernel/picotcp/RFC/rfc4614.txt b/kernel/picotcp/RFC/rfc4614.txt deleted file mode 100644 index 80d08af..0000000 --- a/kernel/picotcp/RFC/rfc4614.txt +++ /dev/null @@ -1,1851 +0,0 @@ - - - - - - -Network Working Group M. Duke -Request for Comments: 4614 Boeing Phantom Works -Category: Informational R. Braden - USC Information Sciences Institute - W. Eddy - Verizon Federal Network Systems - E. Blanton - Purdue University Computer Science - September 2006 - - - A Roadmap for Transmission Control Protocol (TCP) - Specification Documents - -Status of This Memo - - This memo provides information for the Internet community. It does - not specify an Internet standard of any kind. Distribution of this - memo is unlimited. - -Copyright Notice - - Copyright (C) The Internet Society (2006). - -Abstract - - This document contains a "roadmap" to the Requests for Comments (RFC) - documents relating to the Internet's Transmission Control Protocol - (TCP). This roadmap provides a brief summary of the documents - defining TCP and various TCP extensions that have accumulated in the - RFC series. This serves as a guide and quick reference for both TCP - implementers and other parties who desire information contained in - the TCP-related RFCs. - - - - - - - - - - - - - - - - - - -Duke, et al. Informational [Page 1] - -RFC 4614 TCP Roadmap September 2006 - - -Table of Contents - - 1. Introduction ....................................................2 - 2. Basic Functionality .............................................4 - 3. Recommended Enhancements ........................................6 - 3.1. Congestion Control and Loss Recovery Extensions ............7 - 3.2. SACK-Based Loss Recovery and Congestion Control ............8 - 3.3. Dealing with Forged Segments ...............................9 - 4. Experimental Extensions ........................................10 - 5. Historic Extensions ............................................13 - 6. Support Documents ..............................................14 - 6.1. Foundational Works ........................................15 - 6.2. Difficult Network Environments ............................16 - 6.3. Implementation Advice .....................................19 - 6.4. Management Information Bases ..............................20 - 6.5. Tools and Tutorials .......................................22 - 6.6. Case Studies ..............................................22 - 7. Undocumented TCP Features ......................................23 - 8. Security Considerations ........................................24 - 9. Acknowledgments ................................................24 - 10. Informative References ........................................25 - 10.1. Basic Functionality ......................................25 - 10.2. Recommended Enhancements .................................25 - 10.3. Experimental Extensions ..................................26 - 10.4. Historic Extensions ......................................27 - 10.5. Support Documents ........................................28 - 10.6. Informative References Outside the RFC Series ............31 - -1. Introduction - - A correct and efficient implementation of the Transmission Control - Protocol (TCP) is a critical part of the software of most Internet - hosts. As TCP has evolved over the years, many distinct documents - have become part of the accepted standard for TCP. At the same time, - a large number of more experimental modifications to TCP have also - been published in the RFC series, along with informational notes, - case studies, and other advice. - - As an introduction to newcomers and an attempt to organize the - plethora of information for old hands, this document contains a - "roadmap" to the TCP-related RFCs. It provides a brief summary of - the RFC documents that define TCP. This should provide guidance to - implementers on the relevance and significance of the standards-track - extensions, informational notes, and best current practices that - relate to TCP. - - - - - - -Duke, et al. Informational [Page 2] - -RFC 4614 TCP Roadmap September 2006 - - - This document is not an update of RFC 1122 and is not a rigorous - standard for what needs to be implemented in TCP. This document is - merely an informational roadmap that captures, organizes, and - summarizes most of the RFC documents that a TCP implementer, - experimenter, or student should be aware of. Particular comments or - broad categorizations that this document makes about individual - mechanisms and behaviors are not to be taken as definitive, nor - should the content of this document alone influence implementation - decisions. - - This roadmap includes a brief description of the contents of each - TCP-related RFC. In some cases, we simply supply the abstract or a - key summary sentence from the text as a terse description. In - addition, a letter code after an RFC number indicates its category in - the RFC series (see BCP 9 [RFC2026] for explanation of these - categories): - - S - Standards Track (Proposed Standard, Draft Standard, or - Standard) - - E - Experimental - - B - Best Current Practice - - I - Informational - - Note that the category of an RFC does not necessarily reflect its - current relevance. For instance, RFC 2581 is nearly universally - deployed although it is only a Proposed Standard. Similarly, some - Informational RFCs contain significant technical proposals for - changing TCP. - - This roadmap is divided into four main sections. Section 2 lists the - RFCs that describe absolutely required TCP behaviors for proper - functioning and interoperability. Further RFCs that describe - strongly encouraged, but non-essential, behaviors are listed in - Section 3. Experimental extensions that are not yet standard - practices, but that potentially could be in the future, are described - in Section 4. - - The reader will probably notice that these three sections are broadly - equivalent to MUST/SHOULD/MAY specifications (per RFC 2119), and - although the authors support this intuition, this document is merely - descriptive; it does not represent a binding standards-track - position. Individual implementers still need to examine the - standards documents themselves to evaluate specific requirement - levels. - - - - -Duke, et al. Informational [Page 3] - -RFC 4614 TCP Roadmap September 2006 - - - A small number of older experimental extensions that have not been - widely implemented, deployed, and used are noted in Section 5. Many - other supporting documents that are relevant to the development, - implementation, and deployment of TCP are described in Section 6. - Within each section, RFCs are listed in the chronological order of - their publication dates. - - A small number of fairly ubiquitous important implementation - practices that are not currently documented in the RFC series are - listed in Section 7. - -2. Basic Functionality - - A small number of documents compose the core specification of TCP. - These define the required basic functionalities of TCP's header - parsing, state machine, congestion control, and retransmission - timeout computation. These base specifications must be correctly - followed for interoperability. - - RFC 793 S: "Transmission Control Protocol", STD 7 (September 1981) - - This is the fundamental TCP specification document [RFC0793]. - Written by Jon Postel as part of the Internet protocol suite's - core, it describes the TCP packet format, the TCP state machine - and event processing, and TCP's semantics for data transmission, - reliability, flow control, multiplexing, and acknowledgment. - - Section 3.6 of RFC 793, describing TCP's handling of the IP - precedence and security compartment, is mostly irrelevant today. - RFC 2873 changed the IP precedence handling, and the security - compartment portion of the API is no longer implemented or used. - In addition, RFC 793 did not describe any congestion control - mechanism. Otherwise, however, the majority of this document - still accurately describes modern TCPs. RFC 793 is the last of a - series of developmental TCP specifications, starting in the - Internet Experimental Notes (IENs) and continuing in the RFC - series. - - RFC 1122 S: "Requirements for Internet Hosts - Communication Layers" - (October 1989) - - This document [RFC1122] updates and clarifies RFC 793, fixing some - specification bugs and oversights. It also explains some features - such as keep-alives and Karn's and Jacobson's RTO estimation - algorithms [KP87][Jac88][JK92]. ICMP interactions are mentioned, - and some tips are given for efficient implementation. RFC 1122 is - an Applicability Statement, listing the various features that - MUST, SHOULD, MAY, SHOULD NOT, and MUST NOT be present in - - - -Duke, et al. Informational [Page 4] - -RFC 4614 TCP Roadmap September 2006 - - - standards-conforming TCP implementations. Unlike a purely - informational "roadmap", this Applicability Statement is a - standards document and gives formal rules for implementation. - - RFC 2460 S: "Internet Protocol, Version 6 (IPv6) Specification - (December 1998) - - This document [RFC2460] is of relevance to TCP because it defines - how the pseudo-header for TCP's checksum computation is derived - when 128-bit IPv6 addresses are used instead of 32-bit IPv4 - addresses. Additionally, RFC 2675 describes TCP changes required - to support IPv6 jumbograms. - - RFC 2581 S: "TCP Congestion Control" (April 1999) - - Although RFC 793 did not contain any congestion control - mechanisms, today congestion control is a required component of - TCP implementations. This document [RFC2581] defines the current - versions of Van Jacobson's congestion avoidance and control - mechanisms for TCP, based on his 1988 SIGCOMM paper [Jac88]. RFC - 2001 was a conceptual precursor that was obsoleted by RFC 2581. - - A number of behaviors that together constitute what the community - refers to as "Reno TCP" are described in RFC 2581. The name - "Reno" comes from the Net/2 release of the 4.3 BSD operating - system. This is generally regarded as the least common - denominator among TCP flavors currently found running on Internet - hosts. Reno TCP includes the congestion control features of slow - start, congestion avoidance, fast retransmit, and fast recovery. - - RFC 1122 mandates the implementation of a congestion control - mechanism, and RFC 2581 details the currently accepted mechanism. - RFC 2581 differs slightly from the other documents listed in this - section, as it does not affect the ability of two TCP endpoints to - communicate; however, congestion control remains a critical - component of any widely deployed TCP implementation and is - required for the avoidance of congestion collapse and to ensure - fairness among competing flows. - - RFC 2873 S: "TCP Processing of the IPv4 Precedence Field" (June 2000) - - This document [RFC2873] removes from the TCP specification all - processing of the precedence bits of the TOS byte of the IP - header. This resolves a conflict over the use of these bits - between RFC 793 and Differentiated Services [RFC2474]. - - - - - - -Duke, et al. Informational [Page 5] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 2988 S: "Computing TCP's Retransmission Timer" (November 2000) - - Abstract: "This document defines the standard algorithm that - Transmission Control Protocol (TCP) senders are required to use to - compute and manage their retransmission timer. It expands on the - discussion in section 4.2.3.1 of RFC 1122 and upgrades the - requirement of supporting the algorithm from a SHOULD to a MUST." - [RFC2988] - -3. Recommended Enhancements - - This section describes recommended TCP modifications that improve - performance and security. RFCs 1323 and 3168 represent fundamental - changes to the protocol. RFC 1323, based on RFCs 1072 and 1185, - allows better utilization of high bandwidth-delay product paths by - providing some needed mechanisms for high-rate transfers. RFC 3168 - describes a change to the Internet's architecture, whereby routers - signal end-hosts of growing congestion levels and can do so before - packet losses are forced. Section 3.1 lists improvements in the - congestion control and loss recovery mechanisms specified in RFC - 2581. Section 3.2 describes further refinements that make use of - selective acknowledgments. Section 3.3 deals with the problem of - preventing forged segments. - - RFC 1323 S: "TCP Extensions for High Performance" (May 1992) - - This document [RFC1323] defines TCP extensions for window scaling, - timestamps, and protection against wrapped sequence numbers, for - efficient and safe operation over paths with large bandwidth-delay - products. These extensions are commonly found in currently used - systems; however, they may require manual tuning and - configuration. One issue in this specification that is still - under discussion concerns a modification to the algorithm for - estimating the mean RTT when timestamps are used. - - RFC 2675 S: "IPv6 Jumbograms" (August 1999) - - IPv6 supports longer datagrams than were allowed in IPv4. These - are known as Jumbograms, and use with TCP has necessitated changes - to the handling of TCP's MSS and Urgent fields (both 16 bits). - This document [RFC2675] explains those changes. Although it - describes changes to basic header semantics, these changes should - only affect the use of very large segments, such as IPv6 - jumbograms, which are currently rarely used in the general - Internet. Supporting the behavior described in this document does - not affect interoperability with other TCP implementations when - IPv4 or non-jumbogram IPv6 is used. This document states that - jumbograms are to only be used when it can be guaranteed that all - - - -Duke, et al. Informational [Page 6] - -RFC 4614 TCP Roadmap September 2006 - - - receiving nodes, including each router in the end-to-end path, - will support jumbograms. If even a single node that does not - support jumbograms is attached to a local network, then no host on - that network may use jumbograms. This explains why jumbogram use - has been rare, and why this document is considered a performance - optimization and not part of TCP over IPv6's basic functionality. - - RFC 3168 S: "The Addition of Explicit Congestion Notification (ECN) - to IP" (September 2001) - - This document [RFC3168] defines a means for end hosts to detect - congestion before congested routers are forced to discard packets. - Although congestion notification takes place at the IP level, ECN - requires support at the transport level (e.g., in TCP) to echo the - bits and adapt the sending rate. This document updates RFC 793 to - define two previously unused flag bits in the TCP header for ECN - support. RFC 3540 provides a supplementary (experimental) means - for more secure use of ECN, and RFC 2884 provides some sample - results from using ECN. - -3.1. Congestion Control and Loss Recovery Extensions - - Two of the most important aspects of TCP are its congestion control - and loss recovery features. TCP traditionally treats lost packets as - indicating congestion-related loss, and cannot distinguish between - congestion-related loss and loss due to transmission errors. Even - when ECN is in use, there is a rather intimate coupling between - congestion control and loss recovery mechanisms. There are several - extensions to both features, and more often than not, a particular - extension applies to both. In this sub-section, we group - enhancements to either congestion control, loss recovery, or both, - which can be performed unilaterally; that is, without negotiating - support between endpoints. In the next sub-section, we group the - extensions that specify or rely on the SACK option, which must be - negotiated bilaterally. TCP implementations should include the - enhancements from both sub-sections so that TCP senders can perform - well without regard to the feature sets of other hosts they connect - to. For example, if SACK use is not successfully negotiated, a host - should use the NewReno behavior as a fall back. - - - - - - - - - - - - -Duke, et al. Informational [Page 7] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 3042 S: "Enhancing TCP's Loss Recovery Using Limited Transmit" - (January 2001) - - Abstract: "This document proposes Limited Transmit, a new - Transmission Control Protocol (TCP) mechanism that can be used to - more effectively recover lost segments when a connection's - congestion window is small, or when a large number of segments are - lost in a single transmission window." [RFC3042] Tests from 2004 - showed that Limited Transmit was deployed in roughly one third of - the web servers tested [MAF04]. - - RFC 3390 S: "Increasing TCP's Initial Window" (October 2002) - - This document [RFC3390] updates RFC 2581 to permit an initial TCP - window of three or four segments during the slow-start phase, - depending on the segment size. - - RFC 3782 S: "The NewReno Modification to TCP's Fast Recovery - Algorithm" (April 2004) - - This document [RFC3782] specifies a modification to the standard - Reno fast recovery algorithm, whereby a TCP sender can use partial - acknowledgments to make inferences determining the next segment to - send in situations where SACK would be helpful but isn't - available. Although it is only a slight modification, the NewReno - behavior can make a significant difference in performance when - multiple segments are lost from a single window of data. - -3.2. SACK-Based Loss Recovery and Congestion Control - - The base TCP specification in RFC 793 provided only a simple - cumulative acknowledgment mechanism. However, a selective - acknowledgment (SACK) mechanism provides performance improvement in - the presence of multiple packet losses from the same flight, more - than outweighing the modest increase in complexity. A TCP should be - expected to implement SACK; however, SACK is a negotiated option and - is only used if support is advertised by both sides of a connection. - - RFC 2018 S: "TCP Selective Acknowledgment Options" (October 1996) - - This document [RFC2018] defines the basic selective acknowledgment - (SACK) mechanism for TCP. - - RFC 2883 S: "An Extension to the Selective Acknowledgement (SACK) - Option for TCP" (July 2000) - - This document [RFC2883] extends RFC 2018 to cover the case of - acknowledging duplicate segments. - - - -Duke, et al. Informational [Page 8] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 3517 S: "A Conservative Selective Acknowledgment (SACK)-based - Loss Recovery Algorithm for TCP" (April 2003) - - This document [RFC3517] describes a relatively sophisticated - algorithm that a TCP sender can use for loss recovery when SACK - reports more than one segment lost from a single flight of data. - Although support for the exchange of SACK information is widely - implemented, not all implementations use an algorithm as - sophisticated as that described in RFC 3517. - -3.3. Dealing with Forged Segments - - By default, TCP lacks any cryptographic structures to differentiate - legitimate segments and those spoofed from malicious hosts. Spoofing - valid segments requires correctly guessing a number of fields. The - documents in this sub-section describe ways to make that guessing - harder, or to prevent it from being able to affect a connection - negatively. - - The TCPM working group is currently in progress towards fully - understanding and defining mechanisms for preventing spoofing attacks - (including both spoofed TCP segments and ICMP datagrams). Some of - the solutions being considered rely on TCP modifications, whereas - others rely on security at lower layers (like IPsec) for protection. - - RFC 1948 I: "Defending Against Sequence Number Attacks" (May 1996) - - This document [RFC1948] describes the TCP vulnerability that - allows an attacker to send forged TCP packets, by guessing the - initial sequence number in the three-way handshake. Simple - defenses against exploitation are then described. Some variation - is implemented in most currently used operating systems. - - RFC 2385 S: "Protection of BGP Sessions via the TCP MD5 Signature - Option" (August 1998) - - From document: "This document describes current existing practice - for securing BGP against certain simple attacks. It is understood - to have security weaknesses against concerted attacks. - - This memo describes a TCP extension to enhance security for BGP. - It defines a new TCP option for carrying an MD5 digest in a TCP - segment. This digest acts like a signature for that segment, - incorporating information known only to the connection end points. - Since BGP uses TCP as its transport, using this option in the way - described in this paper significantly reduces the danger from - certain security attacks on BGP." [RFC2385] - - - - -Duke, et al. Informational [Page 9] - -RFC 4614 TCP Roadmap September 2006 - - - TCP MD5 options are currently only used in very limited contexts, - primarily for defending BGP exchanges between routers. Some - deployment notes for those using TCP MD5 are found in the later - RFC 3562, "Key Management Considerations for the TCP MD5 Signature - Option" [RFC3562]. RFC 4278 deprecates the use of TCP MD5 outside - BGP [RFC4278]. - -4. Experimental Extensions - - The RFCs in this section are still experimental, but they may become - proposed standards in the future. At least part of the reason that - they are still experimental is to gain more wide-scale experience - with them before a standards track decision is made. By their - publication as experimental RFCs, it is hoped that the community of - TCP researchers will analyze and test the contents of these RFCs. - Although experimentation is encouraged, there is not yet formal - consensus that these are fully logical and safe behaviors. Wide- - scale deployment of implementations that use these features should be - well thought-out in terms of consequences. - - RFC 2140 I: "TCP Control Block Interdependence" (April 1997) - - This document [RFC2140] suggests how TCP connections between the - same endpoints might share information, such as their congestion - control state. To some degree, this is done in practice by a few - operating systems; for example, Linux currently has a destination - cache. Although this RFC is technically informational, the - concepts it describes are in experimental use, so we include it in - this section. - - A related proposal, the Congestion Manager, is specified in RFC - 3124 [RFC3124]. The idea behind the Congestion Manager, moving - congestion control outside of individual TCP connections, - represents a modification to the core of TCP, which supports - sharing information among TCP connections as well. Although a - Proposed Standard, some pieces of the Congestion Manager support - architecture have not been specified yet, and it has not achieved - use or implementation beyond experimental stacks, so it is not - listed among the standard TCP enhancements in this roadmap. - - RFC 2861 E: "TCP Congestion Window Validation" (June 2000) - - This document [RFC2861] suggests reducing the congestion window - over time when no packets are flowing. This behavior is more - aggressive than that specified in RFC 2581, which says that a TCP - sender SHOULD set its congestion window to the initial window - after an idle period of an RTO or greater. - - - - -Duke, et al. Informational [Page 10] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 3465 E: "TCP Congestion Control with Appropriate Byte Counting - (ABC)" (February 2003) - - This document [RFC3465] suggests that congestion control use the - number of bytes acknowledged instead of the number of - acknowledgments received. This has been implemented in Linux. - The ABC mechanism behaves differently from the standard method - when there is not a one-to-one relationship between data segments - and acknowledgments. ABC still operates within the accepted - guidelines, but is more robust to delayed ACKs and ACK-division - [SCWA99][RFC3449]. - - RFC 3522 E: "The Eifel Detection Algorithm for TCP" (April 2003) - - The Eifel detection algorithm [RFC3522] allows a TCP sender to - detect a posteriori whether it has entered loss recovery - unnecessarily. - - RFC 3540 E: "Robust Explicit Congestion Notification (ECN) signaling - with Nonces" (June 2003) - - This document [RFC3540] suggests a modified ECN to address - security concerns and updates RFC 3168. - - RFC 3649 E: "HighSpeed TCP for Large Congestion Windows" (December - 2003) - - This document [RFC3649] suggests a modification to TCP's steady- - state behavior to use very large windows efficiently. - - RFC 3708 E: "Using TCP Duplicate Selective Acknowledgement (DSACKs) - and Stream Control Transmission Protocol (SCTP) Duplicate - Transmission Sequence Numbers (TSNs) to Detect Spurious - Retransmissions" (February 2004) - - Abstract: "TCP and Stream Control Transmission Protocol (SCTP) - provide notification of duplicate segment receipt through - Duplicate Selective Acknowledgement (DSACKs) and Duplicate - Transmission Sequence Number (TSN) notification, respectively. - This document presents conservative methods of using this - information to identify unnecessary retransmissions for various - applications." [RFC3708] - - - - - - - - - -Duke, et al. Informational [Page 11] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 3742 E: "Limited Slow-Start for TCP with Large Congestion - Windows" (March 2004) - - This document [RFC3742] describes a more conservative slow-start - behavior to prevent massive packet losses when a connection uses a - very large window. - - RFC 4015 S: "The Eifel Response Algorithm for TCP" (February 2005) - - This document [RFC4015] describes the response portion of the - Eifel algorithm, which can be used in conjunction with one of - several methods of detecting when loss recovery has been - spuriously entered, such as the Eifel detection algorithm in RFC - 3522, the algorithm in RFC 3708, or F-RTO in RFC 4138. - - Abstract: "Based on an appropriate detection algorithm, the Eifel - response algorithm provides a way for a TCP sender to respond to a - detected spurious timeout. It adapts the retransmission timer to - avoid further spurious timeouts, and can avoid - depending on the - detection algorithm - the often unnecessary go-back-N retransmits - that would otherwise be sent. In addition, the Eifel response - algorithm restores the congestion control state in such a way that - packet bursts are avoided." - - RFC 4015 is itself a Proposed Standard. The consensus of the TCPM - working group was to place it in this section of the roadmap - document due to three factors. - - 1. RFC 4015 operates on the output of a detection algorithm, for - which there is currently no available mechanism on the - standards track. - - 2. The working group was not aware of any wide deployment and use - of RFC 4015. - - 3. The consensus of the working group, after a discussion of the - known Intellectual Property Rights claims on the techniques - described in RFC 4015, identified this section of the roadmap - as an appropriate location. - - RFC 4138 E: "Forward RTO-Recovery (F-RTO): An Algorithm for Detecting - Spurious Retransmission Timeouts with TCP and the Stream Control - Transmission Protocol" (August 2005) - - The F-RTO detection algorithm [RFC4138] provides another option - for inferring spurious retransmission timeouts. Unlike some - similar detection methods, F-RTO does not rely on the use of any - TCP options. - - - -Duke, et al. Informational [Page 12] - -RFC 4614 TCP Roadmap September 2006 - - -5. Historic Extensions - - The RFCs listed here define extensions that have thus far failed to - arouse substantial interest from implementers, or that were found to - be defective for general use. - - RFC 1106 "TCP Big Window and NAK Options" (June 1989): found - defective - - This RFC [RFC1106] defined an alternative to the Window Scale - option for using large windows and described the "negative - acknowledgement" or NAK option. There is a comparison of NAK and - SACK methods, and early discussion of TCP over satellite issues. - RFC 1110 explains some problems with the approaches described in - RFC 1106. The options described in this document have not been - adopted by the larger community, although NAKs are used in the - SCPS-TP adaptation of TCP for satellite and spacecraft use, - developed by the Consultative Committee for Space Data Systems - (CCSDS). - - RFC 1110 "A Problem with the TCP Big Window Option" (August 1989): - deprecates RFC 1106 - - Abstract: "The TCP Big Window option discussed in RFC 1106 will - not work properly in an Internet environment which has both a high - bandwidth * delay product and the possibility of disordering and - duplicating packets. In such networks, the window size must not - be increased without a similar increase in the sequence number - space. Therefore, a different approach to big windows should be - taken in the Internet." [RFC1110] - - RFC 1146 E "TCP Alternate Checksum Options" (March 1990): lack of - interest - - This document [RFC1146] defined more robust TCP checksums than the - 16-bit ones-complement in use today. A typographical error in RFC - 1145 is fixed in RFC 1146; otherwise, the documents are the same. - - RFC 1263 "TCP Extensions Considered Harmful" (October 1991) - lack of - interest - - This document [RFC1263] argues against "backwards compatible" TCP - extensions. Specifically mentioned are several TCP enhancements - that have been successful, including timestamps, window scaling, - PAWS, and SACK. RFC 1263 presents an alternative approach called - "protocol evolution", whereby several evolutionary versions of TCP - would exist on hosts. These distinct TCP versions would represent - upgrades to each other and could be header-incompatible. - - - -Duke, et al. Informational [Page 13] - -RFC 4614 TCP Roadmap September 2006 - - - Interoperability would be provided by having a virtualization - layer select the right TCP version for a particular connection. - This idea did not catch on with the community, although the type - of extensions RFC 1263 specifically targeted as harmful did become - popular. - - RFC 1379 I "Extending TCP for Transactions -- Concepts" (November - 1992): found defective - - See RFC 1644. - - RFC 1644 E "T/TCP -- TCP Extensions for Transactions Functional - Specification" (July 1994): found defective - - The inventors of TCP believed that cached connection state could - have been used to eliminate TCP's 3-way handshake, to support - two-packet request/response exchanges. RFCs 1379 [RFC1379] and - 1644 [RFC1644] show that this is far from simple. Furthermore, - T/TCP floundered on the ease of denial-of-service attacks that can - result. One idea pioneered by T/TCP lives on in RFC 2140, in the - sharing of state across connections. - - RFC 1693 E "An Extension to TCP: Partial Order Service" (November - 1994): lack of interest - - This document [RFC1693] defines a TCP extension for applications - that do not care about the order in which application-layer - objects are received. Examples are multimedia and database - applications. In practice, these applications either accept the - possible performance loss because of TCP's strict ordering or use - more specialized transport protocols. - -6. Support Documents - - This section contains several classes of documents that do not - necessarily define current protocol behaviors, but that are - nevertheless of interest to TCP implementers. Section 6.1 describes - several foundational RFCs that give modern readers a better - understanding of the principles underlying TCP's behaviors and - development over the years. The documents listed in Section 6.2 - provide advice on using TCP in various types of network situations - that pose challenges above those of typical wired links. Some - implementation notes can be found in Section 6.3. The TCP Management - Information Bases are described in Section 6.4. RFCs that describe - tools for testing and debugging TCP implementations or that contain - high-level tutorials on the protocol are listed Section 6.5, and - Section 6.6 lists a number of case studies that have explored TCP - performance. - - - -Duke, et al. Informational [Page 14] - -RFC 4614 TCP Roadmap September 2006 - - -6.1. Foundational Works - - The documents listed in this section contain information that is - largely duplicated by the standards documents previously discussed. - However, some of them contain a greater depth of problem statement - explanation or other context. Particularly, RFCs 813 - 817 (known as - the "Dave Clark Five") describe some early problems and solutions - (RFC 815 only describes the reassembly of IP fragments and is not - included in this TCP roadmap). - - RFC 813: "Window and Acknowledgement Strategy in TCP" (July 1982) - - This document [RFC0813] contains an early discussion of Silly - Window Syndrome and its avoidance and motivates and describes the - use of delayed acknowledgments. - - RFC 814: "Name, Addresses, Ports, and Routes" (July 1982) - - Suggestions and guidance for the design of tables and algorithms - to keep track of various identifiers within a TCP/IP - implementation are provided by this document [RFC0814]. - - RFC 816: "Fault Isolation and Recovery" (July 1982) - - In this document [RFC0816], TCP's response to indications of - network error conditions such as timeouts or received ICMP - messages is discussed. - - RFC 817: "Modularity and Efficiency in Protocol Implementation" (July - 1982) - - This document [RFC0817] contains implementation suggestions that - are general and not TCP specific. However, they have been used to - develop TCP implementations and to describe some performance - implications of the interactions between various layers in the - Internet stack. - - RFC 872: "TCP-ON-A-LAN" (September 1982) - - Conclusion: "The sometimes-expressed fear that using TCP on a - local net is a bad idea is unfounded." [RFC0872] - - RFC 896: "Congestion Control in IP/TCP Internetworks" (January 1984) - - This document [RFC0896] contains some early experiences with - congestion collapse and some initial thoughts on how to avoid it - using congestion control in TCP. - - - - -Duke, et al. Informational [Page 15] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 964: "Some Problems with the Specification of the Military - Standard Transmission Control Protocol" (November 1985) - - This document [RFC0964] points out several specification bugs in - the US Military's MIL-STD-1778 document, which was intended as a - successor to RFC 793. This serves to remind us of the difficulty - in specification writing (even when we work from existing - documents!). - - RFC 1072: "TCP Extensions for Long-Delay Paths" (October 1988) - - This document [RFC1072] contains early explanations of the - mechanisms that were later described by RFCs 1323 and 2018, which - obsolete it. - - RFC 1185: "TCP Extension for High-Speed Paths" (October 1990) - - This document [RFC1185] builds on RFC 1072 to describe more - advanced strategies for dealing with sequence number wrapping and - detecting duplicates from earlier connections. This document was - obsoleted by RFC 1323. - - RFC 2914 B: "Congestion Control Principles" (September 2000) - - This document [RFC2914] motivates the use of end-to-end congestion - control for preventing congestion collapse and providing fairness - to TCP. - -6.2. Difficult Network Environments - - As the internetworking field has explored wireless, satellite, - cellular telephone, and other kinds of link-layer technologies, a - large body of work has built up on enhancing TCP performance for such - links. The RFCs listed in this section describe some of these more - challenging network environments and how TCP interacts with them. - - RFC 2488 B: "Enhancing TCP Over Satellite Channels using Standard - Mechanisms" (January 1999) - - From abstract: "While TCP works over satellite channels there are - several IETF standardized mechanisms that enable TCP to more - effectively utilize the available capacity of the network path. - This document outlines some of these TCP mitigations. At this - time, all mitigations discussed in this document are IETF - standards track mechanisms (or are compliant with IETF - standards)." [RFC2488] - - - - - -Duke, et al. Informational [Page 16] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 2757 I: "Long Thin Networks" (January 2000) - - Several methods of improving TCP performance over long thin - networks, such as geosynchronous satellite links, are discussed in - this document [RFC2757]. A particular set of TCP options is - developed that should work well in such environments and be safe - to use in the global Internet. The implications of such - environments have been further discussed in RFC 3150 and RFC 3155, - and these documents should be preferred where there is overlap - between them and RFC 2757. - - RFC 2760 I: "Ongoing TCP Research Related to Satellites" (February - 2000) - - This document [RFC2760] discusses the advantages and disadvantages - of several different experimental means of improving TCP - performance over long-delay or error-prone paths. These include - T/TCP, larger initial windows, byte counting, delayed - acknowledgments, slow start thresholds, NewReno and SACK-based - loss recovery, FACK [MM96], ECN, various corruption-detection - mechanisms, congestion avoidance changes for fairness, use of - multiple parallel flows, pacing, header compression, state - sharing, and ACK congestion control, filtering, and - reconstruction. Although RFC 2488 looks at standard extensions, - this document focuses on more experimental means of performance - enhancement. - - RFC 3135 I: "Performance Enhancing Proxies Intended to Mitigate - Link-Related Degradations" (June 2001) - - From abstract: "This document is a survey of Performance Enhancing - Proxies (PEPs) often employed to improve degraded TCP performance - caused by characteristics of specific link environments, for - example, in satellite, wireless WAN, and wireless LAN - environments. Different types of Performance Enhancing Proxies - are described as well as the mechanisms used to improve - performance." [RFC3135] - - - - - - - - - - - - - - -Duke, et al. Informational [Page 17] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 3150 B: "End-to-end Performance Implications of Slow Links" (July - 2001) - - From abstract: "This document makes performance-related - recommendations for users of network paths that traverse "very low - bit-rate" links....This recommendation may be useful in any - network where hosts can saturate available bandwidth, but the - design space for this recommendation explicitly includes - connections that traverse 56 Kb/second modem links or 4.8 Kb/ - second wireless access links - both of which are widely deployed." - [RFC3150] - - RFC 3155 B: "End-to-end Performance Implications of Links with - Errors" (August 2001) - - From abstract: "This document discusses the specific TCP - mechanisms that are problematic in environments with high - uncorrected error rates, and discusses what can be done to - mitigate the problems without introducing intermediate devices - into the connection." [RFC3155] - - RFC 3366 "Advice to link designers on link Automatic Repeat reQuest - (ARQ)" (August 2002) - - From abstract: "This document provides advice to the designers of - digital communication equipment and link-layer protocols employing - link-layer Automatic Repeat reQuest (ARQ) techniques. This - document presumes that the designers wish to support Internet - protocols, but may be unfamiliar with the architecture of the - Internet and with the implications of their design choices for the - performance and efficiency of Internet traffic carried over their - links." [RFC3366] - - RFC 3449 B: "TCP Performance Implications of Network Path Asymmetry" - (December 2002) - - From abstract: "This document describes TCP performance problems - that arise because of asymmetric effects. These problems arise in - several access networks, including bandwidth-asymmetric networks - and packet radio subnetworks, for different underlying reasons. - However, the end result on TCP performance is the same in both - cases: performance often degrades significantly because of - imperfection and variability in the ACK feedback from the receiver - to the sender. - - The document details several mitigations to these effects, which - have either been proposed or evaluated in the literature, or are - currently deployed in networks." [RFC3449] - - - -Duke, et al. Informational [Page 18] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 3481 B: "TCP over Second (2.5G) and Third (3G) Generation - Wireless Networks" (February 2003) - - From abstract: "This document describes a profile for optimizing - TCP to adapt so that it handles paths including second (2.5G) and - third (3G) generation wireless networks." [RFC3481] - - RFC 3819 B: "Advice for Internet Subnetwork Designers" (July 2004) - - This document [RFC3819] describes how TCP performance can be - negatively affected by some particular lower-layer behaviors and - provides guidance in designing lower-layer networks and protocols - to be amicable to TCP. - -6.3. Implementation Advice - - RFC 879: "The TCP Maximum Segment Size and Related Topics" (November - 1983) - - Abstract: "This memo discusses the TCP Maximum Segment Size Option - and related topics. The purposes is to clarify some aspects of - TCP and its interaction with IP. This memo is a clarification to - the TCP specification, and contains information that may be - considered as 'advice to implementers'." [RFC0879] - - RFC 1071: "Computing the Internet Checksum" (September 1988) - - This document [RFC1071] lists a number of implementation - techniques for efficiently computing the Internet checksum (used - by TCP). - - RFC 1624 I: "Computation of the Internet Checksum via Incremental - Update" (May 1994) - - Incrementally updating the Internet checksum is useful to routers - in updating IP checksums. Some middleboxes that alter TCP headers - may also be able to update the TCP checksum incrementally. This - document [RFC1624] expands upon the explanation of the incremental - update procedure in RFC 1071. - - RFC 1936 I: "Implementing the Internet Checksum in Hardware" (April - 1996) - - This document [RFC1936] describes the motivation for implementing - the Internet checksum in hardware, rather than in software, and - provides an implementation example. - - - - - -Duke, et al. Informational [Page 19] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 2525 I: "Known TCP Implementation Problems" (March 1999) - - From abstract: "This memo catalogs a number of known TCP - implementation problems. The goal in doing so is to improve - conditions in the existing Internet by enhancing the quality of - current TCP/IP implementations." [RFC2525] - - RFC 2923 I: "TCP Problems with Path MTU Discovery" (September 2000) - - From abstract: "This memo catalogs several known Transmission - Control Protocol (TCP) implementation problems dealing with Path - Maximum Transmission Unit Discovery (PMTUD), including the long- - standing black hole problem, stretch acknowlegements (ACKs) due to - confusion between Maximum Segment Size (MSS) and segment size, and - MSS advertisement based on PMTU." [RFC2923] - - RFC 3360 B: "Inappropriate TCP Resets Considered Harmful" (August - 2002) - - This document [RFC3360] is a plea that firewall vendors not send - gratuitous TCP RST (Reset) packets when unassigned TCP header bits - are used. This practice prevents desirable extension and - evolution of the protocol and thus is potentially harmful to the - future of the Internet. - - RFC 3493 I: "Basic Socket Interface Extensions for IPv6" (February - 2003) - - This document [RFC3493] describes the de facto standard sockets - API for programming with TCP. This API is implemented nearly - ubiquitously in modern operating systems and programming - languages. - -6.4. Management Information Bases - - The first MIB module defined for use with Simple Network Management - Protocol (SNMP) (in RFC 1066 and its update, RFC 1156) was a single - monolithic MIB module, called MIB-I. This evolved over time to be - MIB-II (RFC 1213). It then became apparent that having a single - monolithic MIB module was not scalable, given the number and breadth - of MIB data definitions that needed to be included. Thus, additional - MIB modules were defined, and those parts of MIB-II that needed to - evolve were split off. Eventually, the remaining parts of MIB-II - were also split off, the TCP-specific part being documented in RFC - 2012. - - - - - - -Duke, et al. Informational [Page 20] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 2012 was obsoleted by RFC 4022, which is the primary TCP MIB - document today. MIB-I, defined in RFC 1156, has been obsoleted by - the MIB-II specification in RFC 1213. For current TCP implementers, - RFC 4022 should be supported. - - RFC 1066: "Management Information Base for Network Management of - TCP/IP-based Internets" (August 1988) - - This document [RFC1066] was the description of the TCP MIB. It - was obsoleted by RFC 1156. - - RFC 1156 S: "Management Information Base for Network Management of - TCP/IP-based Internets" (May 1990) - - This document [RFC1156] describes the required MIB fields for TCP - implementations, with minor corrections and no technical changes - from RFC 1066, which it obsoletes. This is the standards track - document for MIB-I. - - RFC 1213 S: "Management Information Base for Network Management of - TCP/IP-based Internets: MIB-II" (March 1991) - - This document [RFC1213] describes the second version of the MIB in - a monolithic form. RFC 2012 updates this document by splitting - out the TCP-specific portions. - - RFC 2012 S: "SNMPv2 Management Information Base for the Transmission - Control Protocol using SMIv2" (November 1996) - - This document [RFC2012] defined the TCP MIB, in an update to RFC - 1213. It is now obsoleted by RFC 4022. - - RFC 2452 S: "IP Version 6 Management Information Base for the - Transmission Control Protocol" (December 1998) - - This document [RFC2452] augments RFC 2012 by adding an IPv6- - specific connection table. The rest of 2012 holds for any IP - version. RFC 2012 is now obsoleted by RFC 4022. - - Although it is a standards track document, RFC 2452 is considered - a historic mistake by the MIB community, as it is based on the - idea of parallel IPv4 and IPv6 structures. Although IPv6 requires - new structures, the community has decided to define a single - generic structure for both IPv4 and IPv6. This will aid in - definition, implementation, and transition between IPv4 and IPv6. - - - - - - -Duke, et al. Informational [Page 21] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 4022 S: "Management Information Base for the Transmission Control - Protocol (TCP)" (March 2005) - - This document [RFC4022] obsoletes RFC 2012 and RFC 2452 and - specifies the current standard for the TCP MIB that should be - deployed. - -6.5. Tools and Tutorials - - RFC 1180 I: "TCP/IP Tutorial" (January 1991) - - This document [RFC1180] is an extremely brief overview of the - TCP/IP protocol suite as a whole. It gives some explanation as to - how and where TCP fits in. - - RFC 1470 I: "FYI on a Network Management Tool Catalog: Tools for - Monitoring and Debugging TCP/IP Internets and Interconnected Devices" - (June 1993) - - A few of the tools that this document [RFC1470] describes are - still maintained and in use today; for example, ttcp and tcpdump. - However, many of the tools described do not relate specifically to - TCP and are no longer used or easily available. - - RFC 2398 I: "Some Testing Tools for TCP Implementors" (August 1998) - - This document [RFC2398] describes a number of TCP packet - generation and analysis tools. Although some of these tools are - no longer readily available or widely used, for the most part they - are still relevant and usable. - -6.6. Case Studies - - RFC 1337 I: "TIME-WAIT Assassination Hazards in TCP" (May 1992) - - This document [RFC1337] points out a problem with acting on - received reset segments while one is in the TIME-WAIT state. The - main recommendation is that hosts in TIME-WAIT ignore resets. - This recommendation might not currently be widely implemented. - - RFC 2415 I: "Simulation Studies of Increased Initial TCP Window Size" - (September 1998) - - This document [RFC2415] presents results of some simulations using - TCP initial windows greater than 1 segment. The analysis - indicates that user-perceived performance can be improved by - increasing the initial window to 3 segments. - - - - -Duke, et al. Informational [Page 22] - -RFC 4614 TCP Roadmap September 2006 - - - RFC 2416 I: "When TCP Starts Up With Four Packets Into Only Three - Buffers" (September 1998) - - This document [RFC2416] uses simulation results to clear up some - concerns about using an initial window of 4 segments when the - network path has less provisioning. - - RFC 2884 I: "Performance Evaluation of Explicit Congestion - Notification (ECN) in IP Networks" (July 2000) - - This document [RFC2884] describes experimental results that show - some improvements to the performance of both short- and long-lived - connections due to ECN. - -7. Undocumented TCP Features - - There are a few important implementation tactics for the TCP that - have not yet been described in any RFC. Although this roadmap is - primarily concerned with mapping the TCP RFCs, this section is - included because an implementer needs to be aware of these important - issues. - - SYN Cookies - - A mechanism known as "SYN cookies" is widely used to thwart TCP - SYN flooding attacks, in which an attacker sends a flood of SYNs - to a victim but fails to complete the 3-way handshake. The result - is exhaustion of resources at the server. The SYN cookie - mechanism allows the server to return a cleverly chosen initial - sequence number that has all the required state for the secure - completion of the handshake. Then the server can avoid saving - connection state during the 3-way handshake and thus survive a SYN - flooding attack. - - A web search for "SYN cookies" will reveal a number of useful - descriptions of this mechanism, although there is currently no RFC - on the matter. - - Header Prediction - - Header prediction is a trick to speed up the processing of - segments. Van Jacobson and Mike Karels developed the technique in - the late 1980s. The basic idea is that some processing time can - be saved when most of a segment's fields can be predicted from - previous segments. A good description of this was sent to the - TCP-IP mailing list by Van Jacobson on March 9, 1988: - - - - - -Duke, et al. Informational [Page 23] - -RFC 4614 TCP Roadmap September 2006 - - - Quite a bit of the speedup comes from an algorithm that we - ('we' refers to collaborator Mike Karels and myself) are - calling "header prediction". The idea is that if you're in the - middle of a bulk data transfer and have just seen a packet, you - know what the next packet is going to look like: It will look - just like the current packet with either the sequence number or - ack number updated (depending on whether you're the sender or - receiver). Combining this with the "Use hints" epigram from - Butler Lampson's classic "Epigrams for System Designers", you - start to think of the tcp state (rcv.nxt, snd.una, etc.) as - "hints" about what the next packet should look like. - - If you arrange those "hints" so they match the layout of a tcp - packet header, it takes a single 14-byte compare to see if your - prediction is correct (3 longword compares to pick up the send - & ack sequence numbers, header length, flags and window, plus a - short compare on the length). If the prediction is correct, - there's a single test on the length to see if you're the sender - or receiver followed by the appropriate processing. E.g., if - the length is non-zero (you're the receiver), checksum and - append the data to the socket buffer then wake any process - that's sleeping on the buffer. Update rcv.nxt by the length of - this packet (this updates your "prediction" of the next - packet). Check if you can handle another packet the same size - as the current one. If not, set one of the unused flag bits in - your header prediction to guarantee that the prediction will - fail on the next packet and force you to go through full - protocol processing. Otherwise, you're done with this packet. - So, the *total* tcp protocol processing, exclusive of - checksumming, is on the order of 6 compares and an add. - -8. Security Considerations - - This document introduces no new security considerations. Each RFC - listed in this document attempts to address the security - considerations of the specification it contains. - -9. Acknowledgments - - This document grew out of a discussion on the end2end-interest - mailing list, the public list of the End-to-End Research Group of the - IRTF, and continued development under the IETF's TCP Maintenance and - Minor Extensions (TCPM) working group. We thank Joe Touch, Reiner - Ludwig, Pekka Savola, Gorry Fairhurst, and Sally Floyd for their - contributions, in particular. The chairs of the TCPM working group, - Mark Allman and Ted Faber, have been instrumental in the development - of this document. Keith McCloghrie provided some useful notes and - clarification on the various MIB-related RFCs. - - - -Duke, et al. Informational [Page 24] - -RFC 4614 TCP Roadmap September 2006 - - -10. Informative References - -10.1. Basic Functionality - - [RFC0793] Postel, J., "Transmission Control Protocol", STD 7, RFC - 793, September 1981. - - [RFC1122] Braden, R., "Requirements for Internet Hosts - - Communication Layers", STD 3, RFC 1122, October 1989. - - [RFC2026] Bradner, S., "The Internet Standards Process -- Revision - 3", BCP 9, RFC 2026, October 1996. - - [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 - (IPv6) Specification", RFC 2460, December 1998. - - [RFC2474] Nichols, K., Blake, S., Baker, F., and D. Black, - "Definition of the Differentiated Services Field (DS - Field) in the IPv4 and IPv6 Headers", RFC 2474, December - 1998. - - [RFC2581] Allman, M., Paxson, V., and W. Stevens, "TCP Congestion - Control", RFC 2581, April 1999. - - [RFC2675] Borman, D., Deering, S., and R. Hinden, "IPv6 Jumbograms", - RFC 2675, August 1999. - - [RFC2873] Xiao, X., Hannan, A., Paxson, V., and E. Crabbe, "TCP - Processing of the IPv4 Precedence Field", RFC 2873, June - 2000. - - [RFC2988] Paxson, V. and M. Allman, "Computing TCP's Retransmission - Timer", RFC 2988, November 2000. - -10.2. Recommended Enhancements - - [RFC1323] Jacobson, V., Braden, R., and D. Borman, "TCP Extensions - for High Performance", RFC 1323, May 1992. - - [RFC1948] Bellovin, S., "Defending Against Sequence Number Attacks", - RFC 1948, May 1996. - - [RFC2018] Mathis, M., Mahdavi, J., Floyd, S., and A. Romanow, "TCP - Selective Acknowledgment Options", RFC 2018, October 1996. - - [RFC2385] Heffernan, A., "Protection of BGP Sessions via the TCP MD5 - Signature Option", RFC 2385, August 1998. - - - - -Duke, et al. Informational [Page 25] - -RFC 4614 TCP Roadmap September 2006 - - - [RFC2883] Floyd, S., Mahdavi, J., Mathis, M., and M. Podolsky, "An - Extension to the Selective Acknowledgement (SACK) Option - for TCP", RFC 2883, July 2000. - - [RFC3042] Allman, M., Balakrishnan, H., and S. Floyd, "Enhancing - TCP's Loss Recovery Using Limited Transmit", RFC 3042, - January 2001. - - [RFC3168] Ramakrishnan, K., Floyd, S., and D. Black, "The Addition - of Explicit Congestion Notification (ECN) to IP", RFC - 3168, September 2001. - - [RFC3390] Allman, M., Floyd, S., and C. Partridge, "Increasing TCP's - Initial Window", RFC 3390, October 2002. - - [RFC3517] Blanton, E., Allman, M., Fall, K., and L. Wang, "A - Conservative Selective Acknowledgment (SACK)-based Loss - Recovery Algorithm for TCP", RFC 3517, April 2003. - - [RFC3562] Leech, M., "Key Management Considerations for the TCP MD5 - Signature Option", RFC 3562, July 2003. - - [RFC3782] Floyd, S., Henderson, T., and A. Gurtov, "The NewReno - Modification to TCP's Fast Recovery Algorithm", RFC 3782, - April 2004. - - [RFC4015] Ludwig, R. and A. Gurtov, "The Eifel Response Algorithm - for TCP", RFC 4015, February 2005. - - [RFC4278] Bellovin, S. and A. Zinin, "Standards Maturity Variance - Regarding the TCP MD5 Signature Option (RFC 2385) and the - BGP-4 Specification", RFC 4278, January 2006. - -10.3. Experimental Extensions - - [RFC2140] Touch, J., "TCP Control Block Interdependence", RFC 2140, - April 1997. - - [RFC2861] Handley, M., Padhye, J., and S. Floyd, "TCP Congestion - Window Validation", RFC 2861, June 2000. - - [RFC3124] Balakrishnan, H. and S. Seshan, "The Congestion Manager", - RFC 3124, June 2001. - - [RFC3465] Allman, M., "TCP Congestion Control with Appropriate Byte - Counting (ABC)", RFC 3465, February 2003. - - - - - -Duke, et al. Informational [Page 26] - -RFC 4614 TCP Roadmap September 2006 - - - [RFC3522] Ludwig, R. and M. Meyer, "The Eifel Detection Algorithm - for TCP", RFC 3522, April 2003. - - [RFC3540] Spring, N., Wetherall, D., and D. Ely, "Robust Explicit - Congestion Notification (ECN) Signaling with Nonces", RFC - 3540, June 2003. - - [RFC3649] Floyd, S., "HighSpeed TCP for Large Congestion Windows", - RFC 3649, December 2003. - - [RFC3708] Blanton, E. and M. Allman, "Using TCP Duplicate Selective - Acknowledgement (DSACKs) and Stream Control Transmission - Protocol (SCTP) Duplicate Transmission Sequence Numbers - (TSNs) to Detect Spurious Retransmissions", RFC 3708, - February 2004. - - [RFC3742] Floyd, S., "Limited Slow-Start for TCP with Large - Congestion Windows", RFC 3742, March 2004. - - [RFC4138] Sarolahti, P. and M. Kojo, "Forward RTO-Recovery (F-RTO): - An Algorithm for Detecting Spurious Retransmission - Timeouts with TCP and the Stream Control Transmission - Protocol (SCTP)", RFC 4138, August 2005. - -10.4. Historic Extensions - - [RFC1106] Fox, R., "TCP big window and NAK options", RFC 1106, June - 1989. - - [RFC1110] McKenzie, A., "Problem with the TCP big window option", - RFC 1110, August 1989. - - [RFC1146] Zweig, J. and C. Partridge, "TCP alternate checksum - options", RFC 1146, March 1990. - - [RFC1263] O'Malley, S. and L. Peterson, "TCP Extensions Considered - Harmful", RFC 1263, October 1991. - - [RFC1379] Braden, R., "Extending TCP for Transactions -- Concepts", - RFC 1379, November 1992. - - [RFC1644] Braden, R., "T/TCP -- TCP Extensions for Transactions - Functional Specification", RFC 1644, July 1994. - - [RFC1693] Connolly, T., Amer, P., and P. Conrad, "An Extension to - TCP : Partial Order Service", RFC 1693, November 1994. - - - - - -Duke, et al. Informational [Page 27] - -RFC 4614 TCP Roadmap September 2006 - - -10.5. Support Documents - - [RFC0813] Clark, D., "Window and Acknowledgement Strategy in TCP", - RFC 813, July 1982. - - [RFC0814] Clark, D., "Name, addresses, ports, and routes", RFC 814, - July 1982. - - [RFC0816] Clark, D., "Fault isolation and recovery", RFC 816, July - 1982. - - [RFC0817] Clark, D., "Modularity and efficiency in protocol - implementation", RFC 817, July 1982. - - [RFC0872] Padlipsky, M., "TCP-on-a-LAN", RFC 872, September 1982. - - [RFC0879] Postel, J., "TCP maximum segment size and related topics", - RFC 879, November 1983. - - [RFC0896] Nagle, J., "Congestion control in IP/TCP internetworks", - RFC 896, January 1984. - - [RFC0964] Sidhu, D. and T. Blumer, "Some problems with the - specification of the Military Standard Transmission - Control Protocol", RFC 964, November 1985. - - [RFC1066] McCloghrie, K. and M. Rose, "Management Information Base - for Network Management of TCP/IP-based internets", RFC - 1066, August 1988. - - [RFC1071] Braden, R., Borman, D., and C. Partridge, "Computing the - Internet checksum", RFC 1071, September 1988. - - [RFC1072] Jacobson, V. and R. Braden, "TCP extensions for long-delay - paths", RFC 1072, October 1988. - - [RFC1156] McCloghrie, K. and M. Rose, "Management Information Base - for network management of TCP/IP-based internets", RFC - 1156, May 1990. - - [RFC1180] Socolofsky, T. and C. Kale, "TCP/IP tutorial", RFC 1180, - January 1991. - - [RFC1185] Jacobson, V., Braden, B., and L. Zhang, "TCP Extension for - High-Speed Paths", RFC 1185, October 1990. - - - - - - -Duke, et al. Informational [Page 28] - -RFC 4614 TCP Roadmap September 2006 - - - [RFC1213] McCloghrie, K. and M. Rose, "Management Information Base - for Network Management of TCP/IP-based internets: MIB-II", - STD 17, RFC 1213, March 1991. - - [RFC1337] Braden, R., "TIME-WAIT Assassination Hazards in TCP", RFC - 1337, May 1992. - - [RFC1470] Enger, R. and J. Reynolds, "FYI on a Network Management - Tool Catalog: Tools for Monitoring and Debugging TCP/IP - Internets and Interconnected Devices", FYI 2, RFC 1470, - June 1993. - - [RFC1624] Rijsinghani, A., "Computation of the Internet Checksum via - Incremental Update", RFC 1624, May 1994. - - [RFC1936] Touch, J. and B. Parham, "Implementing the Internet - Checksum in Hardware", RFC 1936, April 1996. - - [RFC2012] McCloghrie, K., "SNMPv2 Management Information Base for - the Transmission Control Protocol using SMIv2", RFC 2012, - November 1996. - - [RFC2398] Parker, S. and C. Schmechel, "Some Testing Tools for TCP - Implementors", RFC 2398, August 1998. - - [RFC2415] Poduri, K. and K. Nichols, "Simulation Studies of - Increased Initial TCP Window Size", RFC 2415, September - 1998. - - [RFC2416] Shepard, T. and C. Partridge, "When TCP Starts Up With - Four Packets Into Only Three Buffers", RFC 2416, September - 1998. - - [RFC2452] Daniele, M., "IP Version 6 Management Information Base for - the Transmission Control Protocol", RFC 2452, December - 1998. - - [RFC2488] Allman, M., Glover, D., and L. Sanchez, "Enhancing TCP - Over Satellite Channels using Standard Mechanisms", BCP - 28, RFC 2488, January 1999. - - [RFC2525] Paxson, V., Allman, M., Dawson, S., Fenner, W., Griner, - J., Heavens, I., Lahey, K., Semke, J., and B. Volz, "Known - TCP Implementation Problems", RFC 2525, March 1999. - - [RFC2757] Montenegro, G., Dawkins, S., Kojo, M., Magret, V., and N. - Vaidya, "Long Thin Networks", RFC 2757, January 2000. - - - - -Duke, et al. Informational [Page 29] - -RFC 4614 TCP Roadmap September 2006 - - - [RFC2760] Allman, M., Dawkins, S., Glover, D., Griner, J., Tran, D., - Henderson, T., Heidemann, J., Touch, J., Kruse, H., - Ostermann, S., Scott, K., and J. Semke, "Ongoing TCP - Research Related to Satellites", RFC 2760, February 2000. - - [RFC2884] Hadi Salim, J. and U. Ahmed, "Performance Evaluation of - Explicit Congestion Notification (ECN) in IP Networks", - RFC 2884, July 2000. - - [RFC2914] Floyd, S., "Congestion Control Principles", BCP 41, RFC - 2914, September 2000. - - [RFC2923] Lahey, K., "TCP Problems with Path MTU Discovery", RFC - 2923, September 2000. - - [RFC3135] Border, J., Kojo, M., Griner, J., Montenegro, G., and Z. - Shelby, "Performance Enhancing Proxies Intended to - Mitigate Link-Related Degradations", RFC 3135, June 2001. - - [RFC3150] Dawkins, S., Montenegro, G., Kojo, M., and V. Magret, - "End-to-end Performance Implications of Slow Links", BCP - 48, RFC 3150, July 2001. - - [RFC3155] Dawkins, S., Montenegro, G., Kojo, M., Magret, V., and N. - Vaidya, "End-to-end Performance Implications of Links with - Errors", BCP 50, RFC 3155, August 2001. - - [RFC3360] Floyd, S., "Inappropriate TCP Resets Considered Harmful", - BCP 60, RFC 3360, August 2002. - - [RFC3366] Fairhurst, G. and L. Wood, "Advice to link designers on - link Automatic Repeat reQuest (ARQ)", BCP 62, RFC 3366, - August 2002. - - [RFC3449] Balakrishnan, H., Padmanabhan, V., Fairhurst, G., and M. - Sooriyabandara, "TCP Performance Implications of Network - Path Asymmetry", BCP 69, RFC 3449, December 2002. - - [RFC3481] Inamura, H., Montenegro, G., Ludwig, R., Gurtov, A., and - F. Khafizov, "TCP over Second (2.5G) and Third (3G) - Generation Wireless Networks", BCP 71, RFC 3481, February - 2003. - - [RFC3493] Gilligan, R., Thomson, S., Bound, J., McCann, J., and W. - Stevens, "Basic Socket Interface Extensions for IPv6", RFC - 3493, February 2003. - - - - - -Duke, et al. Informational [Page 30] - -RFC 4614 TCP Roadmap September 2006 - - - [RFC3819] Karn, P., Bormann, C., Fairhurst, G., Grossman, D., - Ludwig, R., Mahdavi, J., Montenegro, G., Touch, J., and L. - Wood, "Advice for Internet Subnetwork Designers", BCP 89, - RFC 3819, July 2004. - - [RFC4022] Raghunarayan, R., "Management Information Base for the - Transmission Control Protocol (TCP)", RFC 4022, March - 2005. - -10.6. Informative References Outside the RFC Series - - [JK92] Jacobson, V. and M. Karels, "Congestion Avoidance and - Control", This paper is a revised version of [Jac88], that - includes an additional appendix. This paper has not been - traditionally published, but is currently available at - ftp://ftp.ee.lbl.gov/papers/congavoid.ps.Z. 1992. - - [Jac88] Jacobson, V., "Congestion Avoidance and Control", ACM - SIGCOMM 1988 Proceedings, in ACM Computer Communication - Review, 18 (4), pp. 314-329, August 1988. - - [KP87] Karn, P. and C. Partridge, "Round Trip Time Estimation", - ACM SIGCOMM 1987 Proceedings, in ACM Computer - Communication Review, 17 (5), pp. 2-7, August 1987 - - [MAF04] Medina, A., Allman, M., and S. Floyd, "Measuring the - Evolution of Transport Protocols in the Internet", ACM - Computer Communication Review, 35 (2), April 2005. - - [MM96] Mathis, M. and J. Mahdavi, "Forward Acknowledgement: - Refining TCP Congestion Control", ACM SIGCOMM 1996 - Proceedings, in ACM Computer Communication Review 26 (4), - pp. 281-292, October 1996. - - [SCWA99] Savage, S., Cardwell, N., Wetherall, D., and T. Anderson, - "TCP Congestion Control with a Misbehaving Receiver", ACM - Computer Communication Review, 29 (5), pp. 71-78, October - 1999. - - - - - - - - - - - - - -Duke, et al. Informational [Page 31] - -RFC 4614 TCP Roadmap September 2006 - - -Authors' Addresses - - Martin H. Duke - The Boeing Company - PO Box 3707, MC 7L-49 - Seattle, WA 98124-2207 - - Phone: 425-373-2852 - EMail: martin.duke@boeing.com - - - Robert Braden - USC Information Sciences Institute - Marina del Rey, CA 90292-6695 - - Phone: 310-448-9173 - EMail: braden@isi.edu - - - Wesley M. Eddy - Verizon Federal Network Systems - 21000 Brookpark Rd, MS 54-5 - Cleveland, OH 44135 - - Phone: 216-433-6682 - EMail: weddy@grc.nasa.gov - - - Ethan Blanton - Purdue University Computer Science - 250 N. University St. - West Lafayette, IN 47907 - - EMail: eblanton@cs.purdue.edu - - - - - - - - - - - - - - - - - -Duke, et al. Informational [Page 32] - -RFC 4614 TCP Roadmap September 2006 - - -Full Copyright Statement - - Copyright (C) The Internet Society (2006). - - This document is subject to the rights, licenses and restrictions - contained in BCP 78, and except as set forth therein, the authors - retain all their rights. - - This document and the information contained herein are provided on an - "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS - OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET - ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, - INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE - INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED - WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. - -Intellectual Property - - The IETF takes no position regarding the validity or scope of any - Intellectual Property Rights or other rights that might be claimed to - pertain to the implementation or use of the technology described in - this document or the extent to which any license under such rights - might or might not be available; nor does it represent that it has - made any independent effort to identify any such rights. Information - on the procedures with respect to rights in RFC documents can be - found in BCP 78 and BCP 79. - - Copies of IPR disclosures made to the IETF Secretariat and any - assurances of licenses to be made available, or the result of an - attempt made to obtain a general license or permission for the use of - such proprietary rights by implementers or users of this - specification can be obtained from the IETF on-line IPR repository at - http://www.ietf.org/ipr. - - The IETF invites any interested party to bring to its attention any - copyrights, patents or patent applications, or other proprietary - rights that may cover technology that may be required to implement - this standard. Please address the information to the IETF at - ietf-ipr@ietf.org. - -Acknowledgement - - Funding for the RFC Editor function is provided by the IETF - Administrative Support Activity (IASA). - - - - - - - -Duke, et al. Informational [Page 33] - diff --git a/kernel/picotcp/RFC/rfc6762.txt b/kernel/picotcp/RFC/rfc6762.txt deleted file mode 100644 index 2c44359..0000000 --- a/kernel/picotcp/RFC/rfc6762.txt +++ /dev/null @@ -1,3923 +0,0 @@ - - - - - - -Internet Engineering Task Force (IETF) S. Cheshire -Request for Comments: 6762 M. Krochmal -Category: Standards Track Apple Inc. -ISSN: 2070-1721 February 2013 - - - Multicast DNS - -Abstract - - As networked devices become smaller, more portable, and more - ubiquitous, the ability to operate with less configured - infrastructure is increasingly important. In particular, the ability - to look up DNS resource record data types (including, but not limited - to, host names) in the absence of a conventional managed DNS server - is useful. - - Multicast DNS (mDNS) provides the ability to perform DNS-like - operations on the local link in the absence of any conventional - Unicast DNS server. In addition, Multicast DNS designates a portion - of the DNS namespace to be free for local use, without the need to - pay any annual fee, and without the need to set up delegations or - otherwise configure a conventional DNS server to answer for those - names. - - The primary benefits of Multicast DNS names are that (i) they require - little or no administration or configuration to set them up, (ii) - they work when no infrastructure is present, and (iii) they work - during infrastructure failures. - -Status of This Memo - - This is an Internet Standards Track document. - - This document is a product of the Internet Engineering Task Force - (IETF). It represents the consensus of the IETF community. It has - received public review and has been approved for publication by the - Internet Engineering Steering Group (IESG). Further information on - Internet Standards is available in Section 2 of RFC 5741. - - Information about the current status of this document, any errata, - and how to provide feedback on it may be obtained at - http://www.rfc-editor.org/info/rfc6762. - - - - - - - - -Cheshire & Krochmal Standards Track [Page 1] - -RFC 6762 Multicast DNS February 2013 - - -Copyright Notice - - Copyright (c) 2013 IETF Trust and the persons identified as the - document authors. All rights reserved. - - This document is subject to BCP 78 and the IETF Trust's Legal - Provisions Relating to IETF Documents - (http://trustee.ietf.org/license-info) in effect on the date of - publication of this document. Please review these documents - carefully, as they describe your rights and restrictions with respect - to this document. Code Components extracted from this document must - include Simplified BSD License text as described in Section 4.e of - the Trust Legal Provisions and are provided without warranty as - described in the Simplified BSD License. - - This document may contain material from IETF Documents or IETF - Contributions published or made publicly available before November - 10, 2008. The person(s) controlling the copyright in some of this - material may not have granted the IETF Trust the right to allow - modifications of such material outside the IETF Standards Process. - Without obtaining an adequate license from the person(s) controlling - the copyright in such materials, this document may not be modified - outside the IETF Standards Process, and derivative works of it may - not be created outside the IETF Standards Process, except to format - it for publication as an RFC or to translate it into languages other - than English. - - - - - - - - - - - - - - - - - - - - - - - - - -Cheshire & Krochmal Standards Track [Page 2] - -RFC 6762 Multicast DNS February 2013 - - -Table of Contents - - 1. Introduction ....................................................4 - 2. Conventions and Terminology Used in This Document ...............4 - 3. Multicast DNS Names .............................................5 - 4. Reverse Address Mapping .........................................7 - 5. Querying ........................................................8 - 6. Responding .....................................................13 - 7. Traffic Reduction ..............................................22 - 8. Probing and Announcing on Startup ..............................25 - 9. Conflict Resolution ............................................31 - 10. Resource Record TTL Values and Cache Coherency ................33 - 11. Source Address Check ..........................................38 - 12. Special Characteristics of Multicast DNS Domains ..............40 - 13. Enabling and Disabling Multicast DNS ..........................41 - 14. Considerations for Multiple Interfaces ........................42 - 15. Considerations for Multiple Responders on the Same Machine ....43 - 16. Multicast DNS Character Set ...................................45 - 17. Multicast DNS Message Size ....................................46 - 18. Multicast DNS Message Format ..................................47 - 19. Summary of Differences between Multicast DNS and Unicast DNS ..51 - 20. IPv6 Considerations ...........................................52 - 21. Security Considerations .......................................52 - 22. IANA Considerations ...........................................53 - 23. Acknowledgments ...............................................56 - 24. References ....................................................56 - Appendix A. Design Rationale for Choice of UDP Port Number ........60 - Appendix B. Design Rationale for Not Using Hashed Multicast - Addresses .............................................61 - Appendix C. Design Rationale for Maximum Multicast DNS Name - Length ................................................62 - Appendix D. Benefits of Multicast Responses .......................64 - Appendix E. Design Rationale for Encoding Negative Responses ......65 - Appendix F. Use of UTF-8 ..........................................66 - Appendix G. Private DNS Namespaces ................................67 - Appendix H. Deployment History ....................................67 - - - - - - - - - - - - - - - -Cheshire & Krochmal Standards Track [Page 3] - -RFC 6762 Multicast DNS February 2013 - - -1. Introduction - - Multicast DNS and its companion technology DNS-Based Service - Discovery [RFC6763] were created to provide IP networking with the - ease-of-use and autoconfiguration for which AppleTalk was well-known - [RFC6760]. When reading this document, familiarity with the concepts - of Zero Configuration Networking [Zeroconf] and automatic link-local - addressing [RFC3927] [RFC4862] is helpful. - - Multicast DNS borrows heavily from the existing DNS protocol - [RFC1034] [RFC1035] [RFC6195], using the existing DNS message - structure, name syntax, and resource record types. This document - specifies no new operation codes or response codes. This document - describes how clients send DNS-like queries via IP multicast, and how - a collection of hosts cooperate to collectively answer those queries - in a useful manner. - -2. Conventions and Terminology Used in This Document - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in "Key words for use in - RFCs to Indicate Requirement Levels" [RFC2119]. - - When this document uses the term "Multicast DNS", it should be taken - to mean: "Clients performing DNS-like queries for DNS-like resource - records by sending DNS-like UDP query and response messages over IP - Multicast to UDP port 5353". The design rationale for selecting UDP - port 5353 is discussed in Appendix A. - - This document uses the term "host name" in the strict sense to mean a - fully qualified domain name that has an IPv4 or IPv6 address record. - It does not use the term "host name" in the commonly used but - incorrect sense to mean just the first DNS label of a host's fully - qualified domain name. - - A DNS (or mDNS) packet contains an IP Time to Live (TTL) in the IP - header, which is effectively a hop-count limit for the packet, to - guard against routing loops. Each resource record also contains a - TTL, which is the number of seconds for which the resource record may - be cached. This document uses the term "IP TTL" to refer to the IP - header TTL (hop limit), and the term "RR TTL" or just "TTL" to refer - to the resource record TTL (cache lifetime). - - DNS-format messages contain a header, a Question Section, then - Answer, Authority, and Additional Record Sections. The Answer, - Authority, and Additional Record Sections all hold resource records - - - - -Cheshire & Krochmal Standards Track [Page 4] - -RFC 6762 Multicast DNS February 2013 - - - in the same format. Where this document describes issues that apply - equally to all three sections, it uses the term "Resource Record - Sections" to refer collectively to these three sections. - - This document uses the terms "shared" and "unique" when referring to - resource record sets [RFC1034]: - - A "shared" resource record set is one where several Multicast DNS - responders may have records with the same name, rrtype, and - rrclass, and several responders may respond to a particular query. - - A "unique" resource record set is one where all the records with - that name, rrtype, and rrclass are conceptually under the control - or ownership of a single responder, and it is expected that at - most one responder should respond to a query for that name, - rrtype, and rrclass. Before claiming ownership of a unique - resource record set, a responder MUST probe to verify that no - other responder already claims ownership of that set, as described - in Section 8.1, "Probing". (For fault-tolerance and other - reasons, sometimes it is permissible to have more than one - responder answering for a particular "unique" resource record set, - but such cooperating responders MUST give answers containing - identical rdata for these records. If they do not give answers - containing identical rdata, then the probing step will reject the - data as being inconsistent with what is already being advertised - on the network for those names.) - - Strictly speaking, the terms "shared" and "unique" apply to resource - record sets, not to individual resource records. However, it is - sometimes convenient to talk of "shared resource records" and "unique - resource records". When used this way, the terms should be - understood to mean a record that is a member of a "shared" or - "unique" resource record set, respectively. - -3. Multicast DNS Names - - A host that belongs to an organization or individual who has control - over some portion of the DNS namespace can be assigned a globally - unique name within that portion of the DNS namespace, such as, - "cheshire.example.com.". For those of us who have this luxury, this - works very well. However, the majority of home computer users do not - have easy access to any portion of the global DNS namespace within - which they have the authority to create names. This leaves the - majority of home computers effectively anonymous for practical - purposes. - - - - - - -Cheshire & Krochmal Standards Track [Page 5] - -RFC 6762 Multicast DNS February 2013 - - - To remedy this problem, this document allows any computer user to - elect to give their computers link-local Multicast DNS host names of - the form: "single-dns-label.local.". For example, a laptop computer - may answer to the name "MyComputer.local.". Any computer user is - granted the authority to name their computer this way, provided that - the chosen host name is not already in use on that link. Having - named their computer this way, the user has the authority to continue - utilizing that name until such time as a name conflict occurs on the - link that is not resolved in the user's favor. If this happens, the - computer (or its human user) MUST cease using the name, and SHOULD - attempt to allocate a new unique name for use on that link. These - conflicts are expected to be relatively rare for people who choose - reasonably imaginative names, but it is still important to have a - mechanism in place to handle them when they happen. - - This document specifies that the DNS top-level domain ".local." is a - special domain with special semantics, namely that any fully - qualified name ending in ".local." is link-local, and names within - this domain are meaningful only on the link where they originate. - This is analogous to IPv4 addresses in the 169.254/16 prefix or IPv6 - addresses in the FE80::/10 prefix, which are link-local and - meaningful only on the link where they originate. - - Any DNS query for a name ending with ".local." MUST be sent to the - mDNS IPv4 link-local multicast address 224.0.0.251 (or its IPv6 - equivalent FF02::FB). The design rationale for using a fixed - multicast address instead of selecting from a range of multicast - addresses using a hash function is discussed in Appendix B. - Implementers MAY choose to look up such names concurrently via other - mechanisms (e.g., Unicast DNS) and coalesce the results in some - fashion. Implementers choosing to do this should be aware of the - potential for user confusion when a given name can produce different - results depending on external network conditions (such as, but not - limited to, which name lookup mechanism responds faster). - - It is unimportant whether a name ending with ".local." occurred - because the user explicitly typed in a fully qualified domain name - ending in ".local.", or because the user entered an unqualified - domain name and the host software appended the suffix ".local." - because that suffix appears in the user's search list. The ".local." - suffix could appear in the search list because the user manually - configured it, or because it was received via DHCP [RFC2132] or via - any other mechanism for configuring the DNS search list. In this - respect the ".local." suffix is treated no differently from any other - search domain that might appear in the DNS search list. - - - - - - -Cheshire & Krochmal Standards Track [Page 6] - -RFC 6762 Multicast DNS February 2013 - - - DNS queries for names that do not end with ".local." MAY be sent to - the mDNS multicast address, if no other conventional DNS server is - available. This can allow hosts on the same link to continue - communicating using each other's globally unique DNS names during - network outages that disrupt communication with the greater Internet. - When resolving global names via local multicast, it is even more - important to use DNS Security Extensions (DNSSEC) [RFC4033] or other - security mechanisms to ensure that the response is trustworthy. - Resolving global names via local multicast is a contentious issue, - and this document does not discuss it further, instead concentrating - on the issue of resolving local names using DNS messages sent to a - multicast address. - - This document recommends a single flat namespace for dot-local host - names, (i.e., the names of DNS "A" and "AAAA" records, which map - names to IPv4 and IPv6 addresses), but other DNS record types (such - as those used by DNS-Based Service Discovery [RFC6763]) may contain - as many labels as appropriate for the desired usage, up to a maximum - of 255 bytes, plus a terminating zero byte at the end. Name length - issues are discussed further in Appendix C. - - Enforcing uniqueness of host names is probably desirable in the - common case, but this document does not mandate that. It is - permissible for a collection of coordinated hosts to agree to - maintain multiple DNS address records with the same name, possibly - for load-balancing or fault-tolerance reasons. This document does - not take a position on whether that is sensible. It is important - that both modes of operation be supported. The Multicast DNS - protocol allows hosts to verify and maintain unique names for - resource records where that behavior is desired, and it also allows - hosts to maintain multiple resource records with a single shared name - where that behavior is desired. This consideration applies to all - resource records, not just address records (host names). In summary: - It is required that the protocol have the ability to detect and - handle name conflicts, but it is not required that this ability be - used for every record. - -4. Reverse Address Mapping - - Like ".local.", the IPv4 and IPv6 reverse mapping domains are also - defined to be link-local: - - Any DNS query for a name ending with "254.169.in-addr.arpa." MUST - be sent to the mDNS IPv4 link-local multicast address 224.0.0.251 - or the mDNS IPv6 multicast address FF02::FB. Since names under - this domain correspond to IPv4 link-local addresses, it is logical - that the local link is the best place to find information - pertaining to those names. - - - -Cheshire & Krochmal Standards Track [Page 7] - -RFC 6762 Multicast DNS February 2013 - - - Likewise, any DNS query for a name within the reverse mapping - domains for IPv6 link-local addresses ("8.e.f.ip6.arpa.", - "9.e.f.ip6.arpa.", "a.e.f.ip6.arpa.", and "b.e.f.ip6.arpa.") MUST - be sent to the mDNS IPv6 link-local multicast address FF02::FB or - the mDNS IPv4 link-local multicast address 224.0.0.251. - -5. Querying - - There are two kinds of Multicast DNS queries: one-shot queries of the - kind made by legacy DNS resolvers, and continuous, ongoing Multicast - DNS queries made by fully compliant Multicast DNS queriers, which - support asynchronous operations including DNS-Based Service Discovery - [RFC6763]. - - Except in the rare case of a Multicast DNS responder that is - advertising only shared resource records and no unique records, a - Multicast DNS responder MUST also implement a Multicast DNS querier - so that it can first verify the uniqueness of those records before it - begins answering queries for them. - -5.1. One-Shot Multicast DNS Queries - - The most basic kind of Multicast DNS client may simply send standard - DNS queries blindly to 224.0.0.251:5353, without necessarily even - being aware of what a multicast address is. This change can - typically be implemented with just a few lines of code in an existing - DNS resolver library. If a name being queried falls within one of - the reserved Multicast DNS domains (see Sections 3 and 4), then, - rather than using the configured Unicast DNS server address, the - query is instead sent to 224.0.0.251:5353 (or its IPv6 equivalent - [FF02::FB]:5353). Typically, the timeout would also be shortened to - two or three seconds. It's possible to make a minimal Multicast DNS - resolver with only these simple changes. These queries are typically - done using a high-numbered ephemeral UDP source port, but regardless - of whether they are sent from a dynamic port or from a fixed port, - these queries MUST NOT be sent using UDP source port 5353, since - using UDP source port 5353 signals the presence of a fully compliant - Multicast DNS querier, as described below. - - A simple DNS resolver like this will typically just take the first - response it receives. It will not listen for additional UDP - responses, but in many instances this may not be a serious problem. - If a user types "http://MyPrinter.local." into their web browser, and - their simple DNS resolver just takes the first response it receives, - and the user gets to see the status and configuration web page for - their printer, then the protocol has met the user's needs in this - case. - - - - -Cheshire & Krochmal Standards Track [Page 8] - -RFC 6762 Multicast DNS February 2013 - - - While a basic DNS resolver like this may be adequate for simple host - name lookup, it may not get ideal behavior in other cases. - Additional refinements to create a fully compliant Multicast DNS - querier are described below. - -5.2. Continuous Multicast DNS Querying - - In one-shot queries, the underlying assumption is that the - transaction begins when the application issues a query, and ends when - the first response is received. There is another type of query - operation that is more asynchronous, in which having received one - response is not necessarily an indication that there will be no more - relevant responses, and the querying operation continues until no - further responses are required. Determining when no further - responses are required depends on the type of operation being - performed. If the operation is looking up the IPv4 and IPv6 - addresses of another host, then no further responses are required - once a successful connection has been made to one of those IPv4 or - IPv6 addresses. If the operation is browsing to present the user - with a list of DNS-SD services found on the network [RFC6763], then - no further responses are required once the user indicates this to the - user-interface software, e.g., by closing the network browsing window - that was displaying the list of discovered services. - - Imagine some hypothetical software that allows users to discover - network printers. The user wishes to discover all printers on the - local network, not only the printer that is quickest to respond. - When the user is actively looking for a network printer to use, they - open a network browsing window that displays the list of discovered - printers. It would be convenient for the user if they could rely on - this list of network printers to stay up to date as network printers - come and go, rather than displaying out-of-date stale information, - and requiring the user explicitly to click a "refresh" button any - time they want to see accurate information (which, from the moment it - is displayed, is itself already beginning to become out-of-date and - stale). If we are to display a continuously updated live list like - this, we need to be able to do it efficiently, without naive constant - polling, which would be an unreasonable burden on the network. It is - not expected that all users will be browsing to discover new printers - all the time, but when a user is browsing to discover service - instances for an extended period, we want to be able to support that - operation efficiently. - - Therefore, when retransmitting Multicast DNS queries to implement - this kind of continuous monitoring, the interval between the first - two queries MUST be at least one second, the intervals between - successive queries MUST increase by at least a factor of two, and the - querier MUST implement Known-Answer Suppression, as described below - - - -Cheshire & Krochmal Standards Track [Page 9] - -RFC 6762 Multicast DNS February 2013 - - - in Section 7.1. The Known-Answer Suppression mechanism tells - responders which answers are already known to the querier, thereby - allowing responders to avoid wasting network capacity with pointless - repeated transmission of those answers. A querier retransmits its - question because it wishes to receive answers it may have missed the - first time, not because it wants additional duplicate copies of - answers it already received. Failure to implement Known-Answer - Suppression can result in unacceptable levels of network traffic. - When the interval between queries reaches or exceeds 60 minutes, a - querier MAY cap the interval to a maximum of 60 minutes, and perform - subsequent queries at a steady-state rate of one query per hour. To - avoid accidental synchronization when, for some reason, multiple - clients begin querying at exactly the same moment (e.g., because of - some common external trigger event), a Multicast DNS querier SHOULD - also delay the first query of the series by a randomly chosen amount - in the range 20-120 ms. - - When a Multicast DNS querier receives an answer, the answer contains - a TTL value that indicates for how many seconds this answer is valid. - After this interval has passed, the answer will no longer be valid - and SHOULD be deleted from the cache. Before the record expiry time - is reached, a Multicast DNS querier that has local clients with an - active interest in the state of that record (e.g., a network browsing - window displaying a list of discovered services to the user) SHOULD - reissue its query to determine whether the record is still valid. - - To perform this cache maintenance, a Multicast DNS querier should - plan to retransmit its query after at least 50% of the record - lifetime has elapsed. This document recommends the following - specific strategy. - - The querier should plan to issue a query at 80% of the record - lifetime, and then if no answer is received, at 85%, 90%, and 95%. - If an answer is received, then the remaining TTL is reset to the - value given in the answer, and this process repeats for as long as - the Multicast DNS querier has an ongoing interest in the record. If - no answer is received after four queries, the record is deleted when - it reaches 100% of its lifetime. A Multicast DNS querier MUST NOT - perform this cache maintenance for records for which it has no local - clients with an active interest. If the expiry of a particular - record from the cache would result in no net effect to any client - software running on the querier device, and no visible effect to the - human user, then there is no reason for the Multicast DNS querier to - waste network capacity checking whether the record remains valid. - - - - - - - -Cheshire & Krochmal Standards Track [Page 10] - -RFC 6762 Multicast DNS February 2013 - - - To avoid the case where multiple Multicast DNS queriers on a network - all issue their queries simultaneously, a random variation of 2% of - the record TTL should be added, so that queries are scheduled to be - performed at 80-82%, 85-87%, 90-92%, and then 95-97% of the TTL. - - An additional efficiency optimization SHOULD be performed when a - Multicast DNS response is received containing a unique answer (as - indicated by the cache-flush bit being set, described in Section - 10.2, "Announcements to Flush Outdated Cache Entries"). In this - case, there is no need for the querier to continue issuing a stream - of queries with exponentially increasing intervals, since the receipt - of a unique answer is a good indication that no other answers will be - forthcoming. In this case, the Multicast DNS querier SHOULD plan to - issue its next query for this record at 80-82% of the record's TTL, - as described above. - - A compliant Multicast DNS querier, which implements the rules - specified in this document, MUST send its Multicast DNS queries from - UDP source port 5353 (the well-known port assigned to mDNS), and MUST - listen for Multicast DNS replies sent to UDP destination port 5353 at - the mDNS link-local multicast address (224.0.0.251 and/or its IPv6 - equivalent FF02::FB). - -5.3. Multiple Questions per Query - - Multicast DNS allows a querier to place multiple questions in the - Question Section of a single Multicast DNS query message. - - The semantics of a Multicast DNS query message containing multiple - questions is identical to a series of individual DNS query messages - containing one question each. Combining multiple questions into a - single message is purely an efficiency optimization and has no other - semantic significance. - -5.4. Questions Requesting Unicast Responses - - Sending Multicast DNS responses via multicast has the benefit that - all the other hosts on the network get to see those responses, - enabling them to keep their caches up to date and detect conflicting - responses. - - However, there are situations where all the other hosts on the - network don't need to see every response. Some examples are a laptop - computer waking from sleep, the Ethernet cable being connected to a - running machine, or a previously inactive interface being activated - through a configuration change. At the instant of wake-up or link - activation, the machine is a brand new participant on a new network. - Its Multicast DNS cache for that interface is empty, and it has no - - - -Cheshire & Krochmal Standards Track [Page 11] - -RFC 6762 Multicast DNS February 2013 - - - knowledge of its peers on that link. It may have a significant - number of questions that it wants answered right away, to discover - information about its new surroundings and present that information - to the user. As a new participant on the network, it has no idea - whether the exact same questions may have been asked and answered - just seconds ago. In this case, triggering a large sudden flood of - multicast responses may impose an unreasonable burden on the network. - - To avoid large floods of potentially unnecessary responses in these - cases, Multicast DNS defines the top bit in the class field of a DNS - question as the unicast-response bit. When this bit is set in a - question, it indicates that the querier is willing to accept unicast - replies in response to this specific query, as well as the usual - multicast responses. These questions requesting unicast responses - are referred to as "QU" questions, to distinguish them from the more - usual questions requesting multicast responses ("QM" questions). A - Multicast DNS querier sending its initial batch of questions - immediately on wake from sleep or interface activation SHOULD set the - unicast-response bit in those questions. - - When a question is retransmitted (as described in Section 5.2), the - unicast-response bit SHOULD NOT be set in subsequent retransmissions - of that question. Subsequent retransmissions SHOULD be usual "QM" - questions. After the first question has received its responses, the - querier should have a large Known-Answer list (Section 7.1) so that - subsequent queries should elicit few, if any, further responses. - Reverting to multicast responses as soon as possible is important - because of the benefits that multicast responses provide (see - Appendix D). In addition, the unicast-response bit SHOULD be set - only for questions that are active and ready to be sent the moment of - wake from sleep or interface activation. New questions created by - local clients afterwards should be treated as normal "QM" questions - and SHOULD NOT have the unicast-response bit set on the first - question of the series. - - When receiving a question with the unicast-response bit set, a - responder SHOULD usually respond with a unicast packet directed back - to the querier. However, if the responder has not multicast that - record recently (within one quarter of its TTL), then the responder - SHOULD instead multicast the response so as to keep all the peer - caches up to date, and to permit passive conflict detection. In the - case of answering a probe question (Section 8.1) with the unicast- - response bit set, the responder should always generate the requested - unicast response, but it may also send a multicast announcement if - the time since the last multicast announcement of that record is more - than a quarter of its TTL. - - - - - -Cheshire & Krochmal Standards Track [Page 12] - -RFC 6762 Multicast DNS February 2013 - - - Unicast replies are subject to all the same packet generation rules - as multicast replies, including the cache-flush bit (Section 10.2) - and (except when defending a unique name against a probe from another - host) randomized delays to reduce network collisions (Section 6). - -5.5. Direct Unicast Queries to Port 5353 - - In specialized applications there may be rare situations where it - makes sense for a Multicast DNS querier to send its query via unicast - to a specific machine. When a Multicast DNS responder receives a - query via direct unicast, it SHOULD respond as it would for "QU" - questions, as described above in Section 5.4. Since it is possible - for a unicast query to be received from a machine outside the local - link, responders SHOULD check that the source address in the query - packet matches the local subnet for that link (or, in the case of - IPv6, the source address has an on-link prefix) and silently ignore - the packet if not. - - There may be specialized situations, outside the scope of this - document, where it is intended and desirable to create a responder - that does answer queries originating outside the local link. Such a - responder would need to ensure that these non-local queries are - always answered via unicast back to the querier, since an answer sent - via link-local multicast would not reach a querier outside the local - link. - -6. Responding - - When a Multicast DNS responder constructs and sends a Multicast DNS - response message, the Resource Record Sections of that message must - contain only records for which that responder is explicitly - authoritative. These answers may be generated because the record - answers a question received in a Multicast DNS query message, or at - certain other times that the responder determines than an unsolicited - announcement is warranted. A Multicast DNS responder MUST NOT place - records from its cache, which have been learned from other responders - on the network, in the Resource Record Sections of outgoing response - messages. Only an authoritative source for a given record is allowed - to issue responses containing that record. - - The determination of whether a given record answers a given question - is made using the standard DNS rules: the record name must match the - question name, the record rrtype must match the question qtype unless - the qtype is "ANY" (255) or the rrtype is "CNAME" (5), and the record - rrclass must match the question qclass unless the qclass is "ANY" - (255). As with Unicast DNS, generally only DNS class 1 ("Internet") - is used, but should client software use classes other than 1, the - matching rules described above MUST be used. - - - -Cheshire & Krochmal Standards Track [Page 13] - -RFC 6762 Multicast DNS February 2013 - - - A Multicast DNS responder MUST only respond when it has a positive, - non-null response to send, or it authoritatively knows that a - particular record does not exist. For unique records, where the host - has already established sole ownership of the name, it MUST return - negative answers to queries for records that it knows not to exist. - For example, a host with no IPv6 address, that has claimed sole - ownership of the name "host.local." for all rrtypes, MUST respond to - AAAA queries for "host.local." by sending a negative answer - indicating that no AAAA records exist for that name. See Section - 6.1, "Negative Responses". For shared records, which are owned by no - single host, the nonexistence of a given record is ascertained by the - failure of any machine to respond to the Multicast DNS query, not by - any explicit negative response. For shared records, NXDOMAIN and - other error responses MUST NOT be sent. - - Multicast DNS responses MUST NOT contain any questions in the - Question Section. Any questions in the Question Section of a - received Multicast DNS response MUST be silently ignored. Multicast - DNS queriers receiving Multicast DNS responses do not care what - question elicited the response; they care only that the information - in the response is true and accurate. - - A Multicast DNS responder on Ethernet [IEEE.802.3] and similar shared - multiple access networks SHOULD have the capability of delaying its - responses by up to 500 ms, as described below. - - If a large number of Multicast DNS responders were all to respond - immediately to a particular query, a collision would be virtually - guaranteed. By imposing a small random delay, the number of - collisions is dramatically reduced. On a full-sized Ethernet using - the maximum cable lengths allowed and the maximum number of repeaters - allowed, an Ethernet frame is vulnerable to collisions during the - transmission of its first 256 bits. On 10 Mb/s Ethernet, this - equates to a vulnerable time window of 25.6 microseconds. On higher- - speed variants of Ethernet, the vulnerable time window is shorter. - - In the case where a Multicast DNS responder has good reason to - believe that it will be the only responder on the link that will send - a response (i.e., because it is able to answer every question in the - query message, and for all of those answer records it has previously - verified that the name, rrtype, and rrclass are unique on the link), - it SHOULD NOT impose any random delay before responding, and SHOULD - normally generate its response within at most 10 ms. In particular, - this applies to responding to probe queries with the unicast-response - bit set. Since receiving a probe query gives a clear indication that - some other responder is planning to start using this name in the very - near future, answering such probe queries to defend a unique record - is a high priority and needs to be done without delay. A probe query - - - -Cheshire & Krochmal Standards Track [Page 14] - -RFC 6762 Multicast DNS February 2013 - - - can be distinguished from a normal query by the fact that a probe - query contains a proposed record in the Authority Section that - answers the question in the Question Section (for more details, see - Section 8.2, "Simultaneous Probe Tiebreaking"). - - Responding without delay is appropriate for records like the address - record for a particular host name, when the host name has been - previously verified unique. Responding without delay is *not* - appropriate for things like looking up PTR records used for DNS-Based - Service Discovery [RFC6763], where a large number of responses may be - anticipated. - - In any case where there may be multiple responses, such as queries - where the answer is a member of a shared resource record set, each - responder SHOULD delay its response by a random amount of time - selected with uniform random distribution in the range 20-120 ms. - The reason for requiring that the delay be at least 20 ms is to - accommodate the situation where two or more query packets are sent - back-to-back, because in that case we want a responder with answers - to more than one of those queries to have the opportunity to - aggregate all of its answers into a single response message. - - In the case where the query has the TC (truncated) bit set, - indicating that subsequent Known-Answer packets will follow, - responders SHOULD delay their responses by a random amount of time - selected with uniform random distribution in the range 400-500 ms, to - allow enough time for all the Known-Answer packets to arrive, as - described in Section 7.2, "Multipacket Known-Answer Suppression". - - The source UDP port in all Multicast DNS responses MUST be 5353 (the - well-known port assigned to mDNS). Multicast DNS implementations - MUST silently ignore any Multicast DNS responses they receive where - the source UDP port is not 5353. - - The destination UDP port in all Multicast DNS responses MUST be 5353, - and the destination address MUST be the mDNS IPv4 link-local - multicast address 224.0.0.251 or its IPv6 equivalent FF02::FB, except - when generating a reply to a query that explicitly requested a - unicast response: - - * via the unicast-response bit, - * by virtue of being a legacy query (Section 6.7), or - * by virtue of being a direct unicast query. - - Except for these three specific cases, responses MUST NOT be sent via - unicast, because then the "Passive Observation of Failures" - mechanisms described in Section 10.5 would not work correctly. Other - - - - -Cheshire & Krochmal Standards Track [Page 15] - -RFC 6762 Multicast DNS February 2013 - - - benefits of sending responses via multicast are discussed in Appendix - D. A Multicast DNS querier MUST only accept unicast responses if - they answer a recently sent query (e.g., sent within the last two - seconds) that explicitly requested unicast responses. A Multicast - DNS querier MUST silently ignore all other unicast responses. - - To protect the network against excessive packet flooding due to - software bugs or malicious attack, a Multicast DNS responder MUST NOT - (except in the one special case of answering probe queries) multicast - a record on a given interface until at least one second has elapsed - since the last time that record was multicast on that particular - interface. A legitimate querier on the network should have seen the - previous transmission and cached it. A querier that did not receive - and cache the previous transmission will retry its request and - receive a subsequent response. In the special case of answering - probe queries, because of the limited time before the probing host - will make its decision about whether or not to use the name, a - Multicast DNS responder MUST respond quickly. In this special case - only, when responding via multicast to a probe, a Multicast DNS - responder is only required to delay its transmission as necessary to - ensure an interval of at least 250 ms since the last time the record - was multicast on that interface. - -6.1. Negative Responses - - In the early design of Multicast DNS it was assumed that explicit - negative responses would never be needed. A host can assert the - existence of the set of records that it claims to exist, and the - union of all such sets on a link is the set of Multicast DNS records - that exist on that link. Asserting the nonexistence of every record - in the complement of that set -- i.e., all possible Multicast DNS - records that could exist on this link but do not at this moment -- - was felt to be impractical and unnecessary. The nonexistence of a - record would be ascertained by a querier querying for it and failing - to receive a response from any of the hosts currently attached to the - link. - - However, operational experience showed that explicit negative - responses can sometimes be valuable. One such example is when a - querier is querying for a AAAA record, and the host name in question - has no associated IPv6 addresses. In this case, the responding host - knows it currently has exclusive ownership of that name, and it knows - that it currently does not have any IPv6 addresses, so an explicit - negative response is preferable to the querier having to retransmit - its query multiple times, and eventually give up with a timeout, - before it can conclude that a given AAAA record does not exist. - - - - - -Cheshire & Krochmal Standards Track [Page 16] - -RFC 6762 Multicast DNS February 2013 - - - Any time a responder receives a query for a name for which it has - verified exclusive ownership, for a type for which that name has no - records, the responder MUST (except as allowed in (a) below) respond - asserting the nonexistence of that record using a DNS NSEC record - [RFC4034]. In the case of Multicast DNS the NSEC record is not being - used for its usual DNSSEC [RFC4033] security properties, but simply - as a way of expressing which records do or do not exist with a given - name. - - On receipt of a question for a particular name, rrtype, and rrclass, - for which a responder does have one or more unique answers, the - responder MAY also include an NSEC record in the Additional Record - Section indicating the nonexistence of other rrtypes for that name - and rrclass. - - Implementers working with devices with sufficient memory and CPU - resources MAY choose to implement code to handle the full generality - of the DNS NSEC record [RFC4034], including bitmaps up to 65,536 bits - long. To facilitate use by devices with limited memory and CPU - resources, Multicast DNS queriers are only REQUIRED to be able to - parse a restricted form of the DNS NSEC record. All compliant - Multicast DNS implementations MUST at least correctly generate and - parse the restricted DNS NSEC record format described below: - - o The 'Next Domain Name' field contains the record's own name. - When used with name compression, this means that the 'Next - Domain Name' field always takes exactly two bytes in the - message. - - o The Type Bit Map block number is 0. - - o The Type Bit Map block length byte is a value in the range 1-32. - - o The Type Bit Map data is 1-32 bytes, as indicated by length - byte. - - Because this restricted form of the DNS NSEC record is limited to - Type Bit Map block number zero, it cannot express the existence of - rrtypes above 255. Consequently, if a Multicast DNS responder were - to have records with rrtypes above 255, it MUST NOT generate these - restricted-form NSEC records for those names, since to do so would - imply that the name has no records with rrtypes above 255, which - would be false. In such cases a Multicast DNS responder MUST either - (a) emit no NSEC record for that name, or (b) emit a full NSEC record - containing the appropriate Type Bit Map block(s) with the correct - bits set for all the record types that exist. In practice this is - not a significant limitation, since rrtypes above 255 are not - currently in widespread use. - - - -Cheshire & Krochmal Standards Track [Page 17] - -RFC 6762 Multicast DNS February 2013 - - - If a Multicast DNS implementation receives an NSEC record where the - 'Next Domain Name' field is not the record's own name, then the - implementation SHOULD ignore the 'Next Domain Name' field and process - the remainder of the NSEC record as usual. In Multicast DNS the - 'Next Domain Name' field is not currently used, but it could be used - in a future version of this protocol, which is why a Multicast DNS - implementation MUST NOT reject or ignore an NSEC record it receives - just because it finds an unexpected value in the 'Next Domain Name' - field. - - If a Multicast DNS implementation receives an NSEC record containing - more than one Type Bit Map, or where the Type Bit Map block number is - not zero, or where the block length is not in the range 1-32, then - the Multicast DNS implementation MAY silently ignore the entire NSEC - record. A Multicast DNS implementation MUST NOT ignore an entire - message just because that message contains one or more NSEC record(s) - that the Multicast DNS implementation cannot parse. This provision - is to allow future enhancements to the protocol to be introduced in a - backwards-compatible way that does not break compatibility with older - Multicast DNS implementations. - - To help differentiate these synthesized NSEC records (generated - programmatically on-the-fly) from conventional Unicast DNS NSEC - records (which actually exist in a signed DNS zone), the synthesized - Multicast DNS NSEC records MUST NOT have the NSEC bit set in the Type - Bit Map, whereas conventional Unicast DNS NSEC records do have the - NSEC bit set. - - The TTL of the NSEC record indicates the intended lifetime of the - negative cache entry. In general, the TTL given for an NSEC record - SHOULD be the same as the TTL that the record would have had, had it - existed. For example, the TTL for address records in Multicast DNS - is typically 120 seconds (see Section 10), so the negative cache - lifetime for an address record that does not exist should also be 120 - seconds. - - A responder MUST only generate negative responses to queries for - which it has legitimate ownership of the name, rrtype, and rrclass in - question, and can legitimately assert that no record with that name, - rrtype, and rrclass exists. A responder can assert that a specified - rrtype does not exist for one of its names if it knows a priori that - it has exclusive ownership of that name (e.g., names of reverse - address mapping PTR records, which are derived from IP addresses, - which should be unique on the local link) or if it previously claimed - unique ownership of that name using probe queries for rrtype "ANY". - (If it were to use probe queries for a specific rrtype, then it would - only own the name for that rrtype, and could not assert that other - rrtypes do not exist.) - - - -Cheshire & Krochmal Standards Track [Page 18] - -RFC 6762 Multicast DNS February 2013 - - - The design rationale for this mechanism for encoding negative - responses is discussed further in Appendix E. - -6.2. Responding to Address Queries - - When a Multicast DNS responder sends a Multicast DNS response message - containing its own address records, it MUST include all addresses - that are valid on the interface on which it is sending the message, - and MUST NOT include addresses that are not valid on that interface - (such as addresses that may be configured on the host's other - interfaces). For example, if an interface has both an IPv6 link- - local and an IPv6 routable address, both should be included in the - response message so that queriers receive both and can make their own - choice about which to use. This allows a querier that only has an - IPv6 link-local address to connect to the link-local address, and a - different querier that has an IPv6 routable address to connect to the - IPv6 routable address instead. - - When a Multicast DNS responder places an IPv4 or IPv6 address record - (rrtype "A" or "AAAA") into a response message, it SHOULD also place - any records of the other address type with the same name into the - additional section, if there is space in the message. This is to - provide fate sharing, so that all a device's addresses are delivered - atomically in a single message, to reduce the risk that packet loss - could cause a querier to receive only the IPv4 addresses and not the - IPv6 addresses, or vice versa. - - In the event that a device has only IPv4 addresses but no IPv6 - addresses, or vice versa, then the appropriate NSEC record SHOULD be - placed into the additional section, so that queriers can know with - certainty that the device has no addresses of that kind. - - Some Multicast DNS responders treat a physical interface with both - IPv4 and IPv6 address as a single interface with two addresses. - Other Multicast DNS responders may treat this case as logically two - interfaces (one with one or more IPv4 addresses, and the other with - one or more IPv6 addresses), but responders that operate this way - MUST NOT put the corresponding automatic NSEC records in replies they - send (i.e., a negative IPv4 assertion in their IPv6 responses, and a - negative IPv6 assertion in their IPv4 responses) because this would - cause incorrect operation in responders on the network that work the - former way. - -6.3. Responding to Multiquestion Queries - - Multicast DNS responders MUST correctly handle DNS query messages - containing more than one question, by answering any or all of the - questions to which they have answers. Unlike single-question - - - -Cheshire & Krochmal Standards Track [Page 19] - -RFC 6762 Multicast DNS February 2013 - - - queries, where responding without delay is allowed in appropriate - cases, for query messages containing more than one question, all - (non-defensive) answers SHOULD be randomly delayed in the range - 20-120 ms, or 400-500 ms if the TC (truncated) bit is set. This is - because when a query message contains more than one question, a - Multicast DNS responder cannot generally be certain that other - responders will not also be simultaneously generating answers to - other questions in that query message. (Answers defending a name, in - response to a probe for that name, are not subject to this delay rule - and are still sent immediately.) - -6.4. Response Aggregation - - When possible, a responder SHOULD, for the sake of network - efficiency, aggregate as many responses as possible into a single - Multicast DNS response message. For example, when a responder has - several responses it plans to send, each delayed by a different - interval, then earlier responses SHOULD be delayed by up to an - additional 500 ms if that will permit them to be aggregated with - other responses scheduled to go out a little later. - -6.5. Wildcard Queries (qtype "ANY" and qclass "ANY") - - When responding to queries using qtype "ANY" (255) and/or qclass - "ANY" (255), a Multicast DNS responder MUST respond with *ALL* of its - records that match the query. This is subtly different from how - qtype "ANY" and qclass "ANY" work in Unicast DNS. - - A common misconception is that a Unicast DNS query for qtype "ANY" - will elicit a response containing all matching records. This is - incorrect. If there are any records that match the query, the - response is required only to contain at least one of them, not - necessarily all of them. - - This somewhat surprising behavior is commonly seen with caching - (i.e., "recursive") name servers. If a caching server receives a - qtype "ANY" query for which it has at least one valid answer, it is - allowed to return only those matching answers it happens to have - already in its cache, and it is not required to reconsult the - authoritative name server to check if there are any more records that - also match the qtype "ANY" query. - - For example, one might imagine that a query for qtype "ANY" for name - "host.example.com" would return both the IPv4 (A) and the IPv6 (AAAA) - address records for that host. In reality, what happens is that it - depends on the history of what queries have been previously received - by intervening caching servers. If a caching server has no records - for "host.example.com", then it will consult another server (usually - - - -Cheshire & Krochmal Standards Track [Page 20] - -RFC 6762 Multicast DNS February 2013 - - - the authoritative name server for the name in question), and, in that - case, it will typically return all IPv4 and IPv6 address records. - However, if some other host has recently done a query for qtype "A" - for name "host.example.com", so that the caching server already has - IPv4 address records for "host.example.com" in its cache but no IPv6 - address records, then it will return only the IPv4 address records it - already has cached, and no IPv6 address records. - - Multicast DNS does not share this property that qtype "ANY" and - qclass "ANY" queries return some undefined subset of the matching - records. When responding to queries using qtype "ANY" (255) and/or - qclass "ANY" (255), a Multicast DNS responder MUST respond with *ALL* - of its records that match the query. - -6.6. Cooperating Multicast DNS Responders - - If a Multicast DNS responder ("A") observes some other Multicast DNS - responder ("B") send a Multicast DNS response message containing a - resource record with the same name, rrtype, and rrclass as one of A's - resource records, but *different* rdata, then: - - o If A's resource record is intended to be a shared resource - record, then this is no conflict, and no action is required. - - o If A's resource record is intended to be a member of a unique - resource record set owned solely by that responder, then this is - a conflict and MUST be handled as described in Section 9, - "Conflict Resolution". - - If a Multicast DNS responder ("A") observes some other Multicast DNS - responder ("B") send a Multicast DNS response message containing a - resource record with the same name, rrtype, and rrclass as one of A's - resource records, and *identical* rdata, then: - - o If the TTL of B's resource record given in the message is at - least half the true TTL from A's point of view, then no action - is required. - - o If the TTL of B's resource record given in the message is less - than half the true TTL from A's point of view, then A MUST mark - its record to be announced via multicast. Queriers receiving - the record from B would use the TTL given by B and, hence, may - delete the record sooner than A expects. By sending its own - multicast response correcting the TTL, A ensures that the record - will be retained for the desired time. - - - - - - -Cheshire & Krochmal Standards Track [Page 21] - -RFC 6762 Multicast DNS February 2013 - - - These rules allow multiple Multicast DNS responders to offer the same - data on the network (perhaps for fault-tolerance reasons) without - conflicting with each other. - -6.7. Legacy Unicast Responses - - If the source UDP port in a received Multicast DNS query is not port - 5353, this indicates that the querier originating the query is a - simple resolver such as described in Section 5.1, "One-Shot Multicast - DNS Queries", which does not fully implement all of Multicast DNS. - In this case, the Multicast DNS responder MUST send a UDP response - directly back to the querier, via unicast, to the query packet's - source IP address and port. This unicast response MUST be a - conventional unicast response as would be generated by a conventional - Unicast DNS server; for example, it MUST repeat the query ID and the - question given in the query message. In addition, the cache-flush - bit described in Section 10.2, "Announcements to Flush Outdated Cache - Entries", MUST NOT be set in legacy unicast responses. - - The resource record TTL given in a legacy unicast response SHOULD NOT - be greater than ten seconds, even if the true TTL of the Multicast - DNS resource record is higher. This is because Multicast DNS - responders that fully participate in the protocol use the cache - coherency mechanisms described in Section 10, "Resource Record TTL - Values and Cache Coherency", to update and invalidate stale data. - Were unicast responses sent to legacy resolvers to use the same high - TTLs, these legacy resolvers, which do not implement these cache - coherency mechanisms, could retain stale cached resource record data - long after it is no longer valid. - -7. Traffic Reduction - - A variety of techniques are used to reduce the amount of traffic on - the network. - -7.1. Known-Answer Suppression - - When a Multicast DNS querier sends a query to which it already knows - some answers, it populates the Answer Section of the DNS query - message with those answers. - - Generally, this applies only to Shared records, not Unique records, - since if a Multicast DNS querier already has at least one Unique - record in its cache then it should not be expecting further different - answers to this question, since the Unique record(s) it already has - comprise the complete answer, so it has no reason to be sending the - query at all. In contrast, having some Shared records in its cache - does not necessarily imply that a Multicast DNS querier will not - - - -Cheshire & Krochmal Standards Track [Page 22] - -RFC 6762 Multicast DNS February 2013 - - - receive further answers to this query, and it is in this case that it - is beneficial to use the Known-Answer list to suppress repeated - sending of redundant answers that the querier already knows. - - A Multicast DNS responder MUST NOT answer a Multicast DNS query if - the answer it would give is already included in the Answer Section - with an RR TTL at least half the correct value. If the RR TTL of the - answer as given in the Answer Section is less than half of the true - RR TTL as known by the Multicast DNS responder, the responder MUST - send an answer so as to update the querier's cache before the record - becomes in danger of expiration. - - Because a Multicast DNS responder will respond if the remaining TTL - given in the Known-Answer list is less than half the true TTL, it is - superfluous for the querier to include such records in the Known- - Answer list. Therefore, a Multicast DNS querier SHOULD NOT include - records in the Known-Answer list whose remaining TTL is less than - half of their original TTL. Doing so would simply consume space in - the message without achieving the goal of suppressing responses and - would, therefore, be a pointless waste of network capacity. - - A Multicast DNS querier MUST NOT cache resource records observed in - the Known-Answer Section of other Multicast DNS queries. The Answer - Section of Multicast DNS queries is not authoritative. By placing - information in the Answer Section of a Multicast DNS query, the - querier is stating that it *believes* the information to be true. It - is not asserting that the information *is* true. Some of those - records may have come from other hosts that are no longer on the - network. Propagating that stale information to other Multicast DNS - queriers on the network would not be helpful. - -7.2. Multipacket Known-Answer Suppression - - Sometimes a Multicast DNS querier will already have too many answers - to fit in the Known-Answer Section of its query packets. In this - case, it should issue a Multicast DNS query containing a question and - as many Known-Answer records as will fit. It MUST then set the TC - (Truncated) bit in the header before sending the query. It MUST - immediately follow the packet with another query packet containing no - questions and as many more Known-Answer records as will fit. If - there are still too many records remaining to fit in the packet, it - again sets the TC bit and continues until all the Known-Answer - records have been sent. - - A Multicast DNS responder seeing a Multicast DNS query with the TC - bit set defers its response for a time period randomly selected in - the interval 400-500 ms. This gives the Multicast DNS querier time - to send additional Known-Answer packets before the responder - - - -Cheshire & Krochmal Standards Track [Page 23] - -RFC 6762 Multicast DNS February 2013 - - - responds. If the responder sees any of its answers listed in the - Known-Answer lists of subsequent packets from the querying host, it - MUST delete that answer from the list of answers it is planning to - give (provided that no other host on the network has also issued a - query for that record and is waiting to receive an answer). - - If the responder receives additional Known-Answer packets with the TC - bit set, it SHOULD extend the delay as necessary to ensure a pause of - 400-500 ms after the last such packet before it sends its answer. - This opens the potential risk that a continuous stream of Known- - Answer packets could, theoretically, prevent a responder from - answering indefinitely. In practice, answers are never actually - delayed significantly, and should a situation arise where significant - delays did happen, that would be a scenario where the network is so - overloaded that it would be desirable to err on the side of caution. - The consequence of delaying an answer may be that it takes a user - longer than usual to discover all the services on the local network; - in contrast, the consequence of incorrectly answering before all the - Known-Answer packets have been received would be wasted capacity - sending unnecessary answers on an already overloaded network. In - this (rare) situation, sacrificing speed to preserve reliable network - operation is the right trade-off. - -7.3. Duplicate Question Suppression - - If a host is planning to transmit (or retransmit) a query, and it - sees another host on the network send a query containing the same - "QM" question, and the Known-Answer Section of that query does not - contain any records that this host would not also put in its own - Known-Answer Section, then this host SHOULD treat its own query as - having been sent. When multiple queriers on the network are querying - for the same resource records, there is no need for them to all be - repeatedly asking the same question. - -7.4. Duplicate Answer Suppression - - If a host is planning to send an answer, and it sees another host on - the network send a response message containing the same answer - record, and the TTL in that record is not less than the TTL this host - would have given, then this host SHOULD treat its own answer as - having been sent, and not also send an identical answer itself. When - multiple responders on the network have the same data, there is no - need for all of them to respond. - - - - - - - - -Cheshire & Krochmal Standards Track [Page 24] - -RFC 6762 Multicast DNS February 2013 - - - The opportunity for duplicate answer suppression occurs when a host - has received a query, and is delaying its response for some pseudo- - random interval up to 500 ms, as described elsewhere in this - document, and then, before the host sends its response, it sees some - other host on the network send a response message containing the same - answer record. - - This feature is particularly useful when Multicast DNS Proxy Servers - are in use, where there could be more than one proxy on the network - giving Multicast DNS answers on behalf of some other host (e.g., - because that other host is currently asleep and is not itself - responding to queries). - -8. Probing and Announcing on Startup - - Typically a Multicast DNS responder should have, at the very least, - address records for all of its active interfaces. Creating and - advertising an HINFO record on each interface as well can be useful - to network administrators. - - Whenever a Multicast DNS responder starts up, wakes up from sleep, - receives an indication of a network interface "Link Change" event, or - has any other reason to believe that its network connectivity may - have changed in some relevant way, it MUST perform the two startup - steps below: Probing (Section 8.1) and Announcing (Section 8.3). - -8.1. Probing - - The first startup step is that, for all those resource records that a - Multicast DNS responder desires to be unique on the local link, it - MUST send a Multicast DNS query asking for those resource records, to - see if any of them are already in use. The primary example of this - is a host's address records, which map its unique host name to its - unique IPv4 and/or IPv6 addresses. All probe queries SHOULD be done - using the desired resource record name and class (usually class 1, - "Internet"), and query type "ANY" (255), to elicit answers for all - types of records with that name. This allows a single question to be - used in place of several questions, which is more efficient on the - network. It also allows a host to verify exclusive ownership of a - name for all rrtypes, which is desirable in most cases. It would be - confusing, for example, if one host owned the "A" record for - "myhost.local.", but a different host owned the "AAAA" record for - that name. - - - - - - - - -Cheshire & Krochmal Standards Track [Page 25] - -RFC 6762 Multicast DNS February 2013 - - - The ability to place more than one question in a Multicast DNS query - is useful here, because it can allow a host to use a single message - to probe for all of its resource records instead of needing a - separate message for each. For example, a host can simultaneously - probe for uniqueness of its "A" record and all its SRV records - [RFC6763] in the same query message. - - When ready to send its Multicast DNS probe packet(s) the host should - first wait for a short random delay time, uniformly distributed in - the range 0-250 ms. This random delay is to guard against the case - where several devices are powered on simultaneously, or several - devices are connected to an Ethernet hub, which is then powered on, - or some other external event happens that might cause a group of - hosts to all send synchronized probes. - - 250 ms after the first query, the host should send a second; then, - 250 ms after that, a third. If, by 250 ms after the third probe, no - conflicting Multicast DNS responses have been received, the host may - move to the next step, announcing. (Note that probing is the one - exception from the normal rule that there should be at least one - second between repetitions of the same question, and the interval - between subsequent repetitions should at least double.) - - When sending probe queries, a host MUST NOT consult its cache for - potential answers. Only conflicting Multicast DNS responses received - "live" from the network are considered valid for the purposes of - determining whether probing has succeeded or failed. - - In order to allow services to announce their presence without - unreasonable delay, the time window for probing is intentionally set - quite short. As a result of this, from the time the first probe - packet is sent, another device on the network using that name has - just 750 ms to respond to defend its name. On networks that are - slow, or busy, or both, it is possible for round-trip latency to - account for a few hundred milliseconds, and software delays in slow - devices can add additional delay. Hence, it is important that when a - device receives a probe query for a name that it is currently using, - it SHOULD generate its response to defend that name immediately and - send it as quickly as possible. The usual rules about random delays - before responding, to avoid sudden bursts of simultaneous answers - from different hosts, do not apply here since normally at most one - host should ever respond to a given probe question. Even when a - single DNS query message contains multiple probe questions, it would - be unusual for that message to elicit a defensive response from more - than one other host. Because of the mDNS multicast rate-limiting - - - - - - -Cheshire & Krochmal Standards Track [Page 26] - -RFC 6762 Multicast DNS February 2013 - - - rules, the probes SHOULD be sent as "QU" questions with the unicast- - response bit set, to allow a defending host to respond immediately - via unicast, instead of potentially having to wait before replying - via multicast. - - During probing, from the time the first probe packet is sent until - 250 ms after the third probe, if any conflicting Multicast DNS - response is received, then the probing host MUST defer to the - existing host, and SHOULD choose new names for some or all of its - resource records as appropriate. Apparently conflicting Multicast - DNS responses received *before* the first probe packet is sent MUST - be silently ignored (see discussion of stale probe packets in Section - 8.2, "Simultaneous Probe Tiebreaking", below). In the case of a host - probing using query type "ANY" as recommended above, any answer - containing a record with that name, of any type, MUST be considered a - conflicting response and handled accordingly. - - If fifteen conflicts occur within any ten-second period, then the - host MUST wait at least five seconds before each successive - additional probe attempt. This is to help ensure that, in the event - of software bugs or other unanticipated problems, errant hosts do not - flood the network with a continuous stream of multicast traffic. For - very simple devices, a valid way to comply with this requirement is - to always wait five seconds after any failed probe attempt before - trying again. - - If a responder knows by other means that its unique resource record - set name, rrtype, and rrclass cannot already be in use by any other - responder on the network, then it SHOULD skip the probing step for - that resource record set. For example, when creating the reverse - address mapping PTR records, the host can reasonably assume that no - other host will be trying to create those same PTR records, since - that would imply that the two hosts were trying to use the same IP - address, and if that were the case, the two hosts would be suffering - communication problems beyond the scope of what Multicast DNS is - designed to solve. Similarly, if a responder is acting as a proxy, - taking over from another Multicast DNS responder that has already - verified the uniqueness of the record, then the proxy SHOULD NOT - repeat the probing step for those records. - -8.2. Simultaneous Probe Tiebreaking - - The astute reader will observe that there is a race condition - inherent in the previous description. If two hosts are probing for - the same name simultaneously, neither will receive any response to - the probe, and the hosts could incorrectly conclude that they may - both proceed to use the name. To break this symmetry, each host - populates the query message's Authority Section with the record or - - - -Cheshire & Krochmal Standards Track [Page 27] - -RFC 6762 Multicast DNS February 2013 - - - records with the rdata that it would be proposing to use, should its - probing be successful. The Authority Section is being used here in a - way analogous to the way it is used as the "Update Section" in a DNS - Update message [RFC2136] [RFC3007]. - - When a host is probing for a group of related records with the same - name (e.g., the SRV and TXT record describing a DNS-SD service), only - a single question need be placed in the Question Section, since query - type "ANY" (255) is used, which will elicit answers for all records - with that name. However, for tiebreaking to work correctly in all - cases, the Authority Section must contain *all* the records and - proposed rdata being probed for uniqueness. - - When a host that is probing for a record sees another host issue a - query for the same record, it consults the Authority Section of that - query. If it finds any resource record(s) there which answers the - query, then it compares the data of that (those) resource record(s) - with its own tentative data. We consider first the simple case of a - host probing for a single record, receiving a simultaneous probe from - another host also probing for a single record. The two records are - compared and the lexicographically later data wins. This means that - if the host finds that its own data is lexicographically later, it - simply ignores the other host's probe. If the host finds that its - own data is lexicographically earlier, then it defers to the winning - host by waiting one second, and then begins probing for this record - again. The logic for waiting one second and then trying again is to - guard against stale probe packets on the network (possibly even stale - probe packets sent moments ago by this host itself, before some - configuration change, which may be echoed back after a short delay by - some Ethernet switches and some 802.11 base stations). If the - winning simultaneous probe was from a real other host on the network, - then after one second it will have completed its probing, and will - answer subsequent probes. If the apparently winning simultaneous - probe was in fact just an old stale packet on the network (maybe from - the host itself), then when it retries its probing in one second, its - probes will go unanswered, and it will successfully claim the name. - - The determination of "lexicographically later" is performed by first - comparing the record class (excluding the cache-flush bit described - in Section 10.2), then the record type, then raw comparison of the - binary content of the rdata without regard for meaning or structure. - If the record classes differ, then the numerically greater class is - considered "lexicographically later". Otherwise, if the record types - differ, then the numerically greater type is considered - "lexicographically later". If the rrtype and rrclass both match, - then the rdata is compared. - - - - - -Cheshire & Krochmal Standards Track [Page 28] - -RFC 6762 Multicast DNS February 2013 - - - In the case of resource records containing rdata that is subject to - name compression [RFC1035], the names MUST be uncompressed before - comparison. (The details of how a particular name is compressed is - an artifact of how and where the record is written into the DNS - message; it is not an intrinsic property of the resource record - itself.) - - The bytes of the raw uncompressed rdata are compared in turn, - interpreting the bytes as eight-bit UNSIGNED values, until a byte is - found whose value is greater than that of its counterpart (in which - case, the rdata whose byte has the greater value is deemed - lexicographically later) or one of the resource records runs out of - rdata (in which case, the resource record which still has remaining - data first is deemed lexicographically later). The following is an - example of a conflict: - - MyPrinter.local. A 169.254.99.200 - MyPrinter.local. A 169.254.200.50 - - In this case, 169.254.200.50 is lexicographically later (the third - byte, with value 200, is greater than its counterpart with value 99), - so it is deemed the winner. - - Note that it is vital that the bytes are interpreted as UNSIGNED - values in the range 0-255, or the wrong outcome may result. In the - example above, if the byte with value 200 had been incorrectly - interpreted as a signed eight-bit value, then it would be interpreted - as value -56, and the wrong address record would be deemed the - winner. - -8.2.1. Simultaneous Probe Tiebreaking for Multiple Records - - When a host is probing for a set of records with the same name, or a - message is received containing multiple tiebreaker records answering - a given probe question in the Question Section, the host's records - and the tiebreaker records from the message are each sorted into - order, and then compared pairwise, using the same comparison - technique described above, until a difference is found. - - The records are sorted using the same lexicographical order as - described above, that is, if the record classes differ, the record - with the lower class number comes first. If the classes are the same - but the rrtypes differ, the record with the lower rrtype number comes - first. If the class and rrtype match, then the rdata is compared - bytewise until a difference is found. For example, in the common - case of advertising DNS-SD services with a TXT record and an SRV - record, the TXT record comes first (the rrtype value for TXT is 16) - and the SRV record comes second (the rrtype value for SRV is 33). - - - -Cheshire & Krochmal Standards Track [Page 29] - -RFC 6762 Multicast DNS February 2013 - - - When comparing the records, if the first records match perfectly, - then the second records are compared, and so on. If either list of - records runs out of records before any difference is found, then the - list with records remaining is deemed to have won the tiebreak. If - both lists run out of records at the same time without any difference - being found, then this indicates that two devices are advertising - identical sets of records, as is sometimes done for fault tolerance, - and there is, in fact, no conflict. - -8.3. Announcing - - The second startup step is that the Multicast DNS responder MUST send - an unsolicited Multicast DNS response containing, in the Answer - Section, all of its newly registered resource records (both shared - records, and unique records that have completed the probing step). - If there are too many resource records to fit in a single packet, - multiple packets should be used. - - In the case of shared records (e.g., the PTR records used by DNS- - Based Service Discovery [RFC6763]), the records are simply placed as - is into the Answer Section of the DNS response. - - In the case of records that have been verified to be unique in the - previous step, they are placed into the Answer Section of the DNS - response with the most significant bit of the rrclass set to one. - The most significant bit of the rrclass for a record in the Answer - Section of a response message is the Multicast DNS cache-flush bit - and is discussed in more detail below in Section 10.2, "Announcements - to Flush Outdated Cache Entries". - - The Multicast DNS responder MUST send at least two unsolicited - responses, one second apart. To provide increased robustness against - packet loss, a responder MAY send up to eight unsolicited responses, - provided that the interval between unsolicited responses increases by - at least a factor of two with every response sent. - - A Multicast DNS responder MUST NOT send announcements in the absence - of information that its network connectivity may have changed in some - relevant way. In particular, a Multicast DNS responder MUST NOT send - regular periodic announcements as a matter of course. - - Whenever a Multicast DNS responder receives any Multicast DNS - response (solicited or otherwise) containing a conflicting resource - record, the conflict MUST be resolved as described in Section 9, - "Conflict Resolution". - - - - - - -Cheshire & Krochmal Standards Track [Page 30] - -RFC 6762 Multicast DNS February 2013 - - -8.4. Updating - - At any time, if the rdata of any of a host's Multicast DNS records - changes, the host MUST repeat the Announcing step described above to - update neighboring caches. For example, if any of a host's IP - addresses change, it MUST re-announce those address records. The - host does not need to repeat the Probing step because it has already - established unique ownership of that name. - - In the case of shared records, a host MUST send a "goodbye" - announcement with RR TTL zero (see Section 10.1, "Goodbye Packets") - for the old rdata, to cause it to be deleted from peer caches, before - announcing the new rdata. In the case of unique records, a host - SHOULD omit the "goodbye" announcement, since the cache-flush bit on - the newly announced records will cause old rdata to be flushed from - peer caches anyway. - - A host may update the contents of any of its records at any time, - though a host SHOULD NOT update records more frequently than ten - times per minute. Frequent rapid updates impose a burden on the - network. If a host has information to disseminate which changes more - frequently than ten times per minute, then it may be more appropriate - to design a protocol for that specific purpose. - -9. Conflict Resolution - - A conflict occurs when a Multicast DNS responder has a unique record - for which it is currently authoritative, and it receives a Multicast - DNS response message containing a record with the same name, rrtype - and rrclass, but inconsistent rdata. What may be considered - inconsistent is context sensitive, except that resource records with - identical rdata are never considered inconsistent, even if they - originate from different hosts. This is to permit use of proxies and - other fault-tolerance mechanisms that may cause more than one - responder to be capable of issuing identical answers on the network. - - A common example of a resource record type that is intended to be - unique, not shared between hosts, is the address record that maps a - host's name to its IP address. Should a host witness another host - announce an address record with the same name but a different IP - address, then that is considered inconsistent, and that address - record is considered to be in conflict. - - Whenever a Multicast DNS responder receives any Multicast DNS - response (solicited or otherwise) containing a conflicting resource - record in any of the Resource Record Sections, the Multicast DNS - responder MUST immediately reset its conflicted unique record to - probing state, and go through the startup steps described above in - - - -Cheshire & Krochmal Standards Track [Page 31] - -RFC 6762 Multicast DNS February 2013 - - - Section 8, "Probing and Announcing on Startup". The protocol used in - the Probing phase will determine a winner and a loser, and the loser - MUST cease using the name, and reconfigure. - - It is very important that any host receiving a resource record that - conflicts with one of its own MUST take action as described above. - In the case of two hosts using the same host name, where one has been - configured to require a unique host name and the other has not, the - one that has not been configured to require a unique host name will - not perceive any conflict, and will not take any action. By - reverting to Probing state, the host that desires a unique host name - will go through the necessary steps to ensure that a unique host name - is obtained. - - The recommended course of action after probing and failing is as - follows: - - 1. Programmatically change the resource record name in an attempt - to find a new name that is unique. This could be done by - adding some further identifying information (e.g., the model - name of the hardware) if it is not already present in the name, - or appending the digit "2" to the name, or incrementing a - number at the end of the name if one is already present. - - 2. Probe again, and repeat as necessary until a unique name is - found. - - 3. Once an available unique name has been determined, by probing - without receiving any conflicting response, record this newly - chosen name in persistent storage so that the device will use - the same name the next time it is power-cycled. - - 4. Display a message to the user or operator informing them of the - name change. For example: - - The name "Bob's Music" is in use by another music server on - the network. Your music collection has been renamed to - "Bob's Music (2)". If you want to change this name, use - [describe appropriate menu item or preference dialog here]. - - The details of how the user or operator is informed of the new - name depends on context. A desktop computer with a screen - might put up a dialog box. A headless server in the closet may - write a message to a log file, or use whatever mechanism - (email, SNMP trap, etc.) it uses to inform the administrator of - error conditions. On the other hand, a headless server in the - closet may not inform the user at all -- if the user cares, - - - - -Cheshire & Krochmal Standards Track [Page 32] - -RFC 6762 Multicast DNS February 2013 - - - they will notice the name has changed, and connect to the - server in the usual way (e.g., via web browser) to configure a - new name. - - 5. After one minute of probing, if the Multicast DNS responder has - been unable to find any unused name, it should log an error - message to inform the user or operator of this fact. This - situation should never occur in normal operation. The only - situations that would cause this to happen would be either a - deliberate denial-of-service attack, or some kind of very - obscure hardware or software bug that acts like a deliberate - denial-of-service attack. - - These considerations apply to address records (i.e., host names) and - to all resource records where uniqueness (or maintenance of some - other defined constraint) is desired. - -10. Resource Record TTL Values and Cache Coherency - - As a general rule, the recommended TTL value for Multicast DNS - resource records with a host name as the resource record's name - (e.g., A, AAAA, HINFO) or a host name contained within the resource - record's rdata (e.g., SRV, reverse mapping PTR record) SHOULD be 120 - seconds. - - The recommended TTL value for other Multicast DNS resource records is - 75 minutes. - - A querier with an active outstanding query will issue a query message - when one or more of the resource records in its cache are 80% of the - way to expiry. If the TTL on those records is 75 minutes, this - ongoing cache maintenance process yields a steady-state query rate of - one query every 60 minutes. - - Any distributed cache needs a cache coherency protocol. If Multicast - DNS resource records follow the recommendation and have a TTL of 75 - minutes, that means that stale data could persist in the system for a - little over an hour. Making the default RR TTL significantly lower - would reduce the lifetime of stale data, but would produce too much - extra traffic on the network. Various techniques are available to - minimize the impact of such stale data, outlined in the five - subsections below. - -10.1. Goodbye Packets - - In the case where a host knows that certain resource record data is - about to become invalid (for example, when the host is undergoing a - clean shutdown), the host SHOULD send an unsolicited Multicast DNS - - - -Cheshire & Krochmal Standards Track [Page 33] - -RFC 6762 Multicast DNS February 2013 - - - response packet, giving the same resource record name, rrtype, - rrclass, and rdata, but an RR TTL of zero. This has the effect of - updating the TTL stored in neighboring hosts' cache entries to zero, - causing that cache entry to be promptly deleted. - - Queriers receiving a Multicast DNS response with a TTL of zero SHOULD - NOT immediately delete the record from the cache, but instead record - a TTL of 1 and then delete the record one second later. In the case - of multiple Multicast DNS responders on the network described in - Section 6.6 above, if one of the responders shuts down and - incorrectly sends goodbye packets for its records, it gives the other - cooperating responders one second to send out their own response to - "rescue" the records before they expire and are deleted. - -10.2. Announcements to Flush Outdated Cache Entries - - Whenever a host has a resource record with new data, or with what - might potentially be new data (e.g., after rebooting, waking from - sleep, connecting to a new network link, or changing IP address), the - host needs to inform peers of that new data. In cases where the host - has not been continuously connected and participating on the network - link, it MUST first probe to re-verify uniqueness of its unique - records, as described above in Section 8.1, "Probing". - - Having completed the Probing step, if necessary, the host MUST then - send a series of unsolicited announcements to update cache entries in - its neighbor hosts. In these unsolicited announcements, if the - record is one that has been verified unique, the host sets the most - significant bit of the rrclass field of the resource record. This - bit, the cache-flush bit, tells neighboring hosts that this is not a - shared record type. Instead of merging this new record additively - into the cache in addition to any previous records with the same - name, rrtype, and rrclass, all old records with that name, rrtype, - and rrclass that were received more than one second ago are declared - invalid, and marked to expire from the cache in one second. - - The semantics of the cache-flush bit are as follows: normally when a - resource record appears in a Resource Record Section of the DNS - response it means, "This is an assertion that this information is - true". When a resource record appears in a Resource Record Section - of the DNS response with the cache-flush bit set, it means, "This is - an assertion that this information is the truth and the whole truth, - and anything you may have heard more than a second ago regarding - records of this name/rrtype/rrclass is no longer true". - - To accommodate the case where the set of records from one host - constituting a single unique RRSet is too large to fit in a single - packet, only cache records that are more than one second old are - - - -Cheshire & Krochmal Standards Track [Page 34] - -RFC 6762 Multicast DNS February 2013 - - - flushed. This allows the announcing host to generate a quick burst - of packets back-to-back on the wire containing all the members of the - RRSet. When receiving records with the cache-flush bit set, all - records older than one second are marked to be deleted one second in - the future. One second after the end of the little packet burst, any - records not represented within that packet burst will then be expired - from all peer caches. - - Any time a host sends a response packet containing some members of a - unique RRSet, it MUST send the entire RRSet, preferably in a single - packet, or if the entire RRSet will not fit in a single packet, in a - quick burst of packets sent as close together as possible. The host - MUST set the cache-flush bit on all members of the unique RRSet. - - Another reason for waiting one second before deleting stale records - from the cache is to accommodate bridged networks. For example, a - host's address record announcement on a wireless interface may be - bridged onto a wired Ethernet and may cause that same host's Ethernet - address records to be flushed from peer caches. The one-second delay - gives the host the chance to see its own announcement arrive on the - wired Ethernet, and immediately re-announce its Ethernet interface's - address records so that both sets remain valid and live in peer - caches. - - These rules, about when to set the cache-flush bit and about sending - the entire rrset, apply regardless of *why* the response message is - being generated. They apply to startup announcements as described in - Section 8.3, "Announcing", and to responses generated as a result of - receiving query messages. - - The cache-flush bit is only set in records in the Resource Record - Sections of Multicast DNS responses sent to UDP port 5353. - - The cache-flush bit MUST NOT be set in any resource records in a - response message sent in legacy unicast responses to UDP ports other - than 5353. - - The cache-flush bit MUST NOT be set in any resource records in the - Known-Answer list of any query message. - - The cache-flush bit MUST NOT ever be set in any shared resource - record. To do so would cause all the other shared versions of this - resource record with different rdata from different responders to be - immediately deleted from all the caches on the network. - - - - - - - -Cheshire & Krochmal Standards Track [Page 35] - -RFC 6762 Multicast DNS February 2013 - - - The cache-flush bit does *not* apply to questions listed in the - Question Section of a Multicast DNS message. The top bit of the - rrclass field in questions is used for an entirely different purpose - (see Section 5.4, "Questions Requesting Unicast Responses"). - - Note that the cache-flush bit is NOT part of the resource record - class. The cache-flush bit is the most significant bit of the second - 16-bit word of a resource record in a Resource Record Section of a - Multicast DNS message (the field conventionally referred to as the - rrclass field), and the actual resource record class is the least - significant fifteen bits of this field. There is no Multicast DNS - resource record class 0x8001. The value 0x8001 in the rrclass field - of a resource record in a Multicast DNS response message indicates a - resource record with class 1, with the cache-flush bit set. When - receiving a resource record with the cache-flush bit set, - implementations should take care to mask off that bit before storing - the resource record in memory, or otherwise ensure that it is given - the correct semantic interpretation. - - The reuse of the top bit of the rrclass field only applies to - conventional resource record types that are subject to caching, not - to pseudo-RRs like OPT [RFC2671], TSIG [RFC2845], TKEY [RFC2930], - SIG0 [RFC2931], etc., that pertain only to a particular transport - level message and not to any actual DNS data. Since pseudo-RRs - should never go into the Multicast DNS cache, the concept of a cache- - flush bit for these types is not applicable. In particular, the - rrclass field of an OPT record encodes the sender's UDP payload size, - and should be interpreted as a sixteen-bit length value in the range - 0-65535, not a one-bit flag and a fifteen-bit length. - -10.3. Cache Flush on Topology change - - If the hardware on a given host is able to indicate physical changes - of connectivity, then when the hardware indicates such a change, the - host should take this information into account in its Multicast DNS - cache management strategy. For example, a host may choose to - immediately flush all cache records received on a particular - interface when that cable is disconnected. Alternatively, a host may - choose to adjust the remaining TTL on all those records to a few - seconds so that if the cable is not reconnected quickly, those - records will expire from the cache. - - Likewise, when a host reboots, wakes from sleep, or undergoes some - other similar discontinuous state change, the cache management - strategy should take that information into account. - - - - - - -Cheshire & Krochmal Standards Track [Page 36] - -RFC 6762 Multicast DNS February 2013 - - -10.4. Cache Flush on Failure Indication - - Sometimes a cache record can be determined to be stale when a client - attempts to use the rdata it contains, and the client finds that - rdata to be incorrect. - - For example, the rdata in an address record can be determined to be - incorrect if attempts to contact that host fail, either because (for - an IPv4 address on a local subnet) ARP requests for that address go - unanswered, because (for an IPv6 address with an on-link prefix) ND - requests for that address go unanswered, or because (for an address - on a remote network) a router returns an ICMP "Host Unreachable" - error. - - The rdata in an SRV record can be determined to be incorrect if - attempts to communicate with the indicated service at the host and - port number indicated are not successful. - - The rdata in a DNS-SD PTR record can be determined to be incorrect if - attempts to look up the SRV record it references are not successful. - - The software implementing the Multicast DNS resource record cache - should provide a mechanism so that clients detecting stale rdata can - inform the cache. - - When the cache receives this hint that it should reconfirm some - record, it MUST issue two or more queries for the resource record in - dispute. If no response is received within ten seconds, then, even - though its TTL may indicate that it is not yet due to expire, that - record SHOULD be promptly flushed from the cache. - - The end result of this is that if a printer suffers a sudden power - failure or other abrupt disconnection from the network, its name may - continue to appear in DNS-SD browser lists displayed on users' - screens. Eventually, that entry will expire from the cache - naturally, but if a user tries to access the printer before that - happens, the failure to successfully contact the printer will trigger - the more hasty demise of its cache entries. This is a sensible - trade-off between good user experience and good network efficiency. - If we were to insist that printers should disappear from the printer - list within 30 seconds of becoming unavailable, for all failure - modes, the only way to achieve this would be for the client to poll - the printer at least every 30 seconds, or for the printer to announce - its presence at least every 30 seconds, both of which would be an - unreasonable burden on most networks. - - - - - - -Cheshire & Krochmal Standards Track [Page 37] - -RFC 6762 Multicast DNS February 2013 - - -10.5. Passive Observation Of Failures (POOF) - - A host observes the multicast queries issued by the other hosts on - the network. One of the major benefits of also sending responses - using multicast is that it allows all hosts to see the responses (or - lack thereof) to those queries. - - If a host sees queries, for which a record in its cache would be - expected to be given as an answer in a multicast response, but no - such answer is seen, then the host may take this as an indication - that the record may no longer be valid. - - After seeing two or more of these queries, and seeing no multicast - response containing the expected answer within ten seconds, then even - though its TTL may indicate that it is not yet due to expire, that - record SHOULD be flushed from the cache. The host SHOULD NOT perform - its own queries to reconfirm that the record is truly gone. If every - host on a large network were to do this, it would cause a lot of - unnecessary multicast traffic. If host A sends multicast queries - that remain unanswered, then there is no reason to suppose that host - B or any other host is likely to be any more successful. - - The previous section, "Cache Flush on Failure Indication", describes - a situation where a user trying to print discovers that the printer - is no longer available. By implementing the passive observation - described here, when one user fails to contact the printer, all hosts - on the network observe that failure and update their caches - accordingly. - -11. Source Address Check - - All Multicast DNS responses (including responses sent via unicast) - SHOULD be sent with IP TTL set to 255. This is recommended to - provide backwards-compatibility with older Multicast DNS queriers - (implementing a draft version of this document, posted in February - 2004) that check the IP TTL on reception to determine whether the - packet originated on the local link. These older queriers discard - all packets with TTLs other than 255. - - A host sending Multicast DNS queries to a link-local destination - address (including the 224.0.0.251 and FF02::FB link-local multicast - addresses) MUST only accept responses to that query that originate - from the local link, and silently discard any other response packets. - Without this check, it could be possible for remote rogue hosts to - send spoof answer packets (perhaps unicast to the victim host), which - the receiving machine could misinterpret as having originated on the - local link. - - - - -Cheshire & Krochmal Standards Track [Page 38] - -RFC 6762 Multicast DNS February 2013 - - - The test for whether a response originated on the local link is done - in two ways: - - * All responses received with a destination address in the IP - header that is the mDNS IPv4 link-local multicast address - 224.0.0.251 or the mDNS IPv6 link-local multicast address - FF02::FB are necessarily deemed to have originated on the local - link, regardless of source IP address. This is essential to - allow devices to work correctly and reliably in unusual - configurations, such as multiple logical IP subnets overlayed on - a single link, or in cases of severe misconfiguration, where - devices are physically connected to the same link, but are - currently misconfigured with completely unrelated IP addresses - and subnet masks. - - * For responses received with a unicast destination address in the - IP header, the source IP address in the packet is checked to see - if it is an address on a local subnet. An IPv4 source address - is determined to be on a local subnet if, for (one of) the - address(es) configured on the interface receiving the packet, (I - & M) == (P & M), where I and M are the interface address and - subnet mask respectively, P is the source IP address from the - packet, '&' represents the bitwise logical 'and' operation, and - '==' represents a bitwise equality test. An IPv6 source address - is determined to be on the local link if, for any of the on-link - IPv6 prefixes on the interface receiving the packet (learned via - IPv6 router advertisements or otherwise configured on the host), - the first 'n' bits of the IPv6 source address match the first - 'n' bits of the prefix address, where 'n' is the length of the - prefix being considered. - - Since queriers will ignore responses apparently originating outside - the local subnet, a responder SHOULD avoid generating responses that - it can reasonably predict will be ignored. This applies particularly - in the case of overlayed subnets. If a responder receives a query - addressed to the mDNS IPv4 link-local multicast address 224.0.0.251, - from a source address not apparently on the same subnet as the - responder (or, in the case of IPv6, from a source IPv6 address for - which the responder does not have any address with the same prefix on - that interface), then even if the query indicates that a unicast - response is preferred (see Section 5.4, "Questions Requesting Unicast - Responses"), the responder SHOULD elect to respond by multicast - anyway, since it can reasonably predict that a unicast response with - an apparently non-local source address will probably be ignored. - - - - - - - -Cheshire & Krochmal Standards Track [Page 39] - -RFC 6762 Multicast DNS February 2013 - - -12. Special Characteristics of Multicast DNS Domains - - Unlike conventional DNS names, names that end in ".local." have only - local significance. The same is true of names within the IPv4 link- - local reverse mapping domain "254.169.in-addr.arpa." and the IPv6 - link-local reverse mapping domains "8.e.f.ip6.arpa.", - "9.e.f.ip6.arpa.", "a.e.f.ip6.arpa.", and "b.e.f.ip6.arpa.". - - These names function primarily as protocol identifiers, rather than - as user-visible identifiers. Even though they may occasionally be - visible to end users, that is not their primary purpose. As such, - these names should be treated as opaque identifiers. In particular, - the string "local" should not be translated or localized into - different languages, much as the name "localhost" is not translated - or localized into different languages. - - Conventional Unicast DNS seeks to provide a single unified namespace, - where a given DNS query yields the same answer no matter where on the - planet it is performed or to which recursive DNS server the query is - sent. In contrast, each IP link has its own private ".local.", - "254.169.in-addr.arpa." and IPv6 link-local reverse mapping - namespaces, and the answer to any query for a name within those - domains depends on where that query is asked. (This characteristic - is not unique to Multicast DNS. Although the original concept of DNS - was a single global namespace, in recent years, split views, - firewalls, intranets, DNS geolocation, and the like have increasingly - meant that the answer to a given DNS query has become dependent on - the location of the querier.) - - The IPv4 name server address for a Multicast DNS domain is - 224.0.0.251. The IPv6 name server address for a Multicast DNS domain - is FF02::FB. These are multicast addresses; therefore, they identify - not a single host but a collection of hosts, working in cooperation - to maintain some reasonable facsimile of a competently managed DNS - zone. Conceptually, a Multicast DNS domain is a single DNS zone; - however, its server is implemented as a distributed process running - on a cluster of loosely cooperating CPUs rather than as a single - process running on a single CPU. - - Multicast DNS domains are not delegated from their parent domain via - use of NS (Name Server) records, and there is also no concept of - delegation of subdomains within a Multicast DNS domain. Just because - a particular host on the network may answer queries for a particular - record type with the name "example.local." does not imply anything - about whether that host will answer for the name - "child.example.local.", or indeed for other record types with the - name "example.local.". - - - - -Cheshire & Krochmal Standards Track [Page 40] - -RFC 6762 Multicast DNS February 2013 - - - There are no NS records anywhere in Multicast DNS domains. Instead, - the Multicast DNS domains are reserved by IANA, and there is - effectively an implicit delegation of all Multicast DNS domains to - the 224.0.0.251:5353 and [FF02::FB]:5353 multicast groups, by virtue - of client software implementing the protocol rules specified in this - document. - - Multicast DNS zones have no SOA (Start of Authority) record. A - conventional DNS zone's SOA record contains information such as the - email address of the zone administrator and the monotonically - increasing serial number of the last zone modification. There is no - single human administrator for any given Multicast DNS zone, so there - is no email address. Because the hosts managing any given Multicast - DNS zone are only loosely coordinated, there is no readily available - monotonically increasing serial number to determine whether or not - the zone contents have changed. A host holding part of the shared - zone could crash or be disconnected from the network at any time - without informing the other hosts. There is no reliable way to - provide a zone serial number that would, whenever such a crash or - disconnection occurred, immediately change to indicate that the - contents of the shared zone had changed. - - Zone transfers are not possible for any Multicast DNS zone. - -13. Enabling and Disabling Multicast DNS - - The option to fail-over to Multicast DNS for names not ending in - ".local." SHOULD be a user-configured option, and SHOULD be disabled - by default because of the possible security issues related to - unintended local resolution of apparently global names. Enabling - Multicast DNS for names not ending in ".local." may be appropriate on - a secure isolated network, or on some future network were machines - exclusively use DNSSEC for all DNS queries, and have Multicast DNS - responders capable of generating the appropriate cryptographic DNSSEC - signatures, thereby guarding against spoofing. - - The option to look up unqualified (relative) names by appending - ".local." (or not) is controlled by whether ".local." appears (or - not) in the client's DNS search list. - - No special control is needed for enabling and disabling Multicast DNS - for names explicitly ending with ".local." as entered by the user. - The user doesn't need a way to disable Multicast DNS for names ending - with ".local.", because if the user doesn't want to use Multicast - DNS, they can achieve this by simply not using those names. If a - user *does* enter a name ending in ".local.", then we can safely - assume the user's intention was probably that it should work. Having - user configuration options that can be (intentionally or - - - -Cheshire & Krochmal Standards Track [Page 41] - -RFC 6762 Multicast DNS February 2013 - - - unintentionally) set so that local names don't work is just one more - way of frustrating the user's ability to perform the tasks they want, - perpetuating the view that, "IP networking is too complicated to - configure and too hard to use". - -14. Considerations for Multiple Interfaces - - A host SHOULD defend its dot-local host name on all active interfaces - on which it is answering Multicast DNS queries. - - In the event of a name conflict on *any* interface, a host should - configure a new host name, if it wishes to maintain uniqueness of its - host name. - - A host may choose to use the same name (or set of names) for all of - its address records on all interfaces, or it may choose to manage its - Multicast DNS interfaces independently, potentially answering to a - different name (or set of names) on different interfaces. - - Except in the case of proxying and other similar specialized uses, - addresses in IPv4 or IPv6 address records in Multicast DNS responses - MUST be valid for use on the interface on which the response is being - sent. - - Just as the same link-local IP address may validly be in use - simultaneously on different links by different hosts, the same link- - local host name may validly be in use simultaneously on different - links, and this is not an error. A multihomed host with connections - to two different links may be able to communicate with two different - hosts that are validly using the same name. While this kind of name - duplication should be rare, it means that a host that wants to fully - support this case needs network programming APIs that allow - applications to specify on what interface to perform a link-local - Multicast DNS query, and to discover on what interface a Multicast - DNS response was received. - - There is one other special precaution that multihomed hosts need to - take. It's common with today's laptop computers to have an Ethernet - connection and an 802.11 [IEEE.802.11] wireless connection active at - the same time. What the software on the laptop computer can't easily - tell is whether the wireless connection is in fact bridged onto the - same network segment as its Ethernet connection. If the two networks - are bridged together, then packets the host sends on one interface - will arrive on the other interface a few milliseconds later, and care - must be taken to ensure that this bridging does not cause problems: - - - - - - -Cheshire & Krochmal Standards Track [Page 42] - -RFC 6762 Multicast DNS February 2013 - - - When the host announces its host name (i.e., its address records) on - its wireless interface, those announcement records are sent with the - cache-flush bit set, so when they arrive on the Ethernet segment, - they will cause all the peers on the Ethernet to flush the host's - Ethernet address records from their caches. The Multicast DNS - protocol has a safeguard to protect against this situation: when - records are received with the cache-flush bit set, other records are - not deleted from peer caches immediately, but are marked for deletion - in one second. When the host sees its own wireless address records - arrive on its Ethernet interface, with the cache-flush bit set, this - one-second grace period gives the host time to respond and re- - announce its Ethernet address records, to reinstate those records in - peer caches before they are deleted. - - As described, this solves one problem, but creates another, because - when those Ethernet announcement records arrive back on the wireless - interface, the host would again respond defensively to reinstate its - wireless records, and this process would continue forever, - continuously flooding the network with traffic. The Multicast DNS - protocol has a second safeguard, to solve this problem: the cache- - flush bit does not apply to records received very recently, within - the last second. This means that when the host sees its own Ethernet - address records arrive on its wireless interface, with the cache- - flush bit set, it knows there's no need to re-announce its wireless - address records again because it already sent them less than a second - ago, and this makes them immune from deletion from peer caches. (See - Section 10.2.) - -15. Considerations for Multiple Responders on the Same Machine - - It is possible to have more than one Multicast DNS responder and/or - querier implementation coexist on the same machine, but there are - some known issues. - -15.1. Receiving Unicast Responses - - In most operating systems, incoming *multicast* packets can be - delivered to *all* open sockets bound to the right port number, - provided that the clients take the appropriate steps to allow this. - For this reason, all Multicast DNS implementations SHOULD use the - SO_REUSEPORT and/or SO_REUSEADDR options (or equivalent as - appropriate for the operating system in question) so they will all be - able to bind to UDP port 5353 and receive incoming multicast packets - addressed to that port. However, unlike multicast packets, incoming - unicast UDP packets are typically delivered only to the first socket - to bind to that port. This means that "QU" responses and other - packets sent via unicast will be received only by the first Multicast - DNS responder and/or querier on a system. This limitation can be - - - -Cheshire & Krochmal Standards Track [Page 43] - -RFC 6762 Multicast DNS February 2013 - - - partially mitigated if Multicast DNS implementations detect when they - are not the first to bind to port 5353, and in that case they do not - request "QU" responses. One way to detect if there is another - Multicast DNS implementation already running is to attempt binding to - port 5353 without using SO_REUSEPORT and/or SO_REUSEADDR, and if that - fails it indicates that some other socket is already bound to this - port. - -15.2. Multipacket Known-Answer lists - - When a Multicast DNS querier issues a query with too many Known - Answers to fit into a single packet, it divides the Known-Answer list - into two or more packets. Multicast DNS responders associate the - initial truncated query with its continuation packets by examining - the source IP address in each packet. Since two independent - Multicast DNS queriers running on the same machine will be sending - packets with the same source IP address, from an outside perspective - they appear to be a single entity. If both queriers happened to send - the same multipacket query at the same time, with different Known- - Answer lists, then they could each end up suppressing answers that - the other needs. - -15.3. Efficiency - - If different clients on a machine were each to have their own - independent Multicast DNS implementation, they would lose certain - efficiency benefits. Apart from the unnecessary code duplication, - memory usage, and CPU load, the clients wouldn't get the benefit of a - shared system-wide cache, and they would not be able to aggregate - separate queries into single packets to reduce network traffic. - -15.4. Recommendation - - Because of these issues, this document encourages implementers to - design systems with a single Multicast DNS implementation that - provides Multicast DNS services shared by all clients on that - machine, much as most operating systems today have a single TCP - implementation, which is shared between all clients on that machine. - Due to engineering constraints, there may be situations where - embedding a "user-level" Multicast DNS implementation in the client - application software is the most expedient solution, and while this - will usually work in practice, implementers should be aware of the - issues outlined in this section. - - - - - - - - -Cheshire & Krochmal Standards Track [Page 44] - -RFC 6762 Multicast DNS February 2013 - - -16. Multicast DNS Character Set - - Historically, Unicast DNS has been used with a very restricted set of - characters. Indeed, conventional DNS is usually limited to just - twenty-six letters, ten digits and the hyphen character, not even - allowing spaces or other punctuation. Attempts to remedy this for - Unicast DNS have been badly constrained by the perceived need to - accommodate old buggy legacy DNS implementations. In reality, the - DNS specification itself actually imposes no limits on what - characters may be used in names, and good DNS implementations handle - any arbitrary eight-bit data without trouble. "Clarifications to the - DNS Specification" [RFC2181] directly discusses the subject of - allowable character set in Section 11 ("Name syntax"), and explicitly - states that DNS names may contain arbitrary eight-bit data. However, - the old rules for ARPANET host names back in the 1980s required host - names to be just letters, digits, and hyphens [RFC1034], and since - the predominant use of DNS is to store host address records, many - have assumed that the DNS protocol itself suffers from the same - limitation. It might be accurate to say that there could be - hypothetical bad implementations that do not handle eight-bit data - correctly, but it would not be accurate to say that the protocol - doesn't allow names containing eight-bit data. - - Multicast DNS is a new protocol and doesn't (yet) have old buggy - legacy implementations to constrain the design choices. Accordingly, - it adopts the simple obvious elegant solution: all names in Multicast - DNS MUST be encoded as precomposed UTF-8 [RFC3629] "Net-Unicode" - [RFC5198] text. - - Some users of 16-bit Unicode have taken to stuffing a "zero-width - nonbreaking space" character (U+FEFF) at the start of each UTF-16 - file, as a hint to identify whether the data is big-endian or little- - endian, and calling it a "Byte Order Mark" (BOM). Since there is - only one possible byte order for UTF-8 data, a BOM is neither - necessary nor permitted. Multicast DNS names MUST NOT contain a - "Byte Order Mark". Any occurrence of the Unicode character U+FEFF at - the start or anywhere else in a Multicast DNS name MUST be - interpreted as being an actual intended part of the name, - representing (just as for any other legal unicode value) an actual - literal instance of that character (in this case a zero-width non- - breaking space character). - - For names that are restricted to US-ASCII [RFC0020] letters, digits, - and hyphens, the UTF-8 encoding is identical to the US-ASCII - encoding, so this is entirely compatible with existing host names. - For characters outside the US-ASCII range, UTF-8 encoding is used. - - - - - -Cheshire & Krochmal Standards Track [Page 45] - -RFC 6762 Multicast DNS February 2013 - - - Multicast DNS implementations MUST NOT use any other encodings apart - from precomposed UTF-8 (US-ASCII being considered a compatible subset - of UTF-8). The reasons for selecting UTF-8 instead of Punycode - [RFC3492] are discussed further in Appendix F. - - The simple rules for case-insensitivity in Unicast DNS [RFC1034] - [RFC1035] also apply in Multicast DNS; that is to say, in name - comparisons, the lowercase letters "a" to "z" (0x61 to 0x7A) match - their uppercase equivalents "A" to "Z" (0x41 to 0x5A). Hence, if a - querier issues a query for an address record with the name - "myprinter.local.", then a responder having an address record with - the name "MyPrinter.local." should issue a response. No other - automatic equivalences should be assumed. In particular, all UTF-8 - multibyte characters (codes 0x80 and higher) are compared by simple - binary comparison of the raw byte values. Accented characters are - *not* defined to be automatically equivalent to their unaccented - counterparts. Where automatic equivalences are desired, this may be - achieved through the use of programmatically generated CNAME records. - For example, if a responder has an address record for an accented - name Y, and a querier issues a query for a name X, where X is the - same as Y with all the accents removed, then the responder may issue - a response containing two resource records: a CNAME record "X CNAME - Y", asserting that the requested name X (unaccented) is an alias for - the true (accented) name Y, followed by the address record for Y. - -17. Multicast DNS Message Size - - The 1987 DNS specification [RFC1035] restricts DNS messages carried - by UDP to no more than 512 bytes (not counting the IP or UDP - headers). For UDP packets carried over the wide-area Internet in - 1987, this was appropriate. For link-local multicast packets on - today's networks, there is no reason to retain this restriction. - Given that the packets are by definition link-local, there are no - Path MTU issues to consider. - - Multicast DNS messages carried by UDP may be up to the IP MTU of the - physical interface, less the space required for the IP header (20 - bytes for IPv4; 40 bytes for IPv6) and the UDP header (8 bytes). - - In the case of a single Multicast DNS resource record that is too - large to fit in a single MTU-sized multicast response packet, a - Multicast DNS responder SHOULD send the resource record alone, in a - single IP datagram, using multiple IP fragments. Resource records - this large SHOULD be avoided, except in the very rare cases where - they really are the appropriate solution to the problem at hand. - Implementers should be aware that many simple devices do not - reassemble fragmented IP datagrams, so large resource records SHOULD - NOT be used except in specialized cases where the implementer knows - - - -Cheshire & Krochmal Standards Track [Page 46] - -RFC 6762 Multicast DNS February 2013 - - - that all receivers implement reassembly, or where the large resource - record contains optional data which is not essential for correct - operation of the client. - - A Multicast DNS packet larger than the interface MTU, which is sent - using fragments, MUST NOT contain more than one resource record. - - Even when fragmentation is used, a Multicast DNS packet, including IP - and UDP headers, MUST NOT exceed 9000 bytes. - - Note that 9000 bytes is also the maximum payload size of an Ethernet - "Jumbo" packet [Jumbo]. However, in practice Ethernet "Jumbo" - packets are not widely used, so it is advantageous to keep packets - under 1500 bytes whenever possible. Even on hosts that normally - handle Ethernet "Jumbo" packets and IP fragment reassembly, it is - becoming more common for these hosts to implement power-saving modes - where the main CPU goes to sleep and hands off packet reception tasks - to a more limited processor in the network interface hardware, which - may not support Ethernet "Jumbo" packets or IP fragment reassembly. - -18. Multicast DNS Message Format - - This section describes specific rules pertaining to the allowable - values for the header fields of a Multicast DNS message, and other - message format considerations. - -18.1. ID (Query Identifier) - - Multicast DNS implementations SHOULD listen for unsolicited responses - issued by hosts booting up (or waking up from sleep or otherwise - joining the network). Since these unsolicited responses may contain - a useful answer to a question for which the querier is currently - awaiting an answer, Multicast DNS implementations SHOULD examine all - received Multicast DNS response messages for useful answers, without - regard to the contents of the ID field or the Question Section. In - Multicast DNS, knowing which particular query message (if any) is - responsible for eliciting a particular response message is less - interesting than knowing whether the response message contains useful - information. - - Multicast DNS implementations MAY cache data from any or all - Multicast DNS response messages they receive, for possible future - use, provided of course that normal TTL aging is performed on these - cached resource records. - - In multicast query messages, the Query Identifier SHOULD be set to - zero on transmission. - - - - -Cheshire & Krochmal Standards Track [Page 47] - -RFC 6762 Multicast DNS February 2013 - - - In multicast responses, including unsolicited multicast responses, - the Query Identifier MUST be set to zero on transmission, and MUST be - ignored on reception. - - In legacy unicast response messages generated specifically in - response to a particular (unicast or multicast) query, the Query - Identifier MUST match the ID from the query message. - -18.2. QR (Query/Response) Bit - - In query messages the QR bit MUST be zero. - In response messages the QR bit MUST be one. - -18.3. OPCODE - - In both multicast query and multicast response messages, the OPCODE - MUST be zero on transmission (only standard queries are currently - supported over multicast). Multicast DNS messages received with an - OPCODE other than zero MUST be silently ignored. - -18.4. AA (Authoritative Answer) Bit - - In query messages, the Authoritative Answer bit MUST be zero on - transmission, and MUST be ignored on reception. - - In response messages for Multicast domains, the Authoritative Answer - bit MUST be set to one (not setting this bit would imply there's some - other place where "better" information may be found) and MUST be - ignored on reception. - -18.5. TC (Truncated) Bit - - In query messages, if the TC bit is set, it means that additional - Known-Answer records may be following shortly. A responder SHOULD - record this fact, and wait for those additional Known-Answer records, - before deciding whether to respond. If the TC bit is clear, it means - that the querying host has no additional Known Answers. - - In multicast response messages, the TC bit MUST be zero on - transmission, and MUST be ignored on reception. - - In legacy unicast response messages, the TC bit has the same meaning - as in conventional Unicast DNS: it means that the response was too - large to fit in a single packet, so the querier SHOULD reissue its - query using TCP in order to receive the larger response. - - - - - - -Cheshire & Krochmal Standards Track [Page 48] - -RFC 6762 Multicast DNS February 2013 - - -18.6. RD (Recursion Desired) Bit - - In both multicast query and multicast response messages, the - Recursion Desired bit SHOULD be zero on transmission, and MUST be - ignored on reception. - -18.7. RA (Recursion Available) Bit - - In both multicast query and multicast response messages, the - Recursion Available bit MUST be zero on transmission, and MUST be - ignored on reception. - -18.8. Z (Zero) Bit - - In both query and response messages, the Zero bit MUST be zero on - transmission, and MUST be ignored on reception. - -18.9. AD (Authentic Data) Bit - - In both multicast query and multicast response messages, the - Authentic Data bit [RFC2535] MUST be zero on transmission, and MUST - be ignored on reception. - -18.10. CD (Checking Disabled) Bit - - In both multicast query and multicast response messages, the Checking - Disabled bit [RFC2535] MUST be zero on transmission, and MUST be - ignored on reception. - -18.11. RCODE (Response Code) - - In both multicast query and multicast response messages, the Response - Code MUST be zero on transmission. Multicast DNS messages received - with non-zero Response Codes MUST be silently ignored. - -18.12. Repurposing of Top Bit of qclass in Question Section - - In the Question Section of a Multicast DNS query, the top bit of the - qclass field is used to indicate that unicast responses are preferred - for this particular question. (See Section 5.4.) - -18.13. Repurposing of Top Bit of rrclass in Resource Record Sections - - In the Resource Record Sections of a Multicast DNS response, the top - bit of the rrclass field is used to indicate that the record is a - member of a unique RRSet, and the entire RRSet has been sent together - (in the same packet, or in consecutive packets if there are too many - records to fit in a single packet). (See Section 10.2.) - - - -Cheshire & Krochmal Standards Track [Page 49] - -RFC 6762 Multicast DNS February 2013 - - -18.14. Name Compression - - When generating Multicast DNS messages, implementations SHOULD use - name compression wherever possible to compress the names of resource - records, by replacing some or all of the resource record name with a - compact two-byte reference to an appearance of that data somewhere - earlier in the message [RFC1035]. - - This applies not only to Multicast DNS responses, but also to - queries. When a query contains more than one question, successive - questions in the same message often contain similar names, and - consequently name compression SHOULD be used, to save bytes. In - addition, queries may also contain Known Answers in the Answer - Section, or probe tiebreaking data in the Authority Section, and - these names SHOULD similarly be compressed for network efficiency. - - In addition to compressing the *names* of resource records, names - that appear within the *rdata* of the following rrtypes SHOULD also - be compressed in all Multicast DNS messages: - - NS, CNAME, PTR, DNAME, SOA, MX, AFSDB, RT, KX, RP, PX, SRV, NSEC - - Until future IETF Standards Action [RFC5226] specifying that names in - the rdata of other types should be compressed, names that appear - within the rdata of any type not listed above MUST NOT be compressed. - - Implementations receiving Multicast DNS messages MUST correctly - decode compressed names appearing in the Question Section, and - compressed names of resource records appearing in other sections. - - In addition, implementations MUST correctly decode compressed names - appearing within the *rdata* of the rrtypes listed above. Where - possible, implementations SHOULD also correctly decode compressed - names appearing within the *rdata* of other rrtypes known to the - implementers at the time of implementation, because such forward- - thinking planning helps facilitate the deployment of future - implementations that may have reason to compress those rrtypes. It - is possible that no future IETF Standards Action [RFC5226] will be - created that mandates or permits the compression of rdata in new - types, but having implementations designed such that they are capable - of decompressing all known types helps keep future options open. - - One specific difference between Unicast DNS and Multicast DNS is that - Unicast DNS does not allow name compression for the target host in an - SRV record, because Unicast DNS implementations before the first SRV - specification in 1996 [RFC2052] may not decode these compressed - - - - - -Cheshire & Krochmal Standards Track [Page 50] - -RFC 6762 Multicast DNS February 2013 - - - records properly. Since all Multicast DNS implementations were - created after 1996, all Multicast DNS implementations are REQUIRED to - decode compressed SRV records correctly. - - In legacy unicast responses generated to answer legacy queries, name - compression MUST NOT be performed on SRV records. - -19. Summary of Differences between Multicast DNS and Unicast DNS - - Multicast DNS shares, as much as possible, the familiar APIs, naming - syntax, resource record types, etc., of Unicast DNS. There are, of - course, necessary differences by virtue of it using multicast, and by - virtue of it operating in a community of cooperating peers, rather - than a precisely defined hierarchy controlled by a strict chain of - formal delegations from the root. These differences are summarized - below: - - Multicast DNS... - * uses multicast - * uses UDP port 5353 instead of port 53 - * operates in well-defined parts of the DNS namespace - * has no SOA (Start of Authority) records - * uses UTF-8, and only UTF-8, to encode resource record names - * allows names up to 255 bytes plus a terminating zero byte - * allows name compression in rdata for SRV and other record types - * allows larger UDP packets - * allows more than one question in a query message - * defines consistent results for qtype "ANY" and qclass "ANY" queries - * uses the Answer Section of a query to list Known Answers - * uses the TC bit in a query to indicate additional Known Answers - * uses the Authority Section of a query for probe tiebreaking - * ignores the Query ID field (except for generating legacy responses) - * doesn't require the question to be repeated in the response message - * uses unsolicited responses to announce new records - * uses NSEC records to signal nonexistence of records - * defines a unicast-response bit in the rrclass of query questions - * defines a cache-flush bit in the rrclass of response records - * uses DNS RR TTL 0 to indicate that a record has been deleted - * recommends AAAA records in the additional section when responding - to rrtype "A" queries, and vice versa - * monitors queries to perform Duplicate Question Suppression - * monitors responses to perform Duplicate Answer Suppression... - * ... and Ongoing Conflict Detection - * ... and Opportunistic Caching - - - - - - - -Cheshire & Krochmal Standards Track [Page 51] - -RFC 6762 Multicast DNS February 2013 - - -20. IPv6 Considerations - - An IPv4-only host and an IPv6-only host behave as "ships that pass in - the night". Even if they are on the same Ethernet, neither is aware - of the other's traffic. For this reason, each physical link may have - *two* unrelated ".local." zones, one for IPv4 and one for IPv6. - Since for practical purposes, a group of IPv4-only hosts and a group - of IPv6-only hosts on the same Ethernet act as if they were on two - entirely separate Ethernet segments, it is unsurprising that their - use of the ".local." zone should occur exactly as it would if they - really were on two entirely separate Ethernet segments. - - A dual-stack (v4/v6) host can participate in both ".local." zones, - and should register its name(s) and perform its lookups both using - IPv4 and IPv6. This enables it to reach, and be reached by, both - IPv4-only and IPv6-only hosts. In effect, this acts like a - multihomed host, with one connection to the logical "IPv4 Ethernet - segment", and a connection to the logical "IPv6 Ethernet segment". - When such a host generates NSEC records, if it is using the same host - name for its IPv4 addresses and its IPv6 addresses on that network - interface, its NSEC records should indicate that the host name has - both A and AAAA records. - -21. Security Considerations - - The algorithm for detecting and resolving name conflicts is, by its - very nature, an algorithm that assumes cooperating participants. Its - purpose is to allow a group of hosts to arrive at a mutually disjoint - set of host names and other DNS resource record names, in the absence - of any central authority to coordinate this or mediate disputes. In - the absence of any higher authority to resolve disputes, the only - alternative is that the participants must work together cooperatively - to arrive at a resolution. - - In an environment where the participants are mutually antagonistic - and unwilling to cooperate, other mechanisms are appropriate, like - manually configured DNS. - - In an environment where there is a group of cooperating participants, - but clients cannot be sure that there are no antagonistic hosts on - the same physical link, the cooperating participants need to use - IPsec signatures and/or DNSSEC [RFC4033] signatures so that they can - distinguish Multicast DNS messages from trusted participants (which - they process as usual) from Multicast DNS messages from untrusted - participants (which they silently discard). - - - - - - -Cheshire & Krochmal Standards Track [Page 52] - -RFC 6762 Multicast DNS February 2013 - - - If DNS queries for *global* DNS names are sent to the mDNS multicast - address (during network outages which disrupt communication with the - greater Internet) it is *especially* important to use DNSSEC, because - the user may have the impression that he or she is communicating with - some authentic host, when in fact he or she is really communicating - with some local host that is merely masquerading as that name. This - is less critical for names ending with ".local.", because the user - should be aware that those names have only local significance and no - global authority is implied. - - Most computer users neglect to type the trailing dot at the end of a - fully qualified domain name, making it a relative domain name (e.g., - "www.example.com"). In the event of network outage, attempts to - positively resolve the name as entered will fail, resulting in - application of the search list, including ".local.", if present. A - malicious host could masquerade as "www.example.com." by answering - the resulting Multicast DNS query for "www.example.com.local.". To - avoid this, a host MUST NOT append the search suffix ".local.", if - present, to any relative (partially qualified) host name containing - two or more labels. Appending ".local." to single-label relative - host names is acceptable, since the user should have no expectation - that a single-label host name will resolve as is. However, users who - have both "example.com" and "local" in their search lists should be - aware that if they type "www" into their web browser, it may not be - immediately clear to them whether the page that appears is - "www.example.com" or "www.local". - - Multicast DNS uses UDP port 5353. On operating systems where only - privileged processes are allowed to use ports below 1024, no such - privilege is required to use port 5353. - -22. IANA Considerations - - IANA has allocated the UDP port 5353 for the Multicast DNS protocol - described in this document [SN]. - - IANA has allocated the IPv4 link-local multicast address 224.0.0.251 - for the use described in this document [MC4]. - - IANA has allocated the IPv6 multicast address set FF0X::FB (where "X" - indicates any hexadecimal digit from '1' to 'F') for the use - described in this document [MC6]. Only address FF02::FB (link-local - scope) is currently in use by deployed software, but it is possible - that in the future implementers may experiment with Multicast DNS - using larger-scoped addresses, such as FF05::FB (site-local scope) - [RFC4291]. - - - - - -Cheshire & Krochmal Standards Track [Page 53] - -RFC 6762 Multicast DNS February 2013 - - - IANA has implemented the following DNS records: - - MDNS.MCAST.NET. IN A 224.0.0.251 - 251.0.0.224.IN-ADDR.ARPA. IN PTR MDNS.MCAST.NET. - - Entries for the AAAA and corresponding PTR records have not been made - as there is not yet an RFC providing direction for the management of - the IP6.ARPA domain relating to the IPv6 multicast address space. - - The reuse of the top bit of the rrclass field in the Question and - Resource Record Sections means that Multicast DNS can only carry DNS - records with classes in the range 0-32767. Classes in the range - 32768 to 65535 are incompatible with Multicast DNS. IANA has noted - this fact, and if IANA receives a request to allocate a DNS class - value above 32767, IANA will make sure the requester is aware of this - implication before proceeding. This does not mean that allocations - of DNS class values above 32767 should be denied, only that they - should not be allowed until the requester has indicated that they are - aware of how this allocation will interact with Multicast DNS. - However, to date, only three DNS classes have been assigned by IANA - (1, 3, and 4), and only one (1, "Internet") is actually in widespread - use, so this issue is likely to remain a purely theoretical one. - - IANA has recorded the list of domains below as being Special-Use - Domain Names [RFC6761]: - - .local. - .254.169.in-addr.arpa. - .8.e.f.ip6.arpa. - .9.e.f.ip6.arpa. - .a.e.f.ip6.arpa. - .b.e.f.ip6.arpa. - -22.1. Domain Name Reservation Considerations - - The six domains listed above, and any names falling within those - domains (e.g., "MyPrinter.local.", "34.12.254.169.in-addr.arpa.", - "Ink-Jet._pdl-datastream._tcp.local.") are special [RFC6761] in the - following ways: - - 1. Users may use these names as they would other DNS names, - entering them anywhere that they would otherwise enter a - conventional DNS name, or a dotted decimal IPv4 address, or a - literal IPv6 address. - - Since there is no central authority responsible for assigning - dot-local names, and all devices on the local network are - equally entitled to claim any dot-local name, users SHOULD be - - - -Cheshire & Krochmal Standards Track [Page 54] - -RFC 6762 Multicast DNS February 2013 - - - aware of this and SHOULD exercise appropriate caution. In an - untrusted or unfamiliar network environment, users SHOULD be - aware that using a name like "www.local" may not actually - connect them to the web site they expected, and could easily - connect them to a different web page, or even a fake or spoof - of their intended web site, designed to trick them into - revealing confidential information. As always with networking, - end-to-end cryptographic security can be a useful tool. For - example, when connecting with ssh, the ssh host key - verification process will inform the user if it detects that - the identity of the entity they are communicating with has - changed since the last time they connected to that name. - - 2. Application software may use these names as they would other - similar DNS names, and is not required to recognize the names - and treat them specially. Due to the relative ease of spoofing - dot-local names, end-to-end cryptographic security remains - important when communicating across a local network, just as it - is when communicating across the global Internet. - - 3. Name resolution APIs and libraries SHOULD recognize these names - as special and SHOULD NOT send queries for these names to their - configured (unicast) caching DNS server(s). This is to avoid - unnecessary load on the root name servers and other name - servers, caused by queries for which those name servers do not - have useful non-negative answers to give, and will not ever - have useful non-negative answers to give. - - 4. Caching DNS servers SHOULD recognize these names as special and - SHOULD NOT attempt to look up NS records for them, or otherwise - query authoritative DNS servers in an attempt to resolve these - names. Instead, caching DNS servers SHOULD generate immediate - NXDOMAIN responses for all such queries they may receive (from - misbehaving name resolver libraries). This is to avoid - unnecessary load on the root name servers and other name - servers. - - 5. Authoritative DNS servers SHOULD NOT by default be configurable - to answer queries for these names, and, like caching DNS - servers, SHOULD generate immediate NXDOMAIN responses for all - such queries they may receive. DNS server software MAY provide - a configuration option to override this default, for testing - purposes or other specialized uses. - - 6. DNS server operators SHOULD NOT attempt to configure - authoritative DNS servers to act as authoritative for any of - these names. Configuring an authoritative DNS server to act as - authoritative for any of these names may not, in many cases, - - - -Cheshire & Krochmal Standards Track [Page 55] - -RFC 6762 Multicast DNS February 2013 - - - yield the expected result. Since name resolver libraries and - caching DNS servers SHOULD NOT send queries for those names - (see 3 and 4 above), such queries SHOULD be suppressed before - they even reach the authoritative DNS server in question, and - consequently it will not even get an opportunity to answer - them. - - 7. DNS Registrars MUST NOT allow any of these names to be - registered in the normal way to any person or entity. These - names are reserved protocol identifiers with special meaning - and fall outside the set of names available for allocation by - registrars. Attempting to allocate one of these names as if it - were a normal domain name will probably not work as desired, - for reasons 3, 4, and 6 above. - -23. Acknowledgments - - The concepts described in this document have been explored, - developed, and implemented with help from Ran Atkinson, Richard - Brown, Freek Dijkstra, Erik Guttman, Kyle McKay, Pasi Sarolahti, - Pekka Savola, Robby Simpson, Mark Townsley, Paul Vixie, Bill - Woodcock, and others. Special thanks go to Bob Bradley, Josh - Graessley, Scott Herscher, Rory McGuire, Roger Pantos, and Kiren - Sekar for their significant contributions. Special thanks also to - Kerry Lynn for converting the document to xml2rfc form in May 2010, - and to Area Director Ralph Droms for shepherding the document through - its final steps. - -24. References - -24.1. Normative References - - [MC4] IANA, "IPv4 Multicast Address Space Registry", - . - - [MC6] IANA, "IPv6 Multicast Address Space Registry", - . - - [RFC0020] Cerf, V., "ASCII format for network interchange", RFC 20, - October 1969. - - [RFC1034] Mockapetris, P., "Domain names - concepts and facilities", - STD 13, RFC 1034, November 1987. - - [RFC1035] Mockapetris, P., "Domain names - implementation and - specification", STD 13, RFC 1035, November 1987. - - - - -Cheshire & Krochmal Standards Track [Page 56] - -RFC 6762 Multicast DNS February 2013 - - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO - 10646", STD 63, RFC 3629, November 2003. - - [RFC4034] Arends, R., Austein, R., Larson, M., Massey, D., and S. - Rose, "Resource Records for the DNS Security Extensions", - RFC 4034, March 2005. - - [RFC5198] Klensin, J. and M. Padlipsky, "Unicode Format for Network - Interchange", RFC 5198, March 2008. - - [RFC6195] Eastlake 3rd, D., "Domain Name System (DNS) IANA - Considerations", BCP 42, RFC 6195, March 2011. - - [RFC6761] Cheshire, S. and M. Krochmal, "Special-Use Domain Names", - RFC 6761, February 2013. - - [SN] IANA, "Service Name and Transport Protocol Port Number - Registry", . - -24.2. Informative References - - [B4W] "Bonjour for Windows", - . - - [BJ] Apple Bonjour Open Source Software, - . - - [IEEE.802.3] - "Information technology - Telecommunications and - information exchange between systems - Local and - metropolitan area networks - Specific requirements - Part - 3: Carrier Sense Multiple Access with Collision Detection - (CMSA/CD) Access Method and Physical Layer - Specifications", IEEE Std 802.3-2008, December 2008, - . - - [IEEE.802.11] - "Information technology - Telecommunications and - information exchange between systems - Local and - metropolitan area networks - Specific requirements - Part - 11: Wireless LAN Medium Access Control (MAC) and Physical - Layer (PHY) Specifications", IEEE Std 802.11-2007, June - 2007, . - - - - -Cheshire & Krochmal Standards Track [Page 57] - -RFC 6762 Multicast DNS February 2013 - - - [Jumbo] "Ethernet Jumbo Frames", November 2009, - . - - [NIAS] Cheshire, S. "Discovering Named Instances of Abstract - Services using DNS", Work in Progress, July 2001. - - [NSD] "NsdManager | Android Developer", June 2012, - . - - [RFC2052] Gulbrandsen, A. and P. Vixie, "A DNS RR for specifying the - location of services (DNS SRV)", RFC 2052, October 1996. - - [RFC2132] Alexander, S. and R. Droms, "DHCP Options and BOOTP Vendor - Extensions", RFC 2132, March 1997. - - [RFC2136] Vixie, P., Ed., Thomson, S., Rekhter, Y., and J. Bound, - "Dynamic Updates in the Domain Name System (DNS UPDATE)", - RFC 2136, April 1997. - - [RFC2181] Elz, R. and R. Bush, "Clarifications to the DNS - Specification", RFC 2181, July 1997. - - [RFC2535] Eastlake 3rd, D., "Domain Name System Security - Extensions", RFC 2535, March 1999. - - [RFC2671] Vixie, P., "Extension Mechanisms for DNS (EDNS0)", RFC - 2671, August 1999. - - [RFC2845] Vixie, P., Gudmundsson, O., Eastlake 3rd, D., and B. - Wellington, "Secret Key Transaction Authentication for DNS - (TSIG)", RFC 2845, May 2000. - - [RFC2930] Eastlake 3rd, D., "Secret Key Establishment for DNS (TKEY - RR)", RFC 2930, September 2000. - - [RFC2931] Eastlake 3rd, D., "DNS Request and Transaction Signatures - ( SIG(0)s )", RFC 2931, September 2000. - - [RFC3007] Wellington, B., "Secure Domain Name System (DNS) Dynamic - Update", RFC 3007, November 2000. - - [RFC3492] Costello, A., "Punycode: A Bootstring encoding of Unicode - for Internationalized Domain Names in Applications - (IDNA)", RFC 3492, March 2003. - - - - - -Cheshire & Krochmal Standards Track [Page 58] - -RFC 6762 Multicast DNS February 2013 - - - [RFC3927] Cheshire, S., Aboba, B., and E. Guttman, "Dynamic - Configuration of IPv4 Link-Local Addresses", RFC 3927, May - 2005. - - [RFC4033] Arends, R., Austein, R., Larson, M., Massey, D., and S. - Rose, "DNS Security Introduction and Requirements", RFC - 4033, March 2005. - - [RFC4291] Hinden, R. and S. Deering, "IP Version 6 Addressing - Architecture", RFC 4291, February 2006. - - [RFC4795] Aboba, B., Thaler, D., and L. Esibov, "Link-local - Multicast Name Resolution (LLMNR)", RFC 4795, January - 2007. - - [RFC4861] Narten, T., Nordmark, E., Simpson, W., and H. Soliman, - "Neighbor Discovery for IP version 6 (IPv6)", RFC 4861, - September 2007. - - [RFC4862] Thomson, S., Narten, T., and T. Jinmei, "IPv6 Stateless - Address Autoconfiguration", RFC 4862, September 2007. - - [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an - IANA Considerations Section in RFCs", BCP 26, RFC 5226, - May 2008. - - [RFC5890] Klensin, J., "Internationalized Domain Names for - Applications (IDNA): Definitions and Document Framework", - RFC 5890, August 2010. - - [RFC6281] Cheshire, S., Zhu, Z., Wakikawa, R., and L. Zhang, - "Understanding Apple's Back to My Mac (BTMM) Service", RFC - 6281, June 2011. - - [RFC6760] Cheshire, S. and M. Krochmal, "Requirements for a Protocol - to Replace the AppleTalk Name Binding Protocol (NBP)", RFC - 6760, February 2013. - - [RFC6763] Cheshire, S. and M. Krochmal, "DNS-Based Service - Discovery", RFC 6763, February 2013. - - [Zeroconf] Cheshire, S. and D. Steinberg, "Zero Configuration - Networking: The Definitive Guide", O'Reilly Media, Inc., - ISBN 0-596-10100-7, December 2005. - - - - - - - -Cheshire & Krochmal Standards Track [Page 59] - -RFC 6762 Multicast DNS February 2013 - - -Appendix A. Design Rationale for Choice of UDP Port Number - - Arguments were made for and against using UDP port 53, the standard - Unicast DNS port. Some of the arguments are given below. The - arguments for using a different port were greater in number and more - compelling, so that option was ultimately selected. The UDP port - "5353" was selected for its mnemonic similarity to "53". - - Arguments for using UDP port 53: - - * This is "just DNS", so it should be the same port. - - * There is less work to be done updating old resolver libraries to do - simple Multicast DNS queries. Only the destination address need be - changed. In some cases, this can be achieved without any code - changes, just by adding the address 224.0.0.251 to a configuration - file. - - Arguments for using a different port (UDP port 5353): - - * This is not "just DNS". This is a DNS-like protocol, but - different. - - * Changing resolver library code to use a different port number is - not hard. In some cases, this can be achieved without any code - changes, just by adding the address 224.0.0.251:5353 to a - configuration file. - - * Using the same port number makes it hard to run a Multicast DNS - responder and a conventional Unicast DNS server on the same - machine. If a conventional Unicast DNS server wishes to implement - Multicast DNS as well, it can still do that, by opening two - sockets. Having two different port numbers allows this - flexibility. - - * Some VPN software hijacks all outgoing traffic to port 53 and - redirects it to a special DNS server set up to serve those VPN - clients while they are connected to the corporate network. It is - questionable whether this is the right thing to do, but it is - common, and redirecting link-local multicast DNS packets to a - remote server rarely produces any useful results. It does mean, - for example, that a user of such VPN software becomes unable to - access their local network printer sitting on their desk right next - to their computer. Using a different UDP port helps avoid this - particular problem. - - - - - - -Cheshire & Krochmal Standards Track [Page 60] - -RFC 6762 Multicast DNS February 2013 - - - * On many operating systems, unprivileged software may not send or - receive packets on low-numbered ports. This means that any - software sending or receiving Multicast DNS packets on port 53 - would have to run as "root", which is an undesirable security risk. - Using a higher-numbered UDP port avoids this restriction. - -Appendix B. Design Rationale for Not Using Hashed Multicast Addresses - - Some discovery protocols use a range of multicast addresses, and - determine the address to be used by a hash function of the name being - sought. Queries are sent via multicast to the address as indicated - by the hash function, and responses are returned to the querier via - unicast. Particularly in IPv6, where multicast addresses are - extremely plentiful, this approach is frequently advocated. For - example, IPv6 Neighbor Discovery [RFC4861] sends Neighbor - Solicitation messages to the "solicited-node multicast address", - which is computed as a function of the solicited IPv6 address. - - There are some disadvantages to using hashed multicast addresses like - this in a service discovery protocol: - - * When a host has a large number of records with different names, the - host may have to join a large number of multicast groups. Each - time a host joins or leaves a multicast group, this results in - Internet Group Management Protocol (IGMP) or Multicast Listener - Discovery (MLD) traffic on the network announcing this fact. - Joining a large number of multicast groups can place undue burden - on the Ethernet hardware, which typically supports a limited number - of multicast addresses efficiently. When this number is exceeded, - the Ethernet hardware may have to resort to receiving all - multicasts and passing them up to the host networking code for - filtering in software, thereby defeating much of the point of using - a multicast address range in the first place. Finally, many IPv6 - stacks have a fixed limit IPV6_MAX_MEMBERSHIPS, and the code simply - fails with an error if a client attempts to exceed this limit. - Common values for IPV6_MAX_MEMBERSHIPS are 20 or 31. - - * Multiple questions cannot be placed in one packet if they don't all - hash to the same multicast address. - - * Duplicate Question Suppression doesn't work if queriers are not - seeing each other's queries. - - * Duplicate Answer Suppression doesn't work if responders are not - seeing each other's responses. - - * Opportunistic Caching doesn't work. - - - - -Cheshire & Krochmal Standards Track [Page 61] - -RFC 6762 Multicast DNS February 2013 - - - * Ongoing Conflict Detection doesn't work. - -Appendix C. Design Rationale for Maximum Multicast DNS Name Length - - Multicast DNS names may be up to 255 bytes long (in the on-the-wire - message format), not counting the terminating zero byte at the end. - - "Domain Names - Implementation and Specification" [RFC1035] says: - - Various objects and parameters in the DNS have size limits. They - are listed below. Some could be easily changed, others are more - fundamental. - - labels 63 octets or less - - names 255 octets or less - - ... - - the total length of a domain name (i.e., label octets and label - length octets) is restricted to 255 octets or less. - - This text does not state whether this 255-byte limit includes the - terminating zero at the end of every name. - - Several factors lead us to conclude that the 255-byte limit does - *not* include the terminating zero: - - o It is common in software engineering to have size limits that are a - power of two, or a multiple of a power of two, for efficiency. For - example, an integer on a modern processor is typically 2, 4, or 8 - bytes, not 3 or 5 bytes. The number 255 is not a power of two, nor - is it to most people a particularly noteworthy number. It is - noteworthy to computer scientists for only one reason -- because it - is exactly one *less* than a power of two. When a size limit is - exactly one less than a power of two, that suggests strongly that - the one extra byte is being reserved for some specific reason -- in - this case reserved, perhaps, to leave room for a terminating zero - at the end. - - o In the case of DNS label lengths, the stated limit is 63 bytes. As - with the total name length, this limit is exactly one less than a - power of two. This label length limit also excludes the label - length byte at the start of every label. Including that extra - byte, a 63-byte label takes 64 bytes of space in memory or in a DNS - message. - - - - - -Cheshire & Krochmal Standards Track [Page 62] - -RFC 6762 Multicast DNS February 2013 - - - o It is common in software engineering for the semantic "length" of - an object to be one less than the number of bytes it takes to store - that object. For example, in C, strlen("foo") is 3, but - sizeof("foo") (which includes the terminating zero byte at the end) - is 4. - - o The text describing the total length of a domain name mentions - explicitly that label length and data octets are included, but does - not mention the terminating zero at the end. The zero byte at the - end of a domain name is not a label length. Indeed, the value zero - is chosen as the terminating marker precisely because it is not a - legal length byte value -- DNS prohibits empty labels. For - example, a name like "bad..name." is not a valid domain name - because it contains a zero-length label in the middle, which cannot - be expressed in a DNS message, because software parsing the message - would misinterpret a zero label-length byte as being a zero "end of - name" marker instead. - - Finally, "Clarifications to the DNS Specification" [RFC2181] offers - additional confirmation that, in the context of DNS specifications, - the stated "length" of a domain name does not include the terminating - zero byte at the end. That document refers to the root name, which - is typically written as "." and is represented in a DNS message by a - single lone zero byte (i.e., zero bytes of data plus a terminating - zero), as the "zero length full name": - - The zero length full name is defined as representing the root of - the DNS tree, and is typically written and displayed as ".". - - This wording supports the interpretation that, in a DNS context, when - talking about lengths of names, the terminating zero byte at the end - is not counted. If the root name (".") is considered to be zero - length, then to be consistent, the length (for example) of "org" has - to be 4 and the length of "ietf.org" has to be 9, as shown below: - - ------ - | 0x00 | length = 0 - ------ - - ------------------ ------ - | 0x03 | o | r | g | | 0x00 | length = 4 - ------------------ ------ - - ----------------------------------------- ------ - | 0x04 | i | e | t | f | 0x03 | o | r | g | | 0x00 | length = 9 - ----------------------------------------- ------ - - - - - -Cheshire & Krochmal Standards Track [Page 63] - -RFC 6762 Multicast DNS February 2013 - - - This means that the maximum length of a domain name, as represented - in a Multicast DNS message, up to but not including the final - terminating zero, must not exceed 255 bytes. - - However, many Unicast DNS implementers have read these RFCs - differently, and argue that the 255-byte limit does include the - terminating zero, and that the "Clarifications to the DNS - Specification" [RFC2181] statement that "." is the "zero length full - name" was simply a mistake. - - Hence, implementers should be aware that other Unicast DNS - implementations may limit the maximum domain name to 254 bytes plus a - terminating zero, depending on how that implementer interpreted the - DNS specifications. - - Compliant Multicast DNS implementations MUST support names up to 255 - bytes plus a terminating zero, i.e., 256 bytes total. - -Appendix D. Benefits of Multicast Responses - - Some people have argued that sending responses via multicast is - inefficient on the network. In fact, using multicast responses can - result in a net lowering of overall multicast traffic for a variety - of reasons, and provides other benefits too: - - * Opportunistic Caching. One multicast response can update the - caches on all machines on the network. If another machine later - wants to issue the same query, and it already has the answer in its - cache, it may not need to even transmit that multicast query on the - network at all. - - * Duplicate Query Suppression. When more than one machine has the - same ongoing long-lived query running, every machine does not have - to transmit its own independent query. When one machine transmits - a query, all the other hosts see the answers, so they can suppress - their own queries. - - * Passive Observation Of Failures (POOF). When a host sees a - multicast query, but does not see the corresponding multicast - response, it can use this information to promptly delete stale data - from its cache. To achieve the same level of user-interface - quality and responsiveness without multicast responses would - require lower cache lifetimes and more frequent network polling, - resulting in a higher packet rate. - - * Passive Conflict Detection. Just because a name has been - previously verified to be unique does not guarantee it will - continue to be so indefinitely. By allowing all Multicast DNS - - - -Cheshire & Krochmal Standards Track [Page 64] - -RFC 6762 Multicast DNS February 2013 - - - responders to constantly monitor their peers' responses, conflicts - arising out of network topology changes can be promptly detected - and resolved. If responses were not sent via multicast, some other - conflict detection mechanism would be needed, imposing its own - additional burden on the network. - - * Use on devices with constrained memory resources: When using - delayed responses to reduce network collisions, responders need to - maintain a list recording to whom each answer should be sent. The - option of multicast responses allows responders with limited - storage, which cannot store an arbitrarily long list of response - addresses, to choose to fail-over to a single multicast response in - place of multiple unicast responses, when appropriate. - - * Overlayed Subnets. In the case of overlayed subnets, multicast - responses allow a receiver to know with certainty that a response - originated on the local link, even when its source address may - apparently suggest otherwise. - - * Robustness in the face of misconfiguration: Link-local multicast - transcends virtually every conceivable network misconfiguration. - Even if you have a collection of devices where every device's IP - address, subnet mask, default gateway, and DNS server address are - all wrong, packets sent by any of those devices addressed to a - link-local multicast destination address will still be delivered to - all peers on the local link. This can be extremely helpful when - diagnosing and rectifying network problems, since it facilitates a - direct communication channel between client and server that works - without reliance on ARP, IP routing tables, etc. Being able to - discover what IP address a device has (or thinks it has) is - frequently a very valuable first step in diagnosing why it is - unable to communicate on the local network. - -Appendix E. Design Rationale for Encoding Negative Responses - - Alternative methods of asserting nonexistence were considered, such - as using an NXDOMAIN response, or emitting a resource record with - zero-length rdata. - - Using an NXDOMAIN response does not work well with Multicast DNS. A - Unicast DNS NXDOMAIN response applies to the entire message, but for - efficiency Multicast DNS allows (and encourages) multiple responses - in a single message. If the error code in the header were NXDOMAIN, - it would not be clear to which name(s) that error code applied. - - Asserting nonexistence by emitting a resource record with zero-length - rdata would mean that there would be no way to differentiate between - a record that doesn't exist, and a record that does exist, with zero- - - - -Cheshire & Krochmal Standards Track [Page 65] - -RFC 6762 Multicast DNS February 2013 - - - length rdata. By analogy, most file systems today allow empty files, - so a file that exists with zero bytes of data is not considered - equivalent to a filename that does not exist. - - A benefit of asserting nonexistence through NSEC records instead of - through NXDOMAIN responses is that NSEC records can be added to the - Additional Section of a DNS response to offer additional information - beyond what the querier explicitly requested. For example, in - response to an SRV query, a responder should include A record(s) - giving its IPv4 addresses in the Additional Section, and an NSEC - record indicating which other types it does or does not have for this - name. If the responder is running on a host that does not support - IPv6 (or does support IPv6 but currently has no IPv6 address on that - interface) then this NSEC record in the Additional Section will - indicate this absence of AAAA records. In effect, the responder is - saying, "Here's my SRV record, and here are my IPv4 addresses, and - no, I don't have any IPv6 addresses, so don't waste your time - asking". Without this information in the Additional Section, it - would take the querier an additional round-trip to perform an - additional query to ascertain that the target host has no AAAA - records. (Arguably Unicast DNS could also benefit from this ability - to express nonexistence in the Additional Section, but that is - outside the scope of this document.) - -Appendix F. Use of UTF-8 - - After many years of debate, as a result of the perceived need to - accommodate certain DNS implementations that apparently couldn't - handle any character that's not a letter, digit, or hyphen (and - apparently never would be updated to remedy this limitation), the - Unicast DNS community settled on an extremely baroque encoding called - "Punycode" [RFC3492]. Punycode is a remarkably ingenious encoding - solution, but it is complicated, hard to understand, and hard to - implement, using sophisticated techniques including insertion unsort - coding, generalized variable-length integers, and bias adaptation. - The resulting encoding is remarkably compact given the constraints, - but it's still not as good as simple straightforward UTF-8, and it's - hard even to predict whether a given input string will encode to a - Punycode string that fits within DNS's 63-byte limit, except by - simply trying the encoding and seeing whether it fits. Indeed, the - encoded size depends not only on the input characters, but on the - order they appear, so the same set of characters may or may not - encode to a legal Punycode string that fits within DNS's 63-byte - limit, depending on the order the characters appear. This is - extremely hard to present in a user interface that explains to users - why one name is allowed, but another name containing the exact same - characters is not. Neither Punycode nor any other of the "ASCII- - Compatible Encodings" [RFC5890] proposed for Unicast DNS may be used - - - -Cheshire & Krochmal Standards Track [Page 66] - -RFC 6762 Multicast DNS February 2013 - - - in Multicast DNS messages. Any text being represented internally in - some other representation must be converted to canonical precomposed - UTF-8 before being placed in any Multicast DNS message. - -Appendix G. Private DNS Namespaces - - The special treatment of names ending in ".local." has been - implemented in Macintosh computers since the days of Mac OS 9, and - continues today in Mac OS X and iOS. There are also implementations - for Microsoft Windows [B4W], Linux, and other platforms. - - Some network operators setting up private internal networks - ("intranets") have used unregistered top-level domains, and some may - have used the ".local" top-level domain. Using ".local" as a private - top-level domain conflicts with Multicast DNS and may cause problems - for users. Clients can be configured to send both Multicast and - Unicast DNS queries in parallel for these names, and this does allow - names to be looked up both ways, but this results in additional - network traffic and additional delays in name resolution, as well as - potentially creating user confusion when it is not clear whether any - given result was received via link-local multicast from a peer on the - same link, or from the configured unicast name server. Because of - this, we recommend against using ".local" as a private Unicast DNS - top-level domain. We do not recommend use of unregistered top-level - domains at all, but should network operators decide to do this, the - following top-level domains have been used on private internal - networks without the problems caused by trying to reuse ".local." for - this purpose: - - .intranet. - .internal. - .private. - .corp. - .home. - .lan. - -Appendix H. Deployment History - - In July 1997, in an email to the net-thinkers@thumper.vmeng.com - mailing list, Stuart Cheshire first proposed the idea of running the - AppleTalk Name Binding Protocol [RFC6760] over IP. As a result of - this and related IETF discussions, the IETF Zeroconf working group - was chartered September 1999. After various working group - discussions and other informal IETF discussions, several Internet- - Drafts were written that were loosely related to the general themes - of DNS and multicast, but did not address the service discovery - aspect of NBP. - - - - -Cheshire & Krochmal Standards Track [Page 67] - -RFC 6762 Multicast DNS February 2013 - - - In April 2000, Stuart Cheshire registered IPv4 multicast address - 224.0.0.251 with IANA [MC4] and began writing code to test and - develop the idea of performing NBP-like service discovery using - Multicast DNS, which was documented in a group of three Internet- - Drafts: - - o "Requirements for a Protocol to Replace the AppleTalk Name Binding - Protocol (NBP)" [RFC6760] is an overview explaining the AppleTalk - Name Binding Protocol, because many in the IETF community had - little first-hand experience using AppleTalk, and confusion in the - IETF community about what AppleTalk NBP did was causing confusion - about what would be required in an IP-based replacement. - - o "Discovering Named Instances of Abstract Services using DNS" [NIAS] - proposed a way to perform NBP-like service discovery using DNS- - compatible names and record types. - - o "Multicast DNS" (this document) specifies a way to transport those - DNS-compatible queries and responses using IP multicast, for zero- - configuration environments where no conventional Unicast DNS server - was available. - - In 2001, an update to Mac OS 9 added resolver library support for - host name lookup using Multicast DNS. If the user typed a name such - as "MyPrinter.local." into any piece of networking software that used - the standard Mac OS 9 name lookup APIs, then those name lookup APIs - would recognize the name as a dot-local name and query for it by - sending simple one-shot Multicast DNS queries to 224.0.0.251:5353. - This enabled the user to, for example, enter the name - "MyPrinter.local." into their web browser in order to view a - printer's status and configuration web page, or enter the name - "MyPrinter.local." into the printer setup utility to create a print - queue for printing documents on that printer. - - Multicast DNS responder software, with full service discovery, first - began shipping to end users in volume with the launch of Mac OS X - 10.2 "Jaguar" in August 2002, and network printer makers (who had - historically supported AppleTalk in their network printers and were - receptive to IP-based technologies that could offer them similar - ease-of-use) started adopting Multicast DNS shortly thereafter. - - In September 2002, Apple released the source code for the - mDNSResponder daemon as Open Source under Apple's standard Apple - Public Source License (APSL). - - Multicast DNS responder software became available for Microsoft - Windows users in June 2004 with the launch of Apple's "Rendezvous for - Windows" (now "Bonjour for Windows"), both in executable form (a - - - -Cheshire & Krochmal Standards Track [Page 68] - -RFC 6762 Multicast DNS February 2013 - - - downloadable installer for end users) and as Open Source (one of the - supported platforms within Apple's body of cross-platform code in the - publicly accessible mDNSResponder CVS source code repository) [BJ]. - - In August 2006, Apple re-licensed the cross-platform mDNSResponder - source code under the Apache License, Version 2.0. - - In addition to desktop and laptop computers running Mac OS X and - Microsoft Windows, Multicast DNS is now implemented in a wide range - of hardware devices, such as Apple's "AirPort" wireless base - stations, iPhone and iPad, and in home gateways from other vendors, - network printers, network cameras, TiVo DVRs, etc. - - The Open Source community has produced many independent - implementations of Multicast DNS, some in C like Apple's - mDNSResponder daemon, and others in a variety of different languages - including Java, Python, Perl, and C#/Mono. - - In January 2007, the IETF published the Informational RFC "Link-Local - Multicast Name Resolution (LLMNR)" [RFC4795], which is substantially - similar to Multicast DNS, but incompatible in some small but - important ways. In particular, the LLMNR design explicitly excluded - support for service discovery, which made it an unsuitable candidate - for a protocol to replace AppleTalk NBP [RFC6760]. - - While the original focus of Multicast DNS and DNS-Based Service - Discovery was for zero-configuration environments without a - conventional Unicast DNS server, DNS-Based Service Discovery also - works using Unicast DNS servers, using DNS Update [RFC2136] [RFC3007] - to create service discovery records and standard DNS queries to query - for them. Apple's Back to My Mac service, launched with Mac OS X - 10.5 "Leopard" in October 2007, uses DNS-Based Service Discovery over - Unicast DNS [RFC6281]. - - In June 2012, Google's Android operating system added native support - for DNS-SD and Multicast DNS with the android.net.nsd.NsdManager - class in Android 4.1 "Jelly Bean" (API Level 16) [NSD]. - - - - - - - - - - - - - - -Cheshire & Krochmal Standards Track [Page 69] - -RFC 6762 Multicast DNS February 2013 - - -Authors' Addresses - - Stuart Cheshire - Apple Inc. - 1 Infinite Loop - Cupertino, CA 95014 - USA - - Phone: +1 408 974 3207 - EMail: cheshire@apple.com - - - Marc Krochmal - Apple Inc. - 1 Infinite Loop - Cupertino, CA 95014 - USA - - Phone: +1 408 974 4368 - EMail: marc@apple.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Cheshire & Krochmal Standards Track [Page 70] - diff --git a/kernel/picotcp/docs/user_manual/README.md b/kernel/picotcp/docs/user_manual/README.md deleted file mode 100644 index 3f33e3c..0000000 --- a/kernel/picotcp/docs/user_manual/README.md +++ /dev/null @@ -1,20 +0,0 @@ -Downloading the manual ----------------------- - -With every git commit, we rebuild the documentation and make the [user_doc.pdf](http://162.13.84.104/user_doc.pdf) file (click to download) publicly available. - -If for some reason, you're looking for an older version of the documentation, please check out the wanted commit in git, and compile the manual as described below. - -Compiling the manual --------------------- - -The picoTCP user manual is written in LaTeX, which needs to be compiled to get a readable version. -First and foremost you need the compiler and some packages: -* sudo apt-get install texlive -* sudo apt-get install texlive-latex-extra - -Now, cd into docs/user_manual and do -* ./build.sh - -A user_doc.pdf should be generated in the current directory - diff --git a/kernel/picotcp/docs/user_manual/build.sh b/kernel/picotcp/docs/user_manual/build.sh deleted file mode 100755 index 56eaf31..0000000 --- a/kernel/picotcp/docs/user_manual/build.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -pdflatex user_doc.tex && pdflatex user_doc.tex && pdflatex user_doc.tex diff --git a/kernel/picotcp/docs/user_manual/chap_api_aodv.tex b/kernel/picotcp/docs/user_manual/chap_api_aodv.tex deleted file mode 100644 index 02171ea..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_aodv.tex +++ /dev/null @@ -1,42 +0,0 @@ -\section{Ad-hoc On-Demand Distance Vector Routing (AODV)} - - -AODV is a reactive routing protocol for mobile ad-hoc networks -(MANETs). Its best fit are especially ultra-low power radio networks, -or those RF topologies where sporadic traffic between a small specific set -of nodes is foreseen. -In order to create a route, one node must explicitly start the communication -towards a remote node, and the route is created ad-hoc upon the demand -for a specific network path. -AODV guarantees that the traffic generated by each node in order to create -and maintain routes is kept as low as possible. - -\subsection{pico\_aodv\_add} - -\subsubsection*{Description} -This function will add the target device to the AODV mechanism on the machine, -meaning that it will be possible to advertise and collect routing information -using Ad-hoc On-Demand Distance Vector Routing, as described in RFC3561, through the -target device. - -In order to use multiple devices in the AODV system, this function needs to be called -multiple times, once per device. - -\subsubsection*{Function prototype} -\texttt{pico\_aodv\_add(struct pico\_device *dev);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the device is successfully added. - -\subsubsection*{Example} -\begin{verbatim} - -ret = pico_aodv_add(dev); - -\end{verbatim} - diff --git a/kernel/picotcp/docs/user_manual/chap_api_dhcp_c.tex b/kernel/picotcp/docs/user_manual/chap_api_dhcp_c.tex deleted file mode 100644 index 940d157..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_dhcp_c.tex +++ /dev/null @@ -1,170 +0,0 @@ -\section{DHCP client} - -% Short description/overview of module functions -A DHCP client for obtaining a dynamic IP address. DHCP is supported on multiple interfaces. - - -\subsection{pico\_dhcp\_initiate\_negotiation} - -\subsubsection*{Description} -Initiate a DHCP negotiation. The user passes a callback-function, which will be executed on DHCP success or failure. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_dhcp_initiate_negotiation(struct pico_device *device, - void (*callback)(void *cli, int code), uint32_t *xid); -\end{verbatim} - - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{device} - the device on which a negotiation should be started. -\item \texttt{callback} - the function which is executed on success or failure. This function can be called multiple times. F.e.: initially DHCP succeeded, then the DHCP server is removed long enough from the network for the lease to expire, later the server is added again to the network. The callback is called 3 times: first with code \texttt{PICO\_DHCP\_SUCCESS}, then with \texttt{PICO\_DHCP\_RESET} and finally with \texttt{PICO\_DHCP\_SUCCESS}. The callback may be called before \texttt{pico\_dhcp\_initiate\_negotiation} has returned, f.e. in case of failure to open a socket. The callback has two parameters: -\begin{itemize}[noitemsep] -\item \texttt{cli} - the identifier of the negotiation -\item \texttt{code} - the id indicating success or failure, see further -\end{itemize} -\item \texttt{xid} - transaction id of the negotiation. Is set on \texttt{PICO\_DHCP\_SUCCESS}, 0 otherwise. -\end{itemize} - -\subsubsection*{Possible DHCP codes} -\begin{itemize}[noitemsep] -\item \texttt{PICO\_DHCP\_SUCCESS} - DHCP succeeded, the user can start using the assigned address, which can be obtained by calling \texttt{pico\_dhcp\_get\_address}. -\item \texttt{PICO\_DHCP\_ERROR} - an error occurred. DHCP is unable to recover from this error. \texttt{pico$\_$err} is set appropriately. -\item \texttt{PICO\_DHCP\_RESET} - DHCP was unable to renew its lease, and the lease expired. The user must immediately stop using the previously assigned IP, and wait for DHCP to obtain a new lease. DHCP will automatically start negotiations again. -\end{itemize} - -\subsubsection*{Return value} -Returns 0 on success, -1 otherwise. - -\subsubsection*{Errors} % ORGANIZE -All errors are reported through the callback-function described above. -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available % pico_socket_sendto -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\item \texttt{PICO$\_$ERR$\_$EPROTONOSUPPORT} - protocol not supported % pico_socket_open -\item \texttt{PICO$\_$ERR$\_$ENETUNREACH} - network unreachable -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument % pico_socket_bind -\item \texttt{PICO$\_$ERR$\_$ENXIO} - no such device or address -\item \texttt{PICO$\_$ERR$\_$EOPNOTSUPP} - operation not supported on socket -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -pico_dhcp_initiate_negotiation(dev, &callback_dhcpclient, &xid); -\end{verbatim} - -\subsection{pico\_dhcp\_client\_abort} - -\subsubsection*{Description} -Cancel the ongoing negotiation. To be used if the operation of obtaining an IP address from a remote DHCP server needs to be aborted, before the callback has been triggered. - -\subsubsection*{Function prototype} -\texttt{struct pico\_ip4 pico\_dhcp\_client\_abort(uint32\_t xid);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] - \item \texttt{xid} - the transaction id returned from the call \texttt{pico\_dhcp\_initiate\_negotiation}. -\end{itemize} - -\subsubsection*{Return value} -Returns 0 on success, -1 otherwise (i.e. the XID could not be found in the list of ongoing transactions). - - -\subsection{pico\_dhcp\_get\_identifier} - -\subsubsection*{Description} -Get the identifier needed to pass to all other \texttt{pico\_dhcp} functions. This function should only be called after a callback occurred with code \texttt{PICO\_DHCP\_SUCCESS}. - -\subsubsection*{Function prototype} -\texttt{void *pico\_dhcp\_get\_identifier(uint32\_t xid);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{xid} - transaction id of the negotiation. -\end{itemize} - -\subsubsection*{Return value} -\texttt{void *} - pointer to the identifier. - -%\subsubsection*{Errors} - -\subsubsection*{Example} -\begin{verbatim} -void *cli = pico_dhcp_get_identifier(xid); -\end{verbatim} - - -\subsection{pico\_dhcp\_get\_address} - -\subsubsection*{Description} -Get the address that was assigned through DHCP. This function should only be called after a callback occurred with code \texttt{PICO\_DHCP\_SUCCESS}. - -\subsubsection*{Function prototype} -\texttt{struct pico\_ip4 pico\_dhcp\_get\_address(void *cli);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{cli} - the identifier that was provided by the callback on \texttt{PICO\_DHCP\_SUCCESS}. -\end{itemize} - -\subsubsection*{Return value} -\texttt{struct pico\_ip4} - the address that was assigned. - -%\subsubsection*{Errors} - -\subsubsection*{Example} -\begin{verbatim} -struct pico_ip4 address = pico_dhcp_get_address(cli); -\end{verbatim} - - -\subsection{pico\_dhcp\_get\_gateway} - -\subsubsection*{Description} -Get the address of the gateway that was assigned through DHCP. This function should -only be called after a callback occurred with code \texttt{PICO\_DHCP\_SUCCESS}. - -\subsubsection*{Function prototype} -\texttt{struct pico\_ip4 pico\_dhcp\_get\_gateway(void *cli);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{cli} - the identifier that was provided by the callback on \texttt{PICO\_DHCP\_SUCCESS}. -\end{itemize} - -\subsubsection*{Return value} -\begin{itemize}[noitemsep] -\item \texttt{struct pico\_ip4} - the address of the gateway that should be used. -\end{itemize} - -\subsection{pico\_dhcp\_get\_nameserver} - -\subsubsection*{Description} -Get the address of the first or the second nameserver that was assigned through DHCP. -This function should only be called after a callback occurred with code \texttt{PICO\_DHCP\_SUCCESS}. - -\subsubsection*{Function prototype} -\texttt{struct pico\_ip4 pico\_dhcp\_get\_nameserver(void *cli, int index);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{cli} - the identifier that was provided by the callback on \texttt{PICO\_DHCP\_SUCCESS}. -\item \texttt{index} - the indes of the domain name server received. Can be either "0" or "1". -\end{itemize} - -\subsubsection*{Return value} -\begin{itemize}[noitemsep] -\item \texttt{struct pico\_ip4} - the address of the nameserver that should be used. On failure, e.g. an invalid index was passed, it returns "255.255.255.255". If the IP address of the DNS has not been set, it may return INADDR\_ANY. -\end{itemize} - - -%\subsubsection*{Errors} - -\subsubsection*{Example} -\begin{verbatim} -struct pico_ip4 gateway = pico_dhcp_get_gateway(cli); -\end{verbatim} diff --git a/kernel/picotcp/docs/user_manual/chap_api_dhcp_d.tex b/kernel/picotcp/docs/user_manual/chap_api_dhcp_d.tex deleted file mode 100644 index 671fff3..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_dhcp_d.tex +++ /dev/null @@ -1,71 +0,0 @@ -\section{DHCP server} - -% Short description/overview of module functions - - -\subsection{pico\_dhcp\_server\_initiate} - -\subsubsection*{Description} -This function starts a simple DHCP server. - -\subsubsection*{Function prototype} -\texttt{int pico\_dhcp\_server\_initiate(struct pico\_dhcpd\_settings *settings);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{settings} - a pointer to a struct \texttt{pico\_dhcpd\_settings}, in which the following members matter to the user : -\begin{itemize}[noitemsep] -\item \texttt{struct pico\_ip4 my\_ip} - the IP address of the device performing DHCP. Only IPs of this network will be served. -\item \texttt{uint32\_t pool\_start} - the lowest host number that may be assigned, defaults to 100 if not provided. -\item \texttt{uint32\_t pool\_end} - the highest host number that may be assigned, defaults to 254 if not provided. -\item \texttt{uint32\_t lease\_time} - the advertised lease time in seconds, defaults to 120 if not provided. -\end{itemize} -\end{itemize} - -\subsubsection*{Return value} -On successful startup of the dhcp server, 0 is returned. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -%everything from : -%pico_socket_open -\item PICO$\_$ERR$\_$EPROTONOSUPPORT - protocol not supported -\item PICO$\_$ERR$\_$ENETUNREACH - network unreachable -%pico_socket_bind -\item PICO$\_$ERR$\_$EINVAL - invalid argument -\item PICO$\_$ERR$\_$ENXIO - no such device or address -\end{itemize} - -\subsection{pico\_dhcp\_server\_destroy} - -\subsubsection*{Description} -This function stops a previously started DHCP server on the given device. - -\subsubsection*{Function prototype} -\texttt{int pico\_dhcp\_server\_destroy(struct pico\_device *dev);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device}, to identify a previously started DHCP server that must be terminated. -\end{itemize} - -\subsubsection*{Return value} -On success, 0 is returned. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item PICO$\_$ERR$\_$ENOENT - there was no DHCP server running on the given device. -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -struct pico_dhcpd_settings s = { }; - -s.my_ip.addr = long_be(0x0a280001); /* 10.40.0.1 */ - -pico_dhcp_server_initiate(&s); -\end{verbatim} - - diff --git a/kernel/picotcp/docs/user_manual/chap_api_dns_c.tex b/kernel/picotcp/docs/user_manual/chap_api_dns_c.tex deleted file mode 100644 index 8d4a51b..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_dns_c.tex +++ /dev/null @@ -1,115 +0,0 @@ -\section{DNS client} - -% Short description/overview of module functions - - -\subsection{pico$\_$dns$\_$client$\_$nameserver} - -\subsubsection*{Description} -Function to add or remove nameservers. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_dns_client_nameserver(struct pico_ip4 *ns, uint8_t flag); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{ns} - Pointer to the address of the name server. -\item \texttt{flag} - Flag to indicate addition or removal (see further). -\end{itemize} - -\subsubsection*{Flags} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$DNS$\_$NS$\_$ADD} - to add a nameserver -\item \texttt{PICO$\_$DNS$\_$NS$\_$DEL} - to remove a nameserver -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the nameserver operation has succeeded. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_dns_client_nameserver(&addr_ns, PICO_DNS_NS_ADD); -ret = pico_dns_client_nameserver(&addr_ns, PICO_DNS_NS_DEL); -\end{verbatim} - - - -\subsection{pico$\_$dns$\_$client$\_$getaddr} - -\subsubsection*{Description} -Function to translate an url text string to an internet host address IP. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_dns_client_getaddr(const char *url, void (*callback)(char *ip, void *arg), - void *arg); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{url} - Pointer to text string containing url text string (e.g. www.google.com). -\item \texttt{callback} - Callback function, returning the internet host address IP and the provided argument. The returned string has to be freed by the user. -\item \texttt{arg} - Pointer to an identifier for the request. The pointer is returned in the callback. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the request is sent. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -int ret = pico_dns_client_getaddr("www.google.com", cb_getaddr, &identifier); -\end{verbatim} - - - -\subsection{pico$\_$dns$\_$client$\_$getname} - -\subsubsection*{Description} -Function to translate an internet host address IP to an url text string. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_dns_client_getname(const char *ip, void (*callback)(char *url, void *arg), - void *arg); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{ip} - Pointer to text string containing an internet host address IP (e.g. 8.8.4.4) -\item \texttt{callback} - Callback function, receiving the url text string. Note: the returned string has to be freed by the user. -\item \texttt{arg} - Pointer to an identifier for the request. The pointer is returned in the callback. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the request is sent. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -int ret = pico_dns_client_getname("8.8.4.4", cb_getname, &identifier); -\end{verbatim} diff --git a/kernel/picotcp/docs/user_manual/chap_api_dns_sd.tex b/kernel/picotcp/docs/user_manual/chap_api_dns_sd.tex deleted file mode 100644 index 871ec2c..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_dns_sd.tex +++ /dev/null @@ -1,85 +0,0 @@ -\section{DNS SD client} - -% Short description/overview of module functions -With this module DNS-SD services can be registered on the network to allow Zero Configuration Networking on the device. This is merely a small layer on top of Multicast DNS. - -\subsection{pico$\_$dns$\_$sd$\_$init} - -\subsubsection*{Description} -Just calls pico$\_$mdns$\_$init in its turn to initialise the mDNS-module. See 'pico$\_$mdns$\_$init' for more information. - - -\subsection{pico$\_$dns$\_$sd$\_$register$\_$service} - -\subsubsection*{Description} -Registers the service with a certain name and type on the network via Multicast DNS. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_dns_sd_register_service( const char *name, - const char *type, - uint16_t port, - kv_vector *txt_data, - uint16_t ttl, - void (*callback)(pico_mdns_rtree *,char *,void *), - void *arg); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{name} - Instance-name of the service. Use a descriptive name for it but not longer than 63 characters. -\item \texttt{type} - The type of the service. For all the possible service types see: \url{http://www.dns-sd.org/servicetypes.html} -\item \texttt{port} - The portnumber on which the service runs. -\item \texttt{txt$\_$data} - Pointer to vector with key-value pairs to insert into the TXT record to give additional information about the service. Use the 'PICO$\_$DNS$\_$SD$\_$KV$\_$VECTOR$\_$DECLARE'-macro to declare a vector for key-value-pairs. This vector will be destroyed when the function returns since there's no need in keeping the contents. -\item \texttt{ttl} - TTL of the service on the network before it needs to be reconfirmed. In seconds. -\item \texttt{callback} - Callback function that gets called when the service is successfully registered on the network. -\item \texttt{arg} - Argument for callback supplied by user. This can be used if you want to pass some variable into your callback function. -\end{itemize} - -\subsubsection*{Return value} -Returns 0 when the module successfully started registering the service, something else on failure. \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -PICO_DNS_SD_KV_VECTOR_DECLARE(dictionary); -pico_dns_sd_register_service("Printer 2nd Floor", "_printer._sub._http._tcp", 80, \\ -&dictionary, 240, ®_cb, NULL); -\end{verbatim} - - -\subsection{pico$\_$dns$\_$sd$\_$kv$\_$vector$\_$add} - -\subsubsection*{Description} -Add a key-value pair the a key-value pair vector. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_dns_sd_kv_vector_add( kv_vector *vector, char *key, char *value ); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{vector} - Pointer to vector to add the pair to. Declare a key-value vector with the 'PICO$\_$DNS$\_$SD$\_$KV$\_$VECTOR$\_$DECLARE'-macro. -\item \texttt{key} - Key of the pair. Cannot be NULL. -\item \texttt{value} - Value of the pair. can be NULL, empty ("") or filled ("value"). -\end{itemize} - -\subsubsection*{Return value} -Returns 0 when the pair is added successfully, something else on failure. \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -PICO_DNS_SD_KV_VECTOR_DECLARE(dictionary); -pico_dns_sd_kv_vector_add(&dictionary, "pass", "1234"); -pico_dns_sd_kv_vector_add(&dictionary, "color", NULL); -\end{verbatim} diff --git a/kernel/picotcp/docs/user_manual/chap_api_igmp.tex b/kernel/picotcp/docs/user_manual/chap_api_igmp.tex deleted file mode 100644 index dba1abd..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_igmp.tex +++ /dev/null @@ -1,42 +0,0 @@ -\section{IGMP} - -% Short description/overview of module functions -This module allows the user to join and leave ipv4 multicast groups. The module is based on the IGMP version 3 protocol and it's backwards compatible with version 2. Version 1 is not supported. -The IGMP module is completly driven from socket calls (\ref{socket:setoption}) and non of the IGMP application interface functions should be called by the user. If however, by any reason, it's necessary for the user to do this, the following function call is provided: - -\subsection{pico\_igmp\_state\_change} - -\subsubsection*{Description} -Change the state of the host to Non-member, Idle member or Delaying member. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_igmp_state_change(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, - uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{mcast\_link} - the link on which that multicast group should be joined. -\item \texttt{mcast\_group} - the address of the multicast group you want to join. -\item \texttt{filter\_mode} - the kind of source filtering, if applied. -\item \texttt{\_MCASTFilter} - list of multicast sources on which source filtering might be applied. -\item \texttt{state} - the prefered new state. -\end{itemize} - -\subsubsection*{Errors} -In case of failure, -1 is returned, and the value of pico$\_$err -is set as follows: - -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - Invalid argument provided -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - Not enough space -\item \texttt{PICO$\_$ERR$\_$EPROTONOSUPPORT} - Invalid protocol (or protocol version) found on the link -\item \texttt{PICO$\_$ERR$\_$EFAULT} - Internal error -\end{itemize} - -%\subsubsection*{Example} - -%\subsubsection*{Errors} - -%\subsubsection*{Example} diff --git a/kernel/picotcp/docs/user_manual/chap_api_ipfilter.tex b/kernel/picotcp/docs/user_manual/chap_api_ipfilter.tex deleted file mode 100644 index 8faf71e..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_ipfilter.tex +++ /dev/null @@ -1,89 +0,0 @@ -\section{IP Filter} - -% Short description/overview of module functions -This module allows the user to add and remove filters. The user can filter packets based on interface, protocol, outgoing address, outgoing netmask, incomming address, incomming netmask, outgoing port, incomming port, priority and type of service. There are four types of filters: ACCEPT, PRIORITY, REJECT, DROP. When creating a PRIORITY filter, it is necessary to give a priority value in a range between '-10' and '10', '0' as default priority. - - -\subsection{pico$\_$ipv4$\_$filter$\_$add} - -\subsubsection*{Description} -Function to add a filter. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_filter_add(struct pico_device *dev, uint8_t proto, - struct pico_ip4 out_addr, struct pico_ip4 out_addr_netmask, - struct pico_ip4 in_addr, struct pico_ip4 in_addr_netmask, uint16_t out_port, - uint16_t in_port, int8_t priority, uint8_t tos, enum filter_action action); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - interface to be filtered -\item \texttt{proto} - protocol to be filtered -\item \texttt{out$\_$addr} - outgoing address to be filtered -\item \texttt{out$\_$addr$\_$netmask} - outgoing address-netmask to be filtered -\item \texttt{in$\_$addr} - incomming address to be filtered -\item \texttt{in$\_$addr$\_$netmask} - incomming address-netmask to be filtered -\item \texttt{out$\_$port} - outgoing port to be filtered -\item \texttt{in$\_$port} - incomming port to be filtered -\item \texttt{priority} - priority to assign on the marked packet -\item \texttt{tos} - type of service to be filtered -\item \texttt{action} - type of action for the filter: ACCEPT, PRIORITY, REJECT and DROP. ACCEPT, filters all packets selected by the filter. PRIORITY is not yet implemented. REJECT drops all packets and send an ICMP message 'Packet Filtered' (Communication Administratively Prohibited). DROP will discard the packet silently. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns the filter$\_$id from the generated filter. This id must be used when deleting the filter. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Example} -\begin{verbatim} -/* block all incoming traffic on port 5555 */ -filter_id = pico_ipv4_filter_add(NULL, 6, NULL, NULL, NULL, NULL, 0, 5555, - 0, 0, FILTER_REJECT); -\end{verbatim} - -\subsubsection*{Errors} - -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - - -\subsection{pico$\_$ipv4$\_$filter$\_$del} - -\subsubsection*{Description} -Function to delete a filter. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_filter_del(int filter_id) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{filter$\_$id} - the id of the filter you want to delete. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} - -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EPERM} - operation not permitted -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_filter_del(filter_id); -\end{verbatim} - - -%\subsubsection*{Parameters} -%\subsubsection*{Return value} -%\subsubsection*{Errors} -%\subsubsection*{Example} - diff --git a/kernel/picotcp/docs/user_manual/chap_api_ipv4.tex b/kernel/picotcp/docs/user_manual/chap_api_ipv4.tex deleted file mode 100644 index 88c64ee..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_ipv4.tex +++ /dev/null @@ -1,561 +0,0 @@ -\section{IPv4 functions} - -% Short description/overview of module functions - -\subsection{pico$\_$ipv4$\_$to$\_$string} - -\subsubsection*{Description} -Convert the internet host address IP to a string in IPv4 dotted-decimal notation. -The result is stored in the char array that ipbuf points to. The given IP address argument must be in network order (i.e. 0xC0A80101 becomes 192.168.1.1). - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_to_string(char *ipbuf, const uint32_t ip); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{ipbuf} - Char array to store the result in. -\item \texttt{ip} - Internet host address in integer notation. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the conversion was successful. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_to_string(buf, ip); -\end{verbatim} - - - -\subsection{pico$\_$string$\_$to$\_$ipv4} - -\subsubsection*{Description} -Convert the IPv4 dotted-decimal notation into binary form. The result is stored in the -\texttt{int} that IP points to. Little endian or big endian is not taken into account. -The address supplied in \texttt{ipstr} can have one of the following -forms: a.b.c.d, a.b.c or a.b. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_string_to_ipv4(const char *ipstr, uint32_t *ip); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{ipstr} - Pointer to the IP string. -\item \texttt{ip} - Int pointer to store the result in. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the conversion was successful. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_string_to_ipv4(buf, *ip); -\end{verbatim} - - -\subsection{pico$\_$ipv4$\_$valid$\_$netmask} - -\subsubsection*{Description} -Check if the provided mask if valid. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_valid_netmask(uint32_t mask); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{mask} - The netmask in integer notation. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns the netmask in CIDR notation is returned if the netmask is valid. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_valid_netmask(netmask); -\end{verbatim} - - -\subsection{pico$\_$ipv4$\_$is$\_$unicast} - -\subsubsection*{Description} -Check if the provided address is unicast or multicast. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_is_unicast(uint32_t address); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address in integer notation. -\end{itemize} - -\subsubsection*{Return value} -Returns 1 if unicast, 0 if multicast. - -%\subsubsection*{Errors} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_is_unicast(address); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$source$\_$find} - -\subsubsection*{Description} -Find the source IP for the link associated to the specified destination. -This function will use the currently configured routing table to identify the link that would be used to transmit any traffic directed to the given IP address. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_ip4 *pico_ipv4_source_find(struct pico_ip4 *dst); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as \texttt{struct pico$\_$ip4}. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns the source IP as \texttt{struct pico$\_$ip4}. -If the source can not be found, \texttt{NULL} is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -src = pico_ipv4_source_find(dst); -\end{verbatim} - - - - -\subsection{pico$\_$ipv4$\_$link$\_$add } - -\subsubsection*{Description} -Add a new local device dev inteface, f.e. eth0, with IP address 'address' and netmask 'netmask'. A device may have more than one link configured, i.e. to access multiple networks on the same link. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_link_add(struct pico_device *dev, struct pico_ip4 address, -struct pico_ip4 netmask); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - Local device. -\item \texttt{address} - Pointer to the internet host address as \texttt{struct pico$\_$ip4}. -\item \texttt{netmask} - Netmask of the address. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$ENETUNREACH} - network unreachable -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_link_add(dev, address, netmask); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$link$\_$del} - -\subsubsection*{Description} -Remove the link associated to the local device that was previously configured, corresponding to the IP address 'address'. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_link_del(struct pico_device *dev, struct pico_ip4 address); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - Local device. -\item \texttt{address} - Pointer to the internet host address as \texttt{struct pico$\_$ip4}. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_link_del(dev, address); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$link$\_$find} - -\subsubsection*{Description} -Find the local device associated to the local IP address 'address'. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_device *pico_ipv4_link_find(struct pico_ip4 *address); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the internet host address as \texttt{struct pico$\_$ip4}. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns the local device. -On error, \texttt{NULL} is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENXIO} - no such device or address -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -dev = pico_ipv4_link_find(address); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$nat$\_$enable} - -\subsubsection*{Description} -This function enables NAT functionality on the passed IPv4 link. -Forwarded packets from an internal network will have the public IP address from the passed link -and a translated port number for transmission on the external network. -Usual operation requires at least one additional link for the internal network, -which is used as a gateway for the internal hosts. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_nat_enable(struct pico_ipv4_link *link) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{link} - Pointer to a link \texttt{pico$\_$ipv4$\_$link}. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_nat_enable(&external_link); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$nat$\_$disable} - -\subsubsection*{Description} -Disables the NAT functionality. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_nat_disable(void); -\end{verbatim} - -%\subsubsection*{Parameters} - -\subsubsection*{Return value} -Always returns 0. - -%\subsubsection*{Errors} -%\subsubsection*{Example} - - -\subsection{pico$\_$ipv4$\_$port$\_$forward} - -\subsubsection*{Description} -This function adds or deletes a rule in the IP forwarding table. Internally in the stack, -a one-direction NAT entry will be made. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_port_forward(struct pico_ip4 pub_addr, uint16_t pub_port, -struct pico_ip4 priv_addr, uint16_t priv_port, uint8_t proto, -uint8_t persistant) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{pub$\_$addr} - Public IP address, must be identical to the address of the external link. -\item \texttt{pub$\_$port} - Public port to be translated. -\item \texttt{priv$\_$addr} - Private IP address of the host on the internal network. -\item \texttt{priv$\_$port} - Private port of the host on the internal network. -\item \texttt{proto} - Protocol identifier, see supported list below. -\item \texttt{persistant} - Option for function call: create \texttt{PICO$\_$IPV4$\_$FORWARD$\_$ADD} (= 1) \\ -or delete \texttt{PICO$\_$IPV4$\_$FORWARD$\_$DEL} (= 0). -\end{itemize} - -\subsubsection*{Protocol list} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$PROTO$\_$ICMP4} -\item \texttt{PICO$\_$PROTO$\_$TCP} -\item \texttt{PICO$\_$PROTO$\_$UDP} -\end{itemize} - -\subsubsection*{Return value} -On success, this call 0 after a succesfull entry of the forward rule. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - not succesfull, try again -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_port_forward(ext_link_addr, ext_port, host_addr, -host_port, PICO_PROTO_UDP, 1); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$route$\_$add} - -\subsubsection*{Description} -Add a new route to the destination IP address from the local device link, f.e. eth0. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_route_add(struct pico_ip4 address, struct pico_ip4 netmask, -struct pico_ip4 gateway, int metric, struct pico_ipv4_link *link); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as \texttt{struct pico$\_$ip4}. -\item \texttt{netmask} - Netmask of the address. If zeroed, the call assumes the meaning of adding a default gateway. -\item \texttt{gateway} - Gateway of the address network. If zeroed, no gateway will be associated to this route, and the traffic towards the destination will be simply forwarded towards the given device. -\item \texttt{metric} - Metric for this route. -\item \texttt{link} - Local device interface. If a valid gateway is specified, this parameter is not mandatory, otherwise \texttt{NULL} can be used. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. -%if the route already exists or no memory could be allocated. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\item \texttt{PICO$\_$ERR$\_$ENETUNREACH} - network unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_route_add(dst, netmask, gateway, metric, link); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$route$\_$del} - -\subsubsection*{Description} -Remove the route to the destination IP address from the local device link, f.e. etho0. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv4_route_del(struct pico_ip4 address, struct pico_ip4 netmask, -struct pico_ip4 gateway, int metric, struct pico_ipv4_link *link); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as struct \texttt{pico$\_$ip4}. -\item \texttt{netmask} - Netmask of the address. -\item \texttt{gateway} - Gateway of the address network. -\item \texttt{metric} - Metric of the route. -\item \texttt{link} - Local device interface. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the route is found. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv4_route_del(dst, netmask, gateway, metric, link); -\end{verbatim} - - - -\subsection{pico$\_$ipv4$\_$route$\_$get$\_$gateway} - -\subsubsection*{Description} -This function gets the gateway address for the given destination IP address, if set. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_ip4 pico_ipv4_route_get_gateway(struct pico_ip4 *addr) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as struct \texttt{pico$\_$ip4}. -\end{itemize} - -\subsubsection*{Return value} -On success the gateway address is returned. -On error a \texttt{null} address is returned (\texttt{0.0.0.0}) and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -gateway_addr = pico_ip4 pico_ipv4_route_get_gateway(&dest_addr) -\end{verbatim} - - -\subsection{pico$\_$icmp4$\_$ping} - -\subsubsection*{Description} -This function sends out a number of ping echo requests and checks if the replies are received correctly. -The information from the replies is passed to the callback function after a succesfull reception. -If a timeout expires before a reply is received, the callback is called with the error condition. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_icmp4_ping(char *dst, int count, int interval, int timeout, int size, -void (*cb)(struct pico_icmp4_stats *)); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dst} - Pointer to the destination internet host address as text string -\item \texttt{count} - Number of pings going to be send -\item \texttt{interval} - Time between two transmissions (in ms) -\item \texttt{timeout} - Timeout period untill reply received (in ms) -\item \texttt{size} - Size of data buffer in bytes -\item \texttt{cb} - Callback for ICMP ping -\end{itemize} - -\subsubsection*{Data structure \texttt{struct pico$\_$icmp4$\_$stats}} -\begin{verbatim} -struct pico_icmp4_stats -{ - struct pico_ip4 dst; - unsigned long size; - unsigned long seq; - unsigned long time; - unsigned long ttl; - int err; -}; -\end{verbatim} -With \textbf{err} values: -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$PING$\_$ERR$\_$REPLIED} (value 0) -\item \texttt{PICO$\_$PING$\_$ERR$\_$TIMEOUT} (value 1) -\item \texttt{PICO$\_$PING$\_$ERR$\_$UNREACH} (value 2) -\item \texttt{PICO$\_$PING$\_$ERR$\_$PENDING} (value 0xFFFF) -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns a positive number, which is the ID of the ping operation just started. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -id = pico_icmp4_ping(dst_addr, 30, 10, 100, 1000, callback); -\end{verbatim} - - -\subsection{pico$\_$icmp4$\_$ping$\_$abort} - -\subsubsection*{Description} -This function aborts an ongoing ping operation that has previously started using pico$\_$icmp4$\_$ping(). - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_icmp4_ping_abort(int id); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] - \item \texttt{id} - identification number for the ping operation. This has been returned by \texttt{pico$\_$icmp4$\_$ping()} and it is intended to distinguish the operation to be cancelled. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_icmp4_ping_abort(id); -\end{verbatim} - diff --git a/kernel/picotcp/docs/user_manual/chap_api_ipv6.tex b/kernel/picotcp/docs/user_manual/chap_api_ipv6.tex deleted file mode 100644 index 49868b1..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_ipv6.tex +++ /dev/null @@ -1,533 +0,0 @@ -\section{IPv6 functions} - -% Short description/overview of module functions - -\subsection{pico$\_$ipv6$\_$to$\_$string} - -\subsubsection*{Description} -Convert the internet host address IP to a string in IPv6 colon:hex notation. -The result is stored in the char array that ipbuf points to. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_to_string(char *ipbuf, const uint8_t ip[PICO_SIZE_IP6]); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{ipbuf} - Char array to store the result in. -\item \texttt{ip} - Internet host address in unsigned byte array notation of lenght 16. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the conversion was successful. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_to_string(buf, ip); -\end{verbatim} - -\subsection{pico$\_$string$\_$to$\_$ipv6} - -\subsubsection*{Description} -Convert the IPv6 colon:hex notation into binary form. The result is stored in the -\texttt{int} that IP points to. -The address supplied in \texttt{ipstr} can have one of the default forms for IPv6 address -description, including at most one abbreviation skipping zeroed fields using "::" - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_string_to_ipv6(const char *ipstr, uint8_t *ip); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{ipstr} - Pointer to the IP string. -\item \texttt{ip} - Int pointer to store the result in. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the conversion was successful. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_string_to_ipv6("fe80::1", *ip); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$unicast} - -\subsubsection*{Description} -Check if the provided address is unicast or multicast. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_unicast(struct pico_ip6 *a); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} - -\subsubsection*{Return value} -Returns 1 if unicast, 0 if multicast. - -%\subsubsection*{Errors} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_unicast(address); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$multicast} -\subsubsection*{Description} -Check if the provided address is a valid Internet multicast address, i.e. it belongs to the range ff00::/8. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_multicast(struct pico_ip6 *a); -\end{verbatim} -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} -\subsubsection*{Return value} -Returns 1 if a multicast Internet address has been provided. -%\subsubsection*{Errors} -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_multicast(address); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$global} - -\subsubsection*{Description} -Check if the provided address is a valid Internet global address, i.e. it belongs to the range 2000::/3. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_global(struct pico_ip6 *a); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} - -\subsubsection*{Return value} -Returns 1 if a global Internet address has been provided. - -%\subsubsection*{Errors} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_global(address); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$uniquelocal} - -\subsubsection*{Description} -Check if the provided address is a valid Internet uniquelocal address, i.e. it belongs to the range fc00::/7. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_uniquelocal(struct pico_ip6 *a); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} - -\subsubsection*{Return value} -Returns 1 if a uniquelocal Internet address has been provided. - -%\subsubsection*{Errors} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_uniquelocal(address); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$sitelocal} -\subsubsection*{Description} -Check if the provided address is a valid Internet sitelocal address, i.e. it belongs to the range fec0::/10. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_sitelocal(struct pico_ip6 *a); -\end{verbatim} -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} -\subsubsection*{Return value} -Returns 1 if a sitelocal Internet address has been provided. -%\subsubsection*{Errors} -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_sitelocal(address); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$linklocal} -\subsubsection*{Description} -Check if the provided address is a valid Internet linklocal address, i.e. it belongs to the range fe80::/10. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_linklocal(struct pico_ip6 *a); -\end{verbatim} -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} -\subsubsection*{Return value} -Returns 1 if a linklocal Internet address has been provided. -%\subsubsection*{Errors} -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_linklocal(address); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$localhost} -\subsubsection*{Description} -Check if the provided address is a valid Internet localhost address, i.e. it is "::1". -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_localhost(struct pico_ip6 *a); -\end{verbatim} -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} -\subsubsection*{Return value} -Returns 1 if a localhost Internet address has been provided. -%\subsubsection*{Errors} -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_localhost(address); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$is$\_$undefined} -\subsubsection*{Description} -Check if the provided address is a valid Internet undefined address, i.e. it is "::0". -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_is_undefined(struct pico_ip6 *a); -\end{verbatim} -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Internet host address. -\end{itemize} -\subsubsection*{Return value} -Returns 1 if the Internet address provided describes ANY host. -%\subsubsection*{Errors} -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_is_undefined(address); -\end{verbatim} - - -\subsection{pico$\_$ipv6$\_$source$\_$find} - -\subsubsection*{Description} -Find the source IP for the link associated to the specified destination. -This function will use the currently configured routing table to identify the link that would be used to transmit any traffic directed to the given IP address. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_ip6 *pico_ipv6_source_find(struct pico_ip6 *dst); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as \texttt{struct pico$\_$ip6}. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns the source IP as \texttt{struct pico$\_$ip6}. -If the source can not be found, \texttt{NULL} is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -src = pico_ipv6_source_find(dst); -\end{verbatim} - - - - -\subsection{pico$\_$ipv6$\_$link$\_$add } - -\subsubsection*{Description} -Add a new local device dev inteface, f.e. eth0, with IP address 'address' and netmask 'netmask'. A device may have more than one link configured, i.e. to access multiple networks on the same link. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_link_add(struct pico_device *dev, struct pico_ip6 address, -struct pico_ip6 netmask); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - Local device. -\item \texttt{address} - Pointer to the internet host address as \texttt{struct pico$\_$ip6}. -\item \texttt{netmask} - Netmask of the address. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$ENETUNREACH} - network unreachable -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_link_add(dev, address, netmask); -\end{verbatim} - - - -\subsection{pico$\_$ipv6$\_$link$\_$del} - -\subsubsection*{Description} -Remove the link associated to the local device that was previously configured, corresponding to the IP address 'address'. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_link_del(struct pico_device *dev, struct pico_ip6 address); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - Local device. -\item \texttt{address} - Pointer to the internet host address as \texttt{struct pico$\_$ip6}. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_link_del(dev, address); -\end{verbatim} - - - -\subsection{pico$\_$ipv6$\_$link$\_$find} - -\subsubsection*{Description} -Find the local device associated to the local IP address 'address'. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_device *pico_ipv6_link_find(struct pico_ip6 *address); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the internet host address as \texttt{struct pico$\_$ip6}. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns the local device. -On error, \texttt{NULL} is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENXIO} - no such device or address -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -dev = pico_ipv6_link_find(address); -\end{verbatim} - - - - -\subsection{pico$\_$ipv6$\_$route$\_$add} - -\subsubsection*{Description} -Add a new route to the destination IP address from the local device link, f.e. eth0. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_route_add(struct pico_ip6 address, struct pico_ip6 netmask, -struct pico_ip6 gateway, int metric, struct pico_ipv6_link *link); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as \texttt{struct pico$\_$ip6}. -\item \texttt{netmask} - Netmask of the address. If zeroed, the call assumes the meaning of adding a default gateway. -\item \texttt{gateway} - Gateway of the address network. If zeroed, no gateway will be associated to this route, and the traffic towards the destination will be simply forwarded towards the given device. -\item \texttt{metric} - Metric for this route. -\item \texttt{link} - Local device interface. If a valid gateway is specified, this parameter is not mandatory, otherwise \texttt{NULL} can be used. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0. On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. -%if the route already exists or no memory could be allocated. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\item \texttt{PICO$\_$ERR$\_$ENETUNREACH} - network unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_route_add(dst, netmask, gateway, metric, link); -\end{verbatim} - - - -\subsection{pico$\_$ipv6$\_$route$\_$del} - -\subsubsection*{Description} -Remove the route to the destination IP address from the local device link, f.e. etho0. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_ipv6_route_del(struct pico_ip6 address, struct pico_ip6 netmask, -struct pico_ip6 gateway, int metric, struct pico_ipv6_link *link); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as struct \texttt{pico$\_$ip6}. -\item \texttt{netmask} - Netmask of the address. -\item \texttt{gateway} - Gateway of the address network. -\item \texttt{metric} - Metric of the route. -\item \texttt{link} - Local device interface. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the route is found. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_ipv6_route_del(dst, netmask, gateway, metric, link); -\end{verbatim} - - - -\subsection{pico$\_$ipv6$\_$route$\_$get$\_$gateway} - -\subsubsection*{Description} -This function gets the gateway address for the given destination IP address, if set. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_ip6 pico_ipv6_route_get_gateway(struct pico_ip6 *addr) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{address} - Pointer to the destination internet host address as struct \texttt{pico$\_$ip6}. -\end{itemize} - -\subsubsection*{Return value} -On success the gateway address is returned. -On error a \texttt{null} address is returned (\texttt{0.0.0.0}) and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -gateway_addr = pico_ip6 pico_ipv6_route_get_gateway(&dest_addr) -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$dev$\_$routing$\_$enable} - -\subsubsection*{Description} -Enable IPv6 Routing messages through the specified interface. On a picoTCP IPv6 machine, -when routing is enabled, all possible routes to other links are advertised to the target interfaces. -This allows the hosts connected to the target interface to use the picoTCP IPv6 machine as a router -towards public IPv6 addresses configured on other interfaces, or reachable through known gateways. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_ip6 pico_ipv6_dev_routing_enable(struct pico_device *dev) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - Pointer to the target device struct \texttt{pico$\_$device}. -\end{itemize} - -\subsubsection*{Return value} -On success, zero is returned. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Example} -\begin{verbatim} -retval = pico_ipv6_dev_routing_enable(eth1); -\end{verbatim} - -\subsection{pico$\_$ipv6$\_$dev$\_$routing$\_$disable} - -\subsubsection*{Description} -Enable IPv6 Routing messages through the specified interface. On a picoTCP IPv6 machine, -when routing is enabled, all possible routes to other links are advertised to the target interface. -This function will stop advertising reachable routes to public IPv6 addresses configured on other -interfaces, or reachable through known gateways. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_ip6 pico_ipv6_dev_routing_disable(struct pico_device *dev) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - Pointer to the target device struct \texttt{pico$\_$device}. -\end{itemize} - -\subsubsection*{Return value} -On success, zero is returned. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Example} -\begin{verbatim} -retval = pico_ipv6_dev_routing_disable(eth1); -\end{verbatim} diff --git a/kernel/picotcp/docs/user_manual/chap_api_mdns.tex b/kernel/picotcp/docs/user_manual/chap_api_mdns.tex deleted file mode 100644 index 6437831..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_mdns.tex +++ /dev/null @@ -1,235 +0,0 @@ -\section{MDNS client} - -% Short description/overview of module functions -This module can register DNS resource records on the network via Multicast DNS as either \textbf{\emph{shared}} or \textbf{\emph{unique}} records. Unique records are, as the name implies, unique on the network (the record-name and -type combination is unique) and one single host has claimed the ownership of them. Shared records are records that are not unique on the network, which means multiple hosts can register records with the same record-name and -type combination. For more information on shared and unique resource record sets, see RFC6762. - -Unique records are, as it should, defended when somebody else tries to claim the same unique records. When hosts detect such a defense of another host while registering their own records, the conflict will be resolved by choosing another name for the records and another attempt is made to register those new records. - -This module only supplies the mechanisms of record registration and resolving on the network, it doesn't parses the contents of them, that's up to the application. - -\subsection{pico$\_$mdns$\_$init} - -\subsubsection*{Description} -Initialises the entire mDNS-module and sets the hostname for this machine. Sets up the global mDNS socket properly and calls callback when succeeded. Only when the module is properly initialised, records can be registered on the network. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_mdns_init( const char *hostname, - struct pico_ip4 address, - void (*callback)(pico_mdns_rtree *, char *, void *), - void *arg ); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{hostname} - Hostname to register for this machine. Should end with \'.local\'. -\item \texttt{address} - IPv4-address of this machines interface to generate a hostname record from. -\item \texttt{cb$\_$initialised} - Callback-function that is called when the initialisation process is done. This will also get called when asynchronous conflicts occur for successfully registered records during run-time. The mDNS-record tree contains the registered records, the char-buffer contains the registered hostname and the void-pointer contains the passed argument. -\item \texttt{arg} - Argument for callback supplied by user. This can be used if you want to pass some variable into your callback function. -\end{itemize} - -\subsubsection*{Return value} -Returns 0 when the module is properly initialised and the host started registering the hostname. Returns something else went the host failed initialising the module or registering the hostname. \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -pico_mdns_init("host.local", address, &mdns_init_callback, NULL); -\end{verbatim} - - -\subsection{pico$\_$mdns$\_$get$\_$hostname} - -\subsubsection*{Description} -Get the current hostname for this machine. - -\subsubsection*{Function prototype} -\begin{verbatim} -const char * pico_mdns_get_hostname( void ); -\end{verbatim} - -\subsubsection*{Return value} -Returns the current hostname for this machine when the module is initialised, returns NULL when the module is not initialised. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -char *url = pico_mdns_get_hostname(); -\end{verbatim} - - -\subsection{pico$\_$mdns$\_$set$\_$hostname} - -\subsubsection*{Description} -Tries to claim a hostname for this machine. Claims automatically a unique A record with the IPv4-address of this host. The hostname won't be set directly when this functions returns, but only if the claiming of the unique record succeeded. Init-callback specified when initialising the module will be called when the hostname-record is successfully registered. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_mdns_tryclaim_hostname( const char *url, void *arg ); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{url} - URL to set the hostname to. Should end with \'.local\'. -\item \texttt{arg} - Argument for init-callback supplied by user. This can be used if you want to pass some variable into your callback function. -\end{itemize} - -\subsubsection*{Return value} -Returns 0 when the host started registering the hostname-record successfully, returns something else when it didn't succeed. \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -int ret = pico_mdns_tryclaim_hostname("device.local", NULL); -\end{verbatim} - - -\subsection{pico$\_$mdns$\_$claim} - -\subsubsection*{Description} -Claims all different mDNS records in a tree in a single API-call. All records in the mDNS record-tree are registered in a single new claim-session. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_mdns_claim( pico_mdns_rtree record_tree, - void (*callback)(pico_mdns_rtree *, char *, void *), - void *arg ); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{record$\_$tree} - mDNS record-tree with records to register on the network via Multicast DNS. Can contain \textbf{\emph{unique records}} as well as \textbf{\emph{shared records}}. Declare a mDNS record-tree with the macro 'PICO$\_$MDNS$\_$RTREE$\_$DECLARE(name)', which is actually just a pico$\_$tree-struct, with a comparing-function already set. Records can be added with the preprocessor macro 'PICO$\_$MDNS$\_$RTREE$\_$ADD(pico$\_$mdns$\_$rtree *, struct pico$\_$mdns$\_$record *)'. To create mDNS records see 'pico$\_$mdns$\_$record$\_$create'. -\item \texttt{callback} - Callback function that gets called when \textbf{\emph{ALL}} records in the tree are successfully registered on the network. Records in the returned tree can differ from records originally registered due to conflict-resolution and such. -\item \texttt{arg} - Argument for callback supplied by user. This can be used if you want to pass some variable into your callback function. -\end{itemize} - -\subsubsection*{Return value} -Returns 0 when the host started registering the record successfully, returns something else when it didn't succeed. \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -... -PICO_MDNS_RTREE_DECLARE(rtree); -PICO_MDNS_RTREE_ADD(&rtree, &record); -int ret = pico_mdns_claim(rtree, &claimed_cb, NULL); -\end{verbatim} - - -\subsection{pico$\_$mdns$\_$getrecord} - -\subsubsection*{Description} -API-call to query a record with a certain URL and type. First checks the cache for this record. If no cache-entry is found, a query will be sent on the wire for this record. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_mdns_getrecord( const char *url, uint16_t type, - void (*callback)(pico_mdns_rtree *, char *, void *), - void *arg ); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{url} - URL of the DNS name to query records for. -\item \texttt{type} - DNS type of the records to query for on the network. -\item \texttt{callback} - Callback to call when records are found or answers to the query are received. This functions can get called multiple times when multiple answers are possible (e.g. with shared records). It's up to the application to aggregate all these received answers, this is possible with a static variable of the type pico$\_$mdns$\_$rtree. -\item \texttt{arg} - Argument for callback supplied by user. This can be used if you want to pass some variable into your callback function. -\end{itemize} - -\subsubsection*{Return value} -Returns 0 when the host started querying for these records successfully or the records are found in the cache. Returns something else when it didn't succeed. \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -int ret = pico_mdns_getrecord("_ipp._tcp.local", PICO_DNS_TYPE_PTR, &query_cb, NULL); -\end{verbatim} - - -\subsection{pico$\_$mdns$\_$record$\_$create} - -\subsubsection*{Description} -Creates a single standalone mDNS resource record with given name, type and data to register on the network. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_mdns_record *pico_mdns_record_create( const char *url, - void *_rdata, - uint16_t datalen, - uint16_t rtype, - uint32_t rttl, - uint8_t flags ); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{url} - DNS resource record name in URL format. Will be converted to DNS name notation format. -\item \texttt{$\_$rdata} - Memory buffer with data to insert in the resource record. If data of record should contain a DNS name, the name in the databuffer needs to be in URL-format. -\item \texttt{datalen} - The exact length in bytes of the $\_$rdata-buffer. If data of record should contain a DNS name (f.e. with PICO$\_$DNS$\_$TYPE$\_$PTR), datalen needs to be pico$\_$dns$\_$strlen($\_$rdata). -\item \texttt{rtype} - DNS type of the resource record to be. -\item \texttt{ttl} - TTL of the resource record to be when registered on the network. In seconds. -\item \texttt{flags} - With this parameter, you can specify a record as either a shared record or a unique record with respectively PICO$\_$MDNS$\_$RECORD$\_$SHARED- or PICO$\_$MDNS$\_$RECORD$\_$UNIQUE-preprocessor defines. Records are by default registered as unique. -\end{itemize} - -\subsubsection*{Return value} -Returns a pointer to the newly created record on success, returns NULL on failure. \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -pico_ip4 ip = 0; -pico_string_to_ipv4("10.10.0.5", &(ip.addr)); -struct pico_mdns_record *record = pico_mdns_record_create("foo.local", - &(ip.addr), - PICO_SIZE_IP4, - PICO_DNS_TYPE_ANY, - 120, - PICO_MDNS_RECORD_UNIQUE); -\end{verbatim} - - -\subsection{IS$\_$HOSTNAME$\_$RECORD} - -\subsubsection*{Description} -The initialisation-callback can get called multiple times during run-time due to \emph{passive conflict detection}. A passive conflict occurs for unique records when a faulty Multicast DNS-responder doesn't apply conflict resolution after an occurred conflict. A passive conflict can also occur when a peer registers a \textbf{\emph{shared}} record with the same name and type combination as a \textbf{\emph{unique}} record that the local host already successfully registered on the network. Because of that, shared records have priority over unique records, so unfortunately the local host has to apply the conflict resolution-mechanism to it's earlier uniquely verified record. To be able to notify the application of an updated unique record, the callback gets called given in the initialisation-function. But since that callback maybe parses the returned records as the hostname-records and this isn't necessarily the case when a passive conflict occurs, a mechanism is needed to differ hostname-records from other records. This preprocessor-macro allows this. - -\subsubsection*{Function prototype} -\begin{verbatim} -IS_HOSTNAME_RECORD(record) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{record} - mDNS resource record -\end{itemize} - -\subsubsection*{Return value} -Returns 1 when this record is a hostname record, returns 0 when it's not or when given pointer is a NULL pointer. \ No newline at end of file diff --git a/kernel/picotcp/docs/user_manual/chap_api_mld.tex b/kernel/picotcp/docs/user_manual/chap_api_mld.tex deleted file mode 100644 index b66aca3..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_mld.tex +++ /dev/null @@ -1,42 +0,0 @@ -\section{MLD} - -% Short description/overview of module functions -This module allows the user to join and leave ipv6 multicast groups. The module is based on the MLD version 2 protocol and it's backwards compatible with version 1. -The MLD module is completly driven from socket calls (\ref{socket:setoption}) and non of the MLD application interface functions should be called by the user. If however, by any reason, it's necessary for the user to do this, the following function call is provided: - -\subsection{pico\_mld\_state\_change} - -\subsubsection*{Description} -Change the state of the host to Non-listener, Idle listener or Delaying listener. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_mld_state_change(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, - uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state) -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{mcast\_link} - the link on which that multicast group should be joined. -\item \texttt{mcast\_group} - the address of the multicast group you want to join. -\item \texttt{filter\_mode} - the kind of source filtering, if applied. -\item \texttt{\_MCASTFilter} - list of multicast sources on which source filtering might be applied. -\item \texttt{state} - the prefered new state. -\end{itemize} - -\subsubsection*{Errors} -In case of failure, -1 is returned, and the value of pico$\_$err -is set as follows: - -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - Invalid argument provided -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - Not enough space -\item \texttt{PICO$\_$ERR$\_$EPROTONOSUPPORT} - Invalid protocol (or protocol version) found on the link -\item \texttt{PICO$\_$ERR$\_$EFAULT} - Internal error -\end{itemize} - -%\subsubsection*{Example} - -%\subsubsection*{Errors} - -%\subsubsection*{Example} diff --git a/kernel/picotcp/docs/user_manual/chap_api_olsr.tex b/kernel/picotcp/docs/user_manual/chap_api_olsr.tex deleted file mode 100644 index 424fc35..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_olsr.tex +++ /dev/null @@ -1,49 +0,0 @@ -\section{Optimized Link State Routing (OLSR) Module} - - -OLSR is a proactive routing protocol for mobile ad-hoc networks -(MANETs). It is well suited to large and dense mobile -networks, as the optimization achieved using the MPRs works well in -this context. The larger and more dense a network, the more -optimization can be achieved as compared to the classic link state -algorithm. OLSR uses hop-by-hop routing, i.e., each node uses its -local information to route packets. - -OLSR is well suited for networks, where the traffic is random and -sporadic between a larger set of nodes rather than being almost -exclusively between a small specific set of nodes. As a proactive -protocol, OLSR is also suitable for scenarios where the communicating -pairs change over time: no additional control traffic is generated in -this situation since routes are maintained for all known destinations -at all times. -- cfr. RFC3626 - - -\subsection{pico\_olsr\_add} - -\subsubsection*{Description} -This function will add the target device to the OLSR mechanism on the machine, -meaning that it will be possible to advertise and collect routing information -using Optimized Link State Routing protocol, as described in RFC3626, through the -target device. - -In order to use multiple devices in the OLSR system, this function needs to be called -multiple times, once per device. - -\subsubsection*{Function prototype} -\texttt{pico\_olsr\_add(struct pico\_device *dev);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the device is successfully added. - -\subsubsection*{Example} -\begin{verbatim} - -ret = pico_olsr_add(dev); - -\end{verbatim} - diff --git a/kernel/picotcp/docs/user_manual/chap_api_ppp.tex b/kernel/picotcp/docs/user_manual/chap_api_ppp.tex deleted file mode 100644 index 112cf47..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_ppp.tex +++ /dev/null @@ -1,257 +0,0 @@ -\section{Point-to-Point Protocol (PPP)} - -PPP consists in a family of data-link protocols, providing link control, -configuration and authentication over a point-to-point link. In a connected -embedded system, it is often used to access dial-up modems over serial lines. - -This module supports GSM modem configuration by implementing part of ETSI TS 127 007. - -From the picoTCP perspective, each PPP capable device may be abstracted into its own instance -that can be created using \texttt{pico\_ppp\_create}. - -Any GSM/GPRS/3G/HSDPA module, exporting a non-blocking serial interface, such as SPI or UART, -can be connected to the ppp device abstraction, using \texttt{pico\_ppp\_set\_serial\_read}, -\texttt{pico\_ppp\_set\_serial\_write}, \texttt{pico\_ppp\_set\_serial\_set\_speed}. - -Once the physical interface is attached, the access to the remote access point gateway -can be configured using \texttt{pico\_ppp\_set\_apn}, \texttt{pico\_ppp\_set\_username} and -\texttt{pico\_ppp\_set\_password}. - -When the interface is configured, the connection may be established using -\texttt{pico\_ppp\_connect}. Even if the peer disconnects, the connection will be brought up -again automatically afterwords. - -To interrupt the connection and stop the automatic reconnection, \texttt{pico\_ppp\_disconnect} -can be called. - -\subsection{pico\_ppp\_create} - -\subsubsection*{Description} -This function will create a new device association to be used with the ppp driver. The driver -must then afterwards be associated with lower-level serial functions in order to be used. - -\subsubsection*{Function prototype} -\texttt{struct pico\_device *pico\_ppp\_create(void);} - -\subsubsection*{Return value} -A new pico\_device is allocated and returned if the device is successfully created. - -\subsubsection*{Example} -\begin{verbatim} - -ppp = pico_ppp_create(); - -\end{verbatim} - -\subsection{pico\_ppp\_set\_serial\_read} -\subsubsection*{Description} -This function will associate the read function from an external source (e.g. a UART device API) -to the read functionality of the PPP driver. Setting up a proper read/write interface is necessary -for the PPP driver to work properly. - -The function associated with the read must be non-blocking, no matter the execution model of the system. - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_set\_serial\_read(struct pico\_device *dev, int (*sread)(struct pico\_device *, void *, int))} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\item \texttt{sread} - a pointer to a function of type \texttt{int fn(struct pico\_device *, void *, int)} - specifying the target serial read function. The function prototype will be called with the device pointer, - a buffer to be filled with serial data, and the maximum lenght of the usable buffer. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the serial read function is successfully associated. - -\subsubsection*{Example} -\begin{verbatim} - -static int my_serial_read(struct pico_device *dev, void *buf, int len) -{ - return nonblock_uart_read(buf, len); -} - -pico_ppp_set_serial_read(ppp, my_serial_read); -\end{verbatim} - -\subsection{pico\_ppp\_set\_serial\_write} -\subsubsection*{Description} -This function will associate the write function from an external source (e.g. a UART device API) -to the write functionality of the PPP driver. Setting up a proper read/write interface is necessary -for the PPP driver to work properly. - -The function associated with the write must be non-blocking, no matter the execution model of the system. - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_set\_serial\_write(struct pico\_device *dev, int (*swrite)(struct pico\_device *, const void *, int))} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\item \texttt{swrite} - a pointer to a function of type \texttt{int fn(struct pico\_device *, const void *, int)} - specifying the target serial write function. The function prototype will be called with the device pointer, - a buffer to be filled with serial data, and the maximum lenght of the usable buffer. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the serial write function is successfully associated. - -\subsubsection*{Example} -\begin{verbatim} - -static int my_serial_write(struct pico_device *dev, const void *buf, int len) -{ - return nonblock_uart_write(buf, len); -} - -pico_ppp_set_serial_write(ppp, my_serial_write); -\end{verbatim} - -\subsection{pico\_ppp\_set\_serial\_set\_speed} -\subsubsection*{Description} -This function will associate the set\_speed function from an external source (e.g. a UART device API) -to dynamically set the UART speed for the interface with the PPP driver. - -Calling this function is not mandatory for the PPP UART interface to work. - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_set\_serial\_set\_speed(struct pico\_device *dev, int (*sset\_speed)(struct pico\_device *, uint32\_t))} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\item \texttt{sset\_speed} - a pointer to a function of type \texttt{int fn(struct pico\_device *, uint32\_t speed)} - specifying the target serial set\_speed function. The function prototype will be called with the device pointer and - the speed at which the UART should be configured by PPP. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the serial set\_speed function is successfully associated. - -\subsubsection*{Example} -\begin{verbatim} - -static int my_serial_set_speed(struct pico_device *dev, uint32_t speed) -{ - return uart_set_speed(speed); -} - -pico_ppp_set_serial_set_speed(ppp, my_serial_set_speed); -\end{verbatim} - - -\subsection{pico\_ppp\_set\_apn} -\subsubsection*{Description} -This function allows the configuration of the APN name in order for PPP to correctly establish the connection -to the remote Access Point gateway. - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_set\_apn(struct pico\_device *dev, const char *apn);} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\item \texttt{apn} - a string containing the Access Point Name. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the APN is correctly configured. - -\subsubsection*{Example} -\begin{verbatim} - -ret = pico_ppp_set_apn(dev, "internet.apn.name"); - -\end{verbatim} -\subsection{pico\_ppp\_set\_username} -\subsubsection*{Description} -This function will set an username for the PAP/CHAP authentication mechanism. - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_set\_username(struct pico\_device *dev, const char *username); } - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\item \texttt{username} - a string specifying the desired username. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the username is successfully configured. - -\subsubsection*{Example} -\begin{verbatim} - -ret = pico_ppp_set_username(dev, "john"); - -\end{verbatim} -\subsection{pico\_ppp\_set\_password} -\subsubsection*{Description} -This function will set the password for the PAP/CHAP authentication mechanism. - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_set\_password(struct pico\_device *dev, const char *password); } - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\item \texttt{username} - a string specifying the desired password. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the password is successfully configured. - -\subsubsection*{Example} -\begin{verbatim} - -ret = pico_ppp_set_password(dev, "secret"); -\end{verbatim} - -\subsection{pico\_ppp\_connect} -\subsubsection*{Description} -This function will enable the PPP connection, by triggering the startup of the handshakes -required at all levels. If the connection is dropped, the system will try to reconnect by restarting -the handshakes, until \texttt{pico\_ppp\_disconnect} is finally called. - - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_connect(struct pico\_device *ppp)} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the device is successfully connecting. - -\subsubsection*{Example} -\begin{verbatim} - -ret = pico_ppp_connect(dev); - -\end{verbatim} -\subsection{pico\_ppp\_disconnect} -\subsubsection*{Description} -This function will disable the PPP connection, by triggering a disconnection, and by disabling the -reconnect feature, if enabled. - -\subsubsection*{Function prototype} -\texttt{int pico\_ppp\_disconnect(struct pico\_device *ppp)} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} specifying the target interface. -\end{itemize} - -\subsubsection*{Return value} -0 returned if the device is successfully put in disconnected state. - -\subsubsection*{Example} -\begin{verbatim} - -ret = pico_ppp_disconnect(dev); - -\end{verbatim} diff --git a/kernel/picotcp/docs/user_manual/chap_api_slaacv4.tex b/kernel/picotcp/docs/user_manual/chap_api_slaacv4.tex deleted file mode 100644 index 5833a6b..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_slaacv4.tex +++ /dev/null @@ -1,44 +0,0 @@ -\section{SLAACV4 Module} - -% Short description/overview of module functions - - -\subsection{pico\_slaacv4\_claimip} - -\subsubsection*{Description} -This function starts the ip claiming process for a device. It will generate first the local link ip using -as seed the mac address of the device. Then it will start the claim procedure described in RFC3927. -In case of success the IP is registered to the IP layer and returned using the callback function. -In case of error, code SLAACV4\_ERROR is returned. Errors occur when the maximum number of conflicts is reached. -Use the IP returned only if the return code is SLAACV4\_SUCCESS. - -\subsubsection*{Function prototype} -\texttt{pico\_slaacv4\_claimip(struct pico\_device *dev, void (*cb)(struct pico\_ip4 *ip, uint8\_t code));} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{dev} - a pointer to a struct \texttt{pico\_device} -\item \texttt{*cb} - a callback function returning the ip claimed and a return code (SLAACV4\_ERROR | SLAACV4\_SUCCESS) -\end{itemize} - -\subsubsection*{Return value} -0 returned if the claiming has started successfully - -\subsubsection*{Example} -\begin{verbatim} - -dev = pico_get_device(sdev); - -ret = pico_slaacv4_claimip(dev, slaacv4_cb); - -\end{verbatim} - -\subsection{pico\_slaacv4\_unregisterip} - -\subsubsection*{Description} -This function allows to unregister the local link ip in usage. The function will remove from the route table -the local link ip and will reset the internal state of the SLAACV4 module - -\subsubsection*{Function prototype} -\texttt{void pico\_slaacv4\_unregisterip(void);} - diff --git a/kernel/picotcp/docs/user_manual/chap_api_sntp_c.tex b/kernel/picotcp/docs/user_manual/chap_api_sntp_c.tex deleted file mode 100644 index 29e31f8..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_sntp_c.tex +++ /dev/null @@ -1,96 +0,0 @@ -\section{SNTP client} - -% Short description/overview of module functions -This module allows you to sync your device to to a specified (s)ntp server. -You can then retreive the time with the pico$\_$sntp$\_$gettimeofday function. - - -\subsection{pico$\_$sntp$\_$sync} - -\subsubsection*{Description} -Function to sync the local time to a given sntp server in string notation. DNS resolution will be done automatically. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_sntp_sync(char *sntp_server, void (*cb_synced)(pico_err_t status)); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{sntp$\_$server} - String with the sntp server to get the time from -\item \texttt{cb$\_$synced} - Callback function that is called when the synchronisation process is done. The status variable indicates wheter the synchronisation was successful or not. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the synchronisation operation has successfully started. When both IPv4 and IPv6 are enabled, success on one is sufficient. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EPROTONOSUPPORT} - compiled without DNS support -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -int ret = pico_sntp_sync("ntp.nasa.gov", &callback); -\end{verbatim} - - -\subsection{pico$\_$sntp$\_$sync$\_$ip} - -\subsubsection*{Description} -Function to sync the local time to a given sntp server in IP address notation. -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_sntp_sync_ip(union pico_address *sntp_addr, void (*cb_synced)(pico_err_t status)); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{sntp$\_$addr} - IP address of the sntp server to get the time from -\item \texttt{cb$\_$synced} - Callback function that is called when the synchronisation process is done. The status variable indicates wheter the synchronisation was successful or not. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the synchronisation operation has successfully started. When both IPv4 and IPv6 are enabled, success on one is sufficient. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -int ret = pico_sntp_sync_ip(&sntp_addr, &callback); -\end{verbatim} - - -\subsection{pico$\_$sntp$\_$gettimeofday} - -\subsubsection*{Description} -Function to get the current time. Be sure to call the pico$\_$sntp$\_$sync function to synchronise BEFORE calling this function. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_sntp_gettimeofday(struct pico_timeval *tv); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{tv} - Pointer to a time$\_$val struct in which the current time will be set. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 if the time is set. -On error, -1 is returned and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Example} -\begin{verbatim} -int ret = pico_sntp_gettimeofday(tv); -\end{verbatim} - - - diff --git a/kernel/picotcp/docs/user_manual/chap_api_sock.tex b/kernel/picotcp/docs/user_manual/chap_api_sock.tex deleted file mode 100644 index acbe2a8..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_sock.tex +++ /dev/null @@ -1,804 +0,0 @@ -\section{Socket calls} - -% Short description/overview of module functions -With the socket calls, the user can open, close, bind, \ldots sockets and do read -or write operations. The provided transport protocols are UDP and TCP. - -\subsection{pico$\_$socket$\_$open} - -\subsubsection*{Description} -This function will be called to open a socket from the application level. The created -socket will be unbound and not connected. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_socket *pico_socket_open(uint16_t net, uint16_t proto, -void (*wakeup)(uint16_t ev, struct pico_socket *s)); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{net} - Network protocol, \texttt{PICO$\_$PROTO$\_$IPV4} = 0, \texttt{PICO$\_$PROTO$\_$IPV6} = 41 -\item \texttt{proto} - Transport protocol, \texttt{PICO$\_$PROTO$\_$TCP} = 6, \texttt{PICO$\_$PROTO$\_$UDP} = 17 -\item \texttt{wakeup} - Callback function that accepts 2 parameters: -\begin{itemize}[noitemsep] -\item \texttt{ev} - Events that apply to that specific socket, see further -\item \texttt{s} - Pointer to a socket of type struct \texttt{pico$\_$socket} -\end{itemize} -\end{itemize} - -\subsubsection*{Possible events for sockets} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$SOCK$\_$EV$\_$RD} - triggered when new data arrives on the socket. A new receive action can be taken by the socket owner because this event indicates there is new data to receive. -\item \texttt{PICO$\_$SOCK$\_$EV$\_$WR} - triggered when ready to write to the socket. Issuing a write/send call will now succeed if the buffer has enough space to allocate new outstanding data. -\item \texttt{PICO$\_$SOCK$\_$EV$\_$CONN} - triggered when connection is established (TCP only). This event is received either after a successful call to \texttt{pico$\_$socket$\_$connect} to indicate that the connection has been established, or on a listening socket, indicating that a call to \texttt{pico$\_$socket$\_$accept} may now be issued in order to accept the incoming connection from a remote host. -\item \texttt{PICO$\_$SOCK$\_$EV$\_$CLOSE} - triggered when a FIN segment is received (TCP only). This event indicates that the other endpont has closed the connection, so the local TCP layer is only allowed to send new data until a local shutdown or close is initiated. PicoTCP is able to keep the connection half-open (only for sending) after the FIN packet has been received, allowing new data to be sent in the TCP CLOSE$\_$WAIT state. -\item \texttt{PICO$\_$SOCK$\_$EV$\_$FIN} - triggered when the socket is closed. No further communication is possible from this point on the socket. -\item \texttt{PICO$\_$SOCK$\_$EV$\_$ERR} - triggered when an error occurs. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns a pointer to the declared socket (\texttt{struct pico$\_$socket *}). -On error the socket is not created, \texttt{NULL} is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EPROTONOSUPPORT} - protocol not supported -\item \texttt{PICO$\_$ERR$\_$ENETUNREACH} - network unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -sk_tcp = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, &wakeup); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$read} - -\subsubsection*{Description} -This function will be called to read data from a connected socket. The function checks that the socket is bound and connected before attempting to receive data. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_read(struct pico_socket *s, void *buf, int len); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of the buffer where the received data will be stored -\item \texttt{len} - Length of the buffer (in bytes), represents the maximum amount of bytes that can be read -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes read. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EIO} - input/output error -\item \texttt{PICO$\_$ERR$\_$ESHUTDOWN} - cannot read after transport endpoint shutdown -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -bytesRead = pico_socket_read(sk_tcp, buffer, bufferLength); -\end{verbatim} - - - -\subsection{pico$\_$socket$\_$write} - -\subsubsection*{Description} -This function will be called to write the content of a buffer to a socket that has been previously connected. -This function checks that the socket is bound, connected and that it is allowed to send data, i.e. there hasn't been a local shutdown. -This is the preferred function to use when writing data from the application to a connected stream. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_write(struct pico_socket *s, void *buf, int len); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of a (constant) buffer where the data is stored -\item \texttt{len} - Length of the data buffer \texttt{buf} -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes written to the socket. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EIO} - input/output error -\item \texttt{PICO$\_$ERR$\_$ENOTCONN} - the socket is not connected -\item \texttt{PICO$\_$ERR$\_$ESHUTDOWN} - cannot send after transport endpoint shutdown -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -bytesWritten = pico_socket_write(sk_tcp, buffer, bufLength); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$sendto} - -\subsubsection*{Description} -This function sends data from the local address to the remote address, without checking -whether the remote endpoint is connected. Specifying the destination is particularly useful while sending single datagrams -to different destinations upon consecutive calls. This is the preferred mechanism to send datagrams to a remote destination -using a UDP socket. -Note that the port needs to be passed in network byte order (big-endian), as with all socket calls. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_sendto(struct pico_socket *s, const void *buf, int len, -void *dst, uint16_t remote_port); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of the buffer -\item \texttt{len} - Length of the buffer \texttt{buf} -\item \texttt{dst} - Pointer to the origin of the IPv4/IPv6 frame header -\item \texttt{remote$\_$port} - Portnumber of the receiving socket -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes written to the socket. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -bytesWritten = pico_socket_sendto(sk_tcp, buf, len, &sk_tcp->remote_addr, -sk_tcp->remote_port); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$recvfrom} - -\subsubsection*{Description} -This function is called to receive data from the specified socket. -It is useful when called in the context of a non-connected socket, to receive -the information regarding the origin of the data, namely the origin address and -the remote port number. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_recvfrom(struct pico_socket *s, void *buf, int len, -void *orig, uint16_t *remote_port); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of the buffer -\item \texttt{len} - Maximum allowed length for the data to be stored in the buffer \texttt{buf} -\item \texttt{orig} - Pointer to the origin of the IPv4/IPv6 frame, (peer IP address), can be NULL -\item \texttt{remote$\_$port} - Pointer to the port number of the sender socket, can be NULL -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes read from the socket. On success, if \texttt{orig} -is not NULL, The address of the remote endpoint is stored in the memory area pointed by \texttt{orig}. -In the same way, \texttt{remote$\_$port} will contain the portnumber of the sending socket, unless a NULL is passed -from the caller. - -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ESHUTDOWN} - cannot read after transport endpoint shutdown -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -bytesRcvd = pico_socket_recvfrom(sk_tcp, buf, bufLen, &peer, &port); -\end{verbatim} - -\subsection{Extended Socket operations} -The interface provided by sendto/recvfrom can be extended to include more information about the network communication. -This is especially useful in UDP communication, and whenever extended information is needed about the single datagram and its encapsulation in the networking layer. - -PicoTCP offers an extra structure that can be used to set and retrieve message information while transmitting and receiving datagrams, respectively. The structure \texttt{pico$\_$msginfo} is defined as follows: -\begin{verbatim} -struct pico_msginfo { - struct pico_device *dev; - uint8_t ttl; - uint8_t tos; -}; -\end{verbatim} - - - -\subsection{pico$\_$socket$\_$sendto$\_$extended} - -\subsubsection*{Description} -This function is an extension of the \texttt{pico$\_$socket$\_$sendto} function described above. It's exactly the same but it adds up an additional argument to set TTL and QOS information on the outgoing packet which contains the datagram. -Note that the port needs to be passed in network byte order (big-endian), as with all socket calls. - -The usage of the extended argument makes sense in UDP context only, as the information is set at packet level, and only with UDP there is a 1:1 correspondence between datagrams and IP packets. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_sendto_extended(struct pico_socket *s, const void *buf, int len, -void *dst, uint16_t remote_port, struct pico_msginfo *info); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of the buffer -\item \texttt{len} - Length of the data that is stored in the buffer (in bytes) -\item \texttt{dst} - IPv4 or IPv6 address of the destination peer where th frame is sent -\item \texttt{remote$\_$port} - Port number of the receiving socket at the remote endpoint -\item \texttt{info} - Extended information about the packet containing this datagram. Only the fields "ttl" and "tos" are taken into consideeration, while "dev" is ignored. - -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes written to the socket. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -struct pico_msginfo info = { }; -info.ttl = 5; -bytesWritten = pico_socket_sendto_extended(sk_tcp, buf, len, &sk_tcp->remote_addr, -sk_tcp->remote_port, &info); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$recvfrom$\_$extended} - -\subsubsection*{Description} -This function is an extension to the normal \texttt{pico$\_$socket$\_$recvfrom} function, which allows to retrieve additional information about the networking layer that has been involved in the delivery of the datagram. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_recvfrom_extended(struct pico_socket *s, void *buf, int len, -void *orig, uint16_t *remote_port, struct pico_msginfo *info); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of the buffer -\item \texttt{len} - Maximum allowed length for the data to be stored in the buffer \texttt{buf} -\item \texttt{orig} - Pointer to the origin of the IPv4/IPv6 frame header, can be NULL -\item \texttt{remote$\_$port} - Pointer to the port number of the sender socket, can be NULL -\item \texttt{info} - Extended information about the incoming packet containing this datagram. The device where the packet was received is pointed by info->dev, the maximum TTL for the packet is stored in info->ttl, and finally the field info->tos keeps track of the flags in IP header's QoS. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes read from the socket. On success, if \texttt{orig} -is not NULL, The address of the remote endpoint is stored in the memory area pointed by \texttt{orig}. -In the same way, \texttt{remote$\_$port} will contain the portnumber of the sending socket, unless a NULL is passed -from the caller. - -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ESHUTDOWN} - cannot read after transport endpoint shutdown -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -struct pico_msginfo info; -bytesRcvd = pico_socket_recvfrom_extended(sk_tcp, buf, bufLen, &peer, &port, &info); -if (info && info->dev) { - printf("Socket received a datagram via device %s, ttl:%d, tos: %08x\n", - info->dev->name, info->ttl, info->tos); -} -\end{verbatim} - - -\subsection{pico$\_$socket$\_$send} - -\subsubsection*{Description} -This function is called to send data to the specified socket. -It checks if the socket is connected and then calls the -\texttt{pico$\_$socket$\_$sendto} function. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_send(struct pico_socket *s, const void *buf, int len); -\end{verbatim} - - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of the buffer -\item \texttt{len} - Length of the buffer \texttt{buf} -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes written to -the socket. On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOTCONN} - the socket is not connected -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -bytesRcvd = pico_socket_send(sk_tcp, buf, bufLen); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$recv} - -\subsubsection*{Description} -This function directly calls the \texttt{pico$\_$socket$\_$recvfrom} function. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_recv(struct pico_socket *s, void *buf, int len); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{buf} - Void pointer to the start of the buffer -\item \texttt{len} - Maximum allowed length for the data to be stored in the buffer \texttt{buf} -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns an integer representing the number of bytes read -from the socket. On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ESHUTDOWN} - cannot read after transport endpoint shutdown -\item \texttt{PICO$\_$ERR$\_$EADDRNOTAVAIL} - address not available -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -bytesRcvd = pico_socket_recv(sk_tcp, buf, bufLen); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$bind} - -\subsubsection*{Description} -This function binds a local IP-address and port to the specified socket. -Note that the port needs to be passed in network byte order (big-endian), as with all socket calls. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_bind(struct pico_socket *s, void *local_addr, uint16_t *port); -\end{verbatim} - - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{local$\_$addr} - Void pointer to the local IP-address -\item \texttt{port} - Local portnumber to bind with the socket -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 after a succesfull bind. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$ENOMEM} - not enough space -\item \texttt{PICO$\_$ERR$\_$ENXIO} - no such device or address -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -errMsg = pico_socket_bind(sk_tcp, &sockaddr4->addr, &sockaddr4->port); -\end{verbatim} - -\subsection{pico$\_$socket$\_$getname} - -\subsubsection*{Description} -This function returns the local IP-address and port previously bound to the specified socket. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_getname(struct pico_socket *s, void *local_addr, uint16_t *port, - uint16_t *proto); -\end{verbatim} - - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{local$\_$addr} - Address (IPv4 or IPv6) previously associated to this socket -\item \texttt{port} - Local portnumber associated to the socket -\item \texttt{proto} - Proto of the address returned in the \texttt{local$\_$addr} field. Can be either \texttt{PICO$\_$PROTO$\_$IPV4} or \texttt{PICO$\_$PROTO$\_$IPV6} -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 and populates the three fields {local$\_$addr} \texttt{port} and \texttt{proto} accordingly. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument(s) provided -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -errMsg = pico_socket_getname(sk_tcp, address, &port, &proto); -if (errMsg == 0) { - if (proto == PICO_PROTO_IPV4) - addr4 = (struct pico_ip4 *)address; - else - addr6 = (struct pico_ip6 *)address; -} -\end{verbatim} - -\subsection{pico$\_$socket$\_$getpeername} - -\subsubsection*{Description} -This function returns the IP-address of the remote peer connected to the specified socket. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_getpeername(struct pico_socket *s, void *remote_addr, uint16_t *port, - uint16_t *proto); -\end{verbatim} - - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{remote$\_$addr} - Address (IPv4 or IPv6) associated to the socket remote endpoint -\item \texttt{port} - Local portnumber associated to the socket -\item \texttt{proto} - Proto of the address returned in the \texttt{local$\_$addr} field. Can be either \texttt{PICO$\_$PROTO$\_$IPV4} or \texttt{PICO$\_$PROTO$\_$IPV6} -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 and populates the three fields {local$\_$addr} \texttt{port} and \texttt{proto} accordingly. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument(s) provided -\item \texttt{PICO$\_$ERR$\_$ENOTCONN} - the socket is not connected to any peer -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -errMsg = pico_socket_getpeername(sk_tcp, address, &port, &proto); -if (errMsg == 0) { - if (proto == PICO_PROTO_IPV4) - addr4 = (struct pico_ip4 *)address; - else - addr6 = (struct pico_ip6 *)address; -} -\end{verbatim} - - -\subsection{pico$\_$socket$\_$connect} - -\subsubsection*{Description} -This function connects a local socket to a remote socket of a server that is listening, or permanently associate a remote UDP peer as default receiver for any further outgoing traffic through this socket. -Note that the port needs to be passed in network byte order (big-endian), as with all socket calls. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_connect(struct pico_socket *s, void *srv_addr, -uint16_t remote_port); -\end{verbatim} - - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{srv$\_$addr} - Void pointer to the remote IP-address to connect to -\item \texttt{remote$\_$port} - Remote port number on which the socket will be connected to -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 after a succesfull connect. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EPROTONOSUPPORT} - protocol not supported -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EHOSTUNREACH} - host is unreachable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -errMsg = pico_socket_connect(sk_tcp, &sockaddr4->addr, sockaddr4->port); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$listen} - -\subsubsection*{Description} -A server can use this function when a socket is opened and bound to start listening to it. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_listen(struct pico_socket *s, int backlog); -\end{verbatim} - - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{backlog} - Maximum connection requests -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 after a succesfull listen start. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EISCONN} - socket is connected -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -errMsg = pico_socket_listen(sk_tcp, 3); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$accept} - -\subsubsection*{Description} -When a server is listening on a socket and the client is trying to connect. -The server on its side will wakeup and acknowledge the connection by calling the this function. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_socket *pico_socket_accept(struct pico_socket *s, void *orig, -uint16_t *local_port); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{orig} - Pointer to the origin of the IPv4/IPv6 frame header -\item \texttt{local$\_$port} - Portnumber of the local socket (pointer) -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns the pointer to a \texttt{struct pico$\_$socket} that -represents the client thas was just connected. Also \texttt{orig} will contain the requesting -IP-address and \texttt{remote$\_$port} will contain the portnumber of the requesting socket. -On error, \texttt{NULL} is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\item \texttt{PICO$\_$ERR$\_$EAGAIN} - resource temporarily unavailable -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -client = pico_socket_accept(sk_tcp, &peer, &port); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$shutdown} - -\subsubsection*{Description} -Used by the \texttt{pico$\_$socket$\_$close} function to shutdown read and write mode for -the specified socket. With this function one can close a socket for reading -and/or writing. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_shutdown(struct pico_socket *s, int mode); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{mode} - \texttt{PICO$\_$SHUT$\_$RDWR}, \texttt{PICO$\_$SHUT$\_$WR}, \texttt{PICO$\_$SHUT$\_$RD} -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 after a succesfull socket shutdown. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -errMsg = pico_socket_shutdown(s, PICO_SHUT_RDWR); -\end{verbatim} - - -\subsection{pico$\_$socket$\_$close} - -\subsubsection*{Description} -Function used on application level to close a socket. Always closes read and write connection. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_close(struct pico_socket *s); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 after a succesfull socket shutdown. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -errMsg = pico_socket_close(sk_tcp); -\end{verbatim} - - - -\subsection{pico$\_$socket$\_$setoption} -\label{socket:setoption} -\subsubsection*{Description} -Function used to set socket options. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_setoption(struct pico_socket *s, int option, void *value); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{option} - Option to be set (see further for all options) -\item \texttt{value} - Value of option (void pointer) -\end{itemize} - -\subsubsection*{Available socket options} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$TCP$\_$NODELAY} - Disables/enables the Nagle algorithm (TCP Only). -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$KEEPCNT} - Set number of probes for TCP keepalive -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$KEEPIDLE} - Set timeout value for TCP keepalive probes (in ms) -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$KEEPINTVL} - Set interval between TCP keepalive retries in case of no reply (in ms) -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$LINGER} - Set linger time for TCP TIME$\_$WAIT state (in ms) -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$RCVBUF} - Set receive buffer size for the socket -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$RCVBUF} - Set receive buffer size for the socket -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$RCVBUF} - Set receive buffer size for the socket -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$SNDBUF} - Set send buffer size for the socket -\item \texttt{PICO$\_$IP$\_$MULTICAST$\_$IF} - (Not supported) Set link multicast datagrams are sent from, default is first added link -\item \texttt{PICO$\_$IP$\_$MULTICAST$\_$TTL} - Set TTL (0-255) of multicast datagrams, default is 1 -\item \texttt{PICO$\_$IP$\_$MULTICAST$\_$LOOP} - Specifies if a copy of an outgoing multicast datagram is looped back as long as it is a member of the multicast group, default is enabled -\item \texttt{PICO$\_$IP$\_$ADD$\_$MEMBERSHIP} - Join the multicast group specified in the \textit{pico\_ip\_mreq} structure passed in the value argument -\item \texttt{PICO$\_$IP$\_$DROP$\_$MEMBERSHIP} - Leave the multicast group specified in the \textit{pico\_ip\_mreq} structure passed in the value argument -\item \texttt{PICO$\_$IP$\_$ADD$\_$SOURCE$\_$MEMBERSHIP} - Join the source-specific multicast group specified in the \textit{pico\_ip\_mreq\_source} structure passed in the value argument -\item \texttt{PICO$\_$IP$\_$DROP$\_$SOURCE$\_$MEMBERSHIP} - Leave the source-specific multicast group specified in the \textit{pico\_ip\_mreq\_source} structure passed in the value argument -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 after a succesfull setting of socket option. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_socket_setoption(sk_tcp, PICO_TCP_NODELAY, NULL); - -uint8_t ttl = 2; -ret = pico_socket_setoption(sk_udp, PICO_IP_MULTICAST_TTL, &ttl); - -uint8_t loop = 0; -ret = pico_socket_setoption(sk_udp, PICO_IP_MULTICAST_LOOP, &loop); - -struct pico_ip4 inaddr_dst, inaddr_link; -struct pico_ip_mreq mreq = {{0},{0}}; -pico_string_to_ipv4("224.7.7.7", &inaddr_dst.addr); -pico_string_to_ipv4("192.168.0.2", &inaddr_link.addr); -mreq.mcast_group_addr = inaddr_dst; -mreq.mcast_link_addr = inaddr_link; -ret = pico_socket_setoption(sk_udp, PICO_IP_ADD_MEMBERSHIP, &mreq); -ret = pico_socket_setoption(sk_udp, PICO_IP_DROP_MEMBERSHIP, &mreq) -\end{verbatim} - - -\subsection{pico$\_$socket$\_$getoption} - -\subsubsection*{Description} -Function used to get socket options. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_socket_getoption(struct pico_socket *s, int option, void *value); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{s} - Pointer to socket of type \texttt{struct pico$\_$socket} -\item \texttt{option} - Option to be set (see further for all options) -\item \texttt{value} - Value of option (void pointer) -\end{itemize} - -\subsubsection*{Available socket options} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$TCP$\_$NODELAY} - Nagle algorithm, \texttt{value} casted to \texttt{(int *)} (0 = disabled, 1 = enabled) -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$RCVBUF} - Read current receive buffer size for the socket -\item \texttt{PICO$\_$SOCKET$\_$OPT$\_$SNDBUF} - Read current receive buffer size for the socket -\item \texttt{PICO$\_$IP$\_$MULTICAST$\_$IF} - (Not supported) Link multicast datagrams are sent from -\item \texttt{PICO$\_$IP$\_$MULTICAST$\_$TTL} - TTL (0-255) of multicast datagrams -\item \texttt{PICO$\_$IP$\_$MULTICAST$\_$LOOP} - Loop back a copy of an outgoing multicast datagram, as long as it is a member of the multicast group, or not. -\end{itemize} - -\subsubsection*{Return value} -On success, this call returns 0 after a succesfull getting of socket option. The value of -the option is written to \texttt{value}. -On error, -1 is returned, and \texttt{pico$\_$err} is set appropriately. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$ERR$\_$EINVAL} - invalid argument -\end{itemize} - -\subsubsection*{Example} -\begin{verbatim} -ret = pico_socket_getoption(sk_tcp, PICO_TCP_NODELAY, &stat); - -uint8_t ttl = 0; -ret = pico_socket_getoption(sk_udp, PICO_IP_MULTICAST_TTL, &ttl); - -uint8_t loop = 0; -ret = pico_socket_getoption(sk_udp, PICO_IP_MULTICAST_LOOP, &loop); -\end{verbatim} diff --git a/kernel/picotcp/docs/user_manual/chap_api_tftp.tex b/kernel/picotcp/docs/user_manual/chap_api_tftp.tex deleted file mode 100644 index ab4df9b..0000000 --- a/kernel/picotcp/docs/user_manual/chap_api_tftp.tex +++ /dev/null @@ -1,911 +0,0 @@ -\section{TFTP} - -% Short description/overview of module functions -This module provides support for Trivial File Transfer Protocol (TFTP). -The support includes client and server implementation, both of them can be active at the same time. - -Flows must be split up into TFTP blocks on the sender side, and reassembled from block len -on the receiving side. Please note that a block whose size is less than the block size indicates -the end of the transfer. - -To indicate the end of a transfer where the content is aligned with the block size, an additional -transmission of zero bytes must follow the flow. - -Function pico$\_$tftp$\_$listen must be used to start the server with a proper callback that should be provided by the user. To reject a request received by the server the server callback must call pico$\_$tftp$\_$reject$\_$request. - -In order to start transmission or reception of files a session handler must be obtained with a call to pico$\_$tftp$\_$session$\_$setup. The created session may take advantage of the Extenxed Options of the TFTP protocol invoking pico$\_$tftp$\_$set$\_$option before starting using it. - -Real file transaction is started using the functions pico$\_$tftp$\_$start$\_$tx and pico$\_$tftp$\_$start$\_$rx; both require a callback that must be provided by the user to handle single chunks of the transmission. The transmitter callback must use pico$\_$tftp$\_$send function to send each block of data. - -In case of problem the session can be aborted (and an error message is sent to the remote side) using pico$\_$tftp$\_$abort. - -When a transfer is complete the session became invalid and must not be used any more. - -\subsection*{Application driven interface} - -In some use case is preferable to have an application driven behaviour. The API provide 5 specific functions to use TFTP in this scenario. - -The way to obtain a session handler suited for this purpose is an invocation to the function pico$\_$tftp$\_$app$\_$setup. The synchro variable passed to this function will play a key role during the management of the transfer. - -As usual the section can be instructed to use Extended Options using pico$\_$tftp$\_$set$\_$option before starting the file transfer. - -Once the session is created, the application can start receiving a file with a call to the function pico$\_$tftp$\_$app$\_$start$\_$rx or, if needs to transmit, invoking pico$\_$tftp$\_$app$\_$start$\_$tx. - -After the file transfer is started the user is allowed to perform data handling only when the synchro variable associated with the session is not 0. It is set to 0 after calling pico$\_$tftp$\_$app$\_$setup. A value that differ to 0 means that a single chunk is ready to be handled. - -Single chunk of data are received using pico$\_$tftp$\_$get and transmitted with the use of the function pico$\_$tftp$\_$put. - -Once the file transfer ends, both for completion or in case of error, the session is no more valid. - - -\subsection{pico\_tftp\_listen} - -\subsubsection*{Description} -Start up a TFTP server listening for GET/PUT requests on the given port. -The function pointer passed as callback in the \texttt{cb} argument will be invoked upon a new -transfer request received from the network, and the call will pass the information about: -\begin{itemize}[noitemsep] -\item The address of the remote peer asking for a transfer -\item The remote port of the peer -\item The type of transfer requested, via the \texttt{opcode} parameter being either \texttt{PICO$\_$TFTP$\_$RRQ} or \texttt{PICO$\_$TFTP$\_$WRQ}, for get or put requests respectively. -\end{itemize} - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_tftp_listen(uint16_t family, void (*cb)(union pico_address *addr, - uint16_t port, uint16_t opcode, char *filename, int32_t len)); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{family} - The chosen socket family. Accepted values are \texttt{PICO$\_$PROTO$\_$IPV4} for IPv4 and \texttt{PICO$\_$PROTO$\_$IPV6} for IPv6. -\item \texttt{cb} - a pointer to the callback function, defined by the user, that will be called upon a new transfer request. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsubsection*{Example} -\begin{verbatim} -/* Example of a TFTP listening service callback */ - -void tftp_listen_cb(union pico_address *addr, uint16_t port, - uint16_t opcode, char *filename, int32_t len) -{ - struct note_t *note; - struct pico_tftp_session *session; - - printf("TFTP listen callback (BASIC) from remote port %" PRIu16 ".\n", - short_be(port)); - if (opcode == PICO_TFTP_RRQ) { - printf("Received TFTP get request for %s\n", filename); - note = transfer_prepare(&session, 't', filename, addr, family); - start_tx(session, filename, port, cb_tftp_tx, note); - } else if (opcode == PICO_TFTP_WRQ) { - printf("Received TFTP put request for %s\n", filename); - note = transfer_prepare(&session, 'r', filename, addr, family); - start_rx(session, filename, port, cb_tftp_rx, note); - } -} - -// Code fragment to demostrate the use of pico_tftp_listen: -if (!is_server_enabled) { - pico_tftp_listen(PICO_PROTO_IPV4, (commands->operation == 'S') ? - tftp_listen_cb_opt : tftp_listen_cb); - is_server_enabled = 1; -} -\end{verbatim} - - -\subsection{pico\_tftp\_reject\_request} - -\subsection*{Description} -This message is used in listen callback to reject a request with an error message. -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_reject_request(union pico_address *addr, uint16_t port, - uint16_t error_code, const char *error_message); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{addr} - The address of the remote peer; it must match the address where the request came from. -\item \texttt{port} - The port on the remote peer; it must match the address where the request came from. -\item \texttt{error$\_$code} - Error reason, possible values are: - -\begin{tabular}{ll} -TFTP$\_$ERR$\_$UNDEF & Not defined, see error message (if any) \\ -TFTP$\_$ERR$\_$ENOENT & File not found \\ -TFTP$\_$ERR$\_$EACC & Access violation \\ -TFTP$\_$ERR$\_$EXCEEDED & Disk full or allocation exceeded \\ -TFTP$\_$ERR$\_$EILL & Illegal TFTP operation \\ -TFTP$\_$ERR$\_$ETID & Unknown transfer ID \\ -TFTP$\_$ERR$\_$EEXIST & File already exists \\ -TFTP$\_$ERR$\_$EUSR & No such user \\ -TFTP$\_$ERR$\_$EOPT & Option negotiation \\ -\end{tabular} -\item \texttt{message} - Text message to attach. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -void tftp_listen_cb_opt(union pico_address *addr, uint16_t port, - uint16_t opcode, char *filename, int32_t len) -{ - struct note_t *note; - struct pico_tftp_session *session; - int options; - uint8_t timeout; - int32_t filesize; - int ret; - - printf("TFTP listen callback (OPTIONS) from remote port %" PRIu16 ".\n", - short_be(port)); - /* declare the options we want to support */ - ret = pico_tftp_parse_request_args(filename, len, &options, - &timeout, &filesize); - if (ret) - pico_tftp_reject_request(addr, port, TFTP_ERR_EOPT, - "Malformed request"); - - if (opcode == PICO_TFTP_RRQ) { - printf("Received TFTP get request for %s\n", filename); - note = transfer_prepare(&session, 'T', filename, addr, family); - - if (options & PICO_TFTP_OPTION_TIME) - pico_tftp_set_option(session, PICO_TFTP_OPTION_TIME, timeout); - if (options & PICO_TFTP_OPTION_FILE) { - ret = get_filesize(filename); - if (ret < 0) { - pico_tftp_reject_request(addr, port, TFTP_ERR_ENOENT, - "File not found"); - return; - } - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, ret); - } - - start_tx(session, filename, port, cb_tftp_tx_opt, note); - } else { /* opcode == PICO_TFTP_WRQ */ - printf("Received TFTP put request for %s\n", filename); - - note = transfer_prepare(&session, 'R', filename, addr, family); - if (options & PICO_TFTP_OPTION_TIME) - pico_tftp_set_option(session, PICO_TFTP_OPTION_TIME, timeout); - if (options & PICO_TFTP_OPTION_FILE) - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, filesize); - - start_rx(session, filename, port, cb_tftp_rx_opt, note); - } -} -\end{verbatim} - - -\subsection{pico\_tftp\_session\_setup} - -\subsubsection*{Description} -Obtain a session handler to use for the next file transfer with a remote location. - -\subsubsection*{Function prototype} -\begin{verbatim} -struct pico_tftp_session * pico_tftp_session_setup(union pico_address *a, - uint16_t family); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{a} - The address of the peer to be contacted. In case of a solicited transfer, it must match the address where the request came from. -\item \texttt{family} - The chosen socket family. Accepted values are \texttt{PICO$\_$PROTO$\_$IPV4} for IPv4 and \texttt{PICO$\_$PROTO$\_$IPV6} for IPv6. -\end{itemize} - -\subsubsection*{Return value} -In case of success a session handler is returned. In case of failure, NULL is returned and pico$\_$err is set accordingly. - -\subsubsection*{Example} -\begin{verbatim} -struct pico_tftp_session * make_session_or_die(union pico_address *addr, - uint16_t family) -{ - struct pico_tftp_session * session; - - session = pico_tftp_session_setup(addr, family); - if (!session) { - fprintf(stderr, "TFTP: Error in session setup\n"); - exit(3); - } - return session; -} -\end{verbatim} - - -\subsection{pico\_tftp\_set\_option} - -\subsection*{Description} -This function is used to require the use of Extended Options for TFTP transfer associate to a session according to RFC 2347 and RFC 2349. It should be used before the invocation of pico$\_$tftp$\_$start$\_$rx or pico$\_$tftp$\_$start$\_$tx unless the setting is related to the timeout. -In order to require Transfer size Option PICO$\_$TFTP$\_$OPTION$\_$FILE must be used and its value set to the file size in case of a Write Request or to 0 in case of a Read Request. -To require to adopt a specific fixed value for the timeout PICO$\_$TFTP$\_$OPTION$\_$TIME must be used with a value ranging between 1 and 255. If this option is set to a value of 0 (or not used at all) an adaptive timeout algorithm will take care of the retransmissions. - -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_set_option(struct pico_tftp_session *session, - uint8_t type, int32_t value); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Section handler to use for the file transfer. -\item \texttt{type} - Option to set; accepted values are PICO$\_$TFTP$\_$OPTION$\_$FILE for Transfer size Option or PICO$\_$TFTP$\_$OPTION$\_$TIME for Timeout interval Option. -\item \texttt{value} - Option value to send. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -filesize = get_filesize(commands->filename); -if (filesize < 0) { - fprintf(stderr, "TFTP: unable to read size of file %s\n", - commands->filename); - exit(3); -} -pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, filesize); -start_tx(session, commands->filename, short_be(PICO_TFTP_PORT), - cb_tftp_tx_opt, note); -\end{verbatim} - - -\subsection{pico\_tftp\_get\_option} - -\subsection*{Description} -This function is used to retrieve the values of Extended Options that has been set to a session according to RFC 2347 and RFC 2349. -In order to ask Transfer size Option value PICO$\_$TFTP$\_$OPTION$\_$FILE must be used; it may be used for example for example in receiver callback for calculation of remaining bytes to be received to complete the current transfer. -To query the timeout PICO$\_$TFTP$\_$OPTION$\_$TIME must be used; a value ranging between 1 and 255 will be returned in the value parameter if the fixed interval is in place. If the call return -1 and pico$\_$err is set to PICO$\_$ERR$\_$ENOENT the adaptive timeout algorithm is running. - -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_set_option(struct pico_tftp_session *session, - uint8_t type, int32_t *value); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Section handler to use for the file transfer. -\item \texttt{type} - Option to query; accepted values are PICO$\_$TFTP$\_$OPTION$\_$FILE for Transfer size Option or PICO$\_$TFTP$\_$OPTION$\_$TIME for Timeout interval Option. -\item \texttt{value} - Pointer to an integer variable where to store the value. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -int cb_tftp_tx_opt(struct pico_tftp_session *session, uint16_t event, - uint8_t *block, int32_t len, void *arg) -{ - int ret; - int32_t filesize; - - if (event == PICO_TFTP_EV_OPT) { - ret = pico_tftp_get_option(session, PICO_TFTP_OPTION_FILE, &filesize); - if (ret) - printf("TFTP: Option filesize is not used\n"); - else - printf("TFTP: We expect to transmit %" PRId32 " bytes\n", - filesize); - event = PICO_TFTP_EV_OK; - } - - return cb_tftp_tx(session, event, block, len, arg); -} -\end{verbatim} - - -\subsection{pico\_tftp\_parse\_request\_args} - -\subsection*{Description} -This function is used to extract Extension Options eventually present in Read or Write request (in the listen callback) or in Option ACKnowledge messages (in transmitter or receiver callback when event is equal to PICO$\_$TFTP$\_$EV$\_$OPT). -Note that timeout and filesize are modified only if the corresponding option is found in the received message. - -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_parse_request_args(char *args, int32_t len, int *options, - uint8_t *timeout, int32_t *filesize); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{args} - Pointer to the buffer containing the arguments: filename for listen callback and block for rx or tx callback. -\item \texttt{len} - Length of the buffer containing the arguments; same value of the len parameter in callbacks. -\item \texttt{options} - Pointer to the variable that will contain the set of options found. Presence of single options can be then verified anding it with PICO$\_$TFTP$\_$OPTION$\_$FILE or PICO$\_$TFTP$\_$OPTION$\_$TIME. -\item \texttt{timeout} - Pointer to the variable that will contain the timeout value (if present in the options). -\item \texttt{filesize} - Pointer to the variable that will contain the filesize value (if present in the options).. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -void tftp_listen_cb_opt(union pico_address *addr, uint16_t port, - uint16_t opcode, char *filename, int32_t len) -{ - struct note_t *note; - struct pico_tftp_session *session; - int options; - uint8_t timeout; - int32_t filesize; - int ret; - - printf("TFTP listen callback (OPTIONS) from remote port %" PRIu16 ".\n", - short_be(port)); - /* declare the options we want to support */ - ret = pico_tftp_parse_request_args(filename, len, &options, - &timeout, &filesize); - if (ret) - pico_tftp_reject_request(addr, port, TFTP_ERR_EOPT, - "Malformed request"); - - if (opcode == PICO_TFTP_RRQ) { - printf("Received TFTP get request for %s\n", filename); - note = transfer_prepare(&session, 'T', filename, addr, family); - - if (options & PICO_TFTP_OPTION_TIME) - pico_tftp_set_option(session, PICO_TFTP_OPTION_TIME, timeout); - if (options & PICO_TFTP_OPTION_FILE) { - ret = get_filesize(filename); - if (ret < 0) { - pico_tftp_reject_request(addr, port, TFTP_ERR_ENOENT, - "File not found"); - return; - } - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, ret); - } - - start_tx(session, filename, port, cb_tftp_tx_opt, note); - } else { /* opcode == PICO_TFTP_WRQ */ - printf("Received TFTP put request for %s\n", filename); - - note = transfer_prepare(&session, 'R', filename, addr, family); - if (options & PICO_TFTP_OPTION_TIME) - pico_tftp_set_option(session, PICO_TFTP_OPTION_TIME, timeout); - if (options & PICO_TFTP_OPTION_FILE) - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, filesize); - - start_rx(session, filename, port, cb_tftp_rx_opt, note); - } -} -\end{verbatim} - - -\subsection{pico\_tftp\_start\_tx} - -\subsubsection*{Description} -Start a TFTP transfer. The action can be unsolicited (client PUT operation) or solicited (server responding to a GET request). -In either case, the transfer will happen one block at a time, and the callback provided by the user will be called to notify the acknowledgement for the successful of each transfer, transfer of the last block, reception of an option acknowledge message (client mode) or whenever an error occurs. Any error during the TFTP transfer will cancel the transfer itself. -The possible values for the \texttt{event} variable in callback are: -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$TFTP$\_$EV$\_$OK} Time to send another chunk of data. -\item \texttt{PICO$\_$TFTP$\_$EV$\_$OPT} Option acknowledge has been received. -\item \texttt{PICO$\_$TFTP$\_$EV$\_$ERR$\_$PEER} An error has occurred remotely. -\item \texttt{PICO$\_$TFTP$\_$EV$\_$ERR$\_$LOCAL} An internal error has occurred. -\end{itemize} - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_tftp_start_tx(struct pico_tftp_session *session, uint16_t port, - const char *filename, - int (*user_cb)(struct pico_tftp_session *session, uint16_t event, - uint8_t *block, int32_t len, void *arg), - void *arg); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Session handler to use for the file transfer. -\item \texttt{port} - The port on the remote peer. -\item \texttt{filename} - The name of the file to be transferred. In case of solicited transfer, it must match the filename provided during the request. -\item \texttt{user$\_$cb} - The callback provided by the user to be called upon each block transfer, option acknowledge or in case of error. -\item \texttt{arg} - The pointer is sent as argument to the callback. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsubsection*{Example} -\begin{verbatim} -void start_tx(struct pico_tftp_session *session, - const char *filename, uint16_t port, - int (*tx_callback)(struct pico_tftp_session *session, uint16_t err, - uint8_t *block, int32_t len, void *arg), - struct note_t *note) -{ - if (pico_tftp_start_tx(session, port, filename, tx_callback, note)) { - fprintf(stderr, "TFTP: Error in initialization\n"); - exit(1); - } -} -\end{verbatim} - - -\subsection{pico\_tftp\_send} -\subsubsection*{Description} -Send the next block during an active TFTP transfer. This is ideally called every time the user callback is triggered by the protocol, indicating that the transfer of the last block has been acknowledged. The user should not call this function unless it's solicited by the protocol during an active transmit session. - -\subsubsection*{Function prototype} -\begin{verbatim} -int32_t pico_tftp_send(struct pico_tftp_session *session, - const uint8_t *data, int32_t len); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - the session handler to use for the file transfer. -\item \texttt{data} - the content of the block to be transferred. -\item \texttt{len} - the size of the buffer being transmitted. If $<$ \texttt{BLOCKSIZE}, the transfer is concluded. In order to terminate a transfer where the content is aligned to \texttt{BLOCKSIZE}, a zero-sized \texttt{pico$\_$tftp$\_$send} must be called at the end of the transfer. -\end{itemize} - -\subsubsection*{Return value} -In case of success, the number of bytes transmitted is returned. In case of failure, -1 is returned and pico$\_$err is set accordingly. - -\subsubsection*{Example} -\begin{verbatim} -int cb_tftp_tx(struct pico_tftp_session *session, uint16_t event, - uint8_t *block, int32_t len, void *arg) -{ - struct note_t *note = (struct note_t *) arg; - - if (event != PICO_TFTP_EV_OK) { - fprintf(stderr, "TFTP: Error %" PRIu16 ": %s\n", event, block); - exit(1); - } - - len = read(note->fd, tftp_txbuf, PICO_TFTP_PAYLOAD_SIZE); - - if (len >= 0) { - note->filesize += len; - pico_tftp_send(session, tftp_txbuf, len); - if (len < PICO_TFTP_PAYLOAD_SIZE) { - printf("TFTP: file %s (%" PRId32 - " bytes) TX transfer complete!\n", - note->filename, note->filesize); - close(note->fd); - del_note(note); - } - } else { - perror("read"); - fprintf(stderr, - "Filesystem error reading file %s," - " cancelling current transfer\n", note->filename); - pico_tftp_abort(session, TFTP_ERR_EACC, "Error on read"); - del_note(note); - } - - if (!clipboard) - pico_timer_add(3000, deferred_exit, NULL); - - return len; -} -\end{verbatim} - - -\subsection{pico\_tftp\_start\_rx} - -\subsubsection*{Description} -Start a TFTP transfer. The action can be unsolicited (client GET operation) or solicited (server responding to a PUT request). -In either case, the transfer will happen one block at a time, and the callback provided by the user will be called upon successful transfer of a block, whose content can be directly accessed via the \texttt{block} field, reception of an option acknowledge messagge (client mode) or whenever an error occurs. -The possible values for the \texttt{event} variable in callback are: -\begin{itemize}[noitemsep] -\item \texttt{PICO$\_$TFTP$\_$EV$\_$OK} Previously sent block has been acknowledge. -\item \texttt{PICO$\_$TFTP$\_$EV$\_$OPT} Option acknowledge has been received. -\item \texttt{PICO$\_$TFTP$\_$EV$\_$ERR$\_$PEER} An error has occurrend remotely. -\item \texttt{PICO$\_$TFTP$\_$EV$\_$ERR$\_$LOCAL} An internal error has occurred. -\end{itemize} - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_tftp_start_rx(struct pico_tftp_session *session, uint16_t port, - const char *filename, - int (*user_cb)(struct pico_tftp_session *session, uint16_t event, - uint8_t *block, int32_t len, void *arg), - void *arg); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - the session handler to use for the file transfer. -\item \texttt{port} - The port on the remote peer. -\item \texttt{filename} - The name of the file to be transfered. In case of solicited transfer, it must match the filename provided during the request. -\item \texttt{user$\_$cb} - The callback provided by the user to be called upon each block transfer, option acknowledge or in case of error. This is the callback where the incoming data is processed. When len is less than the block size, the transfer is over. -\item \texttt{arg} - The pointer sent as argument to the callback. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsubsection*{Example} -\begin{verbatim} -void start_rx(struct pico_tftp_session *session, - const char *filename, uint16_t port, - int (*rx_callback)(struct pico_tftp_session *session, uint16_t err, - uint8_t *block, int32_t len, void *arg), - struct note_t *note) -{ - if (pico_tftp_start_rx(session, port, filename, rx_callback, note)) { - fprintf(stderr, "TFTP: Error in initialization\n"); - exit(1); - } -} -\end{verbatim} - - -\subsection{pico\_tftp\_get\_file\_size} - -\subsection*{Description} -This function is used to retrieve the file size (if transmitted by the remote or set as session option). It is equivalent to a call to pico$\_$tftp$\_$get$\_$option(session, PICO$\_$TFTP$\_$OPTION$\_$FILE, $\&$file$\_$size); - -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_get_file_size(struct pico_tftp_session *session, - int32_t *file_size); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Section handler to use for the file transfer. -\item \texttt{file$\_$size} - Pointer to an integer variable where to store the value. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -ret = pico_tftp_get_file_size(session, &file_size); -if (ret) - printf("Information about file size has not been received"\n); -\end{verbatim} - - -\subsection{pico\_tftp\_abort} -\subsubsection*{Description} -When called this function aborts associated ongoing transmission and notifying the other endpoint with a proper error message. After a call to this function the session is closed automatically. - -\subsubsection*{Function prototype} -\begin{verbatim} -int pico_tftp_abort(struct pico_tftp_session *session, - uint16_t error, const char *reason); -\end{verbatim} - -\subsubsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - the session handler related to the session to abort. -\item \texttt{error} - Error reason code, possible values are: - -\begin{tabular}{ll} -TFTP$\_$ERR$\_$UNDEF & Not defined, see error message (if any) \\ -TFTP$\_$ERR$\_$ENOENT & File not found \\ -TFTP$\_$ERR$\_$EACC & Access violation \\ -TFTP$\_$ERR$\_$EXCEEDED & Disk full or allocation exceeded \\ -TFTP$\_$ERR$\_$EILL & Illegal TFTP operation \\ -TFTP$\_$ERR$\_$ETID & Unknown transfer ID \\ -TFTP$\_$ERR$\_$EEXIST & File already exists \\ -TFTP$\_$ERR$\_$EUSR & No such user \\ -TFTP$\_$ERR$\_$EOPT & Option negotiation \\ -\end{tabular} -\item \texttt{reason} - Text message to attach. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -int cb_tftp_rx(struct pico_tftp_session *session, uint16_t event, - uint8_t *block, int32_t len, void *arg) -{ - struct note_t *note = (struct note_t *) arg; - int ret; - - if (event != PICO_TFTP_EV_OK) { - fprintf(stderr, "TFTP: Error %" PRIu16 ": %s\n", event, block); - exit(1); - } - - note->filesize += len; - if (write(note->fd, block, len) < 0) { - perror("write"); - fprintf(stderr, "Filesystem error writing file %s," - " cancelling current transfer\n", note->filename); - pico_tftp_abort(session, TFTP_ERR_EACC, "Error on write"); - del_note(note); - } else { - if (len != PICO_TFTP_PAYLOAD_SIZE) { - printf("TFTP: file %s (%" PRId32 - " bytes) RX transfer complete!\n", - note->filename, note->filesize); - close(note->fd); - del_note(note); - } - } - - if (!clipboard) - pico_timer_add(3000, deferred_exit, NULL); - - return len; -} -\end{verbatim} - - -\subsection{pico\_tftp\_close\_server} - -\subsection*{Description} -This function is used to shutdown the TFTP server. - -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_close_server(void); -\end{verbatim} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -ret = pico_tftp_close_server(); -if (ret) - printf(stderr, "Failure shutting down the server\n"); -\end{verbatim} - - -\subsection{pico\_tftp\_app\_setup} - -\subsection*{Description} -Obtain a session handler to use for the next file transfer with a remote location in application driven mode. - -\subsection*{Function prototype} -\begin{verbatim} -struct pico_tftp_session * pico_tftp_app_setup(union pico_address *a, - uint16_t port, uint16_t family, int *synchro); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{a} - The address of the peer to be contacted. In case of a solicited transfer, it must match the address where the request came from. -\item \texttt{port} - The port on the remote peer. -\item \texttt{family} - The chosen socket family. Accepted values are \texttt{PICO$\_$PROTO$\_$IPV4} for IPv4 and \texttt{PICO$\_$PROTO$\_$IPV6} for IPv6. -\item \texttt{synchro} - Variable to handle the synchronization. -\end{itemize} - -\subsubsection*{Return value} -In case of success a session handler is returned. In case of failure, NULL is returned and pico$\_$err is set accordingly. - -\subsection*{Example} -\begin{verbatim} -session = pico_tftp_app_setup(&server_address, short_be(PICO_TFTP_PORT), - PICO_PROTO_IPV4, &synchro); -if (!session) { - fprintf(stderr, "Error in pico_tftp_app_setup\n"); - exit(1); -} -\end{verbatim} - - -\subsection{pico\_tftp\_app\_start\_rx} - -\subsection*{Description} -Application driven function used to request to read a remote file. The transfer will happen one block at a time using pico$\_$tftp$\_$app$\_$get. - -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_app_start_rx(struct pico_tftp_session *session, - const char *filename); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Session handler to use for the file transfer. -\item \texttt{filename} - The name of the file to be received. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -printf("Start receiving file %s with options set to %d\n", filename, options); - -if (options) { - ret = pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, 0); - if (ret) { - fprintf(stderr, "Error in pico_tftp_set_option\n"); - exit(1); - } -} - -ret = pico_tftp_app_start_rx(session, filename); -if (ret) { - fprintf(stderr, "Error in pico_tftp_app_start_rx\n"); - exit(1); -} -\end{verbatim} - - -\subsection{pico\_tftp\_app\_start\_tx} - -\subsection*{Description} -Application driven function used to request to write a remote file. The transfer will happen one block at a time using pico$\_$tftp$\_$app$\_$put. - -\subsection*{Function prototype} -\begin{verbatim} -int pico_tftp_app_start_tx(struct pico_tftp_session *session, - const char *filename); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Session handler to use for the file transfer. -\item \texttt{filename} - The name of the file to be sent. -\end{itemize} - -\subsubsection*{Return value} -This function returns 0 if succeeds or -1 in case of errors (pico$\_$err is set accordingly). - -\subsection*{Example} -\begin{verbatim} -printf("Start sending file %s with options set to %d\n", filename, options); - -if (options) { - ret = get_filesize(filename); - if (ret < 0) { - fprintf(stderr, "Error in get_filesize\n"); - exit(1); - } - - ret = pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, ret); - if (ret) { - fprintf(stderr, "Error in pico_tftp_set_option\n"); - exit(1); - } -} - -ret = pico_tftp_app_start_tx(session, filename); -if (ret) { - fprintf(stderr, "Error in pico_tftp_app_start_rx\n"); - exit(1); -} -\end{verbatim} - - -\subsection{pico\_tftp\_get} - -\subsection*{Description} -Read the next block during an active TFTP transfer. The len field must always be equal to PICO$\_$TFTP$\_$PAYLOAD$\_$SIZE. Once the file has been sent or after an error the session is no more valid. - -\subsection*{Function prototype} -\begin{verbatim} -int32_t pico_tftp_get(struct pico_tftp_session *session, - uint8_t *data, int32_t len); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Session handler to use for the file transfer. -\item \texttt{data} - Buffer where to store the acquired payload. -\item \texttt{len} - Length of the buffer size to receive; it is equal to the fixed chunk size. -\end{itemize} - -\subsubsection*{Return value} -This function returns the number of received bytes of payload (0 included) if succeeds. In case of error a negative number is returned. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{-1} At least one of the passed arguments are invalid. -\item \texttt{-PICO$\_$TFTP$\_$EV$\_$ERR$\_$PEER} Remote failure. -\item \texttt{-PICO$\_$TFTP$\_$EV$\_$ERR$\_$LOCAL} Local failure. -\end{itemize} - -\subsection*{Example} -\begin{verbatim} -for(;left; left -= countdown) { - usleep(2000); //PICO_IDLE(); - pico_stack_tick(); - if (countdown) - continue; - - if (*synchro) { - len = pico_tftp_get(session, buf, PICO_TFTP_PAYLOAD_SIZE); - if (len < 0) { - fprintf(stderr, "Failure in pico_tftp_get\n"); - close(fd); - countdown = 1; - continue; - } - ret = write(fd, buf, len); - if (ret < 0) { - fprintf(stderr, "Error in write\n"); - pico_tftp_abort(session, TFTP_ERR_EXCEEDED, "File write error"); - close(fd); - countdown = 1; - continue; - } - printf("Written %" PRId32 " bytes to file (synchro=%d)\n", - len, *synchro); - - if (len != PICO_TFTP_PAYLOAD_SIZE) { - close(fd); - printf("Transfer complete!\n"); - countdown = 1; - } - } -} -\end{verbatim} - - -\subsection{pico\_tftp\_put} - -\subsection*{Description} -Send the next block during an active TFTP transfer. The len field, with the exception of last invocation must always be equal to PICO$\_$TFTP$\_$PAYLOAD$\_$SIZE. Once the file has been sent or after an error the session is no more valid. - -\subsection*{Function prototype} -\begin{verbatim} -int32_t pico_tftp_put(struct pico_tftp_session *session, - uint8_t *data, int32_t len); -\end{verbatim} - -\subsection*{Parameters} -\begin{itemize}[noitemsep] -\item \texttt{session} - Session handler to use for the file transfer. -\item \texttt{data} - Pointer to the data to be transmitted. -\item \texttt{len} - Length of the buffer size to transmit; last chunk must be $<$ of the maximum buffer size (0 if file size was a multiple of maximum buffer size). - -\end{itemize} - -\subsubsection*{Return value} -This function returns the number of transmitted payload data (len) if succeeds. In case of error a negative number is returned. - -\subsubsection*{Errors} -\begin{itemize}[noitemsep] -\item \texttt{-1} At least one of the passed arguments are invalid. -\item \texttt{-PICO$\_$TFTP$\_$EV$\_$ERR$\_$PEER} Remote failure. -\item \texttt{-PICO$\_$TFTP$\_$EV$\_$ERR$\_$LOCAL} Local failure. -\end{itemize} - -\subsection*{Example} -\begin{verbatim} -for(;left; left -= countdown) { - usleep(2000); //PICO_IDLE(); - pico_stack_tick(); - if (countdown) - continue; - - if (*synchro) { - ret = read(fd, buf, PICO_TFTP_PAYLOAD_SIZE); - if (ret < 0) { - fprintf(stderr, "Error in read\n"); - pico_tftp_abort(session, TFTP_ERR_EACC, "File read error"); - close(fd); - countdown = 1; - continue; - } - printf("Read %" PRId32 " bytes from file (synchro=%d)\n", - len, *synchro); - - len = pico_tftp_put(session, buf, ret); - if (len < 0) { - fprintf(stderr, "Failure in pico_tftp_put\n"); - close(fd); - countdown = 1; - continue; - } - - if (len != PICO_TFTP_PAYLOAD_SIZE) { - close(fd); - printf("Transfer complete!\n"); - countdown = 1; - } - } -} -\end{verbatim} diff --git a/kernel/picotcp/docs/user_manual/chap_license.tex b/kernel/picotcp/docs/user_manual/chap_license.tex deleted file mode 100644 index a248315..0000000 --- a/kernel/picotcp/docs/user_manual/chap_license.tex +++ /dev/null @@ -1,11 +0,0 @@ -Unless you have received a written document by PicoTCP copyright holders stating otherwise, -the software described in this document is distributed under the terms of the GNU General -Public License version 2 only. - -The terms of the license are reported below. - -\begin{center} -{\bf\large GNU General Public license} -{\bf Version 2, June 1991} -\end{center} -\input {gpl-2.0} diff --git a/kernel/picotcp/docs/user_manual/chap_overview.tex b/kernel/picotcp/docs/user_manual/chap_overview.tex deleted file mode 100644 index f7409ac..0000000 --- a/kernel/picotcp/docs/user_manual/chap_overview.tex +++ /dev/null @@ -1,68 +0,0 @@ -PicoTCP is a complete TCP/IP stack, intended for embedded devices and -designed to run on different architectures and networking -hardware. The architecture of the stack allows easy selection of the features -needed for any particular use, taking into account the sizing and -the performance of the platform on which the code is to run. -Even if it is designed -to allow for size and performance constraints, the chosen approach is to -comply with the latest standards in the telecommunications research, including -the latest proposals, in order to achieve the highest standards for -today's inter-networking communications. PicoTCP is distributed as -a library to be integrated with application and form a combination for -any hardware-specific firmware. - - -The main characteristics of the library are the following: -\begin{itemize} -\item \textbf{Modularity} Each component of the stack is deployed in a -separate module, allowing the selection at compile time of the components needed to -be included for any specific platform, depending on the particular use case. -We know that saving memory and resources is often mission-critical for a -project, and therefore PicoTCP is fully focussed on saving -up to the last byte of memory. -\item \textbf{Code Quality} Every component added to the -stack must pass a complete set of validation tests. Before new code can be -introduced it is scanned and proof-checked by three separate levels of -quality enforcement. The process related to the validation of the code is -one of the major tasks of the engineering team. In the top-down approach of the design, a -new module has to pass the review of our senior architects, to have it comply -with the general guidelines. The development of the smaller -components is done in a test-driven way, providing a specific unit test for each function call. -Finally, functional non-regression tests are performed -after the feature development is complete, and all the tests are automatically -scheduled to run several times per day to check for functional regressions. -\item \textbf{Adherence to the standards} The protocols -included in the stack are done following stepare designed by following meticulously the guidelines -provided by the International Engineering Task Force (IETF) with regards to -inter-networking communication. A strong adherence to the standards guarantees a -smooth integration with all the existing TCP/IP stacks, when communicating -with both other embedded devices and with the PC/server world. -\item \textbf{Features} A fully-featured protocol implementation including all those non-mandatory -features means better data-transfer performances, coverage of rare/unique -network scenarios and topologies and a better integration with all types of -networking hardware devices. -\item \textbf{Transparency} The availability of the source code to the Free -Software community is an important added value of PicoTCP. -%Our programmers -%are proud of the aestethic of their code, and they show it with no -%hesitation to the attention of the rest of the world. -The constant peer reviews and constructive comments on the -design and the development choices that PicoTCP receives from the academic world -and from several hundreds of hobbyists and professionals who read the code, -are an essential element in the quality build-up of the product. -%PicoTCP constantly receives peer-reviews and constructive comments on the -%design and the development choices from the academic world and from several -%hundreds of hobbists and professionals who read the code. -%We strongly -%believe that software is not about keeping things secret: whenever one is -%convinced by the quality of their work, there is absolutely nothing to hide. -\item \textbf{Simplicity} The APIs provided to access the library -facilities, both from the applications as well as from the device drivers, are -small and well documented. This concurs with the goal of the library to facilitate -the integration with the surroundings and minimize the time used to combine -the stack with existing code. The support required to port to a new -architecture is so small it is reduced to a set of macros defined in a -header file specific for the platform. -\end{itemize} - - diff --git a/kernel/picotcp/docs/user_manual/chap_rfcs.tex b/kernel/picotcp/docs/user_manual/chap_rfcs.tex deleted file mode 100644 index c5ef6be..0000000 --- a/kernel/picotcp/docs/user_manual/chap_rfcs.tex +++ /dev/null @@ -1,187 +0,0 @@ - -\begin{longtable}{ | l | p{15cm} | } -\hline -{\bf RFC} & -{\bf Description} \\ \hline - -RFC 768 & -User Datagram Protocol (UDP) \\ \hline - -RFC 791 & -Internet Protocol (IP) \\ \hline - -RFC 792 & -Internet Control Message Protocol (ICMP) \\ \hline - -RFC 793 & -Transmission Control Protocol (TCP) \\ \hline - -RFC 816 & -Fault Isolation and Recovery \\ \hline - -RFC 826 & -Address Resolution Protocol (ARP) \\ \hline - -RFC 879 & -The TCP Maximum Segment Size and Related Topics \\ \hline - -RFC 894 & -IP over Ethernet \\ \hline - -RFC 896 & -Congestion Control in IP/TCP Internetworks \\ \hline - -RFC 919 & -Broadcasting Internet Datagrams \\ \hline - -RFC 922 & -Broadcasting Internet Datagrams in the Presence of Subnets \\ \hline - -RFC 950 & -Internet Standard Subnetting Procedure \\ \hline - -RFC 1009 & -Requirements for Internet Gateways \\ \hline - -RFC 1034 & -Domain NamesConcepts and Facilities \\ \hline - -RFC 1035 & -Domain NamesImplementation and Specification \\ \hline - -RFC 1071 & -Computing the Internet Checksum \\ \hline - -RFC 1112 & -Internet Group Management Protocol (IGMP) \\ \hline - -RFC 1122 & -Requirements for Internet HostsCommunication Layers \\ \hline - -RFC 1123 & -Requirements for Internet Hosts - Application and Support (\textsuperscript{1}) \\ \hline - -RFC 1191 & -Path MTU Discovery (\textsuperscript{1})\\ \hline - -RFC 1323 & -TCP Extensions for High Performance \\ \hline - -RFC 1332 & -The PPP Internet Protocol Control Protocol (IPCP) \\ \hline - -RFC 1334 & -PPP Authentication Protocols \\ \hline - -RFC 1337 & -TIME-WAIT Assassination Hazards in TCP \\ \hline - -RFC 1350 & -The TFTP Protocol (Revision 2) \\ \hline - -RFC 1534 & -Interoperation Between DHCP and BOOTP \\ \hline - -RFC 1542 & -Clarifications and Extensions for the Bootstrap Protocol \\ \hline - -RFC 1661 & -The Point-to-Point Protocol (PPP) \\ \hline - -RFC 1662 & -PPP in HDLC-like Framing \\ \hline - -RFC 1812 & -Requirements for IP Version 4 Routers \\ \hline - -RFC 1878 & -Variable Length Subnet Table For IPv4 \\ \hline - -RFC 1886 & -DNS Extensions to Support IP Version 6 (\textsuperscript{1}) \\ \hline - -RFC 1994 & -PPP Challenge Handshake Authentication Protocol (CHAP) \\ \hline - -RFC 2018 & -TCP Selective Acknowledgment Options \\ \hline - -RFC 2131 & -Dynamic Host Configuration Protocol (DHCP) \\ \hline - -RFC 2132 & -DHCP Options and BOOTP Vendor Extensions \\ \hline - -RFC 2236 & -Internet Group Management Protocol, Version 2 \\ \hline - -RFC2347 & -TFTP Option Extension \\ \hline - -RFC 2349 & -TFTP Timeout Interval and Transfer Size Options \\ \hline - -RFC 2460 & -Internet Protocol, Version 6 (IPv6) Specification \\ \hline - -RFC 2581 & -TCP Congestion Control \\ \hline - -RFC 2663 & -IP Network Address Translator (NAT) Terminology and Considerations \\ \hline - -RFC 2710 & -Multicast Listener Discovery (MLD) for IPv6 \\ \hline - -RFC 3042 & -Enhancing TCP's Loss Recovery Using Limited Transmit \\ \hline - -RFC 3315 & -Dynamic Host Configuration Protocol for IPv6 (DHCPv6) (\textsuperscript{1}) \\ \hline - -RFC 3376 & -Internet Group Management Protocol, Version 3 \\ \hline - -RFC 3517 & -A Conservative Selective Acknowledgment (SACK)-based Loss Recovery Algorithm for TCP \\ \hline - -RFC 3561 & -Ad-hoc On-Demand Distance Vector (AODV) Routing \\ \hline - -RFC 3626 & -Optimized Link State Routing Protocol (OLSR) \\ \hline - -RFC 3782 & -The NewReno Modification to TCP's Fast Recovery Algorithm \\ \hline - -RFC 3810 & -Multicast Listener Discovery Version 2 (MLDv2) for IPv6 \\ \hline - -RFC 3927 & -Dynamic Configuration of IPv4 Link-Local Addresses \\ \hline - -RFC 4291 & -IP Version 6 Addressing Architecture \\ \hline - -RFC 4443 & -Internet Control Message Protocol (ICMPv6) for the Internet Protocol Version 6 (IPv6) Specification \\ \hline - -RFC 4861 & -Neighbor Discovery for IP version 6 (IPv6) \\ \hline - -RFC 4862 & -IPv6 Stateless Address Autoconfiguration \\ \hline - -RFC 6691 & -TCP Options and Maximum Segment Size (MSS) \\ \hline - -RFC 6762 & -Multicast DNS \\ \hline - -RFC 6763 & -DNS-based Service Discovery \\ \hline - -\end{longtable} - -(\textsuperscript{1}) Work in progress -(\textsuperscript{2}) Experimental diff --git a/kernel/picotcp/docs/user_manual/layout.tex b/kernel/picotcp/docs/user_manual/layout.tex deleted file mode 100644 index 00ff75f..0000000 --- a/kernel/picotcp/docs/user_manual/layout.tex +++ /dev/null @@ -1,71 +0,0 @@ -%Thesistemplate LaTeX De Nayer 2008-2009 -%Stefan Bouwens, Dave Geeradyn & Toon Goedem\'e - -%PACKAGES -%\usepackage{times} -%\usepackage[dvips]{graphicx} -\usepackage{url} -\usepackage[english,dutch]{babel} -\usepackage[T1]{fontenc} -\usepackage[all]{xy} -\usepackage{amssymb} -%\usepackage[breaklinks=true, bookmarksopen=true]{hyperref} -\usepackage[small,bf,hang]{caption} -\renewcommand{\captionfont}{\small\itshape} -\usepackage{natbib} -\bibpunct{(}{)}{;}{a}{,}{,} - -\usepackage{listings} -\lstloadlanguages{[visual]c++} -\lstset{ basicstyle=\small, - basicstyle=\ttfamily, - numbers=left, numberstyle=\tiny, stepnumber=1, numbersep=5pt, - tabsize=3, - keywordstyle=\color{black}\bfseries, - commentstyle=\color{gray}\itshape, - frame=single, - breaklines=true - } - - -%LAYOUT -%\bibliographystyle{agsm} - -\setcounter{secnumdepth}{3} -\setcounter{tocdepth}{3} - -\setlength{\textheight}{237mm} -\setlength{\textwidth}{160mm} -%\setlength{\columnsep}{0.3125in} -\setlength{\topmargin}{-0.54cm} -\setlength{\headheight}{15pt} -%\setlength{\headsep}{0in} -%\setlength{\parindent}{1pc} -\setlength{\oddsidemargin}{0.46cm} % Centers text. -\setlength{\evensidemargin}{-.54cm} -% -%Linkermarge 30 mm, rechtermarge 20 mm, bovenmarge 20 mm, ondermarge 20 mm. -%2,54 - -\usepackage{fancyhdr} -\pagestyle{fancy} -% with this we ensure that the chapter and section -% headings are in lowercase. -\renewcommand{\chaptermark}[1]{% - \markboth{#1}{}} -\renewcommand{\sectionmark}[1]{% - \markright{\thesection\ #1}} -\fancyhf{} % delete current header and footer -\fancyhead[LE,RO]{\bfseries\thepage} -\fancyhead[LO]{\bfseries\rightmark} -\fancyhead[RE]{\bfseries\leftmark} -\renewcommand{\headrulewidth}{0.5pt} -\renewcommand{\footrulewidth}{0pt} -\addtolength{\headheight}{0.5pt} % space for the rule -\fancypagestyle{plain}{% - \fancyhead{} % get rid of headers on plain pages - \renewcommand{\headrulewidth}{0pt} % and the line -} - -\parskip 6pt % sets spacing between paragraphs -\parindent 0pt % sets leading space for paragraphs \ No newline at end of file diff --git a/kernel/picotcp/docs/user_manual/layout1.tex b/kernel/picotcp/docs/user_manual/layout1.tex deleted file mode 100644 index 946b6ba..0000000 --- a/kernel/picotcp/docs/user_manual/layout1.tex +++ /dev/null @@ -1,27 +0,0 @@ -% Altran Intelligent Systems -% -% LAYOUT TEXT -% =========== - -%\documentclass[11pt, a4paper, openright,oneside]{book} -\documentclass[11pt, a4paper,oneside]{report} - -\usepackage[english]{babel} -\usepackage[latin1]{inputenc} -%\usepackage[T1]{fontenc} -\usepackage{graphicx} -\usepackage{natbib} -%\usepackage{hyperref} -\usepackage[hang,flushmargin]{footmisc} - -\usepackage{fullpage} -\parskip 4pt % sets spacing between paragraphs -\parindent 0pt % sets leading space for paragraphs - -\makeatletter -\renewcommand{\@makechapterhead}[1]{% -%\vspace*{50 pt}% -{\setlength{\parindent}{0pt} \raggedright \normalfont -\bfseries\Huge\thechapter.\ #1 -\par\nobreak\vspace{40 pt}}} -\makeatother diff --git a/kernel/picotcp/docs/user_manual/template_api.tex b/kernel/picotcp/docs/user_manual/template_api.tex deleted file mode 100644 index 140fe35..0000000 --- a/kernel/picotcp/docs/user_manual/template_api.tex +++ /dev/null @@ -1,38 +0,0 @@ -\section{MODULE} - -% Short description/overview of module functions - - -\subsection{FUNCTION} - -\subsubsection*{Description} - -\subsubsection*{Function prototype} - -\subsubsection*{Parameters} - -\subsubsection*{Return value} - -\subsubsection*{Errors} - -\subsubsection*{Example} - - -\subsection{FUNCTION} - -\subsubsection*{Description} - -\subsubsection*{Function prototype} - -\subsubsection*{Parameters} - -\subsubsection*{Return value} - -\subsubsection*{Errors} - -\subsubsection*{Example} - - -\subsection{FUNCTION} - -% ... \ No newline at end of file diff --git a/kernel/picotcp/docs/user_manual/user_doc.tex b/kernel/picotcp/docs/user_manual/user_doc.tex deleted file mode 100644 index 7ef47e6..0000000 --- a/kernel/picotcp/docs/user_manual/user_doc.tex +++ /dev/null @@ -1,113 +0,0 @@ -% Altran NV -% -% PicoTCP User Documentation main file -% ==================================== - -%\documentclass[11pt, a4paper, openright]{paper} - - -\input{layout1} - -% TEMPS - -%\usepackage{tikz} -%\usepackage[latin1]{inputenc} -%\usepackage{graphicx} -%\usepackage[hang,flushmargin]{footmisc} -%\usepackage{pdfpages} -%\usepackage{tabularx} -%\usepackage{lscape} -%\usepackage{longtable} -%\usepackage{verbatim} -%\usepackage{moreverb} -%\usepackage{listings} -%\usepackage{draftcopy} -%\usepackage{hyperref} -\usepackage{longtable} - -%% to print watermark -% \usepackage{draftwatermark} -% \SetWatermarkText{Altran ISY Confidential} -% \SetWatermarkScale{3} -% \SetWatermarkLightness{0.9} - -% to adjust the space between titles and text -\usepackage[compact]{titlesec} -\titlespacing{\section}{0pt}{*5}{*2} -\titlespacing{\subsection}{0pt}{*4}{*1} -\titlespacing{\subsubsection}{0pt}{*1}{*0} - -% to minimize space between list items -\usepackage{enumitem} - -% To use hyperlinks -\usepackage{hyperref} -% limit toc depth until sections -\setcounter{tocdepth}{1} - - -\begin{document} - -\title{picoTCP User Documentation} -\author{Copyright \copyright 2017 Altran Belgium NV. All right reserved.} -\maketitle -\date{\today} -\maketitle - -\thispagestyle{empty} - -Disclaimer -This document is distributed under the terms of Creative Commons CC BY-ND 3.0. -You are free to share unmodified copies of this document, as long as the copyright -statement is kept. Click \href{http://creativecommons.org/licenses/by-nd/3.0/} {here} to view the full license text. - - -\pagenumbering{arabic} - -\selectlanguage{english} - -\tableofcontents - -%\chapter{Introduction} -%\label{chap:intro} -%\input{chap_intro} - -\chapter{Overview} -\label{chap:overview} -\input{chap_overview} - - -\chapter{API Documentation} -\label{chap:api_doc} -The following sections will describe the API for picoTCP. -\input{chap_api_ipv4} -\input{chap_api_ipv6} -\input{chap_api_sock} -\input{chap_api_dhcp_c} -\input{chap_api_dhcp_d} -\input{chap_api_dns_c} -\input{chap_api_mdns} -\input{chap_api_dns_sd} -\input{chap_api_sntp_c} -\input{chap_api_igmp} -\input{chap_api_mld} -\input{chap_api_ipfilter} -\input{chap_api_slaacv4} -\input{chap_api_tftp} -\input{chap_api_ppp} -\input{chap_api_olsr} -\input{chap_api_aodv} - - -\appendix - -% Do not include license -%\chapter{License} -%\label{chap:license} -%\input{chap_license} - -\chapter{Supported RFCs} -\label{chap:rfcs} -\input{chap_rfcs} - -\end{document} diff --git a/kernel/picotcp/docs/wiki_images/Protocol input.dia b/kernel/picotcp/docs/wiki_images/Protocol input.dia deleted file mode 100644 index 0e49c39..0000000 Binary files a/kernel/picotcp/docs/wiki_images/Protocol input.dia and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/Protocol output.dia b/kernel/picotcp/docs/wiki_images/Protocol output.dia deleted file mode 100644 index 599643c..0000000 Binary files a/kernel/picotcp/docs/wiki_images/Protocol output.dia and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/Protocol_input.png b/kernel/picotcp/docs/wiki_images/Protocol_input.png deleted file mode 100644 index 746cb64..0000000 Binary files a/kernel/picotcp/docs/wiki_images/Protocol_input.png and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/Protocol_output.png b/kernel/picotcp/docs/wiki_images/Protocol_output.png deleted file mode 100644 index d8743db..0000000 Binary files a/kernel/picotcp/docs/wiki_images/Protocol_output.png and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/driver_stack_memory.dia b/kernel/picotcp/docs/wiki_images/driver_stack_memory.dia deleted file mode 100644 index 7364481..0000000 Binary files a/kernel/picotcp/docs/wiki_images/driver_stack_memory.dia and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/driver_stack_memory.png b/kernel/picotcp/docs/wiki_images/driver_stack_memory.png deleted file mode 100644 index f8e0d19..0000000 Binary files a/kernel/picotcp/docs/wiki_images/driver_stack_memory.png and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/petri_net.gif b/kernel/picotcp/docs/wiki_images/petri_net.gif deleted file mode 100644 index 4989ae2..0000000 Binary files a/kernel/picotcp/docs/wiki_images/petri_net.gif and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/picoTCP_logo.png b/kernel/picotcp/docs/wiki_images/picoTCP_logo.png deleted file mode 100644 index 577678f..0000000 Binary files a/kernel/picotcp/docs/wiki_images/picoTCP_logo.png and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/pico_frame.png b/kernel/picotcp/docs/wiki_images/pico_frame.png deleted file mode 100644 index d338d80..0000000 Binary files a/kernel/picotcp/docs/wiki_images/pico_frame.png and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/port_to_os.dia b/kernel/picotcp/docs/wiki_images/port_to_os.dia deleted file mode 100644 index 4077019..0000000 Binary files a/kernel/picotcp/docs/wiki_images/port_to_os.dia and /dev/null differ diff --git a/kernel/picotcp/docs/wiki_images/port_to_os.png b/kernel/picotcp/docs/wiki_images/port_to_os.png deleted file mode 100644 index 989f723..0000000 Binary files a/kernel/picotcp/docs/wiki_images/port_to_os.png and /dev/null differ diff --git a/kernel/picotcp/include/arch/pico_arm9.h b/kernel/picotcp/include/arch/pico_arm9.h deleted file mode 100644 index ca4a35c..0000000 --- a/kernel/picotcp/include/arch/pico_arm9.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - *********************************************************************/ -#define dbg(...) do {} while(0) - -/******************/ - -/*** MACHINE CONFIGURATION ***/ -/* Temporary (POSIX) stuff. */ -#include -#include - -extern volatile uint32_t __str9_tick; - -#define pico_native_malloc(x) calloc(x, 1) -#define pico_native_free(x) free(x) - -static inline unsigned long PICO_TIME(void) -{ - register uint32_t tick = __str9_tick; - return tick / 1000; -} - -static inline unsigned long PICO_TIME_MS(void) -{ - return __str9_tick; -} - -static inline void PICO_IDLE(void) -{ - unsigned long tick_now = __str9_tick; - while(tick_now == __str9_tick) ; -} - diff --git a/kernel/picotcp/include/arch/pico_atsamd21j18.h b/kernel/picotcp/include/arch/pico_atsamd21j18.h deleted file mode 100644 index 27d272a..0000000 --- a/kernel/picotcp/include/arch/pico_atsamd21j18.h +++ /dev/null @@ -1,61 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - *********************************************************************/ - -/*** MACHINE CONFIGURATION ***/ -/* Temporary (POSIX) stuff. */ -#include -#include - -/* Temporary debugging stuff. */ -#include -#include "halUart.h" -#include - -static void print_uart(char *str) -{ - int i, len; - len = (int)strlen(str); - for (i = 0; i < len; i++) { - HAL_UartWriteByte(str[i]); - if (HAL_UartTxFull()) - HAL_UartFlush(); - } -} - -static inline void sam_dbg(const char *format, ...) -{ - char msg[128] = { 0 }; - va_list args; - va_start(args, format); - vsnprintf(msg, 256, format, args); - va_end(args); - print_uart(msg); -} - -//#define dbg sam_dbg -#define dbg(...) do { } while(0) - -extern volatile uint32_t sam_tick; - -#define pico_zalloc(x) calloc(x, 1) -#define pico_free(x) free(x) - -static inline unsigned long PICO_TIME(void) -{ - register uint32_t tick = sam_tick; - return tick / 1000; -} - -static inline unsigned long PICO_TIME_MS(void) -{ - return sam_tick; -} - -static inline void PICO_IDLE(void) -{ - unsigned long tick_now = sam_tick; - while(tick_now == sam_tick) ; -} - diff --git a/kernel/picotcp/include/arch/pico_avr.h b/kernel/picotcp/include/arch/pico_avr.h deleted file mode 100644 index bd5f720..0000000 --- a/kernel/picotcp/include/arch/pico_avr.h +++ /dev/null @@ -1,39 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#define dbg(...) do {} while(0) -/* #define dbg printf */ - -/*************************/ - -/*** MACHINE CONFIGURATION ***/ -/* Temporary (POSIX) stuff. */ -#include -#include -#include -#include "pico_mm.h" - -extern volatile uint32_t __avr_tick; - -#define pico_zalloc(x) calloc(x, 1) -#define pico_free(x) free(x) - -static inline unsigned long PICO_TIME(void) -{ - register uint32_t tick = __avr_tick; - return tick / 1000; -} - -static inline unsigned long PICO_TIME_MS(void) -{ - return __avr_tick; -} - -static inline void PICO_IDLE(void) -{ - unsigned long tick_now = __avr_tick; - while(tick_now == __avr_tick) ; -} - diff --git a/kernel/picotcp/include/arch/pico_cortex_m.h b/kernel/picotcp/include/arch/pico_cortex_m.h deleted file mode 100644 index de3f475..0000000 --- a/kernel/picotcp/include/arch/pico_cortex_m.h +++ /dev/null @@ -1,12 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef _INCLUDE_PICO_CORTEX_M -#define _INCLUDE_PICO_CORTEX_M - -#include "pico_generic_gcc.h" - -#endif /* PICO_CORTEX_M */ - diff --git a/kernel/picotcp/include/arch/pico_dos.h b/kernel/picotcp/include/arch/pico_dos.h deleted file mode 100644 index 4c12a0a..0000000 --- a/kernel/picotcp/include/arch/pico_dos.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * This is a picoTCP arch file for the DOS 16 bit target using OpenWatcom v1.9 - * Copyright (C) 2015 Mateusz Viste - * - * This code is donated to the picoTCP project, and shares the same licensing, - * that is GNU GPLv2. - * - * See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - */ - -#include /* provides int86() along with the union REGS type */ - -#ifndef PICO_SUPPORT_DOS_WATCOM -#define PICO_SUPPORT_DOS_WATCOM - -#define dbg(...) - -#define pico_zalloc(x) calloc(x, 1) -#define pico_free(x) free(x) - -static inline unsigned long PICO_TIME_MS(void) -{ - union REGS regs; - unsigned long ticks; - regs.h.ah = 0; /* get system time (IBM BIOS call) - INT 1A,0 */ - int86(0x1A, ®s, ®s); - ticks = regs.x.cx; /* number of ticks since midnight (high word) */ - ticks <<= 16; - ticks |= regs.x.dx; /* number of ticks since midnight (low word) */ - return (ticks * 55); /* a tick is 55ms because the i8253 PIT runs at 18.2 Hz */ -} - -static inline unsigned long PICO_TIME(void) -{ - return (PICO_TIME_MS() / 1000); -} - -static inline void PICO_IDLE(void) -{ - union REGS regs; - int86(0x28, ®s, ®s); /* DOS 2+ IDLE INTERRUPT */ -} - -#endif /* PICO_SUPPORT_DOS_WATCOM */ diff --git a/kernel/picotcp/include/arch/pico_esp8266.h b/kernel/picotcp/include/arch/pico_esp8266.h deleted file mode 100644 index fecac73..0000000 --- a/kernel/picotcp/include/arch/pico_esp8266.h +++ /dev/null @@ -1,58 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2014-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef _INCLUDE_PICO_ESP8266 -#define _INCLUDE_PICO_ESP8266 - -#include - -#include -#include -#include -#include "pico_constants.h" - -/* -------------- DEBUG ------------- */ - -/* #define dbg(...) */ -#define dbg printf - -/* -------------- MEMORY ------------- */ -extern void *pvPortMalloc( size_t xWantedSize ); -extern void vPortFree( void *pv ); - -#define pico_free vPortFree - -static inline void *pico_zalloc(size_t size) -{ - void *ptr = (void *)pvPortMalloc(size); - - if(ptr) - memset(ptr, 0u, size); - - return ptr; -} - -/* -------------- TIME ------------- */ - -extern volatile uint32_t esp_tick; - -static inline pico_time PICO_TIME_MS(void) -{ - return (pico_time) esp_tick; -} - -static inline pico_time PICO_TIME(void) -{ - return PICO_TIME_MS() / 1000; -} - -static inline void PICO_IDLE(void) -{ - uint32_t now = esp_tick; - while (now == esp_tick) - ; -} - -#endif diff --git a/kernel/picotcp/include/arch/pico_generic_gcc.h b/kernel/picotcp/include/arch/pico_generic_gcc.h deleted file mode 100644 index 79ef41d..0000000 --- a/kernel/picotcp/include/arch/pico_generic_gcc.h +++ /dev/null @@ -1,117 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef _INCLUDE_PICO_GCC -#define _INCLUDE_PICO_GCC - -#include -#include -#include -#include "pico_constants.h" - -/* #define TIME_PRESCALE */ - -/* monotonically increasing tick, - * typically incremented every millisecond in a systick interrupt */ -extern volatile unsigned int pico_ms_tick; - -#define dbg(...) - -#ifdef PICO_SUPPORT_PTHREAD - #define PICO_SUPPORT_MUTEX -#endif - -#ifdef PICO_SUPPORT_RTOS - #define PICO_SUPPORT_MUTEX - -extern void *pico_mutex_init(void); -extern void pico_mutex_lock(void*); -extern void pico_mutex_unlock(void*); -extern void *pvPortMalloc( size_t xSize ); -extern void vPortFree( void *pv ); - - #define pico_free(x) vPortFree(x) - #define free(x) vPortFree(x) - -static inline void *pico_zalloc(size_t size) -{ - void *ptr = pvPortMalloc(size); - - if(ptr) - memset(ptr, 0u, size); - - return ptr; -} - -/* time prescaler */ -#ifdef TIME_PRESCALE -extern int32_t prescale_time; -#endif - -static inline pico_time PICO_TIME_MS() -{ - #ifdef TIME_PRESCALE - return pico_ms_tick << prescale_time; - #else - return pico_ms_tick; - #endif -} - -static inline pico_time PICO_TIME() -{ - #ifdef TIME_PRESCALE - return (pico_ms_tick / 1000) << prescale_time; - #else - return (pico_ms_tick / 1000); - #endif -} - -static inline void PICO_IDLE(void) -{ - pico_time now = PICO_TIME_MS(); - while(now == PICO_TIME_MS()) ; -} - -#else /* NO RTOS SUPPORT */ - - #ifdef MEM_MEAS -/* These functions should be implemented elsewhere */ -extern void *memmeas_zalloc(size_t size); -extern void memmeas_free(void *); - #define pico_free(x) memmeas_free(x) - #define pico_zalloc(x) memmeas_zalloc(x) - #else -/* Use plain C-lib malloc and free */ - #define pico_free(x) free(x) -static inline void *pico_zalloc(size_t size) -{ - void *ptr = malloc(size); - if(ptr) - memset(ptr, 0u, size); - - return ptr; -} - #endif - -static inline pico_time PICO_TIME_MS(void) -{ - return (pico_time)pico_ms_tick; -} - -static inline pico_time PICO_TIME(void) -{ - return (pico_time)(PICO_TIME_MS() / 1000); -} - -static inline void PICO_IDLE(void) -{ - unsigned int now = pico_ms_tick; - while(now == pico_ms_tick) ; -} - -#endif /* IFNDEF RTOS */ - -#endif /* PICO_GCC */ - diff --git a/kernel/picotcp/include/arch/pico_linux.h b/kernel/picotcp/include/arch/pico_linux.h deleted file mode 100644 index 3910c25..0000000 --- a/kernel/picotcp/include/arch/pico_linux.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef PICO_SUPPORT_LINUX -#define PICO_SUPPORT_LINUX - -#include "linux/types.h" -#include "linux/mm.h" -#include "linux/slab.h" -#include "linux/jiffies.h" - -#define dbg printk - -#define pico_zalloc(x) kcalloc(x, 1, GFP_ATOMIC) /* All allocations are GFP_ATOMIC for now */ -#define pico_free(x) kfree(x) - - -static inline unsigned long PICO_TIME(void) -{ - return (unsigned long)(jiffies_to_msecs(jiffies) / 1000); -} - -static inline unsigned long PICO_TIME_MS(void) -{ - return (unsigned long)jiffies_to_msecs(jiffies); -} - -static inline void PICO_IDLE(void) -{ - unsigned long now = jiffies; - while (now == jiffies) { - ; - } -} - -#endif diff --git a/kernel/picotcp/include/arch/pico_mbed.h b/kernel/picotcp/include/arch/pico_mbed.h deleted file mode 100644 index cb140c7..0000000 --- a/kernel/picotcp/include/arch/pico_mbed.h +++ /dev/null @@ -1,185 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - File: pico_mbed.h - Author: Toon Peters - *********************************************************************/ - -#ifndef PICO_SUPPORT_MBED -#define PICO_SUPPORT_MBED -#include -#include -/* #include "mbed.h" */ -/* #include "serial_api.h" */ - -/* #define TIME_PRESCALE */ -/* #define PICO_MEASURE_STACK */ -/* #define MEMORY_MEASURE */ -/* - Debug needs initialization: - * void serial_init (serial_t *obj, PinName tx, PinName rx); - * void serial_baud (serial_t *obj, int baudrate); - * void serial_format (serial_t *obj, int data_bits, SerialParity parity, int stop_bits); - */ - -#define dbg(...) - -/* - #define MEMORY_MEASURE - #define JENKINS_DEBUG - */ - -/* Intended for Mr. Jenkins endurance test loggings */ -#ifdef JENKINS_DEBUG -#include "PicoTerm.h" -#define jenkins_dbg ptm_dbg -#endif - -#ifdef PICO_MEASURE_STACK - -extern int freeStack; -#define STACK_TOTAL_WORDS 1000u -#define STACK_PATTERN (0xC0CAC01Au) - -void stack_fill_pattern(void *ptr); -void stack_count_free_words(void *ptr); -int stack_get_free_words(void); -#else -#define stack_fill_pattern(...) do {} while(0) -#define stack_count_free_words(...) do {} while(0) -#define stack_get_free_words() (0) -#endif - -#ifdef MEMORY_MEASURE /* in case, comment out the two defines above me. */ -extern uint32_t max_mem; -extern uint32_t cur_mem; - -struct mem_chunk_stats { -#ifdef MEMORY_MEASURE_ADV - uint32_t signature; - void *mem; -#endif - uint32_t size; -}; - -static inline void *pico_zalloc(int x) -{ - struct mem_chunk_stats *stats; - if ((cur_mem + x) > (10 * 1024)) - return NULL; - - stats = (struct mem_chunk_stats *)calloc(x + sizeof(struct mem_chunk_stats), 1); -#ifdef MEMORY_MEASURE_ADV - stats->signature = 0xdeadbeef; - stats->mem = ((uint8_t *)stats) + sizeof(struct mem_chunk_stats); -#endif - stats->size = x; - - /* Intended for Mr. Jenkins endurance test loggings */ - #ifdef JENKINS_DEBUG - if (!stats) { - jenkins_dbg(">> OUT OF MEM\n"); - while(1) ; - ; - } - - #endif - cur_mem += x; - if (cur_mem > max_mem) { - max_mem = cur_mem; - /* printf("max mem: %lu\n", max_mem); */ - } - -#ifdef MEMORY_MEASURE_ADV - return (void*)(stats->mem); -#else - return (void*) (((uint8_t *)stats) + sizeof(struct mem_chunk_stats)); -#endif -} - -static inline void pico_free(void *x) -{ - struct mem_chunk_stats *stats = (struct mem_chunk_stats *) ((uint8_t *)x - sizeof(struct mem_chunk_stats)); - - #ifdef JENKINS_DEBUG - #ifdef MEMORY_MEASURE_ADV - if ((stats->signature != 0xdeadbeef) || (x != stats->mem)) { - jenkins_dbg(">> FREE ERROR: caller is %p\n", __builtin_return_address(0)); - while(1) ; - ; - } - - #endif - - #endif - - cur_mem -= stats->size; - memset(stats, 0, sizeof(struct mem_chunk_stats)); - free(stats); -} -#else - -#define pico_zalloc(x) calloc(x, 1) -#define pico_free(x) free(x) - -#endif - -#define PICO_SUPPORT_MUTEX -extern void *pico_mutex_init(void); -extern void pico_mutex_lock(void*); -extern void pico_mutex_unlock(void*); -extern void pico_mutex_deinit(void*); - -extern uint32_t os_time; -extern pico_time local_time; -extern uint32_t last_os_time; - -#ifdef TIME_PRESCALE -extern int32_t prescale_time; -#endif - -#define UPDATE_LOCAL_TIME() do {local_time = local_time + ((pico_time)os_time - (pico_time)last_os_time);last_os_time = os_time;} while(0) - -static inline pico_time PICO_TIME(void) -{ - UPDATE_LOCAL_TIME(); - #ifdef TIME_PRESCALE - return (prescale_time < 0) ? (pico_time)(local_time / 1000 << (-prescale_time)) : \ - (pico_time)(local_time / 1000 >> prescale_time); - #else - return (pico_time)(local_time / 1000); - #endif -} - -static inline pico_time PICO_TIME_MS(void) -{ - UPDATE_LOCAL_TIME(); - #ifdef TIME_PRESCALE - return (prescale_time < 0) ? (pico_time)(local_time << (-prescale_time)) : \ - (pico_time)(local_time >> prescale_time); - #else - return (pico_time)local_time; - #endif -} - -static inline void PICO_IDLE(void) -{ - /* TODO needs implementation */ -} -/* - static inline void PICO_DEBUG(const char * formatter, ... ) - { - char buffer[256]; - char *ptr; - va_list args; - va_start(args, formatter); - vsnprintf(buffer, 256, formatter, args); - ptr = buffer; - while(*ptr != '\0') - serial_putc(serial_t *obj, (int) (*(ptr++))); - va_end(args); - //TODO implement serial_t - }*/ - -#endif diff --git a/kernel/picotcp/include/arch/pico_mop2.h b/kernel/picotcp/include/arch/pico_mop2.h deleted file mode 100644 index 703b46e..0000000 --- a/kernel/picotcp/include/arch/pico_mop2.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef PICO_SUPPORT_MOP2 -#define PICO_SUPPORT_MOP2 - -#include "dlmalloc/malloc.h" -#include "time/time.h" -#include "kprintf.h" - -// proc/proc.c -extern uint64_t uptime_ms; - -#define dbg kprintf -#define pico_zalloc(x) (dlmalloc((x))) -#define pico_free(x) (dlfree((x))) -static inline unsigned long PICO_TIME(void) { - return uptime_ms / 1000; -} - -static inline unsigned long PICO_TIME_MS(void) { - return uptime_ms; -} - -static inline void PICO_IDLE(void) { - -} - -#endif // PICO_SUPPORT_MOP2 diff --git a/kernel/picotcp/include/arch/pico_msp430.h b/kernel/picotcp/include/arch/pico_msp430.h deleted file mode 100644 index c2fc7bd..0000000 --- a/kernel/picotcp/include/arch/pico_msp430.h +++ /dev/null @@ -1,38 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef _INCLUDE_PICO_LPC -#define _INCLUDE_PICO_LPC - -#include -#include -#include -#include "pico_constants.h" - -extern pico_time msp430_time_s(void); -extern pico_time msp430_time_ms(void); -extern void *malloc(size_t); -extern void free(void *); - - -#define PICO_TIME() msp430_time_s() -#define PICO_TIME_MS() msp430_time_ms() -#define PICO_IDLE() do {} while(0) - -#define pico_free(x) free(x) - -static inline void *pico_zalloc(size_t size) -{ - void *ptr = malloc(size); - - if(ptr) - memset(ptr, 0u, size); - - return ptr; -} - -#define dbg(...) - -#endif diff --git a/kernel/picotcp/include/arch/pico_none.h b/kernel/picotcp/include/arch/pico_none.h deleted file mode 100644 index b4a3599..0000000 --- a/kernel/picotcp/include/arch/pico_none.h +++ /dev/null @@ -1,22 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - *********************************************************************/ - -#ifndef PICO_SUPPORT_ARCHNONE -#define PICO_SUPPORT_ARCHNONE - -#include -#include -#include -#include - -#define dbg(...) do {} while(0) -#define pico_zalloc(x) NULL -#define pico_free(x) do {} while(0) -#define PICO_TIME() 666 -#define PICO_TIME_MS() 666000 -#define PICO_IDLE() do {} while(0) - -#endif /* PICO_SUPPORT_ARCHNONE */ - diff --git a/kernel/picotcp/include/arch/pico_pic24.h b/kernel/picotcp/include/arch/pico_pic24.h deleted file mode 100644 index eb3f117..0000000 --- a/kernel/picotcp/include/arch/pico_pic24.h +++ /dev/null @@ -1,100 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - *********************************************************************/ -#ifndef PICO_SUPPORT_PIC24 -#define PICO_SUPPORT_PIC24 -#define dbg printf -/* #define dbg(...) */ - -/*************************/ - -/*** MACHINE CONFIGURATION ***/ -#include -#include - -/* #include "phalox_development_board.h" */ - -#ifndef __PIC24F__ -#define __PIC24F__ -#endif - -/* - #ifndef __PIC24FJ256GA106__ - #define __PIC24FJ256GA106__ - #endif - */ - -#ifndef PICO_MAX_SOCKET_FRAMES -#define PICO_MAX_SOCKET_FRAMES 16 -#endif - -/* Device header file */ - -#if defined(__PIC24E__) -# include -#elif defined(__PIC24F__) -# include -#elif defined(__PIC24H__) -# include -#endif - - -#define TIMBASE_INT_E IEC0bits.T2IE - -#ifdef PICO_SUPPORT_DEBUG_MEMORY -static inline void *pico_zalloc(int len) -{ - /* dbg("%s: Alloc object of len %d, caller: %p\n", __FUNCTION__, len, __builtin_return_address(0)); */ - return calloc(len, 1); -} - -static inline void pico_free(void *tgt) -{ - /* dbg("%s: Discarded object @%p, caller: %p\n", __FUNCTION__, tgt, __builtin_return_address(0)); */ - free(tgt); -} -#else -# define pico_zalloc(x) calloc(x, 1) -# define pico_free(x) free(x) -#endif - -extern void *pvPortMalloc( size_t xWantedSize ); -extern volatile pico_time __pic24_tick; - -static inline unsigned long PICO_TIME(void) -{ - unsigned long tick; - /* Disable timer interrupts */ - TIMBASE_INT_E = 0; - tick = __pic24_tick; - /* Enable timer interrupts */ - TIMBASE_INT_E = 1; - return tick / 1000; -} - -static inline unsigned long PICO_TIME_MS(void) -{ - unsigned long tick; - /* Disable timer interrupts */ - TIMBASE_INT_E = 0; - tick = __pic24_tick; - /* Enable timer interrupts */ - TIMBASE_INT_E = 1; - return tick; -} - -static inline void PICO_IDLE(void) -{ - unsigned long tick_now; - /* Disable timer interrupts */ - TIMBASE_INT_E = 0; - tick_now = (unsigned long)pico_tick; - /* Enable timer interrupts */ - TIMBASE_INT_E = 1; - /* Doesn't matter that this call isn't interrupt safe, */ - /* we just check for the value to change */ - while(tick_now == __pic24_tick) ; -} - -#endif diff --git a/kernel/picotcp/include/arch/pico_pic32.h b/kernel/picotcp/include/arch/pico_pic32.h deleted file mode 100644 index ff902e4..0000000 --- a/kernel/picotcp/include/arch/pico_pic32.h +++ /dev/null @@ -1,54 +0,0 @@ - -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef _INCLUDE_PICO_PIC32 -#define _INCLUDE_PICO_PIC32 - -#include -#include -#include -#include "pico_constants.h" - -/* monotonically increasing tick, - * typically incremented every millisecond in a systick interrupt */ -extern volatile unsigned int pico_ms_tick; - -#ifdef PIC32_NO_PRINTF -#define dbg(...) do {} while(0) -#else -#define dbg printf -#endif - -/* Use plain C-lib malloc and free */ -#define pico_free(x) free(x) - -static inline void *pico_zalloc(size_t size) -{ - void *ptr = malloc(size); - if(ptr) - memset(ptr, 0u, size); - - return ptr; -} - -static inline pico_time PICO_TIME_MS(void) -{ - return (pico_time)pico_ms_tick; -} - -static inline pico_time PICO_TIME(void) -{ - return (pico_time)(PICO_TIME_MS() / 1000); -} - -static inline void PICO_IDLE(void) -{ - unsigned int now = pico_ms_tick; - while(now == pico_ms_tick) ; -} - -#endif /* PICO_PIC32 */ - diff --git a/kernel/picotcp/include/arch/pico_posix.h b/kernel/picotcp/include/arch/pico_posix.h deleted file mode 100644 index 427fed8..0000000 --- a/kernel/picotcp/include/arch/pico_posix.h +++ /dev/null @@ -1,137 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - *********************************************************************/ - -#ifndef PICO_SUPPORT_POSIX -#define PICO_SUPPORT_POSIX - -#include -#include -#include -#include - -/* - #define MEMORY_MEASURE - #define TIME_PRESCALE - #define PICO_SUPPORT_THREADING - */ -#define dbg printf - -#define stack_fill_pattern(...) do {} while(0) -#define stack_count_free_words(...) do {} while(0) -#define stack_get_free_words() (0) - -/* measure allocated memory */ -#ifdef MEMORY_MEASURE -extern uint32_t max_mem; -extern uint32_t cur_mem; - -static inline void *pico_zalloc(int x) -{ - uint32_t *ptr; - if ((cur_mem + x) > (10 * 1024)) - return NULL; - - ptr = (uint32_t *)calloc(x + 4, 1); - *ptr = (uint32_t)x; - cur_mem += x; - if (cur_mem > max_mem) { - max_mem = cur_mem; - } - - return (void*)(ptr + 1); -} - -static inline void pico_free(void *x) -{ - uint32_t *ptr = (uint32_t*)(((uint8_t *)x) - 4); - cur_mem -= *ptr; - free(ptr); -} -#else -#define pico_zalloc(x) calloc(x, 1) -#define pico_free(x) free(x) -#endif - -/* time prescaler */ -#ifdef TIME_PRESCALE -extern int32_t prescale_time; -#endif - -#if defined(PICO_SUPPORT_RTOS) || defined (PICO_SUPPORT_PTHREAD) -/* pico_ms_tick must be defined */ -extern volatile uint32_t pico_ms_tick; - -static inline uint32_t PICO_TIME(void) -{ - #ifdef TIME_PRESCALE - return (pico_ms_tick / 1000) << prescale_time; - #else - return (pico_ms_tick / 1000); - #endif -} - -static inline uint32_t PICO_TIME_MS(void) -{ - #ifdef TIME_PRESCALE - return pico_ms_tick << prescale_time; - #else - return pico_ms_tick; - #endif -} - -#else - -static inline uint32_t PICO_TIME(void) -{ - struct timeval t; - gettimeofday(&t, NULL); - #ifdef TIME_PRESCALE - return (prescale_time < 0) ? (uint32_t)(t.tv_sec / 1000 << (-prescale_time)) : \ - (uint32_t)(t.tv_sec / 1000 >> prescale_time); - #else - return (uint32_t)t.tv_sec; - #endif -} - -static inline uint32_t PICO_TIME_MS(void) -{ - struct timeval t; - gettimeofday(&t, NULL); - #ifdef TIME_PRESCALER - uint32_t tmp = ((t.tv_sec * 1000) + (t.tv_usec / 1000)); - return (prescale_time < 0) ? (uint32_t)(tmp / 1000 << (-prescale_time)) : \ - (uint32_t)(tmp / 1000 >> prescale_time); - #else - return (uint32_t)((t.tv_sec * 1000) + (t.tv_usec / 1000)); - #endif -} -#endif - -#ifdef PICO_SUPPORT_THREADING -#define PICO_SUPPORT_MUTEX -/* mutex implementations */ -extern void *pico_mutex_init(void); -extern void pico_mutex_lock(void *mux); -extern void pico_mutex_unlock(void *mux); - -/* semaphore implementations (only used in wrapper code) */ -extern void *pico_sem_init(void); -extern void pico_sem_destroy(void *sem); -extern void pico_sem_post(void *sem); -/* returns -1 on timeout (in ms), else returns 0 */ -/* if timeout < 0, the semaphore waits forever */ -extern int pico_sem_wait(void *sem, int timeout); - -/* thread implementations */ -extern void *pico_thread_create(void *(*routine)(void *), void *arg); -#endif /* PICO_SUPPORT_THREADING */ - -static inline void PICO_IDLE(void) -{ - usleep(5000); -} - -#endif /* PICO_SUPPORT_POSIX */ - diff --git a/kernel/picotcp/include/heap.h b/kernel/picotcp/include/heap.h deleted file mode 100644 index 7e42fa4..0000000 --- a/kernel/picotcp/include/heap.h +++ /dev/null @@ -1,107 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#define MAX_BLOCK_SIZE 1600 -#define MAX_BLOCK_COUNT 16 - -#define DECLARE_HEAP(type, orderby) \ - struct heap_ ## type { \ - uint32_t size; \ - uint32_t n; \ - type *top[MAX_BLOCK_COUNT]; \ - }; \ - typedef struct heap_ ## type heap_ ## type; \ - static inline type* heap_get_element(struct heap_ ## type *heap, uint32_t idx) \ - { \ - uint32_t elements_per_block = MAX_BLOCK_SIZE/sizeof(type); \ - return &heap->top[idx/elements_per_block][idx%elements_per_block];\ - } \ - static inline int8_t heap_increase_size(struct heap_ ## type *heap) \ - {\ - type *newTop; \ - uint32_t elements_per_block = MAX_BLOCK_SIZE/sizeof(type); \ - uint32_t elements = (heap->n + 1)%elements_per_block;\ - elements = elements?elements:elements_per_block;\ - if (heap->n+1 > elements_per_block * MAX_BLOCK_COUNT){\ - return -1;\ - }\ - newTop = PICO_ZALLOC(elements*sizeof(type)); \ - if(!newTop) { \ - return -1; \ - } \ - if (heap->top[heap->n/elements_per_block]) { \ - memcpy(newTop, heap->top[heap->n/elements_per_block], (elements - 1) * sizeof(type)); \ - PICO_FREE(heap->top[heap->n/elements_per_block]); \ - } \ - heap->top[heap->n/elements_per_block] = newTop; \ - heap->size++; \ - return 0; \ - }\ - static inline int heap_insert(struct heap_ ## type *heap, type * el) \ - { \ - type *half; \ - uint32_t i; \ - if (++heap->n >= heap->size) { \ - if (heap_increase_size(heap)){ \ - heap->n--; \ - return -1; \ - } \ - } \ - if (heap->n == 1) { \ - memcpy(heap_get_element(heap, 1), el, sizeof(type)); \ - return 0; \ - } \ - i = heap->n; \ - half = heap_get_element(heap, i/2); \ - while ( (i > 1) && (half->orderby > el->orderby) ) { \ - memcpy(heap_get_element(heap, i), heap_get_element(heap, i / 2), sizeof(type)); \ - i /= 2; \ - half = heap_get_element(heap, i/2); \ - } \ - memcpy(heap_get_element(heap, i), el, sizeof(type)); \ - return 0; \ - } \ - static inline int heap_peek(struct heap_ ## type *heap, type * first) \ - { \ - type *last; \ - type *left_child; \ - type *right_child; \ - uint32_t i, child; \ - if(heap->n == 0) { \ - return -1; \ - } \ - memcpy(first, heap_get_element(heap, 1), sizeof(type)); \ - last = heap_get_element(heap, heap->n--); \ - for(i = 1; (i * 2u) <= heap->n; i = child) { \ - child = 2u * i; \ - right_child = heap_get_element(heap, child+1); \ - left_child = heap_get_element(heap, child); \ - if ((child != heap->n) && \ - (right_child->orderby \ - < left_child->orderby)) \ - child++; \ - left_child = heap_get_element(heap, child); \ - if (last->orderby > \ - left_child->orderby) \ - memcpy(heap_get_element(heap,i), heap_get_element(heap,child), \ - sizeof(type)); \ - else \ - break; \ - } \ - memcpy(heap_get_element(heap, i), last, sizeof(type)); \ - return 0; \ - } \ - static inline type *heap_first(heap_ ## type * heap) \ - { \ - if (heap->n == 0) \ - return NULL; \ - return heap_get_element(heap, 1); \ - } \ - static inline heap_ ## type *heap_init(void) \ - { \ - heap_ ## type * p = (heap_ ## type *)PICO_ZALLOC(sizeof(heap_ ## type)); \ - return p; \ - } \ - diff --git a/kernel/picotcp/include/pico_addressing.h b/kernel/picotcp/include/pico_addressing.h deleted file mode 100644 index 9264fe2..0000000 --- a/kernel/picotcp/include/pico_addressing.h +++ /dev/null @@ -1,127 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_ADDRESSING -#define INCLUDE_PICO_ADDRESSING - -#include "pico_config.h" -#include "pico_constants.h" - -PACKED_STRUCT_DEF pico_ip4 -{ - uint32_t addr; -}; - -PACKED_STRUCT_DEF pico_ip6 -{ - uint8_t addr[16]; -}; - -union pico_address -{ - struct pico_ip4 ip4; - struct pico_ip6 ip6; -}; - -/****************************************************************************** - * Ethernet Address Definitions - ******************************************************************************/ - -PACKED_STRUCT_DEF pico_eth -{ - uint8_t addr[6]; - uint8_t padding[2]; -}; - -extern const uint8_t PICO_ETHADDR_ALL[]; - -/****************************************************************************** - * Generic 6LoWPAN Address Definitions - ******************************************************************************/ - -/* 6lowpan supports 16-bit short addresses */ -PACKED_STRUCT_DEF pico_6lowpan_short -{ - uint16_t addr; -}; - -/* And also EUI-64 addresses */ -PACKED_STRUCT_DEF pico_6lowpan_ext -{ - uint8_t addr[8]; -}; - -/* Address memory as either a short 16-bit address or a 64-bit address */ -union pico_6lowpan_u -{ - uint8_t data[8]; - struct pico_6lowpan_short _short; - struct pico_6lowpan_ext _ext; -}; - -/* Info data structure to pass to pico_device_init by the device driver */ -struct pico_6lowpan_info -{ - struct pico_6lowpan_short addr_short; - struct pico_6lowpan_ext addr_ext; - struct pico_6lowpan_short pan_id; -}; - -/* Different addressing modes for IEEE802.15.4 addresses */ -#define AM_6LOWPAN_NONE (0u) -#define AM_6LOWPAN_RES (1u) -#define AM_6LOWPAN_SHORT (2u) -#define AM_6LOWPAN_EXT (3u) -#define SIZE_6LOWPAN_SHORT (2u) -#define SIZE_6LOWPAN_EXT (8u) -#define SIZE_6LOWPAN(m) (((m) == 2) ? (2) : (((m) == 3) ? (8) : (0))) - -/****************************************************************************** - * Generic 6LoWPAN Address Definitions - ******************************************************************************/ - -/* Storage data structure for IEEE802.15.4 addresses */ -struct pico_802154 -{ - union pico_6lowpan_u addr; - uint8_t mode; -}; - -/****************************************************************************** - * Link Layer addresses - ******************************************************************************/ - -#define IID_16(iid) (0 == (iid)[2] && 0xff == (iid)[3] && 0xfe == (iid)[4] && 0 == (iid)[5]) - -enum pico_ll_mode -{ - LL_MODE_ETHERNET = 0, -#ifdef PICO_SUPPORT_802154 - LL_MODE_IEEE802154, -#endif -}; - -union pico_ll_addr -{ - struct pico_eth eth; - struct pico_802154 pan; -}; - -PACKED_STRUCT_DEF pico_trans -{ - uint16_t sport; - uint16_t dport; -}; - -/* Here are some protocols. */ -#define PICO_PROTO_IPV4 0 -#define PICO_PROTO_ICMP4 1 -#define PICO_PROTO_IGMP 2 -#define PICO_PROTO_TCP 6 -#define PICO_PROTO_UDP 17 -#define PICO_PROTO_IPV6 41 -#define PICO_PROTO_ICMP6 58 - -#endif diff --git a/kernel/picotcp/include/pico_config.h b/kernel/picotcp/include/pico_config.h deleted file mode 100644 index 07c969c..0000000 --- a/kernel/picotcp/include/pico_config.h +++ /dev/null @@ -1,245 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#include "pico_defines.h" -#ifndef INCLUDE_PICO_CONFIG -#define INCLUDE_PICO_CONFIG -#ifndef __KERNEL__ -#include -#include -#include -#include -#else -#include -#endif - -#if defined __IAR_SYSTEMS_ICC__ || defined ATOP -# define PACKED_STRUCT_DEF __packed struct -# define PEDANTIC_STRUCT_DEF __packed struct -# define PACKED_UNION_DEF __packed union -# define PACKED __packed -# define WEAK -#elif defined __WATCOMC__ -# define PACKED_STRUCT_DEF _Packed struct -# define PEDANTIC_STRUCT_DEF struct -# define PACKED_UNION_DEF _Packed union -# define WEAK -#else -# define PACKED_STRUCT_DEF struct __attribute__((packed)) -# define PEDANTIC_STRUCT_DEF struct -# define PACKED_UNION_DEF union /* Sane compilers do not require packed unions */ -# define PACKED __attribute__((packed)) -# define WEAK __attribute__((weak)) -# ifdef __GNUC__ -# define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -# if ((GCC_VERSION >= 40800)) -# define BYTESWAP_GCC -# endif -# endif -#endif - -#ifdef PICO_BIGENDIAN - -# define PICO_IDETH_IPV4 0x0800 -# define PICO_IDETH_ARP 0x0806 -# define PICO_IDETH_IPV6 0x86DD - -# define PICO_ARP_REQUEST 0x0001 -# define PICO_ARP_REPLY 0x0002 -# define PICO_ARP_HTYPE_ETH 0x0001 - -#define short_be(x) (x) -#define long_be(x) (x) -#define long_long_be(x) (x) - -static inline uint16_t short_from(void *_p) -{ - unsigned char *p = (unsigned char *)_p; - uint16_t r, p0, p1; - p0 = p[0]; - p1 = p[1]; - r = (p0 << 8) + p1; - return r; -} - -static inline uint32_t long_from(void *_p) -{ - unsigned char *p = (unsigned char *)_p; - uint32_t r, p0, p1, p2, p3; - p0 = p[0]; - p1 = p[1]; - p2 = p[2]; - p3 = p[3]; - r = (p0 << 24) + (p1 << 16) + (p2 << 8) + p3; - return r; -} - -#else - -static inline uint16_t short_from(void *_p) -{ - unsigned char *p = (unsigned char *)_p; - uint16_t r, _p0, _p1; - _p0 = p[0]; - _p1 = p[1]; - r = (uint16_t)((_p1 << 8u) + _p0); - return r; -} - -static inline uint32_t long_from(void *_p) -{ - unsigned char *p = (unsigned char *)_p; - uint32_t r, _p0, _p1, _p2, _p3; - _p0 = p[0]; - _p1 = p[1]; - _p2 = p[2]; - _p3 = p[3]; - r = (_p3 << 24) + (_p2 << 16) + (_p1 << 8) + _p0; - return r; -} - - -# define PICO_IDETH_IPV4 0x0008 -# define PICO_IDETH_ARP 0x0608 -# define PICO_IDETH_IPV6 0xDD86 - -# define PICO_ARP_REQUEST 0x0100 -# define PICO_ARP_REPLY 0x0200 -# define PICO_ARP_HTYPE_ETH 0x0100 - -# ifndef BYTESWAP_GCC -static inline uint16_t short_be(uint16_t le) -{ - return (uint16_t)(((le & 0xFFu) << 8) | ((le >> 8u) & 0xFFu)); -} - -static inline uint32_t long_be(uint32_t le) -{ - uint8_t *b = (uint8_t *)≤ - uint32_t be = 0; - uint32_t b0, b1, b2; - b0 = b[0]; - b1 = b[1]; - b2 = b[2]; - be = b[3] + (b2 << 8) + (b1 << 16) + (b0 << 24); - return be; -} -static inline uint64_t long_long_be(uint64_t le) -{ - uint8_t *b = (uint8_t *)≤ - uint64_t be = 0; - uint64_t b0, b1, b2, b3, b4, b5, b6; - b0 = b[0]; - b1 = b[1]; - b2 = b[2]; - b3 = b[3]; - b4 = b[4]; - b5 = b[5]; - b6 = b[6]; - be = b[7] + (b6 << 8) + (b5 << 16) + (b4 << 24) + (b3 << 32) + (b2 << 40) + (b1 << 48) + (b0 << 56); - return be; -} -# else -/* - extern uint32_t __builtin_bswap32(uint32_t); - extern uint16_t __builtin_bswap16(uint16_t); - extern uint64_t __builtin_bswap64(uint64_t); - */ - -static inline uint32_t long_be(uint32_t le) -{ - return (uint32_t)__builtin_bswap32(le); -} - -static inline uint16_t short_be(uint16_t le) -{ - return (uint16_t)__builtin_bswap16(le); -} - -static inline uint64_t long_long_be(uint64_t le) -{ - return (uint64_t)__builtin_bswap64(le); -} - -# endif /* BYTESWAP_GCC */ -#endif - -/* Mockables */ -#if defined UNIT_TEST -# define MOCKABLE __attribute__((weak)) -#else -# define MOCKABLE -#endif - -#include "pico_constants.h" -#include "pico_mm.h" - -#define IGNORE_PARAMETER(x) ((void)x) - -#define PICO_MEM_DEFAULT_SLAB_SIZE 1600 -#define PICO_MEM_PAGE_SIZE 4096 -#define PICO_MEM_PAGE_LIFETIME 100 -#define PICO_MIN_HEAP_SIZE 600 -#define PICO_MIN_SLAB_SIZE 1200 -#define PICO_MAX_SLAB_SIZE 1600 -#define PICO_MEM_MINIMUM_OBJECT_SIZE 4 - -/*** *** *** *** *** *** *** - *** PLATFORM SPECIFIC *** - *** *** *** *** *** *** ***/ -#if defined PICO_PORT_CUSTOM -# include "pico_port.h" -#elif defined CORTEX_M4_HARDFLOAT -# include "arch/pico_cortex_m.h" -#elif defined CORTEX_M4_SOFTFLOAT -# include "arch/pico_cortex_m.h" -#elif defined CORTEX_M3 -# include "arch/pico_cortex_m.h" -#elif defined CORTEX_M0 -# include "arch/pico_cortex_m.h" -#elif defined DOS_WATCOM -# include "arch/pico_dos.h" -#elif defined PIC24 -# include "arch/pico_pic24.h" -#elif defined PIC32 -# include "arch/pico_pic32.h" -#elif defined MSP430 -# include "arch/pico_msp430.h" -#elif defined MBED_TEST -# include "arch/pico_mbed.h" -#elif defined AVR -# include "arch/pico_avr.h" -#elif defined ARM9 -# include "arch/pico_arm9.h" -#elif defined ESP8266 -# include "arch/pico_esp8266.h" -#elif defined ATSAMD21J18 -# include "arch/pico_atsamd21j18.h" -#elif defined MT7681 -# include "arch/pico_generic_gcc.h" -#elif defined FAULTY -# include "../test/pico_faulty.h" -#elif defined ARCHNONE -# include "arch/pico_none.h" -#elif defined GENERIC -# include "arch/pico_generic_gcc.h" -#elif defined __KERNEL__ -# include "arch/pico_linux.h" -/* #elif defined ... */ -#elif defined PICO_MOP2 -# include "arch/pico_mop2.h" -#else -# include "arch/pico_posix.h" -#endif - -#ifdef PICO_SUPPORT_MM -#define PICO_ZALLOC(x) pico_mem_zalloc(x) -#define PICO_FREE(x) pico_mem_free(x) -#else -#define PICO_ZALLOC(x) pico_zalloc(x) -#define PICO_FREE(x) pico_free(x) -#endif /* PICO_SUPPORT_MM */ - -#endif diff --git a/kernel/picotcp/include/pico_constants.h b/kernel/picotcp/include/pico_constants.h deleted file mode 100644 index 87e68ce..0000000 --- a/kernel/picotcp/include/pico_constants.h +++ /dev/null @@ -1,58 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_CONST -#define INCLUDE_PICO_CONST -/* Included from pico_config.h */ - -/** Non-endian dependant constants */ -#define PICO_SIZE_IP4 4 -#define PICO_SIZE_IP6 16 -#define PICO_SIZE_ETH 6 -#define PICO_SIZE_TRANS 8 -#define PICO_SIZE_IEEE802154_EXT (8u) -#define PICO_SIZE_IEEE802154_SHORT (2u) - -/** Endian-dependant constants **/ -typedef uint64_t pico_time; -extern volatile uint64_t pico_tick; - - -/*** *** *** *** *** *** *** - *** ARP CONFIG *** - *** *** *** *** *** *** ***/ - -#include "pico_addressing.h" - -/* Maximum amount of accepted ARP requests per burst interval */ -#define PICO_ARP_MAX_RATE 1 -/* Duration of the burst interval in milliseconds */ -#define PICO_ARP_INTERVAL 1000 - -/* Add well-known host numbers here. (bigendian constants only beyond this point) */ -#define PICO_IP4_ANY (0x00000000U) -#define PICO_IP4_BCAST (0xffffffffU) - -#define PICO_IEEE802154_BCAST (0xffffu) - -/* defined in modules/pico_ipv6.c */ -#ifdef PICO_SUPPORT_IPV6 -extern const uint8_t PICO_IPV6_ANY[PICO_SIZE_IP6]; -#endif - -static inline uint32_t pico_hash(const void *buf, uint32_t size) -{ - uint32_t hash = 5381; - uint32_t i; - const uint8_t *ptr = (const uint8_t *)buf; - for(i = 0; i < size; i++) - hash = ((hash << 5) + hash) + ptr[i]; /* hash * 33 + char */ - return hash; -} - -/* Debug */ -/* #define PICO_SUPPORT_DEBUG_MEMORY */ -/* #define PICO_SUPPORT_DEBUG_TOOLS */ -#endif diff --git a/kernel/picotcp/include/pico_device.h b/kernel/picotcp/include/pico_device.h deleted file mode 100644 index 4c6d9a7..0000000 --- a/kernel/picotcp/include/pico_device.h +++ /dev/null @@ -1,55 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_DEVICE -#define INCLUDE_PICO_DEVICE -#include "pico_queue.h" -#include "pico_frame.h" -#include "pico_addressing.h" -#include "pico_tree.h" -extern struct pico_tree Device_tree; -#include "pico_ipv6_nd.h" -#define MAX_DEVICE_NAME 16 - - -struct pico_ethdev { - struct pico_eth mac; -}; - -struct pico_device { - char name[MAX_DEVICE_NAME]; - uint32_t hash; - uint32_t overhead; - uint32_t mtu; - struct pico_ethdev *eth; /* Null if non-ethernet */ - enum pico_ll_mode mode; - struct pico_queue *q_in; - struct pico_queue *q_out; - int (*link_state)(struct pico_device *self); - int (*send)(struct pico_device *self, void *buf, int len); /* Send function. Return 0 if busy */ - int (*poll)(struct pico_device *self, int loop_score); - void (*destroy)(struct pico_device *self); - int (*dsr)(struct pico_device *self, int loop_score); - int __serving_interrupt; - /* used to signal the upper layer the number of events arrived since the last processing */ - volatile int eventCnt; - #ifdef PICO_SUPPORT_IPV6 - struct pico_nd_hostvars hostvars; - #endif -}; - - -int pico_device_init(struct pico_device *dev, const char *name, const uint8_t *mac); -void pico_device_destroy(struct pico_device *dev); -int pico_devices_loop(int loop_score, int direction); -struct pico_device*pico_get_device(const char*name); -int32_t pico_device_broadcast(struct pico_frame *f); -int pico_device_link_state(struct pico_device *dev); -int pico_device_ipv6_random_ll(struct pico_device *dev); -#ifdef PICO_SUPPORT_IPV6 -struct pico_ipv6_link *pico_ipv6_link_add_local(struct pico_device *dev, const struct pico_ip6 *prefix); -#endif - -#endif diff --git a/kernel/picotcp/include/pico_eth.h b/kernel/picotcp/include/pico_eth.h deleted file mode 100644 index 99dccb3..0000000 --- a/kernel/picotcp/include/pico_eth.h +++ /dev/null @@ -1,21 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_ETH -#define INCLUDE_PICO_ETH -#include "pico_addressing.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" - - -PACKED_STRUCT_DEF pico_eth_hdr { - uint8_t daddr[6]; - uint8_t saddr[6]; - uint16_t proto; -}; - -#define PICO_SIZE_ETHHDR 14 - -#endif diff --git a/kernel/picotcp/include/pico_frame.h b/kernel/picotcp/include/pico_frame.h deleted file mode 100644 index 125785a..0000000 --- a/kernel/picotcp/include/pico_frame.h +++ /dev/null @@ -1,131 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_FRAME -#define INCLUDE_PICO_FRAME -#include "pico_config.h" - - -#define PICO_FRAME_FLAG_BCAST (0x01) -#define PICO_FRAME_FLAG_EXT_BUFFER (0x02) -#define PICO_FRAME_FLAG_EXT_USAGE_COUNTER (0x04) -#define PICO_FRAME_FLAG_SACKED (0x80) -#define PICO_FRAME_FLAG_LL_SEC (0x40) -#define PICO_FRAME_FLAG_SLP_FRAG (0x20) -#define IS_BCAST(f) ((f->flags & PICO_FRAME_FLAG_BCAST) == PICO_FRAME_FLAG_BCAST) - - -struct pico_socket; - - -struct pico_frame { - - /* Connector for queues */ - struct pico_frame *next; - - /* Start of the whole buffer, total frame length. */ - unsigned char *buffer; - uint32_t buffer_len; - - /* For outgoing packets: this is the meaningful buffer. */ - unsigned char *start; - uint32_t len; - - /* Pointer to usage counter */ - uint32_t *usage_count; - - /* Pointer to protocol headers */ - uint8_t *datalink_hdr; - - uint8_t *net_hdr; - uint16_t net_len; - uint8_t *transport_hdr; - uint16_t transport_len; - uint8_t *app_hdr; - uint16_t app_len; - - /* Pointer to the phisical device this packet belongs to. - * Should be valid in both routing directions - */ - struct pico_device *dev; - - pico_time timestamp; - - /* Failures due to bad datalink addressing. */ - uint16_t failure_count; - - /* Protocol over IP */ - uint8_t proto; - - /* PICO_FRAME_FLAG_* */ - uint8_t flags; - - /* Pointer to payload */ - unsigned char *payload; - uint16_t payload_len; - -#if defined(PICO_SUPPORT_IPV4FRAG) || defined(PICO_SUPPORT_IPV6FRAG) - /* Payload fragmentation info */ - uint16_t frag; -#endif - -#if defined(PICO_SUPPORT_6LOWPAN) - uint32_t hash; - union pico_ll_addr src; - union pico_ll_addr dst; -#endif - - /* Pointer to socket */ - struct pico_socket *sock; - - /* Pointer to transport info, used to store remote UDP endpoint (IP + port) */ - void *info; - - /*Priority. "best-effort" priority, the default value is 0. Priority can be in between -10 and +10*/ - int8_t priority; - uint8_t transport_flags_saved; - - /* Callback to notify listener when the buffer has been discarded */ - void (*notify_free)(uint8_t *); - - uint8_t send_ttl; /* Special TTL/HOPS value, 0 = auto assign */ - uint8_t send_tos; /* Type of service */ -}; - -/** frame alloc/dealloc/copy **/ -void pico_frame_discard(struct pico_frame *f); -struct pico_frame *pico_frame_copy(struct pico_frame *f); -struct pico_frame *pico_frame_deepcopy(struct pico_frame *f); -struct pico_frame *pico_frame_alloc(uint32_t size); -int pico_frame_grow(struct pico_frame *f, uint32_t size); -int pico_frame_grow_head(struct pico_frame *f, uint32_t size); -struct pico_frame *pico_frame_alloc_skeleton(uint32_t size, int ext_buffer); -int pico_frame_skeleton_set_buffer(struct pico_frame *f, void *buf); -uint16_t pico_checksum(void *inbuf, uint32_t len); -uint16_t pico_dualbuffer_checksum(void *b1, uint32_t len1, void *b2, uint32_t len2); - -static inline int pico_is_digit(char c) -{ - if (c < '0' || c > '9') - return 0; - - return 1; -} - -static inline int pico_is_hex(char c) -{ - if (c >= '0' && c <= '9') - return 1; - - if (c >= 'a' && c <= 'f') - return 1; - - if (c >= 'A' && c <= 'F') - return 1; - - return 0; -} - -#endif diff --git a/kernel/picotcp/include/pico_md5.h b/kernel/picotcp/include/pico_md5.h deleted file mode 100644 index 067f5e1..0000000 --- a/kernel/picotcp/include/pico_md5.h +++ /dev/null @@ -1,17 +0,0 @@ -/********************************************************************* - * PicoTCP. Copyright (c) 2015-2017 Altran Intelligent Systems. Some rights reserved. - * See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - * - * Authors: Daniele Lacamera - * *********************************************************************/ - -#ifndef PICO_MD5_INCLUDE -#define PICO_MD5_INCLUDE - -#include -#include - -void pico_md5sum(uint8_t *dst, const uint8_t *src, size_t len); -void pico_register_md5sum(void (*md5)(uint8_t *, const uint8_t *, size_t)); - -#endif /* PICO_MD5_INCLUDE */ diff --git a/kernel/picotcp/include/pico_module_eth.h b/kernel/picotcp/include/pico_module_eth.h deleted file mode 100644 index 99286e8..0000000 --- a/kernel/picotcp/include/pico_module_eth.h +++ /dev/null @@ -1,33 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef PICO_MODULE_IPV4_H -#define PICO_MODULE_IPV4_H - -struct pico_arp_entry { - struct eth dest; -#ifdef PICO_CONFIG_IPV4 - struct ipv4 addr_ipv4; -#endif - RB_ENTRY(pico_arp_entry) node; -}; - -/* Configured device */ -struct pico_eth_link { - struct pico_device *dev; - struct eth address; - struct eth netmask; - RB_ENTRY(pico_eth_link) node; -}; - -#ifndef IS_MODULE_ETH -# define _mod extern -#else -# define _mod -#endif -_mod struct pico_module pico_module_eth; -#undef _mod - -#endif diff --git a/kernel/picotcp/include/pico_protocol.h b/kernel/picotcp/include/pico_protocol.h deleted file mode 100644 index 0d8176f..0000000 --- a/kernel/picotcp/include/pico_protocol.h +++ /dev/null @@ -1,97 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_PROTOCOL -#define INCLUDE_PICO_PROTOCOL -#include "pico_config.h" -#include "pico_queue.h" - -#define PICO_LOOP_DIR_IN 1 -#define PICO_LOOP_DIR_OUT 2 - -enum pico_layer { - PICO_LAYER_DATALINK = 2, /* Ethernet only. */ - PICO_LAYER_NETWORK = 3, /* IPv4, IPv6, ARP. Arp is there because it communicates with L2 */ - PICO_LAYER_TRANSPORT = 4, /* UDP, TCP, ICMP */ - PICO_LAYER_SOCKET = 5 /* Socket management */ -}; - -enum pico_err_e { - PICO_ERR_NOERR = 0, - PICO_ERR_EPERM = 1, - PICO_ERR_ENOENT = 2, - /* ... */ - PICO_ERR_EINTR = 4, - PICO_ERR_EIO = 5, - PICO_ERR_ENXIO = 6, - /* ... */ - PICO_ERR_EAGAIN = 11, - PICO_ERR_ENOMEM = 12, - PICO_ERR_EACCESS = 13, - PICO_ERR_EFAULT = 14, - /* ... */ - PICO_ERR_EBUSY = 16, - PICO_ERR_EEXIST = 17, - /* ... */ - PICO_ERR_EINVAL = 22, - /* ... */ - PICO_ERR_ENONET = 64, - /* ... */ - PICO_ERR_EPROTO = 71, - /* ... */ - PICO_ERR_ENOPROTOOPT = 92, - PICO_ERR_EPROTONOSUPPORT = 93, - /* ... */ - PICO_ERR_EOPNOTSUPP = 95, - PICO_ERR_EADDRINUSE = 98, - PICO_ERR_EADDRNOTAVAIL = 99, - PICO_ERR_ENETDOWN = 100, - PICO_ERR_ENETUNREACH = 101, - /* ... */ - PICO_ERR_ECONNRESET = 104, - /* ... */ - PICO_ERR_EISCONN = 106, - PICO_ERR_ENOTCONN = 107, - PICO_ERR_ESHUTDOWN = 108, - /* ... */ - PICO_ERR_ETIMEDOUT = 110, - PICO_ERR_ECONNREFUSED = 111, - PICO_ERR_EHOSTDOWN = 112, - PICO_ERR_EHOSTUNREACH = 113, - /* ... */ - PICO_ERR_EINPROGRESS = 115, -}; - -typedef enum pico_err_e pico_err_t; -extern volatile pico_err_t pico_err; - -#define IS_IPV6(f) (f && f->net_hdr && ((((uint8_t *)(f->net_hdr))[0] & 0xf0) == 0x60)) -#define IS_IPV4(f) (f && f->net_hdr && ((((uint8_t *)(f->net_hdr))[0] & 0xf0) == 0x40)) - -#define MAX_PROTOCOL_NAME 16 - -struct pico_protocol { - char name[MAX_PROTOCOL_NAME]; - uint32_t hash; - enum pico_layer layer; - uint16_t proto_number; - struct pico_queue *q_in; - struct pico_queue *q_out; - struct pico_frame *(*alloc)(struct pico_protocol *self, struct pico_device *dev, uint16_t size); /* Frame allocation. */ - int (*push)(struct pico_protocol *self, struct pico_frame *p); /* Push function, for active outgoing pkts from above */ - int (*process_out)(struct pico_protocol *self, struct pico_frame *p); /* Send loop. */ - int (*process_in)(struct pico_protocol *self, struct pico_frame *p); /* Recv loop. */ - uint16_t (*get_mtu)(struct pico_protocol *self); -}; - -int pico_protocols_loop(int loop_score); -void pico_protocol_init(struct pico_protocol *p); - -int pico_protocol_datalink_loop(int loop_score, int direction); -int pico_protocol_network_loop(int loop_score, int direction); -int pico_protocol_transport_loop(int loop_score, int direction); -int pico_protocol_socket_loop(int loop_score, int direction); - -#endif diff --git a/kernel/picotcp/include/pico_queue.h b/kernel/picotcp/include/pico_queue.h deleted file mode 100644 index 5b5700e..0000000 --- a/kernel/picotcp/include/pico_queue.h +++ /dev/null @@ -1,166 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_QUEUE -#define INCLUDE_PICO_QUEUE -#include "pico_config.h" -#include "pico_frame.h" - -#define Q_LIMIT 0 - -#ifndef NULL -#define NULL ((void *)0) -#endif - -void *pico_mutex_init(void); -void pico_mutex_deinit(void *mutex); -void pico_mutex_lock(void *mutex); -int pico_mutex_lock_timeout(void *mutex, int timeout); -void pico_mutex_unlock(void *mutex); -void pico_mutex_unlock_ISR(void *mutex); - -struct pico_queue { - uint32_t frames; - uint32_t size; - uint32_t max_frames; - uint32_t max_size; - struct pico_frame *head; - struct pico_frame *tail; -#ifdef PICO_SUPPORT_MUTEX - void *mutex; -#endif - uint8_t shared; - uint16_t overhead; -}; - -#ifdef PICO_SUPPORT_MUTEX -#define PICOTCP_MUTEX_LOCK(x) { \ - if (x == NULL) \ - x = pico_mutex_init(); \ - pico_mutex_lock(x); \ -} -#define PICOTCP_MUTEX_UNLOCK(x) pico_mutex_unlock(x) -#define PICOTCP_MUTEX_DEL(x) pico_mutex_deinit(x) - -#else -#define PICOTCP_MUTEX_LOCK(x) do {} while(0) -#define PICOTCP_MUTEX_UNLOCK(x) do {} while(0) -#define PICOTCP_MUTEX_DEL(x) do {} while(0) -#endif - -#ifdef PICO_SUPPORT_DEBUG_TOOLS -static void debug_q(struct pico_queue *q) -{ - struct pico_frame *p = q->head; - dbg("%d: ", q->frames); - while(p) { - dbg("(%p)-->", p); - p = p->next; - } - dbg("X\n"); -} - -#else - -#define debug_q(x) do {} while(0) -#endif - -static inline int32_t pico_enqueue(struct pico_queue *q, struct pico_frame *p) -{ - if ((q->max_frames) && (q->max_frames <= q->frames)) - return -1; - -#if (Q_LIMIT != 0) - if ((Q_LIMIT < p->buffer_len + q->size)) - return -1; - -#endif - - if ((q->max_size) && (q->max_size < (p->buffer_len + q->size))) - return -1; - - if (q->shared) - PICOTCP_MUTEX_LOCK(q->mutex); - - p->next = NULL; - if (!q->head) { - q->head = p; - q->tail = p; - q->size = 0; - q->frames = 0; - } else { - q->tail->next = p; - q->tail = p; - } - - q->size += p->buffer_len + q->overhead; - q->frames++; - debug_q(q); - - if (q->shared) - PICOTCP_MUTEX_UNLOCK(q->mutex); - - return (int32_t)q->size; -} - -static inline struct pico_frame *pico_dequeue(struct pico_queue *q) -{ - struct pico_frame *p = q->head; - if (!p) - return NULL; - - if (q->frames < 1) - return NULL; - - if (q->shared) - PICOTCP_MUTEX_LOCK(q->mutex); - - q->head = p->next; - q->frames--; - q->size -= p->buffer_len - q->overhead; - if (q->head == NULL) - q->tail = NULL; - - debug_q(q); - - p->next = NULL; - if (q->shared) - PICOTCP_MUTEX_UNLOCK(q->mutex); - - return p; -} - -static inline struct pico_frame *pico_queue_peek(struct pico_queue *q) -{ - struct pico_frame *p = q->head; - if (q->frames < 1) - return NULL; - - debug_q(q); - return p; -} - -static inline void pico_queue_deinit(struct pico_queue *q) -{ - if (q->shared) { - PICOTCP_MUTEX_DEL(q->mutex); - } -} - -static inline void pico_queue_empty(struct pico_queue *q) -{ - struct pico_frame *p = pico_dequeue(q); - while(p) { - pico_frame_discard(p); - p = pico_dequeue(q); - } -} - -static inline void pico_queue_protect(struct pico_queue *q) -{ - q->shared = 1; -} - -#endif diff --git a/kernel/picotcp/include/pico_socket.h b/kernel/picotcp/include/pico_socket.h deleted file mode 100644 index f5f50e8..0000000 --- a/kernel/picotcp/include/pico_socket.h +++ /dev/null @@ -1,263 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_SOCKET -#define INCLUDE_PICO_SOCKET -#include "pico_queue.h" -#include "pico_addressing.h" -#include "pico_config.h" -#include "pico_protocol.h" -#include "pico_tree.h" - -#ifdef __linux__ - #define PICO_DEFAULT_SOCKETQ (16 * 1024) /* Linux host, so we want full throttle */ -#else - #define PICO_DEFAULT_SOCKETQ (6 * 1024) /* seems like an acceptable default for small embedded systems */ -#endif - -#define PICO_SHUT_RD 1 -#define PICO_SHUT_WR 2 -#define PICO_SHUT_RDWR 3 - -#ifdef PICO_SUPPORT_IPV4 -# define IS_SOCK_IPV4(s) ((s->net == &pico_proto_ipv4)) -#else -# define IS_SOCK_IPV4(s) (0) -#endif - -#ifdef PICO_SUPPORT_IPV6 -# define IS_SOCK_IPV6(s) ((s->net == &pico_proto_ipv6)) -#else -# define IS_SOCK_IPV6(s) (0) -#endif - - -struct pico_sockport -{ - struct pico_tree socks; /* how you make the connection ? */ - uint16_t number; - uint16_t proto; -}; - - -struct pico_socket { - struct pico_protocol *proto; - struct pico_protocol *net; - - union pico_address local_addr; - union pico_address remote_addr; - - uint16_t local_port; - uint16_t remote_port; - - struct pico_queue q_in; - struct pico_queue q_out; - - void (*wakeup)(uint16_t ev, struct pico_socket *s); - - -#ifdef PICO_SUPPORT_TCP - /* For the TCP backlog queue */ - struct pico_socket *backlog; - struct pico_socket *next; - struct pico_socket *parent; - uint16_t max_backlog; - uint16_t number_of_pending_conn; -#endif -#ifdef PICO_SUPPORT_MCAST - struct pico_tree *MCASTListen; -#ifdef PICO_SUPPORT_IPV6 - struct pico_tree *MCASTListen_ipv6; -#endif -#endif - uint16_t ev_pending; - - struct pico_device *dev; - - /* Private field. */ - int id; - uint16_t state; - uint16_t opt_flags; - pico_time timestamp; - void *priv; -}; - -struct pico_remote_endpoint { - union pico_address remote_addr; - uint16_t remote_port; -}; - - -struct pico_ip_mreq { - union pico_address mcast_group_addr; - union pico_address mcast_link_addr; -}; -struct pico_ip_mreq_source { - union pico_address mcast_group_addr; - union pico_address mcast_source_addr; - union pico_address mcast_link_addr; -}; - - -#define PICO_SOCKET_STATE_UNDEFINED 0x0000u -#define PICO_SOCKET_STATE_SHUT_LOCAL 0x0001u -#define PICO_SOCKET_STATE_SHUT_REMOTE 0x0002u -#define PICO_SOCKET_STATE_BOUND 0x0004u -#define PICO_SOCKET_STATE_CONNECTED 0x0008u -#define PICO_SOCKET_STATE_CLOSING 0x0010u -#define PICO_SOCKET_STATE_CLOSED 0x0020u - -# define PICO_SOCKET_STATE_TCP 0xFF00u -# define PICO_SOCKET_STATE_TCP_UNDEF 0x00FFu -# define PICO_SOCKET_STATE_TCP_CLOSED 0x0100u -# define PICO_SOCKET_STATE_TCP_LISTEN 0x0200u -# define PICO_SOCKET_STATE_TCP_SYN_SENT 0x0300u -# define PICO_SOCKET_STATE_TCP_SYN_RECV 0x0400u -# define PICO_SOCKET_STATE_TCP_ESTABLISHED 0x0500u -# define PICO_SOCKET_STATE_TCP_CLOSE_WAIT 0x0600u -# define PICO_SOCKET_STATE_TCP_LAST_ACK 0x0700u -# define PICO_SOCKET_STATE_TCP_FIN_WAIT1 0x0800u -# define PICO_SOCKET_STATE_TCP_FIN_WAIT2 0x0900u -# define PICO_SOCKET_STATE_TCP_CLOSING 0x0a00u -# define PICO_SOCKET_STATE_TCP_TIME_WAIT 0x0b00u -# define PICO_SOCKET_STATE_TCP_ARRAYSIZ 0x0cu - - -/* Socket options */ -# define PICO_TCP_NODELAY 1 -# define PICO_SOCKET_OPT_TCPNODELAY 0x0000u - -# define PICO_IP_MULTICAST_EXCLUDE 0 -# define PICO_IP_MULTICAST_INCLUDE 1 -# define PICO_IP_MULTICAST_IF 32 -# define PICO_IP_MULTICAST_TTL 33 -# define PICO_IP_MULTICAST_LOOP 34 -# define PICO_IP_ADD_MEMBERSHIP 35 -# define PICO_IP_DROP_MEMBERSHIP 36 -# define PICO_IP_UNBLOCK_SOURCE 37 -# define PICO_IP_BLOCK_SOURCE 38 -# define PICO_IP_ADD_SOURCE_MEMBERSHIP 39 -# define PICO_IP_DROP_SOURCE_MEMBERSHIP 40 - -# define PICO_SOCKET_OPT_MULTICAST_LOOP 1 -# define PICO_SOCKET_OPT_KEEPIDLE 4 -# define PICO_SOCKET_OPT_KEEPINTVL 5 -# define PICO_SOCKET_OPT_KEEPCNT 6 - -#define PICO_SOCKET_OPT_LINGER 13 - -# define PICO_SOCKET_OPT_RCVBUF 52 -# define PICO_SOCKET_OPT_SNDBUF 53 - - -/* Constants */ -# define PICO_IP_DEFAULT_MULTICAST_TTL 1 -# define PICO_IP_DEFAULT_MULTICAST_LOOP 1 - -#define PICO_SOCKET_TIMEOUT 5000u /* 5 seconds */ -#define PICO_SOCKET_LINGER_TIMEOUT 3000u /* 3 seconds */ -#define PICO_SOCKET_BOUND_TIMEOUT 30000u /* 30 seconds */ - -#define PICO_SOCKET_SHUTDOWN_WRITE 0x01u -#define PICO_SOCKET_SHUTDOWN_READ 0x02u -#define TCPSTATE(s) ((s)->state & PICO_SOCKET_STATE_TCP) - -#define PICO_SOCK_EV_RD 1u -#define PICO_SOCK_EV_WR 2u -#define PICO_SOCK_EV_CONN 4u -#define PICO_SOCK_EV_CLOSE 8u -#define PICO_SOCK_EV_FIN 0x10u -#define PICO_SOCK_EV_ERR 0x80u - -struct pico_msginfo { - struct pico_device *dev; - uint8_t ttl; - uint8_t tos; -}; - -struct pico_socket *pico_socket_open(uint16_t net, uint16_t proto, void (*wakeup)(uint16_t ev, struct pico_socket *s)); - -int pico_socket_read(struct pico_socket *s, void *buf, int len); -int pico_socket_write(struct pico_socket *s, const void *buf, int len); - -int pico_socket_sendto(struct pico_socket *s, const void *buf, int len, void *dst, uint16_t remote_port); -int pico_socket_sendto_extended(struct pico_socket *s, const void *buf, const int len, - void *dst, uint16_t remote_port, struct pico_msginfo *msginfo); - -int pico_socket_recvfrom(struct pico_socket *s, void *buf, int len, void *orig, uint16_t *local_port); -int pico_socket_recvfrom_extended(struct pico_socket *s, void *buf, int len, void *orig, - uint16_t *remote_port, struct pico_msginfo *msginfo); - -int pico_socket_send(struct pico_socket *s, const void *buf, int len); -int pico_socket_recv(struct pico_socket *s, void *buf, int len); - -int pico_socket_bind(struct pico_socket *s, void *local_addr, uint16_t *port); -int pico_socket_getname(struct pico_socket *s, void *local_addr, uint16_t *port, uint16_t *proto); -int pico_socket_getpeername(struct pico_socket *s, void *remote_addr, uint16_t *port, uint16_t *proto); - -int pico_socket_connect(struct pico_socket *s, const void *srv_addr, uint16_t remote_port); -int pico_socket_listen(struct pico_socket *s, const int backlog); -struct pico_socket *pico_socket_accept(struct pico_socket *s, void *orig, uint16_t *port); -int8_t pico_socket_del(struct pico_socket *s); - -int pico_socket_setoption(struct pico_socket *s, int option, void *value); -int pico_socket_getoption(struct pico_socket *s, int option, void *value); - -int pico_socket_shutdown(struct pico_socket *s, int mode); -int pico_socket_close(struct pico_socket *s); - -struct pico_frame *pico_socket_frame_alloc(struct pico_socket *s, struct pico_device *dev, uint16_t len); -struct pico_device *get_sock_dev(struct pico_socket *s); - - -#ifdef PICO_SUPPORT_IPV4 -# define is_sock_ipv4(x) (x->net == &pico_proto_ipv4) -#else -# define is_sock_ipv4(x) (0) -#endif - -#ifdef PICO_SUPPORT_IPV6 -# define is_sock_ipv6(x) (x->net == &pico_proto_ipv6) -#else -# define is_sock_ipv6(x) (0) -#endif - -#ifdef PICO_SUPPORT_UDP -# define is_sock_udp(x) (x->proto == &pico_proto_udp) -#else -# define is_sock_udp(x) (0) -#endif - -#ifdef PICO_SUPPORT_TCP -# define is_sock_tcp(x) (x->proto == &pico_proto_tcp) -#else -# define is_sock_tcp(x) (0) -#endif - -/* Interface towards transport protocol */ -int pico_transport_process_in(struct pico_protocol *self, struct pico_frame *f); -struct pico_socket *pico_socket_clone(struct pico_socket *facsimile); -int8_t pico_socket_add(struct pico_socket *s); -int pico_transport_error(struct pico_frame *f, uint8_t proto, int code); - -/* Socket loop */ -int pico_sockets_loop(int loop_score); -struct pico_socket*pico_sockets_find(uint16_t local, uint16_t remote); -/* Port check */ -int pico_is_port_free(uint16_t proto, uint16_t port, void *addr, void *net); - -struct pico_sockport *pico_get_sockport(uint16_t proto, uint16_t port); - -uint32_t pico_socket_get_mss(struct pico_socket *s); -int pico_socket_set_family(struct pico_socket *s, uint16_t family); - -int pico_count_sockets(uint8_t proto); - -#define PICO_SOCKET_SETOPT_EN(socket, index) (socket->opt_flags |= (1 << index)) -#define PICO_SOCKET_SETOPT_DIS(socket, index) (socket->opt_flags &= (uint16_t) ~(1 << index)) -#define PICO_SOCKET_GETOPT(socket, index) ((socket->opt_flags & (1u << index)) != 0) - - -#endif diff --git a/kernel/picotcp/include/pico_socket_multicast.h b/kernel/picotcp/include/pico_socket_multicast.h deleted file mode 100644 index 44c30c5..0000000 --- a/kernel/picotcp/include/pico_socket_multicast.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef PICO_SOCKET_MULTICAST_H -#define PICO_SOCKET_MULTICAST_H -int pico_socket_mcast_filter(struct pico_socket *s, union pico_address *mcast_group, union pico_address *src); -void pico_multicast_delete(struct pico_socket *s); -int pico_setsockopt_mcast(struct pico_socket *s, int option, void *value); -int pico_getsockopt_mcast(struct pico_socket *s, int option, void *value); -int pico_udp_get_mc_ttl(struct pico_socket *s, uint8_t *ttl); -int pico_udp_set_mc_ttl(struct pico_socket *s, void *_ttl); - -#endif diff --git a/kernel/picotcp/include/pico_stack.h b/kernel/picotcp/include/pico_stack.h deleted file mode 100644 index 0595238..0000000 --- a/kernel/picotcp/include/pico_stack.h +++ /dev/null @@ -1,92 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_STACK -#define INCLUDE_PICO_STACK -#include "pico_config.h" -#include "pico_frame.h" -#include "pico_constants.h" - -#define PICO_MAX_TIMERS 20 - -#define PICO_ETH_MRU (1514u) -#define PICO_IP_MRU (1500u) - -/******************************************************************************* - * TRANSPORT LAYER - ******************************************************************************/ - -/* From dev up to socket */ -int32_t pico_transport_receive(struct pico_frame *f, uint8_t proto); - -/******************************************************************************* - * NETWORK LAYER - ******************************************************************************/ - -/* From socket down to dev */ -int32_t pico_network_send(struct pico_frame *f); - -/* From dev up to socket */ -int32_t pico_network_receive(struct pico_frame *f); - -/******************************************************************************* - * DATALINK LAYER - ******************************************************************************/ - -/* From socket down to dev */ -int pico_datalink_send(struct pico_frame *f); - -/* From dev up to socket */ -int pico_datalink_receive(struct pico_frame *f); - -/******************************************************************************* - * PHYSICAL LAYER - ******************************************************************************/ - -/* Enqueues the frame in the device-queue. From socket down to dev */ -int32_t pico_sendto_dev(struct pico_frame *f); - -/* LOWEST LEVEL: interface towards stack from devices */ -/* Device driver will call this function which returns immediately. - * Incoming packet will be processed later on in the dev loop. - * The zerocopy version will associate the current buffer to the newly created frame. - * Warning: the buffer used in the zerocopy version MUST have been allocated using PICO_ZALLOC() - */ -int32_t pico_stack_recv(struct pico_device *dev, uint8_t *buffer, uint32_t len); -int32_t pico_stack_recv_zerocopy(struct pico_device *dev, uint8_t *buffer, uint32_t len); -int32_t pico_stack_recv_zerocopy_ext_buffer(struct pico_device *dev, uint8_t *buffer, uint32_t len); -int32_t pico_stack_recv_zerocopy_ext_buffer_notify(struct pico_device *dev, uint8_t *buffer, uint32_t len, void (*notify_free)(uint8_t *buffer)); -struct pico_frame *pico_stack_recv_new_frame(struct pico_device *dev, uint8_t *buffer, uint32_t len); - -/* ----- Initialization ----- */ -int pico_stack_init(void); - -/* ----- Loop Function. ----- */ -void pico_stack_tick(void); -void pico_stack_loop(void); - -/* ---- Notifications for stack errors */ -int pico_notify_socket_unreachable(struct pico_frame *f); -int pico_notify_proto_unreachable(struct pico_frame *f); -int pico_notify_dest_unreachable(struct pico_frame *f); -int pico_notify_ttl_expired(struct pico_frame *f); -int pico_notify_frag_expired(struct pico_frame *f); -int pico_notify_pkt_too_big(struct pico_frame *f); - -/* Various. */ -int pico_source_is_local(struct pico_frame *f); -int pico_frame_dst_is_unicast(struct pico_frame *f); -void pico_store_network_origin(void *src, struct pico_frame *f); -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg); -uint32_t pico_timer_add_hashed(pico_time expire, void (*timer)(pico_time, void *), void *arg, uint32_t hash); -void pico_timer_cancel_hashed(uint32_t hash); -void pico_timer_cancel(uint32_t id); -uint32_t pico_rand(void); -void pico_rand_feed(uint32_t feed); -void pico_to_lowercase(char *str); -int pico_address_compare(union pico_address *a, union pico_address *b, uint16_t proto); -int32_t pico_seq_compare(uint32_t a, uint32_t b); - -#endif diff --git a/kernel/picotcp/include/pico_tree.h b/kernel/picotcp/include/pico_tree.h deleted file mode 100644 index 9d826c0..0000000 --- a/kernel/picotcp/include/pico_tree.h +++ /dev/null @@ -1,93 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Author: Andrei Carp - *********************************************************************/ - -#ifndef PICO_RBTREE_H -#define PICO_RBTREE_H - -#include "pico_config.h" - -/* This is used to declare a new tree, leaf root by default */ -#define PICO_TREE_DECLARE(name, compareFunction) \ - struct pico_tree name = \ - { \ - &LEAF, \ - compareFunction \ - } - -#define USE_PICO_PAGE0_ZALLOC (1) -#define USE_PICO_ZALLOC (2) - -struct pico_tree_node -{ - void*keyValue; /* generic key */ - struct pico_tree_node*parent; - struct pico_tree_node*leftChild; - struct pico_tree_node*rightChild; - uint8_t color; -}; - -struct pico_tree -{ - struct pico_tree_node *root; /* root of the tree */ - - /* this function directly provides the keys as parameters not the nodes. */ - int (*compare)(void*keyA, void*keyB); -}; - -extern struct pico_tree_node LEAF; /* generic leaf node */ - -#ifdef PICO_SUPPORT_MM -void *pico_tree_insert_implementation(struct pico_tree *tree, void *key, uint8_t allocator); -void *pico_tree_delete_implementation(struct pico_tree *tree, void *key, uint8_t allocator); -#endif - - -/* - * Manipulation functions - */ -void *pico_tree_insert(struct pico_tree *tree, void *key); -void *pico_tree_delete(struct pico_tree *tree, void *key); -void *pico_tree_findKey(struct pico_tree *tree, void *key); -void pico_tree_drop(struct pico_tree *tree); -int pico_tree_empty(struct pico_tree *tree); -struct pico_tree_node *pico_tree_findNode(struct pico_tree *tree, void *key); - -void *pico_tree_first(struct pico_tree *tree); -void *pico_tree_last(struct pico_tree *tree); -/* - * Traverse functions - */ -struct pico_tree_node *pico_tree_lastNode(struct pico_tree_node *node); -struct pico_tree_node *pico_tree_firstNode(struct pico_tree_node *node); -struct pico_tree_node *pico_tree_next(struct pico_tree_node *node); -struct pico_tree_node *pico_tree_prev(struct pico_tree_node *node); - -/* - * For each macros - */ - -#define pico_tree_foreach(idx, tree) \ - for ((idx) = pico_tree_firstNode((tree)->root); \ - (idx) != &LEAF; \ - (idx) = pico_tree_next(idx)) - -#define pico_tree_foreach_reverse(idx, tree) \ - for ((idx) = pico_tree_lastNode((tree)->root); \ - (idx) != &LEAF; \ - (idx) = pico_tree_prev(idx)) - -#define pico_tree_foreach_safe(idx, tree, idx2) \ - for ((idx) = pico_tree_firstNode((tree)->root); \ - ((idx) != &LEAF) && ((idx2) = pico_tree_next(idx), 1); \ - (idx) = (idx2)) - -#define pico_tree_foreach_reverse_safe(idx, tree, idx2) \ - for ((idx) = pico_tree_lastNode((tree)->root); \ - ((idx) != &LEAF) && ((idx2) = pico_tree_prev(idx), 1); \ - (idx) = (idx2)) - -#endif diff --git a/kernel/picotcp/mkdeps.sh b/kernel/picotcp/mkdeps.sh deleted file mode 100755 index 6e536bc..0000000 --- a/kernel/picotcp/mkdeps.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -if [ "$#" -eq 0 ]; then - echo "Supply PREFIX for building pico_defines.h" - exit 1 -fi - -PREFIX=$1 -shift -echo "/* PicoTCP - Definition file - DO NOT EDIT */" >$PREFIX/include/pico_defines.h -echo "/* This file is automatically generated at compile time */" >>$PREFIX/include/pico_defines.h -echo "#ifndef PICO_DEFINES_H" >>$PREFIX/include/pico_defines.h -echo "#define PICO_DEFINES_H" >>$PREFIX/include/pico_defines.h -echo >>$PREFIX/include/pico_defines.h - -for i in $@; do - if (echo $i | grep "^-D" |grep PICO_SUPPORT >/dev/null); then - my_def=`echo $i |sed -e "s/-D//g"` - echo "#define $my_def" >> $PREFIX/include/pico_defines.h - fi -done -echo "#endif" >>$PREFIX/include/pico_defines.h diff --git a/kernel/picotcp/modcheck.py b/kernel/picotcp/modcheck.py deleted file mode 100755 index 068e170..0000000 --- a/kernel/picotcp/modcheck.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/python -import os,sys -import subprocess - - -print "Scroll down for summary" -print "" -print "" - -f = open('MODTREE') -mods = {} -commands = [] - -def get_deps(mod): - if not mod in mods.keys(): - return [] - deps = mods[mod] - retlist = [mod] - for i in deps.split(' '): - retlist.append(i) - for j in get_deps(i): - retlist.append(j) - return retlist - - -while(True): - r = f.readline() - if r == '': - break - if r != '\n': - strings = r.split(':') - mod = strings[0] - deps = strings[1].rstrip('\n') - mods[mod] = deps.strip(' ') - -for k,v in mods.iteritems(): - command = 'make dummy ' - deps = get_deps(k) - for i in mods.keys(): - if i in deps: - command += i + "=1 " - else: - command += i + "=0 " - commands.append(command) - -endResult = [] -failed = 0 - -for i in commands: - print 'Checking config:\n\t%s' % i - - subprocess.call(['make','clean']) - sys.stdout.flush() - sys.stderr.flush() - - args = i.split(' ') - - # Remove the last item (which is a blank) - ret = subprocess.call(args[:-1]) - sys.stdout.flush() - sys.stderr.flush() - - if ret == 0: - print "**********************************************************" - print "******************* CONFIG PASSED! *******************" - endResult.append({"test": i, "result": "PASS"}) - else: - failed += 1 - print "**********************************************************" - print "******************* CONFIG FAILED! *******************" - endResult.append({"test": i, "result": "FAIL"}) - print "**********************************************************" - -print "" -print "***************************************************************************" -print " Executive Summary" -print "***************************************************************************" -print "" - -for r in endResult: - print "Test:", r["test"] - print "Status:", r["result"] - print "" - -print "***********************" -print "%d out of %d Failed" % (failed, len(endResult)) -print "***********************" - -if failed: - sys.exit(1) -else: - sys.exit(0) diff --git a/kernel/picotcp/modules/pico_6lowpan.c b/kernel/picotcp/modules/pico_6lowpan.c deleted file mode 100644 index c4eb2e0..0000000 --- a/kernel/picotcp/modules/pico_6lowpan.c +++ /dev/null @@ -1,1647 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ - -#include "pico_udp.h" -#include "pico_ipv6.h" -#include "pico_stack.h" -#include "pico_frame.h" -#include "pico_6lowpan.h" -#include "pico_protocol.h" -#include "pico_addressing.h" -#include "pico_6lowpan_ll.h" - -#ifdef PICO_SUPPORT_6LOWPAN - -/******************************************************************************* - * Macros - ******************************************************************************/ - -#ifdef DEBUG_6LOWPAN -#define GRN "\x1b[32m" -#define ORG "\x1b[33m" -#define RST "\x1b[0m" -#define lp_dbg dbg -#else -#define lp_dbg(...) do {} while(0) -#endif - -#define IPV6_MCAST_48(addr) (!addr[8] && !addr[9] && !addr[10] && (addr[11] || addr[12])) -#define IPV6_MCAST_32(addr) (!addr[8] && !addr[9] && !addr[10] && !addr[11] && !addr[12] && (addr[13] || addr[14])) -#define IPV6_MCAST_8(addr) (addr[1] == 0x02 && !addr[14] && addr[15]) -#define PORT_COMP(a, mask, b) (((a) & (mask)) == (b)) - -/******************************************************************************* - * Constants - ******************************************************************************/ - -#define NUM_IPV6_FIELDS (6) -#define NUM_UDP_FIELDS (4) -#define IPV6_DISPATCH (0x41) -#define IPHC_DISPATCH (0x60) -#define UDP_DISPATCH (0xF0) -#define EXT_DISPATCH (0xE0) -#define EXT_HOPBYHOP (0x00) -#define EXT_ROUTING (0x02) -#define EXT_FRAG (0x04) -#define EXT_DSTOPT (0x06) -#define EXT_COMPRESSED_NH (0x01) -#define UDP_COMPRESSED_DST (0x01) -#define UDP_COMPRESSED_SRC (0x02) -#define UDP_COMPRESSED_BOTH (0x03) -#define UDP_COMPRESSED_CHCK (0x04) -#define TF_INLINE (0x00) -#define TF_ELIDED_DSCP (0x08) -#define TF_ELIDED_FL (0x10) -#define TF_ELIDED (0x18) -#define NH_COMPRESSED (0x04) -#define HL_COMPRESSED_1 (0x01) -#define HL_COMPRESSED_64 (0x02) -#define HL_COMPRESSED_255 (0x03) -#define CTX_EXTENSION (0x80) -#define SRC_SHIFT (0x04) -#define SRC_STATEFUL (0x40) -#define SRC_COMPRESSED_64 (0x10) -#define SRC_COMPRESSED_16 (0x20) -#define SRC_COMPRESSED (0x30) -#define DST_STATEFUL (0x04) -#define DST_COMPRESSED_64 (0x01) -#define DST_COMPRESSED_16 (0x02) -#define DST_COMPRESSED (0x03) -#define DST_MULTICAST (0x08) -#define DST_MCAST_48 (0x01) -#define DST_MCAST_32 (0x02) -#define DST_MCAST_8 (0x03) -#define COMP_LINKLOCAL (0) -#define COMP_STATELESS (-1) -#define COMP_MULTICAST (-2) -#define COMP_UNSPECIFIED (-3) -#define FRAG1_SIZE (4) -#define FRAGN_SIZE (5) -#define FRAG1_DISPATCH (0xC0) -#define FRAGN_DISPATCH (0xE0) -#define FRAG_TIMEOUT (5) -/******************************************************************************* - * Type definitions - ******************************************************************************/ - -struct hdr_field -{ - int8_t ori_size; - int8_t (* compress)(uint8_t *, uint8_t *, uint8_t *, union pico_ll_addr *, union pico_ll_addr *, struct pico_device *); - int8_t (* decompress)(uint8_t *, uint8_t *, uint8_t *, union pico_ll_addr *, union pico_ll_addr *, struct pico_device *); -}; - -struct frag_ctx { - struct pico_frame *f; - uint16_t dgram_size; - uint16_t dgram_tag; - uint8_t dgram_off; - uint16_t copied; - uint32_t hash; - pico_time timestamp; -}; - -/******************************************************************************* - * Global Variables - ******************************************************************************/ - -static struct pico_queue pico_6lowpan_in = { - 0 -}; -static struct pico_queue pico_6lowpan_out = { - 0 -}; - -static uint16_t dgram_tag = 0; - -/******************************************************************************* - * Private functions - ******************************************************************************/ - -/* Copies two memory buffers but also considers overlapping buffers */ -static void -buf_move(void *dst, const void *src, size_t len) -{ - uint8_t *d = (uint8_t *)dst; - const uint8_t *s = (const uint8_t *)src; - if (!dst || !src) { - return; - } else { - if (d < s) { - while (len--) - *d++ = *s++; - } else { - s = s + len - 1; - d = d + len - 1; - while (len--) - *d-- = *s--; - } - } -} - -/******************************************************************************* - * Frags - ******************************************************************************/ - -/* Compares two fragmentation cookies based on the hash */ -static int32_t -frag_ctx_cmp(void *a, void *b) -{ - struct frag_ctx *fa = (struct frag_ctx *)a; - struct frag_ctx *fb = (struct frag_ctx *)b; - return (int32_t)(fa->hash - fb->hash); -} - -/* Compares two fragmentation cookies according to RFC4944 5.3 */ -static int32_t -frag_cmp(void *a, void *b) -{ - struct frag_ctx *fa = (struct frag_ctx *)a; - struct frag_ctx *fb = (struct frag_ctx *)b; - int32_t ret = 0; - if (fa->dgram_size != fb->dgram_size) { - return (int32_t)(fa->dgram_size - fb->dgram_size); - } else if (fa->dgram_tag != fb->dgram_tag) { - return (int32_t)(fa->dgram_tag - fb->dgram_tag); - } else { - if ((ret = pico_6lowpan_lls[fa->f->dev->mode].addr_cmp(&fa->f->src, &fb->f->src))) { - return ret; - } else { - return pico_6lowpan_lls[fa->f->dev->mode].addr_cmp(&fa->f->dst, &fb->f->dst); - } - } -} - -PICO_TREE_DECLARE(FragTree, &frag_ctx_cmp); -PICO_TREE_DECLARE(ReassemblyTree, &frag_cmp); - -/* Find a fragmentation cookie for transmission of subsequent fragments */ -static struct frag_ctx * -frag_ctx_find(uint32_t hash) -{ - struct frag_ctx f = { .hash = hash }; - return pico_tree_findKey(&FragTree, &f); -} - -/* Reassembly timeout function, deletes */ -static void -frag_timeout(pico_time now, void *arg) -{ - struct pico_tree_node *i = NULL, *next = NULL; - struct frag_ctx *key = NULL; - IGNORE_PARAMETER(arg); - pico_tree_foreach_safe(i, &ReassemblyTree, next) { - if ((key = i->keyValue)) { - if ((pico_time)(FRAG_TIMEOUT * 1000) <= (now - key->timestamp)) { - lp_dbg("Timeout for reassembly: %d\n", key->dgram_tag); - pico_tree_delete(&ReassemblyTree, key); - pico_frame_discard(key->f); - PICO_FREE(key); - } - } - } - - /* If adding a timer fails, there's not really an easy way to recover, so abort all ongoing - * reassemblies - * TODO: Maybe using a global variable allows recovering from this situation */ - if (0 == pico_timer_add(1000, frag_timeout, NULL)) { - lp_dbg("6LP: Failed to set reassembly timeout! Aborting all ongoing reassemblies...\n"); - pico_tree_foreach_safe(i, &ReassemblyTree, next) { - if ((key = i->keyValue)) { - pico_tree_delete(&ReassemblyTree, key); - pico_frame_discard(key->f); - PICO_FREE(key); - } - } - } -} - -/* Finds a reassembly cookie in the reassembly-tree */ -static struct frag_ctx * -frag_find(uint16_t dgram_size, uint16_t tag, struct pico_frame *frame) -{ - struct frag_ctx f = {.f = frame, .dgram_size = dgram_size, .dgram_tag = tag}; - return pico_tree_findKey(&ReassemblyTree, &f); -} - -/* Stores a fragmentation cookie in either the fragmentetion cookie tree or - * in the reassembly tree */ -static int32_t -frag_store(struct pico_frame *f, uint16_t dgram_size, uint16_t tag, - uint8_t dgram_off, uint16_t copied, struct pico_tree *tree) -{ - struct frag_ctx *fr = PICO_ZALLOC(sizeof(struct frag_ctx)); - if (fr) { - fr->f = f; - fr->dgram_size = dgram_size; - fr->dgram_off = dgram_off; - fr->dgram_tag = tag; - fr->copied = copied; - fr->timestamp = PICO_TIME_MS(); - if (&FragTree == tree) { - fr->hash = pico_hash((void *)fr, sizeof(struct frag_ctx)); - f->hash = fr->hash; // Also set hash in frame so we can identify it - lp_dbg("6LP: START: "ORG"fragmentation"RST" with hash '%X' of %u bytes.\n", fr->hash, f->len); - } else { - lp_dbg("6LP: START: "GRN"reassembly"RST" with tag '%d' of %u bytes.\n", tag, dgram_size); - } - /* Insert the cookie in the appropriate tree (FragTree/ReassemblyTree) */ - if (pico_tree_insert(tree, fr)) { - PICO_FREE(fr); - return -1; - } - } else { - return (-1); - } - return (1); // Succes for 'proto_loop_out' -} - -/******************************************************************************* - * IPHC - ******************************************************************************/ - -#ifdef PICO_6LOWPAN_IPHC_ENABLED - -/* Compresses the VTF-field of an IPv6 header */ -static int8_t -compressor_vtf(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr * - llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - uint8_t ecn = 0, dscp = 0; - uint32_t fl = 0; - *ori &= 0x0F; // Clear version field - *iphc &= (uint8_t)0x07; // Clear IPHC field - *iphc |= (uint8_t)IPHC_DISPATCH; - IGNORE_PARAMETER(llsrc); - IGNORE_PARAMETER(lldst); - IGNORE_PARAMETER(dev); - - /* Don't worry... */ - ecn = (uint8_t)((ori[0] << 4) & 0xC0); - dscp = (uint8_t)(((ori[0] << 4) & 0x30) | ((ori[1] & 0xF0) >> 4)); - fl = long_be((uint32_t)(ori[1] & 0x0F) << 16); - fl += long_be((uint32_t)(ori[2] & 0xFF) << 8); - fl += long_be((uint32_t)(ori[3] & 0xFF)); - - if (fl) { - if (!dscp) { // Flow label carried in-line - *iphc |= TF_ELIDED_DSCP; - comp[0] = (uint8_t)(ecn | (ori[1] & 0x0F)); - comp[1] = ori[2]; - comp[2] = ori[3]; - return 3; - } else { // Traffic class and flow label carried in-line - *iphc |= TF_INLINE; - *comp = ecn | dscp; - comp[1] = ori[1] & 0x0F; - comp[2] = ori[2]; - comp[3] = ori[3]; - return 4; - } - } else if (ecn || dscp) { // Traffic class carried in-line - *iphc |= TF_ELIDED_FL; - *comp = ecn | dscp; - return 1; - } else { // Traffic class and flow label elided - *iphc |= TF_ELIDED; - return 0; - } -} - -/* Decompresses the VTF-field of a IPHC-header */ -static int8_t -decompressor_vtf(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr - *llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - uint8_t tf = *iphc & TF_ELIDED; - IGNORE_PARAMETER(llsrc); - IGNORE_PARAMETER(lldst); - IGNORE_PARAMETER(dev); - if (TF_INLINE == tf) { - *ori++ = (0x60 | (*comp >> 4)); - *ori |= (uint8_t)((uint8_t)(*comp++ << 4) & 0xF0); - *ori++ |= *comp++; - *ori++ = *comp++; - *ori++ = *comp++; - return 4; - } else if (TF_ELIDED_DSCP == tf) { - *ori++ = (0x60 | (*comp >> 4)) & 0xFC; - *ori++ = *comp++ & 0x0F; - *ori++ = *comp++; - *ori = *comp; - return 3; - } else if (TF_ELIDED_FL == tf) { - *ori++ = (0x60 | (*comp >> 4)); - *ori = (uint8_t)(*comp << 4) & 0xF0; - return 1; - } else { - *ori = 0x60; // Set version field to IPv6 - return 0; - } -} - -/* Checks whether or not next header is compressible according to NHC scheme */ -static int32_t -compressible_nh(uint8_t nh) -{ - switch (nh) { - case PICO_IPV6_EXTHDR_HOPBYHOP: - case PICO_IPV6_EXTHDR_ROUTING: - case PICO_IPV6_EXTHDR_FRAG: - case PICO_IPV6_EXTHDR_DESTOPT: - case PICO_PROTO_UDP: return 1; - default: return 0; - } -} - -/* Checks whether or not the next header can be compressed and sets the IPHC - * bits accordingly, compression of next header itself happens in NHC-compression - */ -static int8_t -compressor_nh(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr * - llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - *iphc &= (uint8_t)~NH_COMPRESSED; - IGNORE_PARAMETER(comp); - IGNORE_PARAMETER(llsrc); - IGNORE_PARAMETER(lldst); - IGNORE_PARAMETER(dev); - if (compressible_nh(*ori)) { - *iphc |= NH_COMPRESSED; - return 0; - } else { - *comp = *ori; - return 1; - } -} - -/* Check whether or no the next header is NHC-compressed, indicates this for the - * general decompressor so it knows that it has to decompress the next header - * and fill in the NH-header field in IPv6 header */ -static int8_t -decompressor_nh(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr - *llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - IGNORE_PARAMETER(llsrc); - IGNORE_PARAMETER(lldst); - IGNORE_PARAMETER(dev); - IGNORE_PARAMETER(comp); - if (*iphc & NH_COMPRESSED) { - *ori = 0; // Indicate that next header needs to be decompressed - return 0; - } else { - *ori = *comp; - return 1; - } -} - -/* Compressed the HL-field if common hop limit values are used, like 1, 64 and - * 255 */ -static int8_t -compressor_hl(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr * - llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - IGNORE_PARAMETER(llsrc); - IGNORE_PARAMETER(lldst); - IGNORE_PARAMETER(dev); - *iphc &= (uint8_t)~HL_COMPRESSED_255; - switch (*ori) { - case 1: *iphc |= (uint8_t)HL_COMPRESSED_1; - return 0; - case 64: *iphc |= (uint8_t)HL_COMPRESSED_64; - return 0; - case 255: *iphc |= (uint8_t)HL_COMPRESSED_255; - return 0; - default: *comp = *ori; - return 1; - } -} - -/* Decompresses the HL-field to common hop limit values like 1, 64 and 255 */ -static int8_t -decompressor_hl(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr - *llsrc, union pico_ll_addr *lldst, struct pico_device *dev) - -{ - uint8_t hl = *iphc & HL_COMPRESSED_255; - IGNORE_PARAMETER(llsrc); - IGNORE_PARAMETER(lldst); - IGNORE_PARAMETER(dev); - switch(hl) { - case HL_COMPRESSED_1: *ori = (uint8_t)1; - return 0; - case HL_COMPRESSED_64: *ori = (uint8_t)64; - return 0; - case HL_COMPRESSED_255: *ori = (uint8_t)255; - return 0; - default: *ori = *comp; - return 1; - } -} - -/* Determines if an address can be statefully or statelessly compressed */ -static int8_t -addr_comp_prefix(uint8_t *iphc, struct pico_ip6 *addr, int8_t src) -{ - struct iphc_ctx *ctx = NULL; - uint8_t state = src ? SRC_STATEFUL : DST_STATEFUL; - iphc[1] &= (uint8_t)~state; // Clear out compression state for src/dst - - if (pico_ipv6_is_multicast(addr->addr)) { - /* TODO: Support stateful multicast compression with Unicast-Prefix-Based - * IPv6 Multicast Addresses as defined in RFC3956 */ - return COMP_MULTICAST; // AC = 0 - } else if (pico_ipv6_is_linklocal(addr->addr)) { - return COMP_LINKLOCAL; // AC = 0 - } else if ((ctx = ctx_lookup(*addr))) { - if (ctx->flags & PICO_IPHC_CTX_COMPRESS) { - iphc[1] |= state; // AC = 1 - iphc[1] |= CTX_EXTENSION; // SRC or DST is stateful, CID = 1 - return (int8_t)ctx->id; - } - } - return COMP_STATELESS; // AC = 0 -} - -/* Checks whether or not an IPv6 address is derived from a link layer address */ -static int8_t -addr_ll_derived(struct pico_ip6 *addr, union pico_ll_addr *lladdr, struct pico_device *dev) -{ - uint8_t iid[8] = {0}; - if (pico_6lowpan_lls[dev->mode].addr_iid) { - if (!pico_6lowpan_lls[dev->mode].addr_iid(iid, lladdr)) - return (int8_t)(0 == memcmp(iid, &addr->addr[8], 8)); - } - return -1; -} - -/* Sets the compression mode of either the source address or the destination - * address, based on the shift parameter. Use SRC_SHIFT for source, 0 for dst */ -static int8_t -addr_comp_mode(uint8_t *iphc, struct pico_ip6 *addr, union pico_ll_addr lladdr, struct pico_device *dev, int8_t shift) -{ - int8_t mac = addr_ll_derived(addr, &lladdr, dev); - iphc[1] &= (uint8_t)((uint8_t)~DST_COMPRESSED << shift); // Clear src/dst mode - - if (mac > 0) { // Address is mac derived - iphc[1] |= (uint8_t)(DST_COMPRESSED << shift); - return 0; - } else if (!mac && IID_16(&addr->addr[8])) { // Address is 16-bit deriveable - iphc[1] |= (uint8_t)(DST_COMPRESSED_16 << shift); - return 2; - } else if (!mac) { // Copy the entire IID - iphc[1] |= (uint8_t)(DST_COMPRESSED_64 << shift); - return 8; - } else { - return -1; // Something went wrong, indicate failure - } -} - -/* Compresses a multicast address statelessly */ -static int8_t -addr_comp_mcast(uint8_t *iphc, uint8_t *comp, struct pico_ip6 *mcast) -{ - iphc[1] &= (uint8_t)~DST_MCAST_8; // Clear out addressing mode - iphc[1] |= (uint8_t)DST_MULTICAST; // Set multicast flag - - if (IPV6_MCAST_48(mcast->addr)) { - comp[0] = mcast->addr[1]; // Copy flags and scope - buf_move(&comp[1], &mcast->addr[11], 5); // Copy group identifier - iphc[1] |= DST_MCAST_48; - return 6; - } else if (IPV6_MCAST_32(mcast->addr)) { - comp[0] = mcast->addr[1]; // Copy flags and scope - buf_move(&comp[1], &mcast->addr[13], 3); // Copy group identifier - iphc[1] |= DST_MCAST_32; - return 4; - } else if (IPV6_MCAST_8(mcast->addr)) { - comp[0] = mcast->addr[15]; // Copy group identifier - iphc[1] |= DST_MCAST_8; // Flags and scope = 0x02 - return 1; - } else { - buf_move(comp, mcast->addr, PICO_SIZE_IP6); // Copy entire address - return PICO_SIZE_IP6; - } -} - -/* Compresses the IID of a IPv6 address into 'comp'. Also has to take link layer - * address into account and whether it's about source or destination address. */ -static int8_t -addr_comp_iid(uint8_t *iphc, uint8_t *comp, int8_t state, struct pico_ip6 *addr, union pico_ll_addr ll, struct pico_device *dev, int8_t shift) -{ - int8_t len = PICO_SIZE_IP6; - switch (state) { - case COMP_UNSPECIFIED: // Set stateful bit - iphc[1] |= SRC_STATEFUL; - case COMP_STATELESS: // Clear compressed flags - iphc[1] &= (uint8_t)~SRC_COMPRESSED; - break; - case COMP_LINKLOCAL: - len = addr_comp_mode(iphc, addr, ll, dev, shift); - break; - case COMP_MULTICAST: // Multicast, compress statelessly - return addr_comp_mcast(iphc, comp, addr); - default: // Context available, extend header, and check for IID - iphc[2] = (uint8_t)((uint8_t)state << shift); - len = addr_comp_mode(iphc, addr, ll, dev, shift); - } - - if (len >= 0) - buf_move(comp, addr->addr + PICO_SIZE_IP6 - len, (size_t)len); - return len; -} - -/* Compresses the SOURCE address of the IPv6 frame */ -static int8_t -compressor_src(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr *llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - struct pico_ip6 src = *(struct pico_ip6 *)ori; - int8_t ret = addr_comp_prefix(iphc, &src, SRC_SHIFT); - IGNORE_PARAMETER(lldst); - - if (pico_ipv6_is_unspecified(src.addr)) - ret = COMP_UNSPECIFIED; - - return addr_comp_iid(iphc, comp, ret, &src, *llsrc, dev, SRC_SHIFT); -} - -/* Copies the appropriate IPv6 prefix in the decompressed address. Based on - * context, link local address or multicast address */ -static int8_t -addr_decomp_prefix(uint8_t *prefix, uint8_t *iphc, int8_t shift) -{ - struct pico_ip6 ll = { .addr = {0xfe,0x80,0,0,0,0,0,0,0,0,0,0xff,0xfe,0,0,0}}; - uint8_t addr_state = (uint8_t)(DST_STATEFUL << shift); - struct iphc_ctx *ctx = NULL; - - if (iphc[1] & addr_state) { - if ((ctx = ctx_lookup_id((uint8_t)(iphc[2] >> shift)))) { - buf_move(prefix, ctx->prefix.addr, PICO_SIZE_IP6); - buf_move(&prefix[8], &ll.addr[8], 8); // For 16-bit derived addresses - } else { - /* No context available while stateful compression is used... */ - return -1; - } - } else { - buf_move(prefix, ll.addr, PICO_SIZE_IP6); - } - return 0; -} - -/* Decompresses the IID of the IPv6 address based on addressing mode of the IPHC- - * header */ -static int8_t -addr_decomp_iid(struct pico_ip6 *addr, uint8_t *comp, uint8_t am, union pico_ll_addr lladdr, struct pico_device *dev) -{ - if (addr) { - switch (am) { - case DST_COMPRESSED_64: buf_move(&addr->addr[8], comp, 8); - return 8; - case DST_COMPRESSED_16: buf_move(&addr->addr[14], comp, 2); - return 2; - case DST_COMPRESSED: - if (dev && pico_6lowpan_lls[dev->mode].addr_iid) { - pico_6lowpan_lls[dev->mode].addr_iid(&addr->addr[8], &lladdr); - return 0; - } else { - return -1; - } - default: buf_move(addr->addr, comp, PICO_SIZE_IP6); - return 16; - } - } else { - return -1; - } -} - -/* Decompress the SOURCE address of the 6LoWPAN frame */ -static int8_t -decompressor_src(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr - *llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - struct pico_ip6 *src = (struct pico_ip6 *)ori; - uint8_t sam = (uint8_t)((uint8_t)(iphc[1] & SRC_COMPRESSED) >> 4); - IGNORE_PARAMETER(lldst); - - /* Get the appropriate IPv6 prefix */ - if (addr_decomp_prefix(ori, iphc, SRC_SHIFT)) - return -1; - - return addr_decomp_iid(src, comp, sam, *llsrc, dev); -} - -/* Compresses the DESTINATION address of IPv6 frame */ -static int8_t -compressor_dst(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr * - llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - struct pico_ip6 dst = *(struct pico_ip6 *)ori; - int8_t ret = addr_comp_prefix(iphc, &dst, 0); - IGNORE_PARAMETER(llsrc); - return addr_comp_iid(iphc, comp, ret, &dst, *lldst, dev, 0); -} - -/* Decompresses the IPv6 multicast destination address when the IPHC mcast-flag - * is set */ -static int8_t -addr_decomp_mcast(uint8_t *comp, struct pico_ip6 *dst, uint8_t am) -{ - if (dst) { - memset(dst->addr, 0, PICO_SIZE_IP6); - dst->addr[0] = 0xff; - dst->addr[1] = *comp; - switch (am) { - case DST_MCAST_48: - buf_move(dst->addr + 11, comp + 1, 5); - return 6; - case DST_MCAST_32: - buf_move(dst->addr + 13, comp + 1, 3); - return 4; - case DST_MCAST_8: - dst->addr[1] = 0x02; - dst->addr[15] = *comp; - return 1; - default: - buf_move(dst->addr, comp, PICO_SIZE_IP6); - return PICO_SIZE_IP6; - } - } else { - return -1; - } -} - -/* Decompresses the DESTINATION address of a 6LoWPAN frame */ -static int8_t -decompressor_dst(uint8_t *ori, uint8_t *comp, uint8_t *iphc, union pico_ll_addr *llsrc, union pico_ll_addr *lldst, struct pico_device *dev) -{ - struct pico_ip6 *dst = (struct pico_ip6 *)ori; - uint8_t dam = iphc[1] & DST_COMPRESSED; - IGNORE_PARAMETER(llsrc); - - if (addr_decomp_prefix(ori, iphc, SRC_SHIFT)) - return -1; - - if (iphc[1] & DST_MULTICAST) { - return addr_decomp_mcast(comp, dst, dam); - } else { - return addr_decomp_iid(dst, comp, dam, *lldst, dev); - } -} - -static const struct hdr_field ip6_fields[] = { - {4, compressor_vtf, decompressor_vtf}, - {2, NULL, NULL}, - {1, compressor_nh, decompressor_nh}, - {1, compressor_hl, decompressor_hl}, - {16, compressor_src, decompressor_src}, - {16, compressor_dst, decompressor_dst} -}; - -/* Compresses the IPv6 frame according to the IPHC-compression scheme */ -static uint8_t * -compressor_iphc(struct pico_frame *f, int32_t *compressed_len, uint8_t *nh) -{ - uint8_t *inline_buf = PICO_ZALLOC(PICO_SIZE_IP6HDR + 3); - uint8_t *comp = inline_buf + 3; - uint8_t *iphc = inline_buf; - uint8_t *ori = f->net_hdr; - int32_t i = 0, ret = 0; - *compressed_len = 0; - *nh = ((struct pico_ipv6_hdr *)f->net_hdr)->nxthdr; - - if (!inline_buf) { - return NULL; - } else { - /* Compress fixed IPv6 fields */ - for (i = 0; i < NUM_IPV6_FIELDS; i++) { - if (ip6_fields[i].compress) { - ret = ip6_fields[i].compress(ori, comp, iphc, &f->src, &f->dst, f->dev); - if (ret < 0) { // Something went wrong ... - PICO_FREE(inline_buf); - return NULL; - } - *compressed_len += ret; // Increase compressed length - comp += ret; // Move forward compressed length - } - ori += ip6_fields[i].ori_size; // Move to next field - } - - /* Rearrange IPHC-header if CTX-extension is included */ - if (iphc[1] & CTX_EXTENSION) { - *compressed_len += 3; - } else { - buf_move(inline_buf + 2, inline_buf + 3, (size_t)*compressed_len); - *compressed_len += 2; - } - } - return inline_buf; -} - -/* Decompresses a frame compressed with the IPHC compression scheme, RFC6282 */ -static uint8_t * -decompressor_iphc(struct pico_frame *f, int32_t *compressed_len) -{ - uint8_t *ipv6_hdr = PICO_ZALLOC(PICO_SIZE_IP6HDR); - uint8_t *iphc = f->net_hdr, *ori = ipv6_hdr, *comp = NULL; - int32_t i = 0, ret = 0, ctx = f->net_hdr[1] & CTX_EXTENSION; - *compressed_len = ctx ? 3 : 2; - comp = f->net_hdr + (ctx ? 3 : 2); - - if (!ipv6_hdr) { - return NULL; - } else { - for (i = 0; i < NUM_IPV6_FIELDS; i++) { - if (ip6_fields[i].decompress) { - ret = ip6_fields[i].decompress(ori, comp, iphc, &f->src, &f->dst, f->dev); - if (ret < 0) { // Something went wrong ... - PICO_FREE(ipv6_hdr); - return NULL; - } - *compressed_len += ret; // Increase compressed size - comp += ret; // Move to next compressed chunk - } - ori += ip6_fields[i].ori_size; // Move to next IPv6 field - } - } - return ipv6_hdr; -} - -/* Compresses a UDP header according to the NHC_UDP compression scheme, RFC6282 */ -static uint8_t * -compressor_nhc_udp(struct pico_frame *f, int32_t *compressed_len) -{ - uint8_t *inline_buf = PICO_ZALLOC(PICO_UDPHDR_SIZE); - struct pico_udp_hdr *hdr = (struct pico_udp_hdr *)f->transport_hdr; - uint16_t sport = hdr->trans.sport, dport = hdr->trans.dport; - uint16_t xF0B0 = short_be(0xF0B0), xF000 = short_be(0xF000); - uint16_t xFF00 = short_be(0xFF00), xFFF0 = short_be(0xFFF0); - *compressed_len = 0; - - if (!inline_buf) { - return NULL; - } else { - /* Dispatch header */ - inline_buf[0] = (uint8_t)UDP_DISPATCH; - /* Port compression */ - if (PORT_COMP(sport, xFFF0, xF0B0) && PORT_COMP(dport, xFFF0, xF0B0)) { - inline_buf[0] |= UDP_COMPRESSED_BOTH; - inline_buf[1] = (uint8_t)(short_be(sport) << 4); - dport = (uint8_t)(short_be(dport) & (uint16_t)0x000F); - inline_buf[1] = (uint8_t)(inline_buf[1] | (uint8_t)dport); - *compressed_len = 2; - } else if (PORT_COMP(sport, xFF00, xF000)) { - inline_buf[0] |= UDP_COMPRESSED_SRC; - inline_buf[1] = (uint8_t)short_be(sport); - buf_move(inline_buf + 2, (uint8_t *)hdr + 2, 2); - *compressed_len = 4; - } else if (PORT_COMP(dport, xFF00, xF000)) { - inline_buf[0] |= UDP_COMPRESSED_DST; - inline_buf[3] = (uint8_t)short_be(dport); - buf_move(inline_buf + 1, (uint8_t *)hdr, 2); - *compressed_len = 4; - } else { - inline_buf[0] &= (uint8_t)~UDP_COMPRESSED_BOTH; - buf_move(inline_buf + 1, (uint8_t *)hdr, 4); - *compressed_len = 5; - } - /* Length MUST be compressed checksum carried inline. - * RFC6282: .., a compressor in the source transport endpoint MAY elide - * the UDP checksum if it is autorized by the upper layer. The compressor - * MUST NOT set the C bit unless it has received such authorization */ - buf_move(inline_buf + *compressed_len, (uint8_t *)hdr + 6, 2); - *compressed_len += 2; - return inline_buf; - } -} - -/* Decompresses a NHC_UDP header according to the NHC_UDP compression scheme */ -static uint8_t * -decompressor_nhc_udp(struct pico_frame *f, int32_t processed_len, int32_t *compressed_len) -{ - struct pico_udp_hdr *hdr = NULL; - uint8_t *buf = f->transport_hdr; - uint8_t compression = buf[0] & UDP_COMPRESSED_BOTH; - uint16_t xF0B0 = short_be(0xF0B0); - uint16_t xF000 = short_be(0xF000); - int32_t payload_len = 0; - *compressed_len = 0; - - /* Decompress ports */ - hdr = PICO_ZALLOC(PICO_UDPHDR_SIZE); - if (hdr) { - if (UDP_COMPRESSED_BOTH == compression) { - hdr->trans.sport = xF0B0 | short_be((uint16_t)(buf[1] >> 4)); - hdr->trans.dport = xF0B0 | short_be((uint16_t)(buf[1] & 0xff)); - *compressed_len = 2; - } else if (UDP_COMPRESSED_SRC == compression) { - hdr->trans.dport = short_be((uint16_t)(((uint16_t)buf[2] << 8) | (uint16_t)buf[3])); - hdr->trans.sport = xF000 | short_be((uint16_t)buf[1]); - *compressed_len = 4; - } else if (UDP_COMPRESSED_DST == compression) { - hdr->trans.sport = short_be((uint16_t)(((uint16_t)buf[1] << 8) | (uint16_t)buf[2])); - hdr->trans.dport = xF000 | short_be((uint16_t)buf[3]); - *compressed_len = 4; - } else { - buf_move((uint8_t *)&hdr->trans, &buf[1], 4); - *compressed_len = 5; - } - if (!(buf[0] & UDP_COMPRESSED_CHCK)) { // Leave empty room for checksum - buf_move((uint8_t *)&hdr->crc, &buf[*compressed_len],2); - *compressed_len += 2; - } - /* Restore inherently compressed length */ - payload_len = (int32_t)f->len - (processed_len + *compressed_len); - hdr->len = short_be((uint16_t)(payload_len + PICO_UDPHDR_SIZE)); - return (uint8_t *)hdr; - } - return NULL; -} - -/* Get's the length of an IPv6 extension header */ -static int32_t -ext_hdr_len(struct pico_ipv6_exthdr *ext, uint8_t hdr, uint8_t *dispatch) -{ - int32_t len = 0; - /* Get length of extension header */ - switch (hdr) { - case PICO_IPV6_EXTHDR_HOPBYHOP: - *dispatch |= (uint8_t)EXT_HOPBYHOP; - len = IPV6_OPTLEN(ext->ext.destopt.len); // Length in bytes - ext->ext.destopt.len = (uint8_t)(len - 2); // Octets after len-field - return (int32_t)len; - case PICO_IPV6_EXTHDR_ROUTING: - *dispatch |= (uint8_t)EXT_ROUTING; - len = IPV6_OPTLEN(ext->ext.destopt.len); // Length in bytes - ext->ext.destopt.len = (uint8_t)(len - 2); // Octets after len-field - return (int32_t)len; - case PICO_IPV6_EXTHDR_DESTOPT: - *dispatch |= (uint8_t)EXT_DSTOPT; - len = IPV6_OPTLEN(ext->ext.destopt.len); // Length in bytes - ext->ext.destopt.len = (uint8_t)(len - 2); // Octets after len-field - return (int32_t)len; - case PICO_IPV6_EXTHDR_FRAG: - *dispatch |= (uint8_t)EXT_FRAG; - return (int32_t)8; - default: // Somethin went wrong, bail out... - return -1; - } -} - -/* Compresses an IPv6 extension header according to the NHC_EXT compression - * scheme */ -static uint8_t * -compressor_nhc_ext(struct pico_frame *f, int32_t *compressed_len, uint8_t *nh) -{ - struct pico_ipv6_exthdr *ext = (struct pico_ipv6_exthdr *)f->net_hdr; - uint8_t dispatch = EXT_DISPATCH; - int32_t len = 0, lead = 0, ret = 0; - uint8_t *buf = NULL; - uint8_t hdr = *nh; - - /* Determine next header */ - *nh = ext->nxthdr; - if (!compressible_nh(*nh)) { - len++; // Dispatch header has to be prepended - lead++; // Copy right after dispatch - } else { - dispatch |= (uint8_t)0x01; // Set NH flag - } - - /* Get length of extension header */ - ret = ext_hdr_len(ext, hdr, &dispatch); - if (ret < 0) { - return NULL; - } else { - /* Provide inline buffer */ - len += ret; - buf = PICO_ZALLOC((size_t)len); - if (!buf) { - return NULL; - } else { - /* Copy extension header */ - buf_move(buf + lead, (uint8_t *)ext, (size_t)(len - lead)); - buf[0] = dispatch; // Set the dispatch header - *compressed_len = len; - f->net_hdr += *compressed_len; // Move to next header - return buf; - } - } -} - -/* Retrieves the next header from the immediately following header */ -static uint8_t -ext_nh_retrieve(uint8_t *buf, int32_t len) -{ - uint8_t eid = 0; - buf += len; - if ((buf[0] & 0xF0) == EXT_DISPATCH) { - eid = buf[0] & 0x0E; - switch (eid) { - case EXT_HOPBYHOP: - return (uint8_t)PICO_IPV6_EXTHDR_HOPBYHOP; - case EXT_ROUTING: - return (uint8_t)PICO_IPV6_EXTHDR_ROUTING; - case EXT_FRAG: - return (uint8_t)PICO_IPV6_EXTHDR_FRAG; - case EXT_DSTOPT: - return (uint8_t)PICO_IPV6_EXTHDR_DESTOPT; - default: - return 0; - } - } else if ((buf[0] & 0xF8) == UDP_DISPATCH) { - return PICO_PROTO_UDP; - } - return 0; -} - -/* RFC6282: A decompressor MUST ensure that the - * containing header is padded out to a multiple of 8 octets in length, - * using a Pad1 or PadN option if necessary. */ -static int32_t -ext_align(uint8_t *buf, int32_t alloc, int32_t len) -{ - int32_t padlen = alloc - len; - buf += len; // Move to padding location - if (padlen == 1) { - buf[0] = 0; // Pad1 - } else if (padlen > 1) { - buf[0] = 1; // PadN - buf[1] = (uint8_t)(padlen - 2); - } else { - return -1; - } - return 0; -} - -/* Determines the compressed length (and some other parameters) from NHC_EXT - * compressed extension header */ -static int32_t -ext_compressed_length(uint8_t *buf, uint8_t eid, int32_t *compressed_len, int32_t *head) -{ - int32_t len = 0; - switch (eid) { - case EXT_HOPBYHOP: // Intentional fall-through - case EXT_ROUTING: // Intentional fall-through - case EXT_DSTOPT: // Intentional fall-through - if (!(buf[0] & NH_COMPRESSED)) { // [ DIS | NXT | LEN | ... (len) - len = 2 + buf[2]; - *compressed_len = len + 1; - } else { // [ DIS | LEN | ... (len) - len = 2 + buf[1]; - *compressed_len = len; - *head = 1; - } - return len; - case EXT_FRAG: // [ DIS | FRAG ... - len = 8; - *compressed_len = len; - return len; - default: // Something went wrong, bail out.. - return -1; - } -} - -/* Decompresses an extension header pointed to by 'f->net_hdr', according to the - * NHC_EXT compression scheme */ -static uint8_t * -decompressor_nhc_ext(struct pico_frame *f, int32_t *compressed_len, int32_t *decompressed_len) -{ - struct pico_ipv6_exthdr *ext = NULL; - int32_t len = 0, head = 0, alloc = 0; - uint8_t *buf = f->net_hdr; - uint8_t eid = buf[0] & 0x0E; - uint8_t nh = 0; - - if ((buf[0] & 0xF0) == EXT_DISPATCH) { - /* Determine compressed header length */ - len = ext_compressed_length(buf, eid, compressed_len, &head); - if (len >= 0) { - /* Retrieve next header from following header */ - nh = ext_nh_retrieve(buf, *compressed_len); - - /* Make sure options are 8 octet aligned */ - alloc = (len % 8) ? (((len / 8) + 1) * 8) : (len); - ext = (struct pico_ipv6_exthdr *)PICO_ZALLOC((size_t)alloc); - if (ext) { - buf_move((uint8_t *)ext + head, buf + 1, (size_t)(len - head)); - ext->nxthdr = nh; - if (EXT_HOPBYHOP == eid || EXT_DSTOPT == eid || EXT_ROUTING) { - ext->ext.destopt.len = (uint8_t)((alloc / 8) - 1); - ext_align((uint8_t *)ext, alloc, len); - } - } - *decompressed_len = alloc; - return (uint8_t *)ext; - } - } - return NULL; -} - -/* Free's memory of a all assembled chunks for 'n' amount */ -static struct pico_frame * -pico_iphc_bail_out(uint8_t **chunks, int32_t n) -{ - int32_t i = 0; - for (i = 0; i < n; i++) { - PICO_FREE(chunks[i]); - } - return NULL; -} - -/* Performs reassembly after either compression of decompression */ -static struct pico_frame * -pico_iphc_reassemble(struct pico_frame *f, uint8_t **chunks, int32_t *chunks_len, int32_t n, int32_t processed_len, int32_t handled_len) -{ - uint32_t grow = f->buffer_len; - struct pico_frame *new = NULL; - int32_t payload_len = 0; - uint8_t *dst = NULL; - int32_t ret = 0, i = 0; - - /* Calculate buffer size including IPv6 payload */ - payload_len = (int32_t)f->len - handled_len; - processed_len += payload_len; // Length of frame after processing - - /* Reallocate frame size if there isn't enough room available */ - if (f->len < (uint16_t)processed_len) { - grow = (uint32_t)(grow + (uint32_t)processed_len - f->len); - ret = pico_frame_grow(f, grow); - if (ret) - return pico_iphc_bail_out(chunks, n); - } - - chunks[n] = f->net_hdr + handled_len; // Start of payload_available - chunks_len[n] = payload_len; // Size of payload - n++; // Payload is another chunk to copy - - /* Provide a new frame */ - if (!(new = pico_frame_deepcopy(f))) - return pico_iphc_bail_out(chunks, n); - - /* Copy each chunk back in the frame starting at the end of the new - * frame-buffer so we don't overwrite overlapping memory regions */ - dst = new->buffer + new->buffer_len; - for (i = n - 1; i >= 0; i--) { - dst -= chunks_len[i]; - buf_move(dst, chunks[i], (size_t)chunks_len[i]); - } - new->net_hdr = dst; // Last destination is net_hdr - new->start = new->net_hdr; // Start of useful data is at net_hdr - new->len = (uint32_t)processed_len; - new->transport_len = 0; - new->payload_len = 0; - new->app_len = 0; - new->transport_hdr = new->net_hdr + new->net_len; - pico_iphc_bail_out(chunks, n - 1); // Success, discard compressed chunk - if (new->start < new->buffer) { - pico_frame_discard(new); - return NULL; - } - return new; -} - -/* Compresses a frame according to the IPHC, NHC_EXT and NHC_UDP compression scheme */ -static struct pico_frame * -pico_iphc_compress(struct pico_frame *f) -{ - int32_t i = 0, compressed_len = 0, loop = 1, uncompressed = f->net_len; - uint8_t *old_nethdr = f->net_hdr; // Save net_hdr temporary ... - uint8_t nh = PICO_PROTO_IPV6; - uint8_t *chunks[8] = { NULL }; - int32_t chunks_len[8] = { 0 }; - - do { - switch (nh) { - /* IPV6 HEADER */ - case PICO_PROTO_IPV6: - chunks[i] = compressor_iphc(f, &chunks_len[i], &nh); - f->net_hdr += 40; // Move after IPv6 header - f->net_len = (uint16_t)chunks_len[i]; - break; - /* IPV6 EXTENSION HEADERS */ - case PICO_IPV6_EXTHDR_HOPBYHOP: - case PICO_IPV6_EXTHDR_ROUTING: - case PICO_IPV6_EXTHDR_FRAG: - case PICO_IPV6_EXTHDR_DESTOPT: - chunks[i] = compressor_nhc_ext(f, &chunks_len[i], &nh); - f->net_len = (uint16_t)(f->net_len + chunks_len[i]); - /* f->net_hdr is updated in compresor_nhc_ext with original size */ - break; - /* UDP HEADER */ - case PICO_PROTO_UDP: - chunks[i] = compressor_nhc_udp(f, &chunks_len[i]); - uncompressed += PICO_UDPHDR_SIZE; - f->transport_len = (uint16_t)chunks_len[i]; - default: /* Intentional fall-through */ - loop = 0; - } - /* Check if an error occured */ - if (!chunks[i]) - return pico_iphc_bail_out(chunks, i); - /* Increment total compressed_len and increase iterator */ - compressed_len += chunks_len[i++]; - } while (compressible_nh(nh) && loop && i < 8); - - f->net_hdr = old_nethdr; // ... Restore old net_hdr - return pico_iphc_reassemble(f, chunks, chunks_len, i, compressed_len, uncompressed); -} - -/* Restore some IPv6 header fields like next header and payload length */ -static struct pico_frame * -pico_ipv6_finalize(struct pico_frame *f, uint8_t nh) -{ - struct pico_ipv6_hdr *hdr = NULL; - if (!f) { - return NULL; - } else { - hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (!hdr->nxthdr) - hdr->nxthdr = nh; - hdr->len = short_be((uint16_t)(f->len - PICO_SIZE_IP6HDR)); - return f; - } -} - -/* Decompresses a frame according to the IPHC, NHC_EXT and NHC_UDP compression scheme */ -static struct pico_frame * -pico_iphc_decompress(struct pico_frame *f) -{ - int32_t i = 0, compressed = 0, loop = 1, uncompressed = 0, ret = 0; - uint8_t *old_nethdr = f->net_hdr; // Save net_hdr temporary ... - uint8_t dispatch = PICO_PROTO_IPV6; - uint8_t *chunks[8] = { NULL }; - struct pico_frame *n = NULL; - int32_t chunks_len[8] = { 0 }; - uint8_t nh = 0; - - do { - switch (dispatch) { - /* IPV6 HEADER */ - case PICO_PROTO_IPV6: - chunks[i] = decompressor_iphc(f, &ret); - chunks_len[i] = PICO_SIZE_IP6HDR; - f->net_len = PICO_SIZE_IP6HDR; - nh = ext_nh_retrieve(f->net_hdr, ret); - break; - /* IPV6 EXTENSION HEADERS */ - case PICO_IPV6_EXTHDR_HOPBYHOP: - case PICO_IPV6_EXTHDR_ROUTING: - case PICO_IPV6_EXTHDR_FRAG: - case PICO_IPV6_EXTHDR_DESTOPT: - chunks[i] = decompressor_nhc_ext(f, &ret, &chunks_len[i]); - f->net_len = (uint16_t)(f->net_len + chunks_len[i]); - break; - /* UDP HEADER */ - case PICO_PROTO_UDP: - f->transport_hdr = f->net_hdr; // Switch to transport header - chunks[i] = decompressor_nhc_udp(f, compressed, &ret); - chunks_len[i] = PICO_UDPHDR_SIZE; - default: /* Intentional fall-through */ - loop = 0; - } - /* Check if an error occured */ - if (!chunks[i]) - return pico_iphc_bail_out(chunks, i); - - /* Increase compressed and uncompressed length */ - compressed += ret; - uncompressed += chunks_len[i++]; - - /* Get next dispatch header */ - f->net_hdr += ret; - dispatch = ext_nh_retrieve(f->net_hdr, 0); - } while (dispatch && loop && i < 8); - f->net_hdr = old_nethdr; // ... Restore old net_hdr - - /* Reassemble gathererd decompressed buffers */ - n = pico_iphc_reassemble(f, chunks, chunks_len, i, uncompressed, compressed); - return pico_ipv6_finalize(n, nh); -} - -#endif - -/* Prepends an uncompressed IPv6 dispatch header */ -static void -pico_iphc_no_comp(struct pico_frame *f) -{ - f->net_hdr--; // Only need one bytes - f->start--; - f->len++; - f->net_len++; - f->net_hdr[0] = IPV6_DISPATCH; -} - -/* Removes an uncompressed IPv6 dispatch header */ -static void -pico_iphc_no_comp_dec(struct pico_frame *f) -{ - f->net_hdr++; - f->start++; - f->len--; - f->net_len--; -} - -/* Updates the fragmentation cookie with how many bytes there are copied and units - * of 8-octets that are transmitted, if bytes copied equals the size of the datagram - * the cookie is removed from the cookie-tree and the datagram is discarded */ -static int32_t -frag_update(struct pico_frame *f, struct frag_ctx *frag, uint8_t units, uint16_t copy) -{ - frag->dgram_off = (uint8_t)(frag->dgram_off + units); - frag->copied = (uint16_t)(frag->copied + copy); - /* Datagram is completely transmitted */ - if (frag->copied >= f->len) { - lp_dbg("6LP: FIN: "ORG"fragmentation"RST" with hash '%X', sent %u of %u bytes\n", frag->hash, frag->copied, f->len); - pico_tree_delete(&FragTree, frag); - PICO_FREE(frag); - pico_frame_discard(f); - } else { - lp_dbg("6LP: UPDATE: "ORG"fragmentation"RST" with hash '%X', sent %u of %u bytes\n", frag->hash, frag->copied, f->len); - return pico_datalink_send(f); - } - return (int32_t)1; // Success -} - -static void -frag_fill(uint8_t *frag, uint8_t dispatch, uint16_t dgram_size, uint16_t tag, uint8_t dgram_off, int32_t offset, uint16_t copy, uint16_t copied, uint8_t *buf) -{ - frag[0] = (uint8_t)(dispatch | ((uint8_t)short_be(dgram_size) & 0x07)); - frag[1] = (uint8_t)(short_be(dgram_size) >> 8); - frag[2] = (uint8_t)(short_be(tag)); - frag[3] = (uint8_t)(short_be(tag) >> 8); - frag[4] = (uint8_t)(dgram_off); - buf_move(frag + offset, buf + copied, copy); -} - -/* Looks for a fragmentation cookie and creates an n-th fragment frame that it - * tries to push to the datalink layer, if the entire datagram is transmitted, - * the fragment cookie is removed from the tree and the datagram is free'd */ -static int32_t -frag_nth(struct pico_frame *f) -{ - struct frag_ctx *frag = frag_ctx_find(f->hash); - uint16_t left = 0; - uint16_t copy = 0, alloc = FRAGN_SIZE; - struct pico_frame *n = NULL; - uint8_t units = 0; - int32_t avail = 0, ret = 0; - - if (frag) { - /* Check how many bytes there are available for n-th fragment */ - avail = pico_6lowpan_ll_push(f); - if (avail > 0) { - /* Calculate dgram_off and bytes to copy */ - left = (uint16_t)(f->len - frag->copied); - if (left <= (uint16_t)(avail - FRAGN_SIZE)) { - copy = left; - } else { - units = (uint8_t)((uint16_t)(avail - FRAGN_SIZE) >> 3); - copy = (uint16_t)(units << 3); - } - alloc = (uint16_t)(alloc + copy); - - n = pico_proto_6lowpan_ll.alloc(&pico_proto_6lowpan_ll, f->dev, alloc); - if (n) { - frag_fill(n->net_hdr, FRAGN_DISPATCH, frag->dgram_size, - frag->dgram_tag, frag->dgram_off, 5, copy, - frag->copied, f->net_hdr); - n->net_len = alloc; - n->len = (uint32_t)n->net_len; - n->src = frag->f->src; - n->dst = frag->f->dst; - - /* Try to push fragment to link layer */ - ret = pico_6lowpan_ll_push(n); - if (!ret) { // Update frag cookie - return frag_update(f, frag, units, copy); - } - } - } - } - - pico_frame_discard(f); - return -1; -} - -/* Makes a first fragment from a frame and tries to push it to the datalink layer - * Also enqueues the frame back in the outgoing frame-queue of the 6LOWPAN - * layer for subsequent fragments */ -static int32_t -frag_1st(struct pico_frame *f, uint16_t dgram_size, uint8_t dgram_off, uint16_t copy) -{ - uint16_t alloc = (uint16_t)(copy + FRAG1_SIZE); - struct pico_frame *n = NULL; - int32_t ret = 0; - - n = pico_proto_6lowpan_ll.alloc(&pico_proto_6lowpan_ll, f->dev, alloc); - if (n) { - frag_fill(n->net_hdr, FRAG1_DISPATCH, dgram_size, dgram_tag, 0, 4, copy, 0,f->net_hdr); - n->net_len = alloc; - n->len = (uint32_t)n->net_len; - n->src = f->src; - n->dst = f->dst; - - /* Try to push fragment to link layer */ - ret = pico_6lowpan_ll_push(n); - if (ret) { - dgram_tag--; - return -1; - } - - /* Enqueue the frame again for subsequent fragments */ - f->flags |= PICO_FRAME_FLAG_SLP_FRAG; - if (pico_datalink_send(f) <= 0) - return -1; - - /* Everything was a success store a cookie for subsequent fragments */ - return frag_store(f, dgram_size, dgram_tag++, dgram_off, copy, &FragTree); - } else { - pico_err = PICO_ERR_ENOMEM; - return -1; - } -} - -/* Send the first fragment of a uncompressed IPv6 datagram */ -static int32_t -frag_1st_no_comp(struct pico_frame *f, uint16_t dgram_size, int32_t available) -{ - /* Available bytes after inserting FRAG1 dispatch and IPv6 dispatch */ - uint16_t rest_size = (uint16_t)(available - FRAG1_SIZE - 1); - uint8_t dgram_off = (uint8_t)(rest_size >> 3); - uint16_t copy_size = (uint16_t)(rest_size + 1); - return frag_1st(f, dgram_size, dgram_off, copy_size); -} - -#ifdef PICO_6LOWPAN_IPHC_ENABLED -/* Determines the length of the compressed header */ -static uint16_t -frame_comp_hlen(struct pico_frame *f, int32_t udp) -{ - return (uint16_t)(f->net_len + ((udp) ? (f->transport_len) : (0))); -} - -/* Send the first fragment of a compressed datagram */ -static int32_t -frag_1st_comp(struct pico_frame *f, uint16_t dgram_size, int32_t available, int32_t udp) -{ - /* Calculate amount of bytes that are elided */ - uint16_t comp_diff = (uint16_t)(dgram_size - f->len); - uint16_t comp_hlen = frame_comp_hlen(f, udp); - /* Decompressed header length */ - uint16_t deco_hlen = (uint16_t)(comp_hlen + comp_diff); - /* Available octects after inserting FRAG1 dispatch and compressed header */ - uint16_t rest_size = (uint16_t)(available - FRAG1_SIZE - comp_hlen); - /* Offset for subsequent fragments in 8-octect units and in octets */ - uint8_t dgram_off = (uint8_t)((uint16_t)(rest_size + deco_hlen) >> 3); - uint16_t copy_size = 0; - /* 8-octet aligned available octets after decompression */ - rest_size = (uint16_t)((uint16_t)(dgram_off << 3) - deco_hlen); - copy_size = (uint16_t)(rest_size + comp_hlen); - return frag_1st(f, dgram_size, dgram_off, copy_size); -} -#endif - -static int32_t -pico_6lowpan_compress(struct pico_frame *f, int32_t avail) -{ - struct pico_ipv6_hdr *ip = (struct pico_ipv6_hdr *)f->net_hdr; - uint16_t dgram_size = (uint16_t)(short_be(ip->len) + PICO_SIZE_IP6HDR); - -#ifdef PICO_6LOWPAN_IPHC_ENABLED - int32_t udp = (PICO_PROTO_UDP == ip->nxthdr); - struct pico_frame *try = pico_iphc_compress(f); - if (try) { - /* Try to push frame to link layer */ - avail = pico_6lowpan_ll_push(try); - if (0 < avail && frame_comp_hlen(try, udp) <= (uint16_t)avail) { - /* RFC6282: any header that cannot fit within the first fragment - * MUST NOT be compressed. */ - pico_frame_discard(f); - return frag_1st_comp(try, dgram_size, avail, udp); - } else if (!avail) { - pico_frame_discard(f); - return (int32_t)try->len; // Success, compression was enough - } else if (0 > avail) { - pico_frame_discard(try); - pico_frame_discard(f); - return -1; // Error pushing compressed frame - } - pico_frame_discard(try); - } -#endif - - pico_iphc_no_comp(f); // Add uncompressed dispatch header again - return frag_1st_no_comp(f, dgram_size, avail); -} - -/* General compression function that first tries to compress the frame and sends - * it through to the link layer, if that doesn't work the frame is fragmented */ -static int32_t -pico_6lowpan_send(struct pico_frame *f) -{ - int32_t avail = 0; - pico_iphc_no_comp(f); // Add uncrompressed dispatch header ... - - /* Try to push frame to link layer */ - avail = pico_6lowpan_ll_push(f); - if (avail > 0) { - pico_iphc_no_comp_dec(f); // ... remove IPv6 Dispatch Header - return pico_6lowpan_compress(f, avail); - } else if (!avail) { // Success - return (int32_t)f->len; - } else { - return -1; - } -} - -static int32_t -pico_6lowpan_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - - /* Check if it's meant for fragmentation */ - if (f->flags & PICO_FRAME_FLAG_SLP_FRAG) { - return frag_nth(f); - } else if ((f->net_hdr[0] & 0xF0) != 0x60) { - lp_dbg("6lowpan - ERROR: not an IPv6 frame\n"); - goto fin; - } else if (!f->dev || LL_MODE_ETHERNET == f->dev->mode) { - lp_dbg("6lowpan - ERROR: link layer mode not supported\n"); - goto fin; - } - - lp_dbg("6LP: ***NEW***, some stats: "); - lp_dbg("len: %d net_len: %d transport_len: %d\n", f->len, f->net_len, f->transport_len); - - /* Retrieve link layer addresses */ - if (pico_6lowpan_lls[f->dev->mode].addr_from_net(&f->src, f, 0) || - pico_6lowpan_lls[f->dev->mode].addr_from_net(&f->dst, f, 1)) { - /* Address mode is unspecified, probably destination ll-address is being resolved */ - return (int32_t)f->len; - } - - return pico_6lowpan_send(f); -fin: - pico_frame_discard(f); - return -1; -} - -static struct pico_frame * -pico_6lowpan_decompress(struct pico_frame *f) -{ -#ifdef PICO_6LOWPAN_IPHC_ENABLED - struct pico_frame *dec = NULL; -#endif - - if (0) {} -#ifdef PICO_6LOWPAN_IPHC_ENABLED - else if ((f->net_hdr[0] & 0xE0) == IPHC_DISPATCH) { - dec = pico_iphc_decompress(f); - pico_frame_discard(f); - return dec; - } -#endif - else if (f->net_hdr[0] == IPV6_DISPATCH) { - pico_iphc_no_comp_dec(f); - return f; - } else { - lp_dbg("6LP: RCVD invalid frame\n"); - pico_frame_discard(f); - return NULL; - } -} - -static int32_t -defrag_new(struct pico_frame *f, uint16_t dgram_size, uint16_t tag, uint16_t off) -{ - struct pico_frame *r = pico_proto_6lowpan_ll.alloc(&pico_proto_6lowpan_ll, f->dev, dgram_size); - if (r) { - r->start = r->buffer + (int32_t)(r->buffer_len - (uint32_t)dgram_size); - r->len = dgram_size; - r->net_hdr = r->start; - r->net_len = f->net_len; - r->transport_len = (uint16_t)(r->len - r->net_len); - r->src = f->src; - r->dst = f->dst; - buf_move(r->net_hdr + off, f->start, f->len); - if (frag_store(r, dgram_size, tag, 0, (uint16_t)f->len, &ReassemblyTree) < 0) { - pico_frame_discard(f); - pico_frame_discard(r); - return -1; - } - } - pico_frame_discard(f); - return 1; -} - -static int32_t -defrag_update(struct frag_ctx *frag, uint16_t off, struct pico_frame *f) -{ - struct pico_frame *r = frag->f; - buf_move(r->start + (int32_t)off, f->start, f->len); // Copy at start - frag->copied = (uint16_t)(frag->copied + (uint16_t)f->len); - pico_frame_discard(f); - if (frag->copied >= frag->dgram_size) { // Datagram completely reassembled - lp_dbg("6LP: FIN: "GRN"reassembly"RST" with tag '%u', stats: len: %d net: %d trans: %d\n", frag->dgram_tag, r->len, r->net_len, r->transport_len); - pico_tree_delete(&ReassemblyTree, frag); - PICO_FREE(frag); -#ifdef PICO_6LOWPAN_IPHC_ENABLED - r = pico_ipv6_finalize(r, 0); -#endif - return pico_network_receive(r); - } else { - lp_dbg("6LP: UPDATE: "GRN"reassembly"RST" with tag '%u', %u of %u bytes received\n", frag->dgram_tag, frag->copied, frag->dgram_size); - } - return (int32_t)r->len; -} - -static struct frag_ctx * -defrag_remove_header(struct pico_frame *f, uint16_t *dgram_size, uint16_t *tag, uint16_t *off, int32_t size) -{ - *dgram_size = (uint16_t)(((uint16_t)(f->net_hdr[0] & 0x07) << 8) | (uint16_t)f->net_hdr[1]); - *tag = (uint16_t)(((uint16_t)f->net_hdr[2] << 8) | (uint16_t)f->net_hdr[3]); - *off = (uint16_t)((uint16_t)f->net_hdr[4] << 3); - f->net_len = (uint16_t)(f->net_len - (uint16_t)size); - f->len = (uint32_t)(f->len - (uint32_t)size); - f->net_hdr += size; - f->start = f->net_hdr; - return frag_find(*dgram_size, *tag, f); -} - -static int32_t -defrag(struct pico_frame *f) -{ - uint16_t size = 0, tag = 0, off = 0; - struct frag_ctx *frag = NULL; - - if ((f->net_hdr[0] & 0xF8) == FRAG1_DISPATCH) { - frag = defrag_remove_header(f, &size, &tag, &off, FRAG1_SIZE); - if (!(f = pico_6lowpan_decompress(f))) - return -1; - off = 0; - } else if ((f->net_hdr[0] & 0xF8) == FRAGN_DISPATCH) { - frag = defrag_remove_header(f, &size, &tag, &off, FRAGN_SIZE); - } else { - lp_dbg("6LP: RCVD invalid frame\n"); - pico_frame_discard(f); - return -1; - } - - if (frag) { - return defrag_update(frag, off, f); - } else { - return defrag_new(f, size, tag, off); - } -} - -static int32_t -pico_6lowpan_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - - if (f->net_hdr[0] & 0x80) { - return defrag(f); - } else { - f = pico_6lowpan_decompress(f); - if (f) { - lp_dbg("6LP: Decompression finished, stats: len: %d net: %d trans: %d\n", f->len, f->net_len, f->transport_len); - return pico_network_receive(f); - } - return -1; - } -} - -int32_t -pico_6lowpan_pull(struct pico_frame *f) -{ - if (pico_enqueue(pico_proto_6lowpan.q_in, f) > 0) { - return (int32_t)f->len; // Success - } - - pico_frame_discard(f); - return -1; -} - -struct pico_protocol pico_proto_6lowpan = { - .name = "6lowpan", - .layer = PICO_LAYER_DATALINK, - .process_in = pico_6lowpan_process_in, - .process_out = pico_6lowpan_process_out, - .q_in = &pico_6lowpan_in, - .q_out = &pico_6lowpan_out -}; - -int pico_6lowpan_init(void) -{ - pico_6lowpan_ll_init(); - if (0 == pico_timer_add(1000, frag_timeout, NULL)) { - return -1; /* We care if timer fails, results in memory leak if frames don't get reassembled */ - } - return 0; -} - -#endif /* PICO_SUPPORT_6LOWPAN */ diff --git a/kernel/picotcp/modules/pico_6lowpan.h b/kernel/picotcp/modules/pico_6lowpan.h deleted file mode 100644 index c54e03f..0000000 --- a/kernel/picotcp/modules/pico_6lowpan.h +++ /dev/null @@ -1,40 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights - reserved. See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ - -#ifndef INCLUDE_PICO_6LOWPAN -#define INCLUDE_PICO_6LOWPAN - -#include "pico_protocol.h" -#include "pico_device.h" -#include "pico_config.h" -#include "pico_frame.h" - -#define PICO_6LP_FLAG_LOWPAN (0x01) -#define PICO_6LP_FLAG_NOMAC (0x02) - -#ifdef PICO_SUPPORT_6LOWPAN -#define PICO_DEV_IS_6LOWPAN(dev) ((dev) && ((dev)->hostvars.lowpan_flags & PICO_6LP_FLAG_LOWPAN)) -#define PICO_DEV_IS_NOMAC(dev) ((dev) && ((dev)->hostvars.lowpan_flags & PICO_6LP_FLAG_NOMAC)) -#else -#define PICO_DEV_IS_6LOWPAN(dev) (0) -#define PICO_DEV_IS_NOMAC(dev) (0) -#endif - -/****************************************************************************** - * Public variables - ******************************************************************************/ - -extern struct pico_protocol pico_proto_6lowpan; - -/****************************************************************************** - * Public functions - ******************************************************************************/ - -int32_t pico_6lowpan_pull(struct pico_frame *f); -int pico_6lowpan_init(void); - -#endif /* INCLUDE_PICO_6LOWPAN */ diff --git a/kernel/picotcp/modules/pico_6lowpan_ll.c b/kernel/picotcp/modules/pico_6lowpan_ll.c deleted file mode 100644 index b099c1f..0000000 --- a/kernel/picotcp/modules/pico_6lowpan_ll.c +++ /dev/null @@ -1,454 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ - -#include "pico_ipv6.h" -#include "pico_stack.h" -#include "pico_frame.h" -#include "pico_802154.h" -#include "pico_6lowpan.h" -#include "pico_protocol.h" -#include "pico_addressing.h" -#include "pico_6lowpan_ll.h" - -#ifdef PICO_SUPPORT_6LOWPAN - -/******************************************************************************* - * Macros - ******************************************************************************/ - -#ifdef DEBUG_6LOWPAN -#define ll_dbg dbg -#else -#define ll_dbg(...) do {} while(0) -#endif - -/******************************************************************************* - * Constants - ******************************************************************************/ - -/* Lifetime check interval */ -#define ONE_MINUTE ((pico_time)(1000 * 60)) - -/* Number of extensions */ -#define NUM_LL_EXTENSIONS (2) - -/******************************************************************************* - * Type definitions - ******************************************************************************/ - -struct extension { - int32_t (*estimate)(struct pico_frame *f); - int32_t (*out)(struct pico_frame *f); - int32_t (*in)(struct pico_frame *f); -}; - -/******************************************************************************* - * Global Variables - ******************************************************************************/ - -static const struct pico_6lowpan_ll_protocol pico_6lowpan_ll_none = { - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL -}; - -/* Declare a global lookup-table for distribution of link layer specific tasks */ -struct pico_6lowpan_ll_protocol pico_6lowpan_lls[PICO_6LOWPAN_LLS + 1]; - -static struct pico_queue pico_6lowpan_ll_in = { - 0 -}; -static struct pico_queue pico_6lowpan_ll_out = { - 0 -}; - -/******************************************************************************* - * CTX - ******************************************************************************/ - -#ifdef PICO_6LOWPAN_IPHC_ENABLED - -/* Compares if the IPv6 prefix of two IPv6 addresses match */ -static int32_t compare_prefix(uint8_t *a, uint8_t *b, int32_t len) -{ - uint8_t bitmask = (uint8_t)(0xff << (8 - (len % 8))); - size_t bytes = (size_t)len / 8; - int32_t ret = 0; - if ((ret = memcmp(a, b, bytes))) - return ret; - return (int32_t)((a[bytes] & bitmask) - (b[bytes] & bitmask)); -} - -/* Compares 2 IPHC context entries */ -static int32_t compare_ctx(void *a, void *b) -{ - struct iphc_ctx *ca = (struct iphc_ctx *)a; - struct iphc_ctx *cb = (struct iphc_ctx *)b; - return compare_prefix(ca->prefix.addr, cb->prefix.addr, ca->size); -} - -PICO_TREE_DECLARE(CTXtree, compare_ctx); - -/* Searches in the context tree if there's a context entry available with the - * prefix of the IPv6 address */ -struct iphc_ctx * ctx_lookup(struct pico_ip6 addr) -{ - struct iphc_ctx test = { NULL, addr, 0, 0, 0, 0 }; - return pico_tree_findKey(&CTXtree, &test); -} - -/* Looks up the context by ID, for decompression */ -struct iphc_ctx * ctx_lookup_id(uint8_t id) -{ - struct iphc_ctx *key = NULL; - struct pico_tree_node *i = NULL; - - pico_tree_foreach(i, &CTXtree) { - key = i->keyValue; - if (key && id ==key->id) - return key; - } - return NULL; -} - -/* Tries to insert a new IPHC-context into the Context-tree */ -static int32_t ctx_insert(struct pico_ip6 addr, uint8_t id, uint8_t size, pico_time lifetime, uint8_t flags, struct pico_device *dev) -{ - struct iphc_ctx *new = PICO_ZALLOC(sizeof(struct iphc_ctx)); - if (new) { - new->lifetime = lifetime; - new->prefix = addr; - new->flags = flags; - new->size = size; - new->dev = dev; - new->id = id; - if (pico_tree_insert(&CTXtree, new)) { - PICO_FREE(new); - return -1; - } - } else { - return -1; - } - return 0; -} - -/* Function to update context table from 6LoWPAN Neighbor Discovery */ -void ctx_update(struct pico_ip6 addr, uint8_t id, uint8_t size, pico_time lifetime, uint8_t flags, struct pico_device *dev) -{ - struct iphc_ctx *entry = ctx_lookup_id(id); - if (entry && dev == entry->dev) { - if (!lifetime) { - pico_tree_delete(&CTXtree, entry); - PICO_FREE(entry); - return; - } - entry->prefix = addr; - entry->size = size; - entry->lifetime = lifetime; - entry->flags = flags; - } else { - /* We don't care if it failed */ - (void)ctx_insert(addr, id, size, lifetime, flags, dev); - } -} - -/* Check whether or not particular contexts are expired and remove them if so. Contexts - * are reconfirmed before their lifetime expires */ -static void ctx_lifetime_check(pico_time now, void *arg) -{ - struct pico_tree_node *i = NULL, *next = NULL; - struct pico_ipv6_route *gw = NULL; - struct iphc_ctx *key = NULL; - IGNORE_PARAMETER(now); - IGNORE_PARAMETER(arg); - - pico_tree_foreach_safe(i, &CTXtree, next) { - if (i && i->keyValue) { - key = i->keyValue; - key->lifetime--; - if (!key->lifetime) { - pico_tree_delete(&CTXtree, key); - PICO_FREE(key); - } else if (key->lifetime == 5) { - /* RFC6775: The host SHOULD unicast one or more RSs to the router well before the - * shortest of the, Router Lifetime, PIO lifetimes and the lifetime of the 6COs. */ - gw = pico_ipv6_gateway_by_dev(key->dev); - while (gw) { - pico_6lp_nd_start_soliciting(pico_ipv6_linklocal_get(key->dev), gw); - gw = pico_ipv6_gateway_by_dev_next(key->dev, gw); - } - } - } - } - - (void)pico_timer_add(ONE_MINUTE, ctx_lifetime_check, NULL); -} - -#endif - -/******************************************************************************* - * MESH-UNDER ROUTING LAYER - ******************************************************************************/ - -/* XXX: Extensible processing function for outgoing frames. Here, the mesh header - * for a Mesh-Under topology can be prepended and the link layer source and - * destination addresses can be updated */ -static int32_t -ll_mesh_header_process_in(struct pico_frame *f) -{ - IGNORE_PARAMETER(f); - return 0; -} - -/* XXX: Extensible processing function for outgoing frames. Here, the mesh header - * for a Mesh-Under topology can be prepended and the link layer source and - * destination addresses can be updated */ -static int32_t -ll_mesh_header_process_out(struct pico_frame *f) -{ - IGNORE_PARAMETER(f); - return 0; -} - -/* XXX: Extensible function that estimates the size of the mesh header to be - * prepended based on the frame, the source and destination link layer address */ -static int32_t -ll_mesh_header_estimator(struct pico_frame *f) -{ - IGNORE_PARAMETER(f); - return 0; -} - -/******************************************************************************* - * GENERIC 6LOWPAN LINK LAYER - ******************************************************************************/ - -static int32_t -ll_mac_header_process_in(struct pico_frame *f) -{ - if (f && f->dev && pico_6lowpan_lls[f->dev->mode].process_in) { - return (int32_t)pico_6lowpan_lls[f->dev->mode].process_in(f); - } else { - return -1; - } -} - -static int32_t -ll_mac_header_process_out(struct pico_frame *f) -{ - if (f && f->dev && pico_6lowpan_lls[f->dev->mode].process_out) { - return (int32_t)pico_6lowpan_lls[f->dev->mode].process_out(f); - } else { - return -1; - } -} - -static int32_t -ll_mac_header_estimator(struct pico_frame *f) -{ - if (f && f->dev && pico_6lowpan_lls[f->dev->mode].estimate) { - return (int32_t)pico_6lowpan_lls[f->dev->mode].estimate(f); - } else { - return -1; - } -} - -/* Alloc's a frame with device's overhead and maximum IEEE802.15.4 header size */ -static struct pico_frame * -pico_6lowpan_frame_alloc(struct pico_protocol *self, struct pico_device *dev, uint16_t size) -{ - IGNORE_PARAMETER(self); - if (dev && pico_6lowpan_lls[dev->mode].alloc) { - return pico_6lowpan_lls[dev->mode].alloc(dev, size); - } else { - return NULL; - } -} - -/******************************************************************************* - * 6LOWPAN LINK LAYER PROTOCOL - ******************************************************************************/ - -const struct extension exts[] = { - {ll_mesh_header_estimator, ll_mesh_header_process_out, ll_mesh_header_process_in}, - {ll_mac_header_estimator, ll_mac_header_process_out, ll_mac_header_process_in}, -}; - -static int32_t -pico_6lowpan_ll_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - uint32_t datalink_len = 0; - int32_t ret = 0, i = 0; - IGNORE_PARAMETER(self); - - /* Every link layer extension updates the datalink pointer of the frame a little bit. */ - f->datalink_hdr = f->net_hdr; - - /* Call each of the outgoing processing functions */ - for (i = 0; i < NUM_LL_EXTENSIONS; i++) { - ret = exts[i].out(f); - if (ret < 0) /* Processing failed, no way to recover, discard frame */ - goto fin; - datalink_len = (uint32_t)(datalink_len + (uint32_t)ret); - if ((f->net_hdr - datalink_len) < f->buffer) /* Before buffer bound check */ - goto fin; - } - - /* Frame is ready for sending to the device driver */ - f->start = f->datalink_hdr; - f->len = (uint32_t)(f->len + datalink_len); - return (int32_t)(pico_sendto_dev(f) <= 0); -fin: - pico_frame_discard(f); - return -1; -} - -static int32_t -pico_6lowpan_ll_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - int32_t i = 0, ret = 0; - uint32_t len = 0; - IGNORE_PARAMETER(self); - - /* net_hdr is the pointer that is dynamically updated by the incoming - * processing functions to always point to right after a particular - * header, whether it's MAC, MESH, LL_SEC, ... eventually net_hdr will - * point to 6LoWPAN header which is exactly what we want */ - f->net_hdr = f->buffer; - - for (i = NUM_LL_EXTENSIONS - 1; i >= 0; i--) { - ret = exts[i].in(f); - switch (ret) { - case FRAME_6LOWPAN_LL_RELEASE: - /* Success, frame is somewhere else now.. */ - break; - case FRAME_6LOWPAN_LL_DISCARD: - /* Something went wrong, discard the frame */ - pico_frame_discard(f); - break; - default: - /* Success, update link layer header length */ - len = (uint32_t)(len + (uint32_t)ret); - } - } - - /* Determine size at network layer */ - f->net_len = (uint16_t)(f->len - len); - f->len = (uint32_t)(f->len - len); - return pico_6lowpan_pull(f); -} - -/* Entry point for incoming 6LoWPAN frames, proxy for pico_stack_recv. This allows passing the link - * layer source and destination address as well */ -int32_t pico_6lowpan_stack_recv(struct pico_device *dev, uint8_t *buffer, uint32_t len, union pico_ll_addr *src, union pico_ll_addr *dst) -{ - int32_t ret = 0; - ll_dbg("6LoWPAN - Stack recv called!\n"); - if (PICO_DEV_IS_NOMAC(dev)) { - struct pico_frame *f = pico_stack_recv_new_frame(dev, buffer, len); - if (f) { - f->src = *src; - f->dst = *dst; - ret = pico_enqueue(dev->q_in, f); - if (0 >= ret) - pico_frame_discard(f); - return ret; - } - } else { - return pico_stack_recv(dev, buffer, len); - } - return -1; // return ERROR -} - -/* Proxy for pico_devloop_sendto_dev, 6LoWPAN-devices have a different interface with pico. This - * allows passing the link layer source and destination address as well */ -int32_t pico_6lowpan_ll_sendto_dev(struct pico_device *dev, struct pico_frame *f) -{ - /* FINAL OUTGOING POINT OF 6LOWPAN STACK */ - return ((struct pico_dev_6lowpan *)dev)->send(dev, f->start, (int32_t)f->len, f->src, f->dst); -} - -/* Initialisation routine for 6LoWPAN specific devices */ -int pico_dev_6lowpan_init(struct pico_dev_6lowpan *dev, const char *name, uint8_t *mac, enum pico_ll_mode ll_mode, uint16_t mtu, uint8_t nomac, - int (* send)(struct pico_device *dev, void *_buf, int len, union pico_ll_addr src, union pico_ll_addr dst), - int (* poll)(struct pico_device *dev, int loop_score)) -{ - struct pico_device *picodev = (struct pico_device *)dev; - if (!dev || !send || !poll) { - return -1; - } - - picodev->mode = ll_mode; - picodev->hostvars.lowpan_flags = PICO_6LP_FLAG_LOWPAN; - if (nomac) { - picodev->hostvars.lowpan_flags |= PICO_6LP_FLAG_NOMAC; - } - picodev->mtu = mtu; - picodev->poll = poll; - picodev->send = NULL; - dev->send = send; - - return pico_device_init(picodev, name, mac); -} - - -/* Push function for 6LoWPAN to call when it wants to try to send te frame to the device-driver */ -int32_t -pico_6lowpan_ll_push(struct pico_frame *f) -{ - uint16_t frame_size, pl_available = 0; - int32_t i = 0; - - if (!f || !f->dev) - return -1; - frame_size = (uint16_t)(f->len); - - /* Restrict frames to be as large as the device's MTU. */ - pl_available = (uint16_t)f->dev->mtu; - - /* Call each of the estimator functions of the additional headers to - * determine if the frame fits inside a single 802.15.4 frame, if it doesn't - * in the end, return the available bytes */ - for (i = 0; i < NUM_LL_EXTENSIONS; i++) { - pl_available = (uint16_t)(pl_available - exts[i].estimate(f)); - } - if (frame_size > pl_available) - return pl_available; - - /* Make sure these addresses are retrievable from the frame on processing */ - if (pico_enqueue(pico_proto_6lowpan_ll.q_out,f) > 0) { - return 0; // Frame enqueued for later processing - } - return -1; // Return ERROR -} - -struct pico_protocol pico_proto_6lowpan_ll = { - .name = "6lowpan_ll", - .layer = PICO_LAYER_DATALINK, - .alloc = pico_6lowpan_frame_alloc, - .process_in = pico_6lowpan_ll_process_in, - .process_out = pico_6lowpan_ll_process_out, - .q_in = &pico_6lowpan_ll_in, - .q_out = &pico_6lowpan_ll_out -}; - -void pico_6lowpan_ll_init(void) -{ - int32_t i = 0; - -#ifdef PICO_6LOWPAN_IPHC_ENABLED - /* We don't care about failure */ - (void)pico_timer_add(60000, ctx_lifetime_check, NULL); -#endif - - /* Initialize interface with 6LoWPAN link layer protocols */ - pico_6lowpan_lls[i++] = pico_6lowpan_ll_none; - -#ifdef PICO_SUPPORT_802154 - pico_6lowpan_lls[i++] = pico_6lowpan_ll_802154; -#endif -} - -#endif /* PICO_SUPPORT_6LOWPAN */ diff --git a/kernel/picotcp/modules/pico_6lowpan_ll.h b/kernel/picotcp/modules/pico_6lowpan_ll.h deleted file mode 100644 index 06cde23..0000000 --- a/kernel/picotcp/modules/pico_6lowpan_ll.h +++ /dev/null @@ -1,122 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights - reserved. See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ - -#ifndef INCLUDE_PICO_6LOWPAN_LL -#define INCLUDE_PICO_6LOWPAN_LL - -#include "pico_addressing.h" -#include "pico_protocol.h" -#include "pico_6lowpan.h" -#include "pico_device.h" -#include "pico_config.h" -#include "pico_frame.h" -#include "pico_ipv6.h" - -/* Possible actions to perform on a received frame */ -#define FRAME_6LOWPAN_LL_RELEASE (-1) -#define FRAME_6LOWPAN_LL_DISCARD (-2) - -/******************************************************************************* - * CTX - ******************************************************************************/ - -#ifdef PICO_6LOWPAN_IPHC_ENABLED - -#define PICO_IPHC_CTX_COMPRESS (0x01u) - -struct iphc_ctx -{ - struct pico_device *dev; - struct pico_ip6 prefix; - uint8_t id; - uint8_t size; - uint8_t flags; - pico_time lifetime; -}; - -/* - * Looks up a context entry for a particular IPv6-address contained in 'addr' and returns it. - * Returns NULL if no entry is found. (See RFC4944) - */ -struct iphc_ctx * ctx_lookup(struct pico_ip6 addr); - -/* - * Looks up a context entry that belongs to a certain context identifier. - * Returns NULL if no belonging entry is found. (See RFC4944) - */ -struct iphc_ctx * ctx_lookup_id(uint8_t id); - -/* - * Creates a new, or updates and existing, context entry for a certain IPv6 address. (See RFC4944) - */ -void ctx_update(struct pico_ip6 addr, uint8_t id, uint8_t size, pico_time lifetime, uint8_t flags, struct pico_device *dev); - -#endif - -/****************************************************************************** - * Interface with device drivers - ******************************************************************************/ - -struct pico_dev_6lowpan -{ - /* Interface with picoTCP */ - struct pico_device dev; - - /* Transmit-function: - * - * @param dev The device who's send-function got called - * @param _buf Buffer containing the frame to be send over the network - * @param len Length of _buf - * @param src Link Layer source address of the device (IETF-endianness) - * @param dst Link layer destination address of the device (IETF-endianness) - * - * @return length of the frame that is transmitted on success, -1 on failure - */ - int (* send)(struct pico_device *dev, void *_buf, int len, union pico_ll_addr src, union pico_ll_addr dst); -}; - -/* Initialisation routine for 6LoWPAN specific devices */ -int pico_dev_6lowpan_init(struct pico_dev_6lowpan *dev, const char *name, uint8_t *mac, enum pico_ll_mode ll_mode, uint16_t mtu, uint8_t nomac, - int (* send)(struct pico_device *dev, void *_buf, int len, union pico_ll_addr src, union pico_ll_addr dst), - int (* poll)(struct pico_device *dev, int loop_score)); - -/****************************************************************************** - * Interface with link layer - ******************************************************************************/ - -struct pico_6lowpan_ll_protocol -{ - int32_t (* process_in)(struct pico_frame *f); - int32_t (* process_out)(struct pico_frame *f); - int32_t (* estimate)(struct pico_frame *f); - int32_t (* addr_from_buf)(union pico_ll_addr *addr, uint8_t *buf); - int32_t (* addr_from_net)(union pico_ll_addr *addr, struct pico_frame *f, int32_t dest); - int32_t (* addr_len)(union pico_ll_addr *addr); - int32_t (* addr_cmp)(union pico_ll_addr *a, union pico_ll_addr *b); - int32_t (* addr_iid)(uint8_t *iid, union pico_ll_addr *addr); - struct pico_frame * (*alloc)(struct pico_device *dev, uint16_t size); -}; - -/****************************************************************************** - * Public variables - ******************************************************************************/ - -extern struct pico_6lowpan_ll_protocol pico_6lowpan_lls[]; -extern struct pico_protocol pico_proto_6lowpan_ll; - -/****************************************************************************** - * Public functions - ******************************************************************************/ - -void pico_6lowpan_ll_init(void); -int32_t pico_6lowpan_ll_push(struct pico_frame *f); -int32_t pico_6lowpan_ll_pull(struct pico_frame *f); -int32_t frame_6lowpan_ll_store_addr(struct pico_frame *f); -int32_t pico_6lowpan_ll_sendto_dev(struct pico_device *dev, struct pico_frame *f); -int32_t pico_6lowpan_stack_recv(struct pico_device *dev, uint8_t *buffer, uint32_t len, union pico_ll_addr *src, union pico_ll_addr *dst); - -#endif /* INCLUDE_PICO_6LOWPAN_LL */ diff --git a/kernel/picotcp/modules/pico_802154.c b/kernel/picotcp/modules/pico_802154.c deleted file mode 100644 index 3647059..0000000 --- a/kernel/picotcp/modules/pico_802154.c +++ /dev/null @@ -1,456 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights - reserved. See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ - -#include "pico_stack.h" -#include "pico_frame.h" -#include "pico_802154.h" -#include "pico_6lowpan.h" -#include "pico_protocol.h" -#include "pico_addressing.h" -#include "pico_6lowpan_ll.h" - -#ifdef PICO_SUPPORT_802154 - -/******************************************************************************* - * Macros - ******************************************************************************/ - -#define PICO_802154_VALID(am) ((am) == 2 || (am) == 3 ? 1 : 0) - -/******************************************************************************* - * Constants - ******************************************************************************/ - -/* Frame type definitions */ -#define FCF_TYPE_BEACON (short_be(0x0000u)) -#define FCF_TYPE_DATA (short_be(0x0001u)) -#define FCF_TYPE_ACK (short_be(0x0002u)) -#define FCF_TYPE_CMD (short_be(0x0003u)) - -/* Frame version definitions */ -#define FCF_VER_2003 (short_be(0x0000u)) -#define FCF_VER_2006 (short_be(0x1000u)) -#define FCF_SEC (short_be(0x0008u)) -#define FCF_NO_SEC (short_be(0x0000u)) -#define FCF_PENDING (short_be(0x0010u)) -#define FCF_NO_PENDING (short_be(0x0000u)) -#define FCF_ACK_REQ (short_be(0x0020u)) -#define FCF_NO_ACK_REQ (short_be(0x0000u)) -#define FCF_INTRA_PAN (short_be(0x0040u)) -#define FCF_INTER_PAN (short_be(0x0000u)) - -/* Commonly used addresses */ -#define ADDR_802154_BCAST (short_be(0xFFFFu)) -#define ADDR_802154_UNSPEC (short_be(0xFFFEu)) - -#ifndef PICO_6LOWPAN_NOMAC - -/******************************************************************************* - * ENDIANNESS - ******************************************************************************/ - -/* Swaps the two 8-bit values, the pointer A and B point at */ -static void pico_swap(uint8_t *a, uint8_t *b) -{ - *a = *a ^ *b; - *b = *a ^ *b; - *a = *a ^ *b; -} - -/* Converts an IEEE802.15.4 address, which is little endian by standard, to - * IETF-endianness, which is big endian. */ -static void -addr_802154_to_ietf(struct pico_802154 *addr) -{ - int32_t i = 0; - int32_t end = SIZE_6LOWPAN(addr->mode) - 1; - for (i = 0; i < (int32_t)((uint8_t)SIZE_6LOWPAN(addr->mode) >> 1); i++) { - pico_swap(&addr->addr.data[i], &addr->addr.data[end - i]); - } -} - -/* Converts an IEE802.15.4 address in IETF format, which is used to form the IID - * of the host's IPv6 addresses, back to IEEE-endianess, which is little - * endian. */ -static void -addr_802154_to_ieee(struct pico_802154 *addr) -{ - addr_802154_to_ietf(addr); -} - -/******************************************************************************* - * FRAME - ******************************************************************************/ - -/* Retrieves the addressing mode of the destination address from the MHR's frame - * control field. */ -static uint8_t -dst_am(struct pico_802154_hdr *hdr) -{ - return (uint8_t)((hdr->fcf >> 10) & 0x3); -} - -/* Retrieves the addressing mode of the source address from the MHR's frame - * control field */ -static uint8_t -src_am(struct pico_802154_hdr *hdr) -{ - return (uint8_t)((hdr->fcf >> 14) & 0x3); -} - -/* Determines the size of an IEEE802.15.4-header, based on the addressing - * modes */ -static uint8_t -frame_802154_hdr_len(struct pico_802154_hdr *hdr) -{ - return (uint8_t)(SIZE_802154_MHR_MIN + SIZE_6LOWPAN(src_am(hdr)) + SIZE_6LOWPAN(dst_am(hdr))); -} - -/* Gets the source address out of a mapped IEEE802.15.4-frame, converts it - * to host endianess */ -static struct pico_802154 -frame_802154_src(struct pico_802154_hdr *hdr) -{ - struct pico_802154 src = { .addr.data = { 0 }, .mode = src_am(hdr) }; - uint8_t *addresses = (uint8_t *)hdr + sizeof(struct pico_802154_hdr); - uint16_t len = SIZE_6LOWPAN(src.mode); - memcpy(src.addr.data, addresses + SIZE_6LOWPAN(dst_am(hdr)), len); - addr_802154_to_ietf(&src); - return src; -} - -/* Gets the destination address out of a mapped IEEE802.15.4-frame, converts - * it to host endianess */ -static struct pico_802154 -frame_802154_dst(struct pico_802154_hdr *hdr) -{ - struct pico_802154 dst = { .addr.data = { 0 }, .mode = dst_am(hdr) }; - uint8_t *addresses = (uint8_t *)hdr + sizeof(struct pico_802154_hdr); - uint16_t len = SIZE_6LOWPAN(dst.mode); - memcpy(dst.addr.data, addresses, len); - addr_802154_to_ietf(&dst); - return dst; -} - -/* Maps a 802.15.4 frame structure onto a flat buffer, fills in the entire - * header and set the payload pointer right after the MHR. */ -static void -frame_802154_format(uint8_t *buf, uint8_t seq, uint16_t intra_pan, uint16_t ack, - uint16_t sec, struct pico_6lowpan_short pan, struct pico_802154 src, - struct pico_802154 dst) -{ - uint8_t *addresses = (uint8_t *)(buf + sizeof(struct pico_802154_hdr)); - struct pico_802154_hdr *hdr = (struct pico_802154_hdr *)buf; - uint16_t sam = 0, dam = 0; - - hdr->fcf = 0; /* Clear out control field */ - intra_pan = (uint16_t)(intra_pan & FCF_INTRA_PAN); - ack = (uint16_t)(ack & FCF_ACK_REQ); - sec = (uint16_t)(sec & FCF_SEC); - dam = short_be((uint16_t)(dst.mode << 10)); - sam = short_be((uint16_t)(src.mode << 14)); - - /* Fill in frame control field */ - hdr->fcf |= (uint16_t)(FCF_TYPE_DATA | sec ); - hdr->fcf |= (uint16_t)(FCF_NO_PENDING | ack); - hdr->fcf |= (uint16_t)(intra_pan | dam | FCF_VER_2003); - hdr->fcf |= (uint16_t)(sam); - hdr->fcf = short_be(hdr->fcf); // Convert to IEEE endianness - - hdr->seq = seq; // Sequence number - - /* Convert addresses to IEEE-endianness */ - pan.addr = short_be(pan.addr); - addr_802154_to_ieee(&src); - addr_802154_to_ieee(&dst); - - /* Fill in the addresses */ - memcpy(&hdr->pan_id, &pan.addr, SIZE_6LOWPAN_SHORT); - memcpy(addresses, dst.addr.data, SIZE_6LOWPAN(dst.mode)); - memcpy(addresses + SIZE_6LOWPAN(dst.mode), src.addr.data,SIZE_6LOWPAN(src.mode)); -} - -#endif /* PICO_6LOWPAN_NOMAC */ - -/* Removes the IEEE802.15.4 MAC header before the frame */ -static int32_t -pico_802154_process_in(struct pico_frame *f) -{ -#ifndef PICO_6LOWPAN_NOMAC - struct pico_802154_hdr *hdr = (struct pico_802154_hdr *)f->net_hdr; - uint16_t fcf = short_be(hdr->fcf); - uint8_t hlen = 0; - f->src.pan = frame_802154_src(hdr); - f->dst.pan = frame_802154_dst(hdr); - - /* I claim the datalink header */ - f->datalink_hdr = f->net_hdr; - - if (fcf & FCF_SEC) { - f->flags |= PICO_FRAME_FLAG_LL_SEC; - } - - hlen = frame_802154_hdr_len(hdr); - - /* XXX: Generic procedure to move forward in incoming processing function - * is updating the net_hdr-pointer */ - f->net_hdr = f->datalink_hdr + (int32_t)hlen; - - return (int32_t)hlen; -#else - IGNORE_PARAMETER(f); - return 0; -#endif -} - -/* Prepends the IEEE802.15.4 MAC header before the frame */ -static int32_t -pico_802154_process_out(struct pico_frame *f) -{ -#ifndef PICO_6LOWPAN_NOMAC - int32_t len = (int32_t)(SIZE_802154_MHR_MIN + SIZE_6LOWPAN(f->dst.pan.mode) + SIZE_6LOWPAN(f->src.pan.mode)); - uint8_t sec = (uint8_t)((f->flags & PICO_FRAME_FLAG_LL_SEC) ? (FCF_SEC) : (FCF_NO_SEC)); - struct pico_6lowpan_info *info = (struct pico_6lowpan_info *)f->dev->eth; - uint16_t headroom = (uint16_t)(f->net_hdr - f->buffer); - static uint8_t seq = 0; - uint32_t grow = 0; - int32_t ret = 0; - - if (headroom < (uint16_t)len) { /* Check if there's enough headroom to prepend 802.15.4 header */ - grow = (uint32_t)(len - headroom); - ret = pico_frame_grow_head(f, (uint32_t)(f->buffer_len + grow)); - if (ret) { - pico_frame_discard(f); - return -1; - } - } - - /* XXX: General procedure to seek backward in an outgoing processing function - * is to update the datalink_hdr */ - f->datalink_hdr = f->datalink_hdr - len; - - /* Format the IEEE802.15.4 header */ - frame_802154_format(f->datalink_hdr, seq++, FCF_INTRA_PAN, FCF_NO_ACK_REQ, sec, info->pan_id, f->src.pan, f->dst.pan); - return len; -#else - IGNORE_PARAMETER(f); - return 0; -#endif -} - -/* Get the EUI-64 of the device in a structured form */ -static struct pico_802154 -addr_802154_ext_dev(struct pico_6lowpan_info *info) -{ - struct pico_802154 addr; - memcpy(addr.addr.data, info->addr_ext.addr, SIZE_6LOWPAN_EXT); - addr.mode = AM_6LOWPAN_EXT; - return addr; -} - -/* Get the short address of the device in a structured form */ -static struct pico_802154 -addr_802154_short_dev(struct pico_6lowpan_info *info) -{ - struct pico_802154 addr; - memcpy(addr.addr.data, (uint8_t *)&(info->addr_short.addr), SIZE_6LOWPAN_SHORT); - addr.mode = AM_6LOWPAN_SHORT; - return addr; -} - -/* Based on the source IPv6-address, this function derives the link layer source - * address */ -static struct pico_802154 -addr_802154_ll_src(struct pico_frame *f) -{ - struct pico_ip6 src = ((struct pico_ipv6_hdr *)f->net_hdr)->src; - if (IID_16(&src.addr[8])) { - /* IPv6 source is derived from the device's short address, use that - * short address so decompressor can derive the IPv6 source from - * the encapsulating header */ - return addr_802154_short_dev((struct pico_6lowpan_info *)f->dev->eth); - } else { - /* IPv6 source is derived from the device's extended address, use - * the device's extended address so */ - return addr_802154_ext_dev((struct pico_6lowpan_info *)f->dev->eth); - } -} - -/* Based on the destination IPv6-address, this function derives the link layer - * destination address */ -static struct pico_802154 -addr_802154_ll_dst(struct pico_frame *f) -{ - struct pico_ip6 dst = ((struct pico_ipv6_hdr *)f->net_hdr)->dst; - struct pico_802154 addr = { .addr.data = { 0 }, .mode = 0 }; - addr.mode = AM_6LOWPAN_NONE; - - /* If the address is multicast use 802.15.4 BCAST address 0xFFFF */ - if (pico_ipv6_is_multicast(dst.addr)) { - addr.addr._short.addr = short_be(ADDR_802154_BCAST); - addr.mode = AM_6LOWPAN_SHORT; - } - /* If the address is link local derive the link layer address from the IID */ - else { // if (pico_ipv6_is_linklocal(dst.addr)) { - if (IID_16(&dst.addr[8])) { - addr.addr.data[0] = dst.addr[14]; - addr.addr.data[1] = dst.addr[15]; - addr.mode = AM_6LOWPAN_SHORT; - } else { - memcpy(addr.addr.data, &dst.addr[8], SIZE_6LOWPAN_EXT); - addr.addr.data[0] = (uint8_t)(addr.addr.data[0] ^ 0x02); - addr.mode = AM_6LOWPAN_EXT; - } - } -/* - else { - struct pico_802154 *n = (struct pico_802154 *)pico_ipv6_get_neighbor(f); - if (n) { - memcpy(addr.addr.data, n->addr.data, SIZE_6LOWPAN(n->mode)); - addr.mode = n->mode; - } else { - pico_ipv6_nd_postpone(f); - } - } -*/ - return addr; -} - -/* Estimates the size the MAC header would be based on the source and destination - * link layer address */ -static int32_t -pico_802154_estimator(struct pico_frame *f) -{ - return (int32_t)(SIZE_802154_MHR_MIN + SIZE_6LOWPAN(f->src.pan.mode) + SIZE_6LOWPAN(f->dst.pan.mode) + f->dev->overhead); -} - -/* Retrieve address from temporarily flat buffer */ -static int32_t -addr_802154_from_buf(union pico_ll_addr *addr, uint8_t *buf) -{ - uint8_t len = (uint8_t)*buf++; - - if (len > 8) // OOB check - return -1; - - memcpy(addr->pan.addr.data, buf, len); - if (SIZE_6LOWPAN_EXT == len) - addr->pan.mode = AM_6LOWPAN_EXT; - else if (SIZE_6LOWPAN_SHORT == len) - addr->pan.mode = AM_6LOWPAN_SHORT; - else - addr->pan.mode = AM_6LOWPAN_NONE; - - return 0; -} - -/* If 'dest' is not set, this function will get the link layer address for a - * certain source IPv6 address, if 'dest' is set it will get it for the a - * destination address */ -static int32_t -addr_802154_from_net(union pico_ll_addr *addr, struct pico_frame *f, int32_t dest) -{ - if (dest) { - addr->pan = addr_802154_ll_dst(f); - } else { - addr->pan = addr_802154_ll_src(f); - } - return 0; -} - -/* Determines the length of an IEEE802.15.4 address */ -static int32_t -addr_802154_len(union pico_ll_addr *addr) -{ - return SIZE_6LOWPAN(addr->pan.mode); -} - -/* Compares 2 IEE802.15.4 addresses */ -static int32_t -addr_802154_cmp(union pico_ll_addr *a, union pico_ll_addr *b) -{ - if (a->pan.mode != b->pan.mode) { - return (int32_t)((int32_t)a->pan.mode - (int32_t)b->pan.mode); - } else { - return memcmp(a->pan.addr.data, b->pan.addr.data, SIZE_6LOWPAN(b->pan.mode)); - } -} - -/* Derive an IPv6 IID from an IEEE802.15.4 address */ -static int32_t -addr_802154_iid(uint8_t iid[8], union pico_ll_addr *addr) -{ - uint8_t buf[8] = {0,0,0,0xff,0xfe,0,0,0}; - struct pico_802154 pan = addr->pan; - - if (AM_6LOWPAN_SHORT == pan.mode) { - buf[6] = (uint8_t)(pan.addr._short.addr); - buf[7] = (uint8_t)(pan.addr._short.addr >> 8); - } else if (AM_6LOWPAN_EXT == pan.mode) { - memcpy(buf, pan.addr.data, SIZE_6LOWPAN_EXT); - buf[0] ^= (uint8_t)0x02; - } else { - return -1; - } - - memcpy(iid, buf, 8); - return 0; -} - -/* - * Allocates a pico_frame but makes sure the network-buffer starts on an 4-byte aligned address, - * this is required by upper layer of the stack. IEEE802.15.4's header isn't necessarily 4/8-byte - * aligned since the minimum size of an IEEE802.15.4 header is '5'. The datalink header therefore - * might not (and most probably isn't) aligned on an aligned address. The datalink header will of - * the size passed in 'headroom' - * - * @param size Size of the actual frame provided for network-layer and above - * @param headroom Size of the headroom for datalink-buffer - * @param overhead Size of the overhead to keep for the device driver - * - * @return struct pico_frame *, returns the allocated frame upon success, 'NULL' otherwise. - */ -static struct pico_frame * -pico_frame_alloc_with_headroom(uint16_t size, uint16_t headroom, uint16_t overhead) -{ - int network_offset = (((headroom + overhead) >> 2) + 1) << 2; // Sufficient headroom for alignment - struct pico_frame *f = pico_frame_alloc((uint32_t)(size + network_offset)); - - if (!f) - return NULL; - - f->net_hdr = f->buffer + network_offset; - f->datalink_hdr = f->net_hdr - headroom; - return f; -} - -/* Allocates a frame with the maximum MAC header size + device's overhead-parameter since this is - * the lowest level of the frame allocation chain */ -static struct pico_frame * -pico_802154_frame_alloc(struct pico_device *dev, uint16_t size) -{ - struct pico_frame *f = pico_frame_alloc_with_headroom(size, SIZE_802154_MHR_MAX, (uint16_t)dev->overhead); - if (!f) - return NULL; - - f->dev = dev; - return f; -} - -const struct pico_6lowpan_ll_protocol pico_6lowpan_ll_802154 = { - .process_in = pico_802154_process_in, - .process_out = pico_802154_process_out, - .estimate = pico_802154_estimator, - .addr_from_buf = addr_802154_from_buf, - .addr_from_net = addr_802154_from_net, - .addr_len = addr_802154_len, - .addr_cmp = addr_802154_cmp, - .addr_iid = addr_802154_iid, - .alloc = pico_802154_frame_alloc, -}; - -#endif /* PICO_SUPPORT_802154 */ diff --git a/kernel/picotcp/modules/pico_802154.h b/kernel/picotcp/modules/pico_802154.h deleted file mode 100644 index c2bcd7f..0000000 --- a/kernel/picotcp/modules/pico_802154.h +++ /dev/null @@ -1,40 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights - reserved. See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ -#ifndef INCLUDE_PICO_802154 -#define INCLUDE_PICO_802154 - -#include "pico_device.h" -#include "pico_config.h" -#include "pico_6lowpan_ll.h" - -/******************************************************************************* - * Size definitions - ******************************************************************************/ - -#define MTU_802154_PHY (128u) -#define MTU_802154_MAC (125u) // 127 - Frame Check Sequence - -#define SIZE_802154_MHR_MIN (5u) -#define SIZE_802154_MHR_MAX (23u) -#define SIZE_802154_FCS (2u) -#define SIZE_802154_LEN (1u) -#define SIZE_802154_PAN (2u) - -/******************************************************************************* - * Structure definitions - ******************************************************************************/ - -PACKED_STRUCT_DEF pico_802154_hdr -{ - uint16_t fcf; - uint8_t seq; - uint16_t pan_id; -}; - -extern const struct pico_6lowpan_ll_protocol pico_6lowpan_ll_802154; - -#endif /* INCLUDE_PICO_802154 */ diff --git a/kernel/picotcp/modules/pico_aodv.c b/kernel/picotcp/modules/pico_aodv.c deleted file mode 100644 index b9726c7..0000000 --- a/kernel/picotcp/modules/pico_aodv.c +++ /dev/null @@ -1,696 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2015-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Author: Daniele Lacamera - *********************************************************************/ -#include -#include -#include -#include -#include - -#include -#ifdef PICO_SUPPORT_IPV4 - -#ifdef DEBUG_AODV - #define pico_aodv_dbg dbg -#else - #define pico_aodv_dbg(...) do {} while(0) -#endif - -#define AODV_MAX_PKT (64) -static const struct pico_ip4 HOST_NETMASK = { - 0xffffffff -}; -static struct pico_ip4 all_bcast = { - .addr = 0xFFFFFFFFu -}; - -static const struct pico_ip4 ANY_HOST = { - 0x0 -}; - -static uint32_t pico_aodv_local_id = 0; -static int aodv_node_compare(void *ka, void *kb) -{ - struct pico_aodv_node *a = ka, *b = kb; - if (a->dest.ip4.addr < b->dest.ip4.addr) - return -1; - - if (b->dest.ip4.addr < a->dest.ip4.addr) - return 1; - - return 0; -} - -static int aodv_dev_cmp(void *ka, void *kb) -{ - struct pico_device *a = ka, *b = kb; - if (a->hash < b->hash) - return -1; - - if (a->hash > b->hash) - return 1; - - return 0; -} - -static PICO_TREE_DECLARE(aodv_nodes, aodv_node_compare); -static PICO_TREE_DECLARE(aodv_devices, aodv_dev_cmp); - -static struct pico_socket *aodv_socket = NULL; - -static struct pico_aodv_node *get_node_by_addr(const union pico_address *addr) -{ - struct pico_aodv_node search; - memcpy(&search.dest, addr, sizeof(union pico_address)); - return pico_tree_findKey(&aodv_nodes, &search); - -} - -static void pico_aodv_set_dev(struct pico_device *dev) -{ - pico_ipv4_route_set_bcast_link(pico_ipv4_link_by_dev(dev)); -} - - -static int aodv_peer_refresh(struct pico_aodv_node *node, uint32_t seq) -{ - if ((0 == (node->flags & PICO_AODV_NODE_SYNC)) || (pico_seq_compare(seq, node->dseq) > 0)) { - node->dseq = seq; - node->flags |= PICO_AODV_NODE_SYNC; - node->last_seen = PICO_TIME_MS(); - return 0; - } - - return -1; -} - -static void aodv_elect_route(struct pico_aodv_node *node, union pico_address *gw, uint8_t metric, struct pico_device *dev) -{ - metric++; - if (!(PICO_AODV_ACTIVE(node)) || metric < node->metric) { - pico_ipv4_route_del(node->dest.ip4, HOST_NETMASK, node->metric); - if (!gw) { - pico_ipv4_route_add(node->dest.ip4, HOST_NETMASK, ANY_HOST, 1, pico_ipv4_link_by_dev(dev)); - node->metric = 1; - } else { - node->metric = metric; - pico_ipv4_route_add(node->dest.ip4, HOST_NETMASK, gw->ip4, metric, NULL); - } - } -} - -static struct pico_aodv_node *aodv_peer_new(const union pico_address *addr) -{ - struct pico_aodv_node *node = PICO_ZALLOC(sizeof(struct pico_aodv_node)); - if (!node) - return NULL; - - memcpy(&node->dest, addr, sizeof(union pico_address)); - - if (pico_tree_insert(&aodv_nodes, node)) { - PICO_FREE(node); - return NULL; - } - - return node; -} - - -static struct pico_aodv_node *aodv_peer_eval(union pico_address *addr, uint32_t seq, int valid_seq) -{ - struct pico_aodv_node *node = NULL; - node = get_node_by_addr(addr); - if (!node) { - node = aodv_peer_new(addr); - } - - if (!valid_seq) - return node; - - if (node && (aodv_peer_refresh(node, long_be(seq)) == 0)) - return node; - - return NULL; -} - -static void aodv_forward(void *pkt, struct pico_msginfo *info, int reply) -{ - struct pico_aodv_node *orig; - union pico_address orig_addr; - struct pico_tree_node *index; - struct pico_device *dev; - pico_time now; - int size; - - pico_aodv_dbg("Forwarding %s packet\n", reply ? "REPLY" : "REQUEST"); - - if (reply) { - struct pico_aodv_rrep *rep = (struct pico_aodv_rrep *)pkt; - orig_addr.ip4.addr = rep->dest; - rep->hop_count++; - pico_aodv_dbg("RREP hop count: %d\n", rep->hop_count); - size = sizeof(struct pico_aodv_rrep); - } else { - struct pico_aodv_rreq *req = (struct pico_aodv_rreq *)pkt; - orig_addr.ip4.addr = req->orig; - req->hop_count++; - size = sizeof(struct pico_aodv_rreq); - } - - orig = get_node_by_addr(&orig_addr); - if (!orig) - orig = aodv_peer_new(&orig_addr); - - if (!orig) - return; - - now = PICO_TIME_MS(); - - pico_aodv_dbg("Forwarding %s: last fwd_time: %lu now: %lu ttl: %d ==== \n", reply ? "REPLY" : "REQUEST", orig->fwd_time, now, info->ttl); - if (((orig->fwd_time == 0) || ((now - orig->fwd_time) > AODV_NODE_TRAVERSAL_TIME)) && (--info->ttl > 0)) { - orig->fwd_time = now; - info->dev = NULL; - pico_tree_foreach(index, &aodv_devices){ - dev = index->keyValue; - pico_aodv_set_dev(dev); - pico_socket_sendto_extended(aodv_socket, pkt, size, &all_bcast, short_be(PICO_AODV_PORT), info); - pico_aodv_dbg("Forwarding %s: complete! ==== \n", reply ? "REPLY" : "REQUEST"); - } - } -} - -static uint32_t aodv_lifetime(struct pico_aodv_node *node) -{ - uint32_t lifetime; - pico_time now = PICO_TIME_MS(); - if (!node->last_seen) - node->last_seen = now; - - if ((now - node->last_seen) > AODV_ACTIVE_ROUTE_TIMEOUT) - return 0; - - lifetime = AODV_ACTIVE_ROUTE_TIMEOUT - (uint32_t)(now - node->last_seen); - return lifetime; -} - -static void aodv_send_reply(struct pico_aodv_node *node, struct pico_aodv_rreq *req, int node_is_local, struct pico_msginfo *info) -{ - struct pico_aodv_rrep reply; - union pico_address dest; - union pico_address oaddr; - struct pico_aodv_node *orig; - oaddr.ip4.addr = req->orig; - orig = get_node_by_addr(&oaddr); - reply.type = AODV_TYPE_RREP; - reply.dest = req->dest; - reply.dseq = req->dseq; - reply.orig = req->orig; - if (!orig) - return; - - reply.hop_count = (uint8_t)(orig->metric - 1u); - - - dest.ip4.addr = 0xFFFFFFFF; /* wide broadcast */ - - if (short_be(req->req_flags) & AODV_RREQ_FLAG_G) { - dest.ip4.addr = req->orig; - } else { - pico_aodv_set_dev(info->dev); - } - - if (node_is_local) { - reply.lifetime = long_be(AODV_MY_ROUTE_TIMEOUT); - reply.dseq = long_be(++pico_aodv_local_id); - pico_socket_sendto(aodv_socket, &reply, sizeof(reply), &dest, short_be(PICO_AODV_PORT)); - } else if (((short_be(req->req_flags) & AODV_RREQ_FLAG_D) == 0) && (node->flags & PICO_AODV_NODE_SYNC)) { - reply.lifetime = long_be(aodv_lifetime(node)); - reply.dseq = long_be(node->dseq); - pico_aodv_dbg("Generating RREP for node %x, id=%x\n", reply.dest, reply.dseq); - pico_socket_sendto(aodv_socket, &reply, sizeof(reply), &dest, short_be(PICO_AODV_PORT)); - } - - pico_aodv_dbg("no rrep generated.\n"); -} - -/* Parser functions */ - -static int aodv_send_req(struct pico_aodv_node *node); - -static void aodv_reverse_path_discover(pico_time now, void *arg) -{ - struct pico_aodv_node *origin = (struct pico_aodv_node *)arg; - (void)now; - pico_aodv_dbg("Sending G RREQ to ORIGIN (metric = %d).\n", origin->metric); - origin->ring_ttl = origin->metric; - aodv_send_req(origin); -} - -static void aodv_recv_valid_rreq(struct pico_aodv_node *node, struct pico_aodv_rreq *req, struct pico_msginfo *info) -{ - struct pico_device *dev; - dev = pico_ipv4_link_find(&node->dest.ip4); - pico_aodv_dbg("Valid req.\n"); - if (dev || PICO_AODV_ACTIVE(node)) { - /* if destination is ourselves, or we have a possible route: Send reply. */ - aodv_send_reply(node, req, dev != NULL, info); - if (dev) { - /* if really for us, we need to build the return route. Initiate a gratuitous request. */ - union pico_address origin_addr; - struct pico_aodv_node *origin; - origin_addr.ip4.addr = req->orig; - origin = get_node_by_addr(&origin_addr); - if (origin) { - origin->flags |= PICO_AODV_NODE_ROUTE_DOWN; - if (!pico_timer_add(AODV_PATH_DISCOVERY_TIME, aodv_reverse_path_discover, origin)) { - pico_aodv_dbg("AODV: Failed to start path discovery timer\n"); - } - } - } - - pico_aodv_dbg("Replied.\n"); - } else { - /* destination unknown. Evaluate forwarding. */ - pico_aodv_dbg(" == Forwarding == .\n"); - aodv_forward(req, info, 0); - } -} - - -static void aodv_parse_rreq(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) -{ - struct pico_aodv_rreq *req = (struct pico_aodv_rreq *) buf; - struct pico_aodv_node *node = NULL; - struct pico_device *dev; - union pico_address orig, dest; - (void)from; - if (len != (int)sizeof(struct pico_aodv_rreq)) - return; - - orig.ip4.addr = req->orig; - dev = pico_ipv4_link_find(&orig.ip4); - if (dev) { - pico_aodv_dbg("RREQ <-- myself\n"); - return; - } - - node = aodv_peer_eval(&orig, req->oseq, 1); - if (!node) { - pico_aodv_dbg("RREQ: Neighbor is not valid. oseq=%d\n", long_be(req->oseq)); - return; - } - - if (req->hop_count > 0) - aodv_elect_route(node, from, req->hop_count, msginfo->dev); - else - aodv_elect_route(node, NULL, 0, msginfo->dev); - - dest.ip4.addr = req->dest; - node = aodv_peer_eval(&dest, req->dseq, !(req->req_flags & short_be(AODV_RREQ_FLAG_U))); - if (!node) { - node = aodv_peer_new(&dest); - pico_aodv_dbg("RREQ: New peer! %08x\n", dest.ip4.addr); - } - - if (!node) - return; - - aodv_recv_valid_rreq(node, req, msginfo); -} - -static void aodv_parse_rrep(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) -{ - struct pico_aodv_rrep *rep = (struct pico_aodv_rrep *) buf; - struct pico_aodv_node *node = NULL; - union pico_address dest; - union pico_address orig; - struct pico_device *dev = NULL; - if (len != (int)sizeof(struct pico_aodv_rrep)) - return; - - dest.ip4.addr = rep->dest; - orig.ip4.addr = rep->orig; - dev = pico_ipv4_link_find(&dest.ip4); - - if (dev) /* Our reply packet got rebounced, no useful information here, no need to fwd. */ - return; - - pico_aodv_dbg("::::::::::::: Parsing RREP for node %08x\n", rep->dest); - node = aodv_peer_eval(&dest, rep->dseq, 1); - if (node) { - pico_aodv_dbg("::::::::::::: Node found. Electing route and forwarding.\n"); - dest.ip4.addr = node->dest.ip4.addr; - if (rep->hop_count > 0) - aodv_elect_route(node, from, rep->hop_count, msginfo->dev); - else - aodv_elect_route(node, NULL, 0, msginfo->dev); - - /* If we are the final destination for the reply (orig), no need to forward. */ - if (pico_ipv4_link_find(&orig.ip4)) { - node->flags |= PICO_AODV_NODE_ROUTE_UP; - } else { - aodv_forward(rep, msginfo, 1); - } - } -} - -static void aodv_parse_rerr(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) -{ - if ((uint32_t)len < sizeof(struct pico_aodv_rerr) || - (((uint32_t)len - sizeof(struct pico_aodv_rerr)) % sizeof(struct pico_aodv_unreachable)) > 0) - return; - - (void)from; - (void)buf; - (void)len; - (void)msginfo; - /* TODO: invalidate routes. This only makes sense if we are using HELLO messages. */ -} - -static void aodv_parse_rack(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) -{ - if (len != (int)sizeof(struct pico_aodv_rack)) - return; - - (void)from; - (void)buf; - (void)len; - (void)msginfo; -} - -struct aodv_parser_s { - void (*call)(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo); -}; - -static struct aodv_parser_s aodv_parser[5] = { - {.call = NULL}, - {.call = aodv_parse_rreq }, - {.call = aodv_parse_rrep }, - {.call = aodv_parse_rerr }, - {.call = aodv_parse_rack } -}; - - -static void pico_aodv_parse(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) -{ - struct pico_aodv_node *node; - uint8_t hopcount = 0; - if ((buf[0] < 1) || (buf[0] > 4)) { - /* Type is invalid. Discard silently. */ - return; - } - - if (buf[0] == AODV_TYPE_RREQ) { - hopcount = ((struct pico_aodv_rreq *)buf)->hop_count; - } - - if (buf[0] == AODV_TYPE_RREP) { - hopcount = ((struct pico_aodv_rrep *)buf)->hop_count; - } - - node = aodv_peer_eval(from, 0, 0); - if (!node) - node = aodv_peer_new(from); - - if (node && (hopcount == 0)) { - aodv_elect_route(node, NULL, hopcount, msginfo->dev); - } - - pico_aodv_dbg("Received AODV packet, ttl = %d\n", msginfo->ttl); - aodv_parser[buf[0]].call(from, buf, len, msginfo); -} - -static void pico_aodv_socket_callback(uint16_t ev, struct pico_socket *s) -{ - static uint8_t aodv_pkt[AODV_MAX_PKT]; - static union pico_address from; - static struct pico_msginfo msginfo; - uint16_t sport; - int r; - if (s != aodv_socket) - return; - - if (ev & PICO_SOCK_EV_RD) { - r = pico_socket_recvfrom_extended(s, aodv_pkt, AODV_MAX_PKT, &from, &sport, &msginfo); - if (r <= 0) - return; - - pico_aodv_dbg("Received AODV packet: %d bytes \n", r); - - pico_aodv_parse(&from, aodv_pkt, r, &msginfo); - } -} - -static void aodv_make_rreq(struct pico_aodv_node *node, struct pico_aodv_rreq *req) -{ - memset(req, 0, sizeof(struct pico_aodv_rreq)); - req->type = AODV_TYPE_RREQ; - - if (0 == (node->flags & PICO_AODV_NODE_SYNC)) { - req->req_flags |= short_be(AODV_RREQ_FLAG_U); /* no known dseq, mark as U */ - req->dseq = 0; /* Unknown */ - } else { - req->dseq = long_be(node->dseq); - req->req_flags |= short_be(AODV_RREQ_FLAG_G); /* RFC3561 $6.3: we SHOULD set G flag as originators */ - } - - /* Hop count = 0; */ - req->rreq_id = long_be(++pico_aodv_local_id); - req->dest = node->dest.ip4.addr; - req->oseq = long_be(pico_aodv_local_id); -} - -static void aodv_retrans_rreq(pico_time now, void *arg) -{ - struct pico_aodv_node *node = (struct pico_aodv_node *)arg; - struct pico_device *dev; - struct pico_tree_node *index; - static struct pico_aodv_rreq rreq; - struct pico_ipv4_link *ip4l = NULL; - struct pico_msginfo info = { - .dev = NULL, .tos = 0, .ttl = AODV_TTL_START - }; - (void)now; - - memset(&rreq, 0, sizeof(rreq)); - - if (node->flags & PICO_AODV_NODE_ROUTE_UP) { - pico_aodv_dbg("------------------------------------------------------ Node %08x already active.\n", node->dest.ip4.addr); - return; - } - - if (node->ring_ttl > AODV_TTL_THRESHOLD) { - node->ring_ttl = AODV_NET_DIAMETER; - pico_aodv_dbg("----------- DIAMETER reached.\n"); - } - - - if (node->rreq_retry > AODV_RREQ_RETRIES) { - node->rreq_retry = 0; - node->ring_ttl = 0; - pico_aodv_dbg("Node is unreachable.\n"); - node->flags &= (uint16_t)(~PICO_AODV_NODE_ROUTE_DOWN); - return; - } - - if (node->ring_ttl == AODV_NET_DIAMETER) { - node->rreq_retry++; - pico_aodv_dbg("Retry #%d\n", node->rreq_retry); - } - - aodv_make_rreq(node, &rreq); - info.ttl = (uint8_t)node->ring_ttl; - pico_tree_foreach(index, &aodv_devices){ - dev = index->keyValue; - pico_aodv_set_dev(dev); - ip4l = pico_ipv4_link_by_dev(dev); - if (ip4l) { - rreq.orig = ip4l->address.addr; - pico_socket_sendto_extended(aodv_socket, &rreq, sizeof(rreq), &all_bcast, short_be(PICO_AODV_PORT), &info); - } - } - if (node->ring_ttl < AODV_NET_DIAMETER) - node->ring_ttl = (uint8_t)(node->ring_ttl + AODV_TTL_INCREMENT); - - if (!pico_timer_add((pico_time)AODV_RING_TRAVERSAL_TIME(node->ring_ttl), aodv_retrans_rreq, node)) { - pico_aodv_dbg("AODV: Failed to start retransmission timer\n"); - } -} - -static int aodv_send_req(struct pico_aodv_node *node) -{ - struct pico_device *dev; - struct pico_tree_node *index; - static struct pico_aodv_rreq rreq; - int n = 0; - struct pico_ipv4_link *ip4l = NULL; - struct pico_msginfo info = { - .dev = NULL, .tos = 0, .ttl = AODV_TTL_START - }; - memset(&rreq, 0, sizeof(rreq)); - - if (PICO_AODV_ACTIVE(node)) - return 0; - - node->flags |= PICO_AODV_NODE_REQUESTING; - - if (pico_tree_empty(&aodv_devices)) - return n; - - if (!aodv_socket) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (node->flags & PICO_AODV_NODE_ROUTE_DOWN) { - info.ttl = node->metric; - } - - aodv_make_rreq(node, &rreq); - pico_tree_foreach(index, &aodv_devices) { - dev = index->keyValue; - pico_aodv_set_dev(dev); - ip4l = pico_ipv4_link_by_dev(dev); - if (ip4l) { - rreq.orig = ip4l->address.addr; - pico_socket_sendto_extended(aodv_socket, &rreq, sizeof(rreq), &all_bcast, short_be(PICO_AODV_PORT), &info); - n++; - } - } - if (!pico_timer_add((pico_time)AODV_RING_TRAVERSAL_TIME(1), aodv_retrans_rreq, node)) { - pico_aodv_dbg("AODV: Failed to start retransmission timer\n"); - return -1; - } - return n; -} - -static void pico_aodv_expired(struct pico_aodv_node *node) -{ - node->flags |= PICO_AODV_NODE_UNREACH; - node->flags &= (uint8_t)(~PICO_AODV_NODE_ROUTE_UP); - node->flags &= (uint8_t)(~PICO_AODV_NODE_ROUTE_DOWN); - pico_ipv4_route_del(node->dest.ip4, HOST_NETMASK, node->metric); - node->ring_ttl = 0; - /* TODO: send err */ - -} - -static void pico_aodv_collector(pico_time now, void *arg) -{ - struct pico_tree_node *index; - struct pico_aodv_node *node; - (void)arg; - (void)now; - pico_tree_foreach(index, &aodv_nodes){ - node = index->keyValue; - if (PICO_AODV_ACTIVE(node)) { - uint32_t lifetime = aodv_lifetime(node); - if (lifetime == 0) - pico_aodv_expired(node); - } - } - if (!pico_timer_add(AODV_HELLO_INTERVAL, pico_aodv_collector, NULL)) { - pico_aodv_dbg("AODV: Failed to start collector timer\n"); - /* TODO what to do now? garbage collection will not be restarted, leading to memory leaks */ - } -} - -MOCKABLE int pico_aodv_init(void) -{ - struct pico_ip4 any = { - 0 - }; - uint16_t port = short_be(PICO_AODV_PORT); - if (aodv_socket) { - pico_err = PICO_ERR_EADDRINUSE; - return -1; - } - - aodv_socket = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, pico_aodv_socket_callback); - if (!aodv_socket) - return -1; - - if (pico_socket_bind(aodv_socket, &any, &port) != 0) { - uint16_t err = pico_err; - pico_socket_close(aodv_socket); - pico_err = err; - aodv_socket = NULL; - return -1; - } - - pico_aodv_local_id = pico_rand(); - if (!pico_timer_add(AODV_HELLO_INTERVAL, pico_aodv_collector, NULL)) { - pico_aodv_dbg("AODV: Failed to start collector timer\n"); - pico_socket_close(aodv_socket); - aodv_socket = NULL; - return -1; - } - return 0; -} - - -int pico_aodv_add(struct pico_device *dev) -{ - return (pico_tree_insert(&aodv_devices, dev)) ? (0) : (-1); -} - -void pico_aodv_refresh(const union pico_address *addr) -{ - struct pico_aodv_node *node = get_node_by_addr(addr); - if (node) { - node->last_seen = PICO_TIME_MS(); - } -} - -int pico_aodv_lookup(const union pico_address *addr) -{ - struct pico_aodv_node *node = get_node_by_addr(addr); - if (!node) - node = aodv_peer_new(addr); - - if (!node) - return -1; - - if ((node->flags & PICO_AODV_NODE_ROUTE_UP) || (node->flags & PICO_AODV_NODE_ROUTE_DOWN)) - return 0; - - if (node->ring_ttl < AODV_TTL_START) { - node->ring_ttl = AODV_TTL_START; - aodv_send_req(node); - return 0; - } - - pico_err = PICO_ERR_EINVAL; - return -1; -} - -#else - -int pico_aodv_init(void) -{ - return -1; -} - -int pico_aodv_add(struct pico_device *dev) -{ - (void)dev; - return -1; -} - -int pico_aodv_lookup(const union pico_address *addr) -{ - (void)addr; - return -1; -} - -void pico_aodv_refresh(const union pico_address *addr) -{ - (void)addr; -} - -#endif diff --git a/kernel/picotcp/modules/pico_aodv.h b/kernel/picotcp/modules/pico_aodv.h deleted file mode 100644 index 2fa5cc9..0000000 --- a/kernel/picotcp/modules/pico_aodv.h +++ /dev/null @@ -1,130 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2015-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Author: Daniele Lacamera - *********************************************************************/ -#ifndef PICO_AODV_H_ -#define PICO_AODV_H_ - -/* RFC3561 */ -#define PICO_AODV_PORT (654) - -/* RFC3561 $10 */ -#define AODV_ACTIVE_ROUTE_TIMEOUT (8000u) /* Conservative value for link breakage detection */ -#define AODV_DELETE_PERIOD (5 * AODV_ACTIVE_ROUTE_TIMEOUT) /* Recommended value K = 5 */ -#define AODV_ALLOWED_HELLO_LOSS (4) /* conservative */ -#define AODV_NET_DIAMETER ((uint8_t)(35)) -#define AODV_RREQ_RETRIES (2) -#define AODV_NODE_TRAVERSAL_TIME (40) -#define AODV_HELLO_INTERVAL (1000) -#define AODV_LOCAL_ADD_TTL 2 -#define AODV_RREQ_RATELIMIT (10) -#define AODV_TIMEOUT_BUFFER (2) -#define AODV_TTL_START ((uint8_t)(1)) -#define AODV_TTL_INCREMENT 2 -#define AODV_TTL_THRESHOLD ((uint8_t)(7)) -#define AODV_RERR_RATELIMIT (10) -#define AODV_MAX_REPAIR_TTL ((uint8_t)(AODV_NET_DIAMETER / 3)) -#define AODV_MY_ROUTE_TIMEOUT (2 * AODV_ACTIVE_ROUTE_TIMEOUT) -#define AODV_NET_TRAVERSAL_TIME (2 * AODV_NODE_TRAVERSAL_TIME * AODV_NET_DIAMETER) -#define AODV_BLACKLIST_TIMEOUT (AODV_RREQ_RETRIES * AODV_NET_TRAVERSAL_TIME) -#define AODV_NEXT_HOP_WAIT (AODV_NODE_TRAVERSAL_TIME + 10) -#define AODV_PATH_DISCOVERY_TIME (2 * AODV_NET_TRAVERSAL_TIME) -#define AODV_RING_TRAVERSAL_TIME(ttl) (2 * AODV_NODE_TRAVERSAL_TIME * (ttl + AODV_TIMEOUT_BUFFER)) -/* End section RFC3561 $10 */ - - -#define AODV_TYPE_RREQ 1 -#define AODV_TYPE_RREP 2 -#define AODV_TYPE_RERR 3 -#define AODV_TYPE_RACK 4 - -PACKED_STRUCT_DEF pico_aodv_rreq -{ - uint8_t type; - uint16_t req_flags; - uint8_t hop_count; - uint32_t rreq_id; - uint32_t dest; - uint32_t dseq; - uint32_t orig; - uint32_t oseq; -}; - -#define AODV_RREQ_FLAG_J 0x8000 -#define AODV_RREQ_FLAG_R 0x4000 -#define AODV_RREQ_FLAG_G 0x2000 -#define AODV_RREQ_FLAG_D 0x1000 -#define AODV_RREQ_FLAG_U 0x0800 -#define AODV_RREQ_FLAG_RESERVED 0x07FF - -PACKED_STRUCT_DEF pico_aodv_rrep -{ - uint8_t type; - uint8_t rep_flags; - uint8_t prefix_sz; - uint8_t hop_count; - uint32_t dest; - uint32_t dseq; - uint32_t orig; - uint32_t lifetime; -}; - -#define AODV_RREP_MAX_PREFIX 0x1F -#define AODV_RREP_FLAG_R 0x80 -#define AODV_RREP_FLAG_A 0x40 -#define AODV_RREP_FLAG_RESERVED 0x3F - -#define PICO_AODV_NODE_NEW 0x0000 -#define PICO_AODV_NODE_SYNC 0x0001 -#define PICO_AODV_NODE_REQUESTING 0x0002 -#define PICO_AODV_NODE_ROUTE_UP 0x0004 -#define PICO_AODV_NODE_ROUTE_DOWN 0x0008 -#define PICO_AODV_NODE_IDLING 0x0010 -#define PICO_AODV_NODE_UNREACH 0x0020 - -#define PICO_AODV_ACTIVE(node) ((node->flags & PICO_AODV_NODE_ROUTE_UP) && (node->flags & PICO_AODV_NODE_ROUTE_DOWN)) - - -struct pico_aodv_node -{ - union pico_address dest; - pico_time last_seen; - pico_time fwd_time; - uint32_t dseq; - uint16_t flags; - uint8_t metric; - uint8_t ring_ttl; - uint8_t rreq_retry; -}; - -PACKED_STRUCT_DEF pico_aodv_unreachable -{ - uint32_t addr; - uint32_t dseq; -}; - -PACKED_STRUCT_DEF pico_aodv_rerr -{ - uint8_t type; - uint16_t rerr_flags; - uint8_t dst_count; - uint32_t unreach_addr; - uint32_t unreach_dseq; - struct pico_aodv_unreachable unreach[1]; /* unrechable nodes: must be at least 1. See dst_count field above */ -}; - -PACKED_STRUCT_DEF pico_aodv_rack -{ - uint8_t type; - uint8_t reserved; -}; - -int pico_aodv_init(void); -int pico_aodv_add(struct pico_device *dev); -int pico_aodv_lookup(const union pico_address *addr); -void pico_aodv_refresh(const union pico_address *addr); -#endif diff --git a/kernel/picotcp/modules/pico_arp.c b/kernel/picotcp/modules/pico_arp.c deleted file mode 100644 index 8b4b68f..0000000 --- a/kernel/picotcp/modules/pico_arp.c +++ /dev/null @@ -1,566 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - -#include "pico_config.h" -#include "pico_arp.h" -#include "pico_tree.h" -#include "pico_ipv4.h" -#include "pico_device.h" -#include "pico_stack.h" -#include "pico_ethernet.h" - -extern const uint8_t PICO_ETHADDR_ALL[6]; -#define PICO_ARP_TIMEOUT 600000llu -#define PICO_ARP_RETRY 300lu -#define PICO_ARP_MAX_PENDING 5 - -#ifdef DEBUG_ARP - #define arp_dbg dbg -#else - #define arp_dbg(...) do {} while(0) -#endif - -static int max_arp_reqs = PICO_ARP_MAX_RATE; -static struct pico_frame *frames_queued[PICO_ARP_MAX_PENDING] = { - 0 -}; - -static void pico_arp_queued_trigger(void) -{ - int i; - struct pico_frame *f; - for (i = 0; i < PICO_ARP_MAX_PENDING; i++) - { - f = frames_queued[i]; - if (f) { - if (pico_datalink_send(f) <= 0) - pico_frame_discard(f); - frames_queued[i] = NULL; - } - } -} - -static void update_max_arp_reqs(pico_time now, void *unused) -{ - IGNORE_PARAMETER(now); - IGNORE_PARAMETER(unused); - if (max_arp_reqs < PICO_ARP_MAX_RATE) - max_arp_reqs++; - - if (!pico_timer_add(PICO_ARP_INTERVAL / PICO_ARP_MAX_RATE, &update_max_arp_reqs, NULL)) { - arp_dbg("ARP: Failed to start update_max_arps timer\n"); - /* TODO if this fails all incoming arps will be discarded once max_arp_reqs recahes 0 */ - } -} - -void pico_arp_init(void) -{ - if (!pico_timer_add(PICO_ARP_INTERVAL / PICO_ARP_MAX_RATE, &update_max_arp_reqs, NULL)) { - arp_dbg("ARP: Failed to start update_max_arps timer\n"); - } -} - -PACKED_STRUCT_DEF pico_arp_hdr -{ - uint16_t htype; - uint16_t ptype; - uint8_t hsize; - uint8_t psize; - uint16_t opcode; - uint8_t s_mac[PICO_SIZE_ETH]; - struct pico_ip4 src; - uint8_t d_mac[PICO_SIZE_ETH]; - struct pico_ip4 dst; -}; - - - -/* Callback handler for ip conflict service (e.g. IPv4 SLAAC) - * Whenever the IP address registered here is seen in the network, - * the callback is awaken to take countermeasures against IP collisions. - * - */ - -struct arp_service_ipconflict { - struct pico_eth mac; - struct pico_ip4 ip; - void (*conflict)(int); -}; - -static struct arp_service_ipconflict conflict_ipv4; - - - -#define PICO_SIZE_ARPHDR ((sizeof(struct pico_arp_hdr))) - -/* Arp Entries for the tables. */ -struct pico_arp { -/* CAREFUL MAN! ARP entry MUST begin with a pico_eth structure, - * due to in-place casting!!! */ - struct pico_eth eth; - struct pico_ip4 ipv4; - int arp_status; - pico_time timestamp; - struct pico_device *dev; - uint32_t timer; -}; - - - -/*****************/ -/** ARP TREE **/ -/*****************/ - -/* Routing destination */ - -static int arp_compare(void *ka, void *kb) -{ - struct pico_arp *a = ka, *b = kb; - return pico_ipv4_compare(&a->ipv4, &b->ipv4); -} - -static PICO_TREE_DECLARE(arp_tree, arp_compare); - -/*********************/ -/** END ARP TREE **/ -/*********************/ - -struct pico_eth *pico_arp_lookup(struct pico_ip4 *dst) -{ - struct pico_arp search, *found; - search.ipv4.addr = dst->addr; - found = pico_tree_findKey(&arp_tree, &search); - if (found && (found->arp_status != PICO_ARP_STATUS_STALE)) - return &found->eth; - - return NULL; -} - -struct pico_ip4 *pico_arp_reverse_lookup(struct pico_eth *dst) -{ - struct pico_arp*search; - struct pico_tree_node *index; - pico_tree_foreach(index, &arp_tree){ - search = index->keyValue; - if(memcmp(&(search->eth.addr), &dst->addr, 6) == 0) - return &search->ipv4; - } - return NULL; -} - -static void pico_arp_unreachable(struct pico_ip4 *a) -{ - int i; - struct pico_frame *f; - struct pico_ipv4_hdr *hdr; - struct pico_ip4 dst; - for (i = 0; i < PICO_ARP_MAX_PENDING; i++) - { - f = frames_queued[i]; - if (f) { - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - dst = pico_ipv4_route_get_gateway(&hdr->dst); - if (!dst.addr) - dst.addr = hdr->dst.addr; - - if (dst.addr == a->addr) { - if (!pico_source_is_local(f)) { - pico_notify_dest_unreachable(f); - } - - } - } - } -} - -static void pico_arp_retry(struct pico_frame *f, struct pico_ip4 *where) -{ - if (++f->failure_count < 4) { - arp_dbg ("================= ARP REQUIRED: %d =============\n\n", f->failure_count); - /* check if dst is local (gateway = 0), or if to use gateway */ - pico_arp_request(f->dev, where, PICO_ARP_QUERY); - } else { - pico_arp_unreachable(where); - } -} - -struct pico_eth *pico_arp_get(struct pico_frame *f) -{ - struct pico_eth *a4; - struct pico_ip4 gateway; - struct pico_ip4 *where; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - struct pico_ipv4_link *l; - if (!hdr) - return NULL; - - l = pico_ipv4_link_get(&hdr->dst); - if(l) { - /* address belongs to ourself */ - return &l->dev->eth->mac; - } - - gateway = pico_ipv4_route_get_gateway(&hdr->dst); - /* check if dst is local (gateway = 0), or if to use gateway */ - if (gateway.addr != 0) - where = &gateway; - else - where = &hdr->dst; - - a4 = pico_arp_lookup(where); /* check if dst ip mac in cache */ - - if (!a4) - pico_arp_retry(f, where); - - return a4; -} - - -void pico_arp_postpone(struct pico_frame *f) -{ - int i; - for (i = 0; i < PICO_ARP_MAX_PENDING; i++) - { - if (!frames_queued[i]) { - if (f->failure_count < 4) - frames_queued[i] = f; - - return; - } - } - /* Not possible to enqueue: caller will discard packet */ -} - - -#ifdef DEBUG_ARP -static void dbg_arp(void) -{ - struct pico_arp *a; - struct pico_tree_node *index; - - pico_tree_foreach(index, &arp_tree) { - a = index->keyValue; - arp_dbg("ARP to %08x, mac: %02x:%02x:%02x:%02x:%02x:%02x\n", a->ipv4.addr, a->eth.addr[0], a->eth.addr[1], a->eth.addr[2], a->eth.addr[3], a->eth.addr[4], a->eth.addr[5] ); - } -} -#endif - -static void arp_expire(pico_time now, void *_stale) -{ - struct pico_arp *stale = (struct pico_arp *) _stale; - if (now >= (stale->timestamp + PICO_ARP_TIMEOUT)) { - stale->arp_status = PICO_ARP_STATUS_STALE; - arp_dbg("ARP: Setting arp_status to STALE\n"); - pico_arp_request(stale->dev, &stale->ipv4, PICO_ARP_QUERY); - } else { - /* Timer must be rescheduled, ARP entry has been renewed lately. - * No action required to refresh the entry, will check on the next timeout */ - if (!pico_timer_add(PICO_ARP_TIMEOUT + stale->timestamp - now, arp_expire, stale)) { - arp_dbg("ARP: Failed to start expiration timer, destroying arp entry\n"); - pico_tree_delete(&arp_tree, stale); - PICO_FREE(stale); - } - } -} - -static int pico_arp_add_entry(struct pico_arp *entry) -{ - entry->arp_status = PICO_ARP_STATUS_REACHABLE; - entry->timestamp = PICO_TIME(); - - if (pico_tree_insert(&arp_tree, entry)) { - arp_dbg("ARP: Failed to insert new entry in tree\n"); - return -1; - } - - arp_dbg("ARP ## reachable.\n"); - pico_arp_queued_trigger(); - if (!pico_timer_add(PICO_ARP_TIMEOUT, arp_expire, entry)) { - arp_dbg("ARP: Failed to start expiration timer\n"); - pico_tree_delete(&arp_tree, entry); - return -1; - } - - return 0; -} - -int pico_arp_create_entry(uint8_t *hwaddr, struct pico_ip4 ipv4, struct pico_device *dev) -{ - struct pico_arp*arp = PICO_ZALLOC(sizeof(struct pico_arp)); - if(!arp) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - memcpy(arp->eth.addr, hwaddr, 6); - arp->ipv4.addr = ipv4.addr; - arp->dev = dev; - - if (pico_arp_add_entry(arp) < 0) { - PICO_FREE(arp); - return -1; - } - - return 0; -} - -static void pico_arp_check_conflict(struct pico_arp_hdr *hdr) -{ - if (conflict_ipv4.conflict) - { - if((conflict_ipv4.ip.addr == hdr->src.addr) && - (memcmp(hdr->s_mac, conflict_ipv4.mac.addr, PICO_SIZE_ETH) != 0)) - conflict_ipv4.conflict(PICO_ARP_CONFLICT_REASON_CONFLICT ); - - if((hdr->src.addr == 0) && (hdr->dst.addr == conflict_ipv4.ip.addr)) - conflict_ipv4.conflict(PICO_ARP_CONFLICT_REASON_PROBE ); - } -} - -static struct pico_arp *pico_arp_lookup_entry(struct pico_frame *f) -{ - struct pico_arp search; - struct pico_arp *found = NULL; - struct pico_arp_hdr *hdr = (struct pico_arp_hdr *) f->net_hdr; - /* Populate a new arp entry */ - search.ipv4.addr = hdr->src.addr; - - /* Search for already existing entry */ - found = pico_tree_findKey(&arp_tree, &search); - if (found) { - if (found->arp_status == PICO_ARP_STATUS_STALE) { - /* Replace if stale */ - pico_tree_delete(&arp_tree, found); - if (pico_arp_add_entry(found) < 0) { - arp_dbg("ARP: Failed to re-instert stale arp entry\n"); - PICO_FREE(found); - found = NULL; - } - } else { - /* Update mac address */ - memcpy(found->eth.addr, hdr->s_mac, PICO_SIZE_ETH); - arp_dbg("ARP entry updated!\n"); - - /* Refresh timestamp, this will force a reschedule on the next timeout*/ - found->timestamp = PICO_TIME(); - } - } - - return found; -} - - -static int pico_arp_check_incoming_hdr_type(struct pico_arp_hdr *h) -{ - /* Check the hardware type and protocol */ - if ((h->htype != PICO_ARP_HTYPE_ETH) || (h->ptype != PICO_IDETH_IPV4)) - return -1; - - return 0; -} - -static int pico_arp_check_incoming_hdr(struct pico_frame *f, struct pico_ip4 *dst_addr) -{ - struct pico_arp_hdr *hdr = (struct pico_arp_hdr *) f->net_hdr; - if (!hdr) - return -1; - - dst_addr->addr = hdr->dst.addr; - if (pico_arp_check_incoming_hdr_type(hdr) < 0) - return -1; - - /* The source mac address must not be a multicast or broadcast address */ - if (hdr->s_mac[0] & 0x01) - return -1; - - return 0; -} - -static void pico_arp_reply_on_request(struct pico_frame *f, struct pico_ip4 me) -{ - struct pico_arp_hdr *hdr; - struct pico_eth_hdr *eh; - - hdr = (struct pico_arp_hdr *) f->net_hdr; - eh = (struct pico_eth_hdr *)f->datalink_hdr; - if (hdr->opcode != PICO_ARP_REQUEST) - return; - - hdr->opcode = PICO_ARP_REPLY; - memcpy(hdr->d_mac, hdr->s_mac, PICO_SIZE_ETH); - memcpy(hdr->s_mac, f->dev->eth->mac.addr, PICO_SIZE_ETH); - hdr->dst.addr = hdr->src.addr; - hdr->src.addr = me.addr; - - /* Prepare eth header for arp reply */ - memcpy(eh->daddr, eh->saddr, PICO_SIZE_ETH); - memcpy(eh->saddr, f->dev->eth->mac.addr, PICO_SIZE_ETH); - f->start = f->datalink_hdr; - f->len = PICO_SIZE_ETHHDR + PICO_SIZE_ARPHDR; - f->dev->send(f->dev, f->start, (int)f->len); -} - -static int pico_arp_check_flooding(struct pico_frame *f, struct pico_ip4 me) -{ - struct pico_device *link_dev; - struct pico_arp_hdr *hdr; - hdr = (struct pico_arp_hdr *) f->net_hdr; - - /* Prevent ARP flooding */ - link_dev = pico_ipv4_link_find(&me); - if ((link_dev == f->dev) && (hdr->opcode == PICO_ARP_REQUEST)) { - if (max_arp_reqs == 0) - return -1; - else - max_arp_reqs--; - } - - /* Check if we are the target IP address */ - if (link_dev != f->dev) - return -1; - - return 0; -} - -static int pico_arp_process_in(struct pico_frame *f, struct pico_arp_hdr *hdr, struct pico_arp *found) -{ - struct pico_ip4 me; - if (pico_arp_check_incoming_hdr(f, &me) < 0) { - pico_frame_discard(f); - return -1; - } - - if (pico_arp_check_flooding(f, me) < 0) { - pico_frame_discard(f); - return -1; - } - - /* If no existing entry was found, create a new entry, or fail trying. */ - if ((!found) && (pico_arp_create_entry(hdr->s_mac, hdr->src, f->dev) < 0)) { - pico_frame_discard(f); - return -1; - } - - /* If the packet is a request, send a reply */ - pico_arp_reply_on_request(f, me); - -#ifdef DEBUG_ARP - dbg_arp(); -#endif - pico_frame_discard(f); - return 0; -} - -int pico_arp_receive(struct pico_frame *f) -{ - struct pico_arp_hdr *hdr; - struct pico_arp *found = NULL; - - hdr = (struct pico_arp_hdr *) f->net_hdr; - if (!hdr) { - pico_frame_discard(f); - return -1; - } - - pico_arp_check_conflict(hdr); - found = pico_arp_lookup_entry(f); - return pico_arp_process_in(f, hdr, found); - -} - -static int32_t pico_arp_request_xmit(struct pico_device *dev, struct pico_frame *f, struct pico_ip4 *src, struct pico_ip4 *dst, uint8_t type) -{ - struct pico_arp_hdr *ah = (struct pico_arp_hdr *) (f->start + PICO_SIZE_ETHHDR); - int ret; - - /* Fill arp header */ - ah->htype = PICO_ARP_HTYPE_ETH; - ah->ptype = PICO_IDETH_IPV4; - ah->hsize = PICO_SIZE_ETH; - ah->psize = PICO_SIZE_IP4; - ah->opcode = PICO_ARP_REQUEST; - memcpy(ah->s_mac, dev->eth->mac.addr, PICO_SIZE_ETH); - - switch (type) { - case PICO_ARP_ANNOUNCE: - ah->src.addr = dst->addr; - ah->dst.addr = dst->addr; - break; - case PICO_ARP_PROBE: - ah->src.addr = 0; - ah->dst.addr = dst->addr; - break; - case PICO_ARP_QUERY: - ah->src.addr = src->addr; - ah->dst.addr = dst->addr; - break; - default: - pico_frame_discard(f); - return -1; - } - arp_dbg("Sending arp request.\n"); - ret = dev->send(dev, f->start, (int) f->len); - pico_frame_discard(f); - return ret; -} - -int32_t pico_arp_request(struct pico_device *dev, struct pico_ip4 *dst, uint8_t type) -{ - struct pico_frame *q = pico_frame_alloc(PICO_SIZE_ETHHDR + PICO_SIZE_ARPHDR); - struct pico_eth_hdr *eh; - struct pico_ip4 *src = NULL; - - if (!q) - return -1; - - if (type == PICO_ARP_QUERY) - { - src = pico_ipv4_source_find(dst); - if (!src) { - pico_frame_discard(q); - return -1; - } - } - - arp_dbg("QUERY: %08x\n", dst->addr); - - eh = (struct pico_eth_hdr *)q->start; - - /* Fill eth header */ - memcpy(eh->saddr, dev->eth->mac.addr, PICO_SIZE_ETH); - memcpy(eh->daddr, PICO_ETHADDR_ALL, PICO_SIZE_ETH); - eh->proto = PICO_IDETH_ARP; - - return pico_arp_request_xmit(dev, q, src, dst, type); -} - -int pico_arp_get_neighbors(struct pico_device *dev, struct pico_ip4 *neighbors, int maxlen) -{ - struct pico_arp*search; - struct pico_tree_node *index; - int i = 0; - pico_tree_foreach(index, &arp_tree){ - search = index->keyValue; - if (search->dev == dev) { - neighbors[i++].addr = search->ipv4.addr; - if (i >= maxlen) - return i; - } - } - return i; -} - -void pico_arp_register_ipconflict(struct pico_ip4 *ip, struct pico_eth *mac, void (*cb)(int reason)) -{ - conflict_ipv4.conflict = cb; - conflict_ipv4.ip.addr = ip->addr; - if (mac != NULL) - memcpy(conflict_ipv4.mac.addr, mac, 6); -} - diff --git a/kernel/picotcp/modules/pico_arp.h b/kernel/picotcp/modules/pico_arp.h deleted file mode 100644 index e292f4a..0000000 --- a/kernel/picotcp/modules/pico_arp.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_ARP -#define INCLUDE_PICO_ARP -#include "pico_eth.h" -#include "pico_device.h" - -int pico_arp_receive(struct pico_frame *); - - -struct pico_eth *pico_arp_get(struct pico_frame *f); -int32_t pico_arp_request(struct pico_device *dev, struct pico_ip4 *dst, uint8_t type); - -#define PICO_ARP_STATUS_REACHABLE 0x00 -#define PICO_ARP_STATUS_PERMANENT 0x01 -#define PICO_ARP_STATUS_STALE 0x02 - -#define PICO_ARP_QUERY 0x00 -#define PICO_ARP_PROBE 0x01 -#define PICO_ARP_ANNOUNCE 0x02 - -#define PICO_ARP_CONFLICT_REASON_CONFLICT 0 -#define PICO_ARP_CONFLICT_REASON_PROBE 1 - -struct pico_eth *pico_arp_lookup(struct pico_ip4 *dst); -struct pico_ip4 *pico_arp_reverse_lookup(struct pico_eth *dst); -int pico_arp_create_entry(uint8_t*hwaddr, struct pico_ip4 ipv4, struct pico_device*dev); -int pico_arp_get_neighbors(struct pico_device *dev, struct pico_ip4 *neighbors, int maxlen); -void pico_arp_register_ipconflict(struct pico_ip4 *ip, struct pico_eth *mac, void (*cb)(int reason)); -void pico_arp_postpone(struct pico_frame *f); -void pico_arp_init(void); -#endif diff --git a/kernel/picotcp/modules/pico_dev_ipc.c b/kernel/picotcp/modules/pico_dev_ipc.c deleted file mode 100644 index f8b41e0..0000000 --- a/kernel/picotcp/modules/pico_dev_ipc.c +++ /dev/null @@ -1,109 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Michiel Kustermans - *********************************************************************/ - -#include -#include -#include - -#include "pico_device.h" -#include "pico_dev_ipc.h" -#include "pico_stack.h" - -struct pico_device_ipc { - struct pico_device dev; - int fd; -}; - -#define IPC_MTU 2048 - -static int pico_ipc_send(struct pico_device *dev, void *buf, int len) -{ - struct pico_device_ipc *ipc = (struct pico_device_ipc *) dev; - return (int)write(ipc->fd, buf, (uint32_t)len); -} - -static int pico_ipc_poll(struct pico_device *dev, int loop_score) -{ - struct pico_device_ipc *ipc = (struct pico_device_ipc *) dev; - struct pollfd pfd; - unsigned char buf[IPC_MTU]; - int len; - pfd.fd = ipc->fd; - pfd.events = POLLIN; - do { - if (poll(&pfd, 1, 0) <= 0) - return loop_score; - - len = (int)read(ipc->fd, buf, IPC_MTU); - if (len > 0) { - loop_score--; - pico_stack_recv(dev, buf, (uint32_t)len); - } - } while(loop_score > 0); - return 0; -} - -/* Public interface: create/destroy. */ - -void pico_ipc_destroy(struct pico_device *dev) -{ - struct pico_device_ipc *ipc = (struct pico_device_ipc *) dev; - if(ipc->fd > 0) { - close(ipc->fd); - } -} - -static int ipc_connect(const char *sock_path) -{ - struct sockaddr_un addr; - int ipc_fd; - - if((ipc_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0)) < 0) { - return(-1); - } - - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, sock_path, sizeof(addr.sun_path) - 1); - addr.sun_path[sizeof(addr.sun_path) - 1] = '\0'; - - if(connect(ipc_fd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) < 0) { - return(-1); - } - - return ipc_fd; -} - -struct pico_device *pico_ipc_create(const char *sock_path, const char *name, const uint8_t *mac) -{ - struct pico_device_ipc *ipc = PICO_ZALLOC(sizeof(struct pico_device_ipc)); - - if (!ipc) - return NULL; - - ipc->dev.mtu = IPC_MTU; - - if( 0 != pico_device_init((struct pico_device *)ipc, name, mac)) { - dbg("Ipc init failed.\n"); - pico_ipc_destroy((struct pico_device *)ipc); - return NULL; - } - - ipc->dev.overhead = 0; - ipc->fd = ipc_connect(sock_path); - if (ipc->fd < 0) { - dbg("Ipc creation failed.\n"); - pico_ipc_destroy((struct pico_device *)ipc); - return NULL; - } - - ipc->dev.send = pico_ipc_send; - ipc->dev.poll = pico_ipc_poll; - ipc->dev.destroy = pico_ipc_destroy; - dbg("Device %s created.\n", ipc->dev.name); - return (struct pico_device *)ipc; -} diff --git a/kernel/picotcp/modules/pico_dev_ipc.h b/kernel/picotcp/modules/pico_dev_ipc.h deleted file mode 100644 index 0df3d64..0000000 --- a/kernel/picotcp/modules/pico_dev_ipc.h +++ /dev/null @@ -1,15 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_IPC -#define INCLUDE_PICO_IPC -#include "pico_config.h" -#include "pico_device.h" - -void pico_ipc_destroy(struct pico_device *ipc); -struct pico_device *pico_ipc_create(const char *sock_path, const char *name, const uint8_t *mac); - -#endif - diff --git a/kernel/picotcp/modules/pico_dev_loop.c b/kernel/picotcp/modules/pico_dev_loop.c deleted file mode 100644 index daba333..0000000 --- a/kernel/picotcp/modules/pico_dev_loop.c +++ /dev/null @@ -1,66 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include "pico_device.h" -#include "pico_dev_loop.h" -#include "pico_stack.h" - - -#define LOOP_MTU 1500 -static uint8_t l_buf[LOOP_MTU]; -static int l_bufsize = 0; - - -static int pico_loop_send(struct pico_device *dev, void *buf, int len) -{ - IGNORE_PARAMETER(dev); - if (len > LOOP_MTU) - return 0; - - if (l_bufsize == 0) { - memcpy(l_buf, buf, (size_t)len); - l_bufsize += len; - return len; - } - - return 0; -} - -static int pico_loop_poll(struct pico_device *dev, int loop_score) -{ - if (loop_score <= 0) - return 0; - - if (l_bufsize > 0) { - pico_stack_recv(dev, l_buf, (uint32_t)l_bufsize); - l_bufsize = 0; - loop_score--; - } - - return loop_score; -} - - -struct pico_device *pico_loop_create(void) -{ - struct pico_device *loop = PICO_ZALLOC(sizeof(struct pico_device)); - if (!loop) - return NULL; - - if( 0 != pico_device_init(loop, "loop", NULL)) { - dbg ("Loop init failed.\n"); - pico_device_destroy(loop); - return NULL; - } - - loop->send = pico_loop_send; - loop->poll = pico_loop_poll; - dbg("Device %s created.\n", loop->name); - return loop; -} - diff --git a/kernel/picotcp/modules/pico_dev_loop.h b/kernel/picotcp/modules/pico_dev_loop.h deleted file mode 100644 index bd22451..0000000 --- a/kernel/picotcp/modules/pico_dev_loop.h +++ /dev/null @@ -1,15 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_LOOP -#define INCLUDE_PICO_LOOP -#include "pico_config.h" -#include "pico_device.h" - -void pico_loop_destroy(struct pico_device *loop); -struct pico_device *pico_loop_create(void); - -#endif - diff --git a/kernel/picotcp/modules/pico_dev_mock.c b/kernel/picotcp/modules/pico_dev_mock.c deleted file mode 100644 index d87ff0f..0000000 --- a/kernel/picotcp/modules/pico_dev_mock.c +++ /dev/null @@ -1,316 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Frederik Van Slycken - *********************************************************************/ - - -#include "pico_device.h" -#include "pico_dev_mock.h" -#include "pico_stack.h" -#include "pico_tree.h" - -#define MOCK_MTU 1500 - - - -/* Tree for finding mock_device based on pico_device* */ - - -static int mock_dev_cmp(void *ka, void *kb) -{ - struct mock_device *a = ka, *b = kb; - if (a->dev < b->dev) - return -1; - - if (a->dev > b->dev) - return 1; - - return 0; -} - -static PICO_TREE_DECLARE(mock_device_tree, mock_dev_cmp); - -static int pico_mock_send(struct pico_device *dev, void *buf, int len) -{ - struct mock_device search = { - .dev = dev - }; - struct mock_device*mock = pico_tree_findKey(&mock_device_tree, &search); - struct mock_frame*frame; - - if(!mock) - return 0; - - if (len > MOCK_MTU) - return 0; - - frame = PICO_ZALLOC(sizeof(struct mock_frame)); - if(!frame) { - return 0; - } - - if(mock->out_head == NULL) - mock->out_head = frame; - else - mock->out_tail->next = frame; - - mock->out_tail = frame; - - mock->out_tail->buffer = PICO_ZALLOC((uint32_t)len); - if(!mock->out_tail->buffer) - return 0; - - memcpy(mock->out_tail->buffer, buf, (uint32_t)len); - mock->out_tail->len = len; - - return len; - -} - -static int pico_mock_poll(struct pico_device *dev, int loop_score) -{ - struct mock_device search = { - .dev = dev - }; - struct mock_device*mock = pico_tree_findKey(&mock_device_tree, &search); - struct mock_frame*nxt; - - if(!mock) - return 0; - - if (loop_score <= 0) - return 0; - - while(mock->in_head != NULL && loop_score > 0) - { - pico_stack_recv(dev, mock->in_head->buffer, (uint32_t)mock->in_head->len); - loop_score--; - - PICO_FREE(mock->in_head->buffer); - - if(mock->in_tail == mock->in_head) { - PICO_FREE(mock->in_head); - mock->in_tail = mock->in_head = NULL; - return loop_score; - } - - nxt = mock->in_head->next; - PICO_FREE(mock->in_head); - mock->in_head = nxt; - } - return loop_score; -} - -int pico_mock_network_read(struct mock_device*mock, void *buf, int len) -{ - struct mock_frame*nxt; - if(mock->out_head == NULL) - return 0; - - if(len > mock->out_head->len - mock->out_head->read) - len = mock->out_head->len - mock->out_head->read; - - memcpy(buf, mock->out_head->buffer, (uint32_t)len); - - if(len + mock->out_head->read != mock->out_head->len) { - mock->out_head->read += len; - return len; - } - - PICO_FREE(mock->out_head->buffer); - - if(mock->out_tail == mock->out_head) { - PICO_FREE(mock->out_head); - mock->out_tail = mock->out_head = NULL; - return len; - } - - nxt = mock->out_head->next; - PICO_FREE(mock->out_head); - mock->out_head = nxt; - - return len; -} - -int pico_mock_network_write(struct mock_device*mock, const void *buf, int len) -{ - struct mock_frame*frame; - if (len > MOCK_MTU) - return 0; - - frame = PICO_ZALLOC(sizeof(struct mock_frame)); - if(!frame) { - return 0; - } - - if(mock->in_head == NULL) - mock->in_head = frame; - else - mock->in_tail->next = frame; - - mock->in_tail = frame; - - mock->in_tail->buffer = PICO_ZALLOC((uint32_t)len); - if(!mock->in_tail->buffer) - return 0; - - memcpy(mock->in_tail->buffer, buf, (uint32_t)len); - mock->in_tail->len = len; - - return len; - -} - -/* Public interface: create/destroy. */ - -void pico_mock_destroy(struct pico_device *dev) -{ - struct mock_device search = { - .dev = dev - }; - struct mock_device*mock = pico_tree_findKey(&mock_device_tree, &search); - struct mock_frame*nxt; - - if(!mock) - return; - - nxt = mock->in_head; - while(nxt != NULL) { - mock->in_head = mock->in_head->next; - PICO_FREE(nxt); - nxt = mock->in_head; - } - nxt = mock->out_head; - while(nxt != NULL) { - mock->out_head = mock->out_head->next; - PICO_FREE(nxt); - nxt = mock->out_head; - } - pico_tree_delete(&mock_device_tree, mock); -} - -struct mock_device *pico_mock_create(uint8_t*mac) -{ - - struct mock_device*mock = PICO_ZALLOC(sizeof(struct mock_device)); - if(!mock) - return NULL; - - mock->dev = PICO_ZALLOC(sizeof(struct pico_device)); - if (!mock->dev) { - PICO_FREE(mock); - return NULL; - } - - if(mac != NULL) { - mock->mac = PICO_ZALLOC(6 * sizeof(uint8_t)); - if(!mock->mac) { - PICO_FREE(mock->dev); - PICO_FREE(mock); - return NULL; - } - - memcpy(mock->mac, mac, 6); - } - - if( 0 != pico_device_init((struct pico_device *)mock->dev, "mock", mac)) { - dbg ("Loop init failed.\n"); - pico_device_destroy(mock->dev); - if(mock->mac != NULL) - PICO_FREE(mock->mac); - - PICO_FREE(mock); - return NULL; - } - - mock->dev->send = pico_mock_send; - mock->dev->poll = pico_mock_poll; - mock->dev->destroy = pico_mock_destroy; - dbg("Device %s created.\n", mock->dev->name); - - if (pico_tree_insert(&mock_device_tree, mock)) { - if (mock->mac != NULL) - PICO_FREE(mock->mac); - - pico_device_destroy(mock->dev); - - PICO_FREE(mock); - return NULL; - } - - return mock; -} - -/* - * a few utility functions that check certain fields - */ - -uint32_t mock_get_sender_ip4(struct mock_device*mock, void*buf, int len) -{ - uint32_t ret; - int start = mock->mac ? 14 : 0; - if(start + 16 > len) { - dbg("out of range!\n"); - return 0; - } - - memcpy(&ret, buf + start + 12, 4); - return ret; -} - -/* - * TODO - * find a way to create ARP replies - * - * create the other utility functions, e.g. - * -is_arp_request - * -create_arp_reply - * -get_destination_ip4 - * -get_ip4_total_length - * -is_ip4_checksum_valid - * -is_tcp_syn - * -create_tcp_synack - * -is_tcp_checksum_valid - * etc. - * - */ - -int mock_ip_protocol(struct mock_device*mock, void*buf, int len) -{ - uint8_t type; - int start = mock->mac ? 14 : 0; - if(start + 10 > len) { - return 0; - } - - memcpy(&type, buf + start + 9, 1); - return type; -} - -/* note : this function doesn't check if the IP header has any options */ -int mock_icmp_type(struct mock_device*mock, void*buf, int len) -{ - uint8_t type; - int start = mock->mac ? 14 : 0; - if(start + 21 > len) { - return 0; - } - - memcpy(&type, buf + start + 20, 1); - return type; -} - -/* note : this function doesn't check if the IP header has any options */ -int mock_icmp_code(struct mock_device*mock, void*buf, int len) -{ - uint8_t type; - int start = mock->mac ? 14 : 0; - if(start + 22 > len) { - return 0; - } - - memcpy(&type, buf + start + 21, 1); - return type; -} diff --git a/kernel/picotcp/modules/pico_dev_mock.h b/kernel/picotcp/modules/pico_dev_mock.h deleted file mode 100644 index 137f138..0000000 --- a/kernel/picotcp/modules/pico_dev_mock.h +++ /dev/null @@ -1,47 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_MOCK -#define INCLUDE_PICO_MOCK -#include "pico_config.h" -#include "pico_device.h" - - -struct mock_frame { - uint8_t*buffer; - int len; - int read; - - struct mock_frame*next; -}; - -struct mock_device { - struct pico_device*dev; - struct mock_frame*in_head; - struct mock_frame*in_tail; - struct mock_frame*out_head; - struct mock_frame*out_tail; - - uint8_t*mac; - -}; - -struct mock_device; -/* A mockup-device for the purpose of testing. It provides a couple of extra "network"-functions, which represent the network-side of the device. A network_send will result in mock_poll reading something, a network_read will see if the stack has sent anything through our mock-device. */ -void pico_mock_destroy(struct pico_device *dev); -struct mock_device *pico_mock_create(uint8_t*mac); - -int pico_mock_network_read(struct mock_device*mock, void *buf, int len); -int pico_mock_network_write(struct mock_device*mock, const void *buf, int len); - -/* TODO */ -/* we could use a few checking functions, e.g. one to see if it's a valid IP packet, if it's TCP, if the IP-address matches,... */ -/* That would be useful to avoid having to manually create buffers of what you expect, probably with masks for things that are random,... */ -uint32_t mock_get_sender_ip4(struct mock_device*mock, void*buf, int len); - -int mock_ip_protocol(struct mock_device*mock, void*buf, int len); -int mock_icmp_type(struct mock_device*mock, void*buf, int len); -int mock_icmp_code(struct mock_device*mock, void*buf, int len); -#endif diff --git a/kernel/picotcp/modules/pico_dev_null.c b/kernel/picotcp/modules/pico_dev_null.c deleted file mode 100644 index 34e9ebc..0000000 --- a/kernel/picotcp/modules/pico_dev_null.c +++ /dev/null @@ -1,60 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include "pico_device.h" -#include "pico_dev_null.h" -#include "pico_stack.h" - -struct pico_device_null { - struct pico_device dev; - int statistics_frames_out; -}; - -#define NULL_MTU 0 - -static int pico_null_send(struct pico_device *dev, void *buf, int len) -{ - struct pico_device_null *null = (struct pico_device_null *) dev; - IGNORE_PARAMETER(buf); - - /* Increase the statistic count */ - null->statistics_frames_out++; - - /* Discard the frame content silently. */ - return len; -} - -static int pico_null_poll(struct pico_device *dev, int loop_score) -{ - /* We never have packet to receive, no score is used. */ - IGNORE_PARAMETER(dev); - return loop_score; -} - -/* Public interface: create/destroy. */ - - -struct pico_device *pico_null_create(const char *name) -{ - struct pico_device_null *null = PICO_ZALLOC(sizeof(struct pico_device_null)); - - if (!null) - return NULL; - - if( 0 != pico_device_init((struct pico_device *)null, name, NULL)) { - return NULL; - } - - null->dev.overhead = 0; - null->statistics_frames_out = 0; - null->dev.send = pico_null_send; - null->dev.poll = pico_null_poll; - dbg("Device %s created.\n", null->dev.name); - return (struct pico_device *)null; -} - diff --git a/kernel/picotcp/modules/pico_dev_null.h b/kernel/picotcp/modules/pico_dev_null.h deleted file mode 100644 index 321375b..0000000 --- a/kernel/picotcp/modules/pico_dev_null.h +++ /dev/null @@ -1,15 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_NULL -#define INCLUDE_PICO_NULL -#include "pico_config.h" -#include "pico_device.h" - -void pico_null_destroy(struct pico_device *null); -struct pico_device *pico_null_create(const char *name); - -#endif - diff --git a/kernel/picotcp/modules/pico_dev_pcap.c b/kernel/picotcp/modules/pico_dev_pcap.c deleted file mode 100644 index 387dbd9..0000000 --- a/kernel/picotcp/modules/pico_dev_pcap.c +++ /dev/null @@ -1,96 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include -#include "pico_device.h" -#include "pico_dev_pcap.h" -#include "pico_stack.h" - -#include - -struct pico_device_pcap { - struct pico_device dev; - pcap_t *conn; -}; - -#define VDE_MTU 2048 - -static int pico_pcap_send(struct pico_device *dev, void *buf, int len) -{ - struct pico_device_pcap *pcap = (struct pico_device_pcap *) dev; - /* dbg("[%s] send %d bytes.\n", dev->name, len); */ - return pcap_inject(pcap->conn, buf, (uint32_t)len); -} - -static void pico_dev_pcap_cb(u_char *u, const struct pcap_pkthdr *h, const u_char *data) -{ - struct pico_device *dev = (struct pico_device *)u; - const uint8_t *buf = (const uint8_t *)data; - pico_stack_recv(dev, buf, (uint32_t)h->len); -} - - -static int pico_pcap_poll(struct pico_device *dev, int loop_score) -{ - struct pico_device_pcap *pcap = (struct pico_device_pcap *) dev; - loop_score -= pcap_dispatch(pcap->conn, loop_score, pico_dev_pcap_cb, (u_char *) pcap); - return loop_score; -} - -/* Public interface: create/destroy. */ - -void pico_pcap_destroy(struct pico_device *dev) -{ - struct pico_device_pcap *pcap = (struct pico_device_pcap *) dev; - pcap_close(pcap->conn); -} - -#define PICO_PCAP_MODE_LIVE 0 -#define PICO_PCAP_MODE_STORED 1 - -static struct pico_device *pico_pcap_create(char *if_file_name, char *name, uint8_t *mac, int mode) -{ - struct pico_device_pcap *pcap = PICO_ZALLOC(sizeof(struct pico_device_pcap)); - char errbuf[2000]; - if (!pcap) - return NULL; - - if( 0 != pico_device_init((struct pico_device *)pcap, name, mac)) { - dbg ("Pcap init failed.\n"); - pico_pcap_destroy((struct pico_device *)pcap); - return NULL; - } - - pcap->dev.overhead = 0; - - if (mode == PICO_PCAP_MODE_LIVE) - pcap->conn = pcap_open_live(if_file_name, 2000, 100, 10, errbuf); - else - pcap->conn = pcap_open_offline(if_file_name, errbuf); - - if (!pcap->conn) { - pico_pcap_destroy((struct pico_device *)pcap); - return NULL; - } - - pcap->dev.send = pico_pcap_send; - pcap->dev.poll = pico_pcap_poll; - pcap->dev.destroy = pico_pcap_destroy; - dbg("Device %s created.\n", pcap->dev.name); - return (struct pico_device *)pcap; -} - -struct pico_device *pico_pcap_create_fromfile(char *filename, char *name, uint8_t *mac) -{ - return pico_pcap_create(filename, name, mac, PICO_PCAP_MODE_STORED); -} - -struct pico_device *pico_pcap_create_live(char *ifname, char *name, uint8_t *mac) -{ - return pico_pcap_create(ifname, name, mac, PICO_PCAP_MODE_LIVE); -} diff --git a/kernel/picotcp/modules/pico_dev_pcap.h b/kernel/picotcp/modules/pico_dev_pcap.h deleted file mode 100644 index 37fb9d9..0000000 --- a/kernel/picotcp/modules/pico_dev_pcap.h +++ /dev/null @@ -1,19 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - - Author: Daniele Lacamera - *********************************************************************/ -#ifndef INCLUDE_PICO_PCAP -#define INCLUDE_PICO_PCAP -#include "pico_config.h" -#include "pico_device.h" -#include - -void pico_pcap_destroy(struct pico_device *pcap); -struct pico_device *pico_pcap_create_live(char *ifname, char *name, uint8_t *mac); -struct pico_device *pico_pcap_create_fromfile(char *filename, char *name, uint8_t *mac); - -#endif - diff --git a/kernel/picotcp/modules/pico_dev_ppp.c b/kernel/picotcp/modules/pico_dev_ppp.c deleted file mode 100644 index e778b16..0000000 --- a/kernel/picotcp/modules/pico_dev_ppp.c +++ /dev/null @@ -1,2312 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Serge Gadeyne, Daniele Lacamera, Maxime Vincent - - *********************************************************************/ - - -#include -#include -#include -#include - -#include "pico_device.h" -#include "pico_dev_ppp.h" -#include "pico_stack.h" -#include "pico_ipv4.h" -#include "pico_md5.h" -#include "pico_dns_client.h" - -#ifdef DEBUG_PPP - #define ppp_dbg dbg -#else - #define ppp_dbg(...) do {} while(0) -#endif - -/* We should define this in a global header. */ -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - -#define PICO_PPP_MRU 1514 /* RFC default MRU */ -#define PICO_PPP_MTU 1500 -#define PPP_MAXPKT 2048 -#define PPP_MAX_APN 134 -#define PPP_MAX_USERNAME 134 -#define PPP_MAX_PASSWORD 134 -#define PPP_HDR_SIZE 3u -#define PPP_PROTO_SLOT_SIZE 2u -#define PPP_FCS_SIZE 2u -#define PPP_PROTO_LCP short_be(0xc021) -#define PPP_PROTO_IP short_be(0x0021) -#define PPP_PROTO_PAP short_be(0xc023) -#define PPP_PROTO_CHAP short_be(0xc223) -#define PPP_PROTO_IPCP short_be(0x8021) - -#define PICO_CONF_REQ 1 -#define PICO_CONF_ACK 2 -#define PICO_CONF_NAK 3 -#define PICO_CONF_REJ 4 -#define PICO_CONF_TERM 5 -#define PICO_CONF_TERM_ACK 6 -#define PICO_CONF_CODE_REJ 7 -#define PICO_CONF_PROTO_REJ 8 -#define PICO_CONF_ECHO_REQ 9 -#define PICO_CONF_ECHO_REP 10 -#define PICO_CONF_DISCARD_REQ 11 - -#define LCPOPT_MRU 1u /* param size: 4, fixed: MRU */ -#define LCPOPT_AUTH 3u /* param size: 4-5: AUTH proto */ -#define LCPOPT_QUALITY 4u /* unused for now */ -#define LCPOPT_MAGIC 5u /* param size: 6, fixed: Magic */ -#define LCPOPT_PROTO_COMP 7u /* param size: 0, flag */ -#define LCPOPT_ADDRCTL_COMP 8u /* param size: 0, flag */ - -#define CHAP_MD5_SIZE 16u -#define CHAP_CHALLENGE 1 -#define CHAP_RESPONSE 2 -#define CHAP_SUCCESS 3 -#define CHAP_FAILURE 4 -#define CHALLENGE_SIZE(ppp, ch) ((size_t)((1 + strlen(ppp->password) + short_be((ch)->len)))) - -#define PAP_AUTH_REQ 1 -#define PAP_AUTH_ACK 2 -#define PAP_AUTH_NAK 3 - - -#define PICO_PPP_DEFAULT_TIMER (3) /* seconds */ -#define PICO_PPP_DEFAULT_MAX_TERMINATE (2) -#define PICO_PPP_DEFAULT_MAX_CONFIGURE (10) -#define PICO_PPP_DEFAULT_MAX_FAILURE (5) -#define PICO_PPP_DEFAULT_MAX_DIALTIME (20) - -#define IPCP_ADDR_LEN 6u -#define IPCP_VJ_LEN 6u -#define IPCP_OPT_IP 0x03 -#define IPCP_OPT_VJ 0x02 -#define IPCP_OPT_DNS1 0x81 -#define IPCP_OPT_NBNS1 0x82 -#define IPCP_OPT_DNS2 0x83 -#define IPCP_OPT_NBNS2 0x84 - -static uint8_t LCPOPT_LEN[9] = { - 0, 4, 0, 4, 4, 6, 2, 2, 2 -}; - - -/* Protocol defines */ -static const unsigned char AT_S3 = 0x0du; -static const unsigned char AT_S4 = 0x0au; -static const unsigned char PPPF_FLAG_SEQ = 0x7eu; -static const unsigned char PPPF_CTRL_ESC = 0x7du; -static const unsigned char PPPF_ADDR = 0xffu; -static const unsigned char PPPF_CTRL = 0x03u; - -static int ppp_devnum = 0; -static uint8_t ppp_recv_buf[PPP_MAXPKT]; - -PACKED_STRUCT_DEF pico_lcp_hdr { - uint8_t code; - uint8_t id; - uint16_t len; -}; - -PACKED_STRUCT_DEF pico_chap_hdr { - uint8_t code; - uint8_t id; - uint16_t len; -}; - -PACKED_STRUCT_DEF pico_pap_hdr { - uint8_t code; - uint8_t id; - uint16_t len; -}; - -PACKED_STRUCT_DEF pico_ipcp_hdr { - uint8_t code; - uint8_t id; - uint16_t len; -}; - -enum ppp_modem_state { - PPP_MODEM_STATE_INITIAL = 0, - PPP_MODEM_STATE_RESET, - PPP_MODEM_STATE_ECHO, - PPP_MODEM_STATE_CREG, - PPP_MODEM_STATE_CGREG, - PPP_MODEM_STATE_CGDCONT, - PPP_MODEM_STATE_CGATT, - PPP_MODEM_STATE_DIAL, - PPP_MODEM_STATE_CONNECTED, - PPP_MODEM_STATE_MAX -}; - -enum ppp_modem_event { - PPP_MODEM_EVENT_START = 0, - PPP_MODEM_EVENT_STOP, - PPP_MODEM_EVENT_OK, - PPP_MODEM_EVENT_CONNECT, - PPP_MODEM_EVENT_TIMEOUT, - PPP_MODEM_EVENT_MAX -}; - -enum ppp_lcp_state { - PPP_LCP_STATE_INITIAL = 0, - PPP_LCP_STATE_STARTING, - PPP_LCP_STATE_CLOSED, - PPP_LCP_STATE_STOPPED, - PPP_LCP_STATE_CLOSING, - PPP_LCP_STATE_STOPPING, - PPP_LCP_STATE_REQ_SENT, - PPP_LCP_STATE_ACK_RCVD, - PPP_LCP_STATE_ACK_SENT, - PPP_LCP_STATE_OPENED, - PPP_LCP_STATE_MAX -}; - -enum ppp_lcp_event { - PPP_LCP_EVENT_UP = 0, - PPP_LCP_EVENT_DOWN, - PPP_LCP_EVENT_OPEN, - PPP_LCP_EVENT_CLOSE, - PPP_LCP_EVENT_TO_POS, - PPP_LCP_EVENT_TO_NEG, - PPP_LCP_EVENT_RCR_POS, - PPP_LCP_EVENT_RCR_NEG, - PPP_LCP_EVENT_RCA, - PPP_LCP_EVENT_RCN, - PPP_LCP_EVENT_RTR, - PPP_LCP_EVENT_RTA, - PPP_LCP_EVENT_RUC, - PPP_LCP_EVENT_RXJ_POS, - PPP_LCP_EVENT_RXJ_NEG, - PPP_LCP_EVENT_RXR, - PPP_LCP_EVENT_MAX -}; - -enum ppp_auth_state { - PPP_AUTH_STATE_INITIAL = 0, - PPP_AUTH_STATE_STARTING, - PPP_AUTH_STATE_RSP_SENT, - PPP_AUTH_STATE_REQ_SENT, - PPP_AUTH_STATE_AUTHENTICATED, - PPP_AUTH_STATE_MAX -}; - -enum ppp_auth_event { - PPP_AUTH_EVENT_UP_NONE = 0, - PPP_AUTH_EVENT_UP_PAP, - PPP_AUTH_EVENT_UP_CHAP, - PPP_AUTH_EVENT_DOWN, - PPP_AUTH_EVENT_RAC, - PPP_AUTH_EVENT_RAA, - PPP_AUTH_EVENT_RAN, - PPP_AUTH_EVENT_TO, - PPP_AUTH_EVENT_MAX -}; - -enum ppp_ipcp_state { - PPP_IPCP_STATE_INITIAL = 0, - PPP_IPCP_STATE_REQ_SENT, - PPP_IPCP_STATE_ACK_RCVD, - PPP_IPCP_STATE_ACK_SENT, - PPP_IPCP_STATE_OPENED, - PPP_IPCP_STATE_MAX -}; - -enum ppp_ipcp_event { - PPP_IPCP_EVENT_UP = 0, - PPP_IPCP_EVENT_DOWN, - PPP_IPCP_EVENT_RCR_POS, - PPP_IPCP_EVENT_RCR_NEG, - PPP_IPCP_EVENT_RCA, - PPP_IPCP_EVENT_RCN, - PPP_IPCP_EVENT_TO, - PPP_IPCP_EVENT_MAX -}; - -enum pico_ppp_state { - PPP_MODEM_RST = 0, - PPP_MODEM_CREG, - PPP_MODEM_CGREG, - PPP_MODEM_CGDCONT, - PPP_MODEM_CGATT, - PPP_MODEM_CONNECT, - /* From here on, PPP states */ - PPP_ESTABLISH, - PPP_AUTH, - PPP_NETCONFIG, - PPP_NETWORK, - PPP_TERMINATE, - /* MAXSTATE is the last one */ - PPP_MODEM_MAXSTATE -}; - - -#define IPCP_ALLOW_IP 0x01u -#define IPCP_ALLOW_DNS1 0x02u -#define IPCP_ALLOW_DNS2 0x04u -#define IPCP_ALLOW_NBNS1 0x08u -#define IPCP_ALLOW_NBNS2 0x10u - -struct pico_device_ppp { - struct pico_device dev; - int autoreconnect; - enum ppp_modem_state modem_state; - enum ppp_lcp_state lcp_state; - enum ppp_auth_state auth_state; - enum ppp_ipcp_state ipcp_state; - enum pico_ppp_state state; - char apn[PPP_MAX_APN]; - char password[PPP_MAX_PASSWORD]; - char username[PPP_MAX_USERNAME]; - uint16_t lcpopt_local; - uint16_t lcpopt_peer; - uint8_t *pkt; - uint32_t len; - uint16_t rej; - uint16_t auth; - int (*serial_recv)(struct pico_device *dev, void *buf, int len); - int (*serial_send)(struct pico_device *dev, const void *buf, int len); - int (*serial_set_speed)(struct pico_device *dev, uint32_t speed); - uint32_t ipcp_allowed_fields; - uint32_t ipcp_ip; - uint32_t ipcp_dns1; - uint32_t ipcp_nbns1; - uint32_t ipcp_dns2; - uint32_t ipcp_nbns2; - uint32_t timer; - uint8_t timer_val; - uint8_t timer_count; - uint8_t frame_id; - uint8_t timer_on; - uint16_t mru; -}; - - -/* Unit test interceptor */ -static void (*mock_modem_state)(struct pico_device_ppp *ppp, enum ppp_modem_event event) = NULL; -static void (*mock_lcp_state)(struct pico_device_ppp *ppp, enum ppp_lcp_event event) = NULL; -static void (*mock_auth_state)(struct pico_device_ppp *ppp, enum ppp_auth_event event) = NULL; -static void (*mock_ipcp_state)(struct pico_device_ppp *ppp, enum ppp_ipcp_event event) = NULL; - -/* Debug prints */ -#ifdef PPP_DEBUG -static void lcp_optflags_print(struct pico_device_ppp *ppp, uint8_t *opts, uint32_t opts_len); -#endif - -#define PPP_TIMER_ON_MODEM 0x01u -#define PPP_TIMER_ON_LCPREQ 0x04u -#define PPP_TIMER_ON_LCPTERM 0x08u -#define PPP_TIMER_ON_AUTH 0x10u -#define PPP_TIMER_ON_IPCP 0x20u - -/* Escape and send */ -static int ppp_serial_send_escape(struct pico_device_ppp *ppp, void *buf, int len) -{ - uint8_t *in_buf = (uint8_t *)buf; - uint8_t *out_buf = NULL; - int esc_char_count = 0; - int newlen = 0, ret = -1; - int i, j; - -#ifdef PPP_DEBUG - { - uint32_t idx; - if (len > 0) { - ppp_dbg("PPP >>>> "); - for(idx = 0; idx < (uint32_t)len; idx++) { - ppp_dbg(" %02x", ((uint8_t *)buf)[idx]); - } - ppp_dbg("\n"); - } - } -#endif - - for (i = 1; i < (len - 1); i++) /* from 1 to len -1, as start/stop are not escaped */ - { - if (((in_buf[i] + 1u) >> 1) == 0x3Fu) - esc_char_count++; - } - if (!esc_char_count) { - return ppp->serial_send(&ppp->dev, buf, len); - } - - newlen = len + esc_char_count; - out_buf = PICO_ZALLOC((uint32_t)newlen); - if(!out_buf) - return -1; - - /* Start byte. */ - out_buf[0] = in_buf[0]; - for(i = 1, j = 1; i < (len - 1); i++) { - if (((in_buf[i] + 1u) >> 1) == 0x3Fu) { - out_buf[j++] = PPPF_CTRL_ESC; - out_buf[j++] = in_buf[i] ^ 0x20; - } else { - out_buf[j++] = in_buf[i]; - } - } - /* Stop byte. */ - out_buf[newlen - 1] = in_buf[len - 1]; - - ret = ppp->serial_send(&ppp->dev, out_buf, newlen); - - PICO_FREE(out_buf); - - if (ret == newlen) - return len; - - return ret; - -} - -static void lcp_timer_start(struct pico_device_ppp *ppp, uint8_t timer_type) -{ - uint8_t count = 0; - ppp->timer_on |= timer_type; - - if (ppp->timer_val == 0) { - ppp->timer_val = PICO_PPP_DEFAULT_TIMER; - } - - if (timer_type == PPP_TIMER_ON_LCPTERM) { - count = PICO_PPP_DEFAULT_MAX_TERMINATE; - } - - if (timer_type == PPP_TIMER_ON_LCPREQ) { - count = PICO_PPP_DEFAULT_MAX_CONFIGURE; - } - - if (timer_type == 0) { - ppp->timer_on |= PPP_TIMER_ON_LCPREQ; - ppp->timer_count = 0; - } - - if (ppp->timer_count == 0) - ppp->timer_count = count; -} - -static void lcp_zero_restart_count(struct pico_device_ppp *ppp) -{ - lcp_timer_start(ppp, 0); -} - -static void lcp_timer_stop(struct pico_device_ppp *ppp, uint8_t timer_type) -{ - ppp->timer_on = (uint8_t)ppp->timer_on & (uint8_t)(~timer_type); -} - - -#define PPP_FSM_MAX_ACTIONS 3 - -struct pico_ppp_fsm { - int next_state; - void (*event_handler[PPP_FSM_MAX_ACTIONS]) (struct pico_device_ppp *); -}; - -#define LCPOPT_SET_LOCAL(ppp, opt) ppp->lcpopt_local |= (uint16_t)(1u << opt) -#define LCPOPT_SET_PEER(ppp, opt) ppp->lcpopt_peer |= (uint16_t)(1u << opt) -#define LCPOPT_UNSET_LOCAL(ppp, opt) ppp->lcpopt_local &= (uint16_t) ~(1u << opt) -#define LCPOPT_UNSET_LOCAL_MASK(ppp, opt) ppp->lcpopt_local &= (uint16_t) ~(opt) -#define LCPOPT_UNSET_PEER(ppp, opt) ppp->lcpopt_peer &= (uint16_t) ~(1u << opt) -#define LCPOPT_ISSET_LOCAL(ppp, opt) ((ppp->lcpopt_local & (uint16_t)(1u << opt)) != 0) -#define LCPOPT_ISSET_PEER(ppp, opt) ((ppp->lcpopt_peer & (uint16_t)(1u << opt)) != 0) - -static void evaluate_modem_state(struct pico_device_ppp *ppp, enum ppp_modem_event event); -static void evaluate_lcp_state(struct pico_device_ppp *ppp, enum ppp_lcp_event event); -static void evaluate_auth_state(struct pico_device_ppp *ppp, enum ppp_auth_event event); -static void evaluate_ipcp_state(struct pico_device_ppp *ppp, enum ppp_ipcp_event event); - - -static uint32_t ppp_ctl_packet_size(struct pico_device_ppp *ppp, uint16_t proto, uint32_t *size) -{ - uint32_t prefix = 0; - - IGNORE_PARAMETER(ppp); - IGNORE_PARAMETER(proto); - - prefix += PPP_HDR_SIZE; /* 7e ff 03 ... */ - prefix += PPP_PROTO_SLOT_SIZE; - *size += prefix; - *size += PPP_FCS_SIZE; - (*size)++; /* STOP byte 0x7e */ - return prefix; -} - -/* CRC16 / FCS Calculation */ -static uint16_t ppp_fcs_char(uint16_t old_crc, uint8_t data) -{ - uint16_t word = (old_crc ^ data) & (uint16_t)0x00FFu; - word = (uint16_t)(word ^ (uint16_t)((word << 4u) & (uint16_t)0x00FFu)); - word = (uint16_t)((word << 8u) ^ (word << 3u) ^ (word >> 4u)); - return ((old_crc >> 8u) ^ word); -} - -static uint16_t ppp_fcs_continue(uint16_t fcs, uint8_t *buf, uint32_t len) -{ - uint8_t *pos = buf; - for (pos = buf; pos < buf + len; pos++) - { - fcs = ppp_fcs_char(fcs, *pos); - } - return fcs; -} - -static uint16_t ppp_fcs_finish(uint16_t fcs) -{ - return fcs ^ 0xFFFF; -} - -static uint16_t ppp_fcs_start(uint8_t *buf, uint32_t len) -{ - uint16_t fcs = 0xFFFF; - return ppp_fcs_continue(fcs, buf, len); -} - -static int ppp_fcs_verify(uint8_t *buf, uint32_t len) -{ - uint16_t fcs = ppp_fcs_start(buf, len - 2); - fcs = ppp_fcs_finish(fcs); - if ((((fcs & 0xFF00u) >> 8) != buf[len - 1]) || ((fcs & 0xFFu) != buf[len - 2])) { - return -1; - } - - return 0; -} - -/* Serial send (DTE->DCE) functions */ -static int pico_ppp_ctl_send(struct pico_device *dev, uint16_t code, uint8_t *pkt, uint32_t len) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *) dev; - uint16_t fcs; - uint8_t *ptr = pkt; - int i = 0; - - if (!ppp->serial_send) - return (int)len; - - /* PPP Header */ - ptr[i++] = PPPF_FLAG_SEQ; - ptr[i++] = PPPF_ADDR; - ptr[i++] = PPPF_CTRL; - /* protocol */ - ptr[i++] = (uint8_t)(code & 0xFFu); - ptr[i++] = (uint8_t)((code & 0xFF00u) >> 8); - - /* payload is already in place. Calculate FCS. */ - fcs = ppp_fcs_start(pkt + 1, len - 4); /* FCS excludes: start (1), FCS(2), stop(1), total 4 bytes */ - fcs = ppp_fcs_finish(fcs); - pkt[len - 3] = (uint8_t)(fcs & 0xFFu); - pkt[len - 2] = (uint8_t)((fcs & 0xFF00u) >> 8); - pkt[len - 1] = PPPF_FLAG_SEQ; - ppp_serial_send_escape(ppp, pkt, (int)len); - return (int)len; -} - -static uint8_t pico_ppp_data_buffer[PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + PICO_PPP_MTU + PPP_FCS_SIZE + 1]; -static int pico_ppp_send(struct pico_device *dev, void *buf, int len) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *) dev; - uint16_t fcs = 0; - int fcs_start; - int i = 0; - - ppp_dbg(" >>>>>>>>> PPP OUT\n"); - - if (ppp->ipcp_state != PPP_IPCP_STATE_OPENED) - return len; - - if (!ppp->serial_send) - return len; - - pico_ppp_data_buffer[i++] = PPPF_FLAG_SEQ; - if (!LCPOPT_ISSET_PEER(ppp, LCPOPT_ADDRCTL_COMP)) - { - pico_ppp_data_buffer[i++] = PPPF_ADDR; - pico_ppp_data_buffer[i++] = PPPF_CTRL; - } - - fcs_start = i; - - if (!LCPOPT_ISSET_PEER(ppp, LCPOPT_PROTO_COMP)) - { - pico_ppp_data_buffer[i++] = 0x00; - } - - pico_ppp_data_buffer[i++] = 0x21; - memcpy(pico_ppp_data_buffer + i, buf, (uint32_t)len); - i += len; - fcs = ppp_fcs_start(pico_ppp_data_buffer + fcs_start, (uint32_t)(i - fcs_start)); - fcs = ppp_fcs_finish(fcs); - pico_ppp_data_buffer[i++] = (uint8_t)(fcs & 0xFFu); - pico_ppp_data_buffer[i++] = (uint8_t)((fcs & 0xFF00u) >> 8); - pico_ppp_data_buffer[i++] = PPPF_FLAG_SEQ; - - ppp_serial_send_escape(ppp, pico_ppp_data_buffer, i); - return len; -} - - -/* FSM functions */ - -static void ppp_modem_start_timer(struct pico_device_ppp *ppp) -{ - ppp->timer_on = ppp->timer_on | PPP_TIMER_ON_MODEM; - ppp->timer_val = PICO_PPP_DEFAULT_TIMER; -} - -#define PPP_AT_CREG0 "ATZ\r\n" -static void ppp_modem_send_reset(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CREG0, strlen(PPP_AT_CREG0)); - - ppp_modem_start_timer(ppp); -} - -#define PPP_AT_CREG1 "ATE0\r\n" -static void ppp_modem_send_echo(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CREG1, strlen(PPP_AT_CREG1)); - - ppp_modem_start_timer(ppp); -} - -#define PPP_AT_CREG2 "AT+CREG=1\r\n" -static void ppp_modem_send_creg(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CREG2, strlen(PPP_AT_CREG2)); - - ppp_modem_start_timer(ppp); -} - -#define PPP_AT_CGREG "AT+CGREG=1\r\n" -static void ppp_modem_send_cgreg(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CGREG, strlen(PPP_AT_CGREG)); - - ppp_modem_start_timer(ppp); -} - - -#define PPP_AT_CGDCONT "AT+CGDCONT=1,\"IP\",\"%s\",,,\r\n" -static void ppp_modem_send_cgdcont(struct pico_device_ppp *ppp) -{ - char at_cgdcont[200]; - - if (ppp->serial_send) { - snprintf(at_cgdcont, 200, PPP_AT_CGDCONT, ppp->apn); - ppp->serial_send(&ppp->dev, at_cgdcont, (int)strlen(at_cgdcont)); - } - - ppp_modem_start_timer(ppp); -} - - -#define PPP_AT_CGATT "AT+CGATT=1\r\n" -static void ppp_modem_send_cgatt(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CGATT, strlen(PPP_AT_CGATT)); - - ppp_modem_start_timer(ppp); -} - -#ifdef PICOTCP_PPP_SUPPORT_QUERIES -#define PPP_AT_CGATT_Q "AT+CGATT?\r\n" -static void ppp_modem_send_cgatt_q(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CGATT_Q, strlen(PPP_AT_CGATT_Q)); - - ppp_modem_start_timer(ppp); -} -#define PPP_AT_CGDCONT_Q "AT+CGDCONT?\r\n" -static void ppp_modem_send_cgdcont_q(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CGDCONT_Q, strlen(PPP_AT_CGDCONT_Q)); - - ppp_modem_start_timer(ppp); -} - -#define PPP_AT_CGREG_Q "AT+CGREG?\r\n" -static void ppp_modem_send_cgreg_q(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CGREG_Q, strlen(PPP_AT_CGREG_Q)); - - ppp_modem_start_timer(ppp); -} - -#define PPP_AT_CREG3 "AT+CREG?\r\n" -static void ppp_modem_send_creg_q(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_CREG3, strlen(PPP_AT_CREG3)); - - ppp_modem_start_timer(ppp); -} -#endif /* PICOTCP_PPP_SUPPORT_QUERIES */ - -#define PPP_AT_DIALIN "ATD*99***1#\r\n" -static void ppp_modem_send_dial(struct pico_device_ppp *ppp) -{ - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_AT_DIALIN, strlen(PPP_AT_DIALIN)); - - ppp_modem_start_timer(ppp); - ppp->timer_val = PICO_PPP_DEFAULT_MAX_DIALTIME; -} - -static void ppp_modem_connected(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: Modem connected to peer.\n"); - evaluate_lcp_state(ppp, PPP_LCP_EVENT_UP); -} - -#define PPP_ATH "+++ATH\r\n" -static void ppp_modem_disconnected(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: Modem disconnected.\n"); - if (ppp->serial_send) - ppp->serial_send(&ppp->dev, PPP_ATH, strlen(PPP_ATH)); - - evaluate_lcp_state(ppp, PPP_LCP_EVENT_DOWN); -} - -static const struct pico_ppp_fsm ppp_modem_fsm[PPP_MODEM_STATE_MAX][PPP_MODEM_EVENT_MAX] = { - [PPP_MODEM_STATE_INITIAL] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_INITIAL, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_RESET] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_RESET, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_ECHO, { ppp_modem_send_echo } }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_RESET, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_ECHO] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_ECHO, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_CREG, { ppp_modem_send_creg } }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_ECHO, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_CREG] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_CREG, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_CGREG, { ppp_modem_send_cgreg } }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_CREG, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_CGREG] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_CGREG, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_CGDCONT, { ppp_modem_send_cgdcont } }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_CGREG, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_CGDCONT] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_CGDCONT, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_CGATT, { ppp_modem_send_cgatt } }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_CGDCONT, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_CGATT] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_CGATT, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_DIAL, { ppp_modem_send_dial } }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_CGATT, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_DIAL] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_DIAL, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, {} }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_DIAL, {} }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_CONNECTED, { ppp_modem_connected } }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_RESET, {ppp_modem_send_reset} } - }, - [PPP_MODEM_STATE_CONNECTED] = { - [PPP_MODEM_EVENT_START] = { PPP_MODEM_STATE_CONNECTED, {} }, - [PPP_MODEM_EVENT_STOP] = { PPP_MODEM_STATE_INITIAL, { ppp_modem_disconnected } }, - [PPP_MODEM_EVENT_OK] = { PPP_MODEM_STATE_CONNECTED, {} }, - [PPP_MODEM_EVENT_CONNECT] = { PPP_MODEM_STATE_CONNECTED, {} }, - [PPP_MODEM_EVENT_TIMEOUT] = { PPP_MODEM_STATE_CONNECTED, {} } - } -}; -static void evaluate_modem_state(struct pico_device_ppp *ppp, enum ppp_modem_event event) -{ - const struct pico_ppp_fsm *fsm; - int i; - if (mock_modem_state) { - mock_modem_state(ppp, event); - return; - } - - fsm = &ppp_modem_fsm[ppp->modem_state][event]; - - ppp->modem_state = (enum ppp_modem_state)fsm->next_state; - - for (i = 0; i < PPP_FSM_MAX_ACTIONS; i++) { - if (fsm->event_handler[i]) - fsm->event_handler[i](ppp); - } -} - -static void ppp_modem_recv(struct pico_device_ppp *ppp, void *data, uint32_t len) -{ - IGNORE_PARAMETER(len); - - ppp_dbg("PPP: Recv: '%s'\n", (char *)data); - - if (strcmp(data, "OK") == 0) { - evaluate_modem_state(ppp, PPP_MODEM_EVENT_OK); - } - - if (strcmp(data, "ERROR") == 0) { - evaluate_modem_state(ppp, PPP_MODEM_EVENT_STOP); - } - - if (strncmp(data, "CONNECT", 7) == 0) { - evaluate_modem_state(ppp, PPP_MODEM_EVENT_CONNECT); - } -} - -static void lcp_send_configure_request(struct pico_device_ppp *ppp) -{ -# define MY_LCP_REQ_SIZE 12 /* Max value. */ - struct pico_lcp_hdr *req; - uint8_t *lcpbuf, *opts; - uint32_t size = MY_LCP_REQ_SIZE; - uint32_t prefix; - uint32_t optsize = 0; - - prefix = ppp_ctl_packet_size(ppp, PPP_PROTO_LCP, &size); - lcpbuf = PICO_ZALLOC(size); - if (!lcpbuf) - return; - - req = (struct pico_lcp_hdr *)(lcpbuf + prefix); - - opts = lcpbuf + prefix + (sizeof(struct pico_lcp_hdr)); - /* uint8_t my_pkt[] = { 0x7e, 0xff, 0x03, 0xc0, 0x21, 0x01, 0x00, 0x00, 0x06, 0x07, 0x02, 0x64, 0x7b, 0x7e }; */ - - ppp_dbg("Sending LCP CONF REQ\n"); - req->code = PICO_CONF_REQ; - req->id = ppp->frame_id++; - - if (LCPOPT_ISSET_LOCAL(ppp, LCPOPT_PROTO_COMP)) { - opts[optsize++] = LCPOPT_PROTO_COMP; - opts[optsize++] = LCPOPT_LEN[LCPOPT_PROTO_COMP]; - } - - if (LCPOPT_ISSET_LOCAL(ppp, LCPOPT_MRU)) { - opts[optsize++] = LCPOPT_MRU; - opts[optsize++] = LCPOPT_LEN[LCPOPT_MRU]; - opts[optsize++] = (uint8_t)((ppp->mru >> 8) & 0xFF); - opts[optsize++] = (uint8_t)(ppp->mru & 0xFF); - } else { - ppp->mru = PICO_PPP_MRU; - } - - if (LCPOPT_ISSET_LOCAL(ppp, LCPOPT_ADDRCTL_COMP)) { - opts[optsize++] = LCPOPT_ADDRCTL_COMP; - opts[optsize++] = LCPOPT_LEN[LCPOPT_ADDRCTL_COMP]; - } - - req->len = short_be((uint16_t)((unsigned long)optsize + sizeof(struct pico_lcp_hdr))); - -#ifdef PPP_DEBUG - lcp_optflags_print(ppp, opts, optsize); -#endif - - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_LCP, - lcpbuf, /* Start of PPP packet */ - (uint32_t)(prefix + /* PPP Header, etc. */ - sizeof(struct pico_lcp_hdr) + /* LCP HDR */ - optsize + /* Actual options size */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1u) /* STOP Byte */ - ); - PICO_FREE(lcpbuf); - ppp->timer_val = PICO_PPP_DEFAULT_TIMER; - lcp_timer_start(ppp, PPP_TIMER_ON_LCPREQ); -} - -#ifdef PPP_DEBUG -static void lcp_optflags_print(struct pico_device_ppp *ppp, uint8_t *opts, uint32_t opts_len) -{ - uint8_t *p = opts; - int off; - IGNORE_PARAMETER(ppp); - ppp_dbg("Parsing options:\n"); - while(p < (opts + opts_len)) { - int i; - - ppp_dbg("-- LCP opt: %d - len: %d - data:", p[0], p[1]); - for (i = 0; i < p[1] - 2; i++) - { - ppp_dbg(" %02X", p[2 + i]); - } - ppp_dbg("\n"); - - off = p[1]; - if (!off) - break; - - p += off; - } -} -#endif - -/* setting adjust_opts will adjust our options to the ones supplied */ -static uint16_t lcp_optflags(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len, int adjust_opts) -{ - uint16_t flags = 0; - uint8_t *p = pkt + sizeof(struct pico_lcp_hdr); - int off; - while(p < (pkt + len)) { - flags = (uint16_t)((uint16_t)(1u << (uint16_t)p[0]) | flags); - - if (adjust_opts && ppp) - { - switch (p[0]) - { - case LCPOPT_MRU: - /* XXX: Can we accept any MRU ? */ - ppp_dbg("Adjusting MRU to %02x%02x\n", p[2], p[3]); - ppp->mru = (uint16_t)((p[2] << 8) + p[3]); - break; - case LCPOPT_AUTH: - ppp_dbg("Setting AUTH to %02x%02x\n", p[2], p[3]); - ppp->auth = (uint16_t)((p[2] << 8) + p[3]); - break; - default: - break; - } - } - - off = p[1]; /* opt length field */ - if (!off) - break; - - p += off; - } -#ifdef PPP_DEBUG - lcp_optflags_print(ppp, pkt + sizeof(struct pico_lcp_hdr), (uint32_t)(len - sizeof(struct pico_lcp_hdr))); -#endif - return flags; -} - - -static void lcp_send_configure_ack(struct pico_device_ppp *ppp) -{ - uint8_t ack[ppp->len + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_lcp_hdr) + PPP_FCS_SIZE + 1]; - struct pico_lcp_hdr *ack_hdr = (struct pico_lcp_hdr *) (ack + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - struct pico_lcp_hdr *lcpreq = (struct pico_lcp_hdr *)ppp->pkt; - memcpy(ack + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE, ppp->pkt, ppp->len); - ack_hdr->code = PICO_CONF_ACK; - ack_hdr->id = lcpreq->id; - ack_hdr->len = lcpreq->len; - ppp_dbg("Sending LCP CONF ACK\n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_LCP, ack, - PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - short_be(lcpreq->len) + /* Actual options size + hdr (whole lcp packet) */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1 /* STOP Byte */ - ); -} - -static void lcp_send_terminate_request(struct pico_device_ppp *ppp) -{ - uint8_t term[PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_lcp_hdr) + PPP_FCS_SIZE + 1]; - struct pico_lcp_hdr *term_hdr = (struct pico_lcp_hdr *) (term + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - term_hdr->code = PICO_CONF_TERM; - term_hdr->id = ppp->frame_id++; - term_hdr->len = short_be((uint16_t)sizeof(struct pico_lcp_hdr)); - ppp_dbg("Sending LCP TERMINATE REQUEST\n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_LCP, term, - PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - sizeof(struct pico_lcp_hdr) + /* Actual options size + hdr (whole lcp packet) */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1 /* STOP Byte */ - ); - lcp_timer_start(ppp, PPP_TIMER_ON_LCPTERM); -} - -static void lcp_send_terminate_ack(struct pico_device_ppp *ppp) -{ - uint8_t ack[ppp->len + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_lcp_hdr) + PPP_FCS_SIZE + 1]; - struct pico_lcp_hdr *ack_hdr = (struct pico_lcp_hdr *) (ack + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - struct pico_lcp_hdr *lcpreq = (struct pico_lcp_hdr *)ppp->pkt; - memcpy(ack + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE, ppp->pkt, ppp->len); - ack_hdr->code = PICO_CONF_TERM_ACK; - ack_hdr->id = lcpreq->id; - ack_hdr->len = lcpreq->len; - ppp_dbg("Sending LCP TERM ACK\n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_LCP, ack, - PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - short_be(lcpreq->len) + /* Actual options size + hdr (whole lcp packet) */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1 /* STOP Byte */ - ); -} - -static void lcp_send_configure_nack(struct pico_device_ppp *ppp) -{ - uint8_t reject[64]; - uint8_t *p = ppp->pkt + sizeof(struct pico_lcp_hdr); - struct pico_lcp_hdr *lcpreq = (struct pico_lcp_hdr *)ppp->pkt; - struct pico_lcp_hdr *lcprej = (struct pico_lcp_hdr *)(reject + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - uint8_t *dst_opts = reject + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_lcp_hdr); - uint32_t dstopts_len = 0; - ppp_dbg("CONF_NACK: rej = %04X\n", ppp->rej); - while (p < (ppp->pkt + ppp->len)) { - uint8_t i = 0; - if ((1u << p[0]) & ppp->rej || (p[0] > 8u)) { /* Reject anything we dont support or with option id >8 */ - ppp_dbg("rejecting option %d -- ", p[0]); - dst_opts[dstopts_len++] = p[0]; - - ppp_dbg("len: %d -- ", p[1]); - dst_opts[dstopts_len++] = p[1]; - - ppp_dbg("data: "); - for(i = 0; i < p[1] - 2u; i++) { /* length includes type, length and data fields */ - dst_opts[dstopts_len++] = p[2 + i]; - ppp_dbg("%02X ", p[2 + i]); - } - ppp_dbg("\n"); - } - - p += p[1]; - } - lcprej->code = PICO_CONF_REJ; - lcprej->id = lcpreq->id; - lcprej->len = short_be((uint16_t)(dstopts_len + sizeof(struct pico_lcp_hdr))); - - ppp_dbg("Sending LCP CONF REJ\n"); -#ifdef PPP_DEBUG - lcp_optflags_print(ppp, dst_opts, dstopts_len); -#endif - - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_LCP, reject, - (uint32_t)(PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - sizeof(struct pico_lcp_hdr) + /* LCP HDR */ - dstopts_len + /* Actual options size */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1u) /* STOP Byte */ - ); -} - -static void lcp_process_in(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) -{ - uint16_t optflags; - if (!ppp) - return; - - if (pkt[0] == PICO_CONF_REQ) { - uint16_t rejected = 0; - ppp_dbg("Received LCP CONF REQ\n"); - optflags = lcp_optflags(ppp, pkt, len, 1u); - rejected = (uint16_t)(optflags & (~ppp->lcpopt_local)); - ppp->pkt = pkt; - ppp->len = len; - ppp->rej = rejected; - if (rejected) { - evaluate_lcp_state(ppp, PPP_LCP_EVENT_RCR_NEG); - } else { - ppp->lcpopt_peer = optflags; - evaluate_lcp_state(ppp, PPP_LCP_EVENT_RCR_POS); - } - - return; - } - - if (pkt[0] == PICO_CONF_ACK) { - ppp_dbg("Received LCP CONF ACK\nOptflags: %04x\n", lcp_optflags(NULL, pkt, len, 0u)); - evaluate_lcp_state(ppp, PPP_LCP_EVENT_RCA); - return; - } - - if (pkt[0] == PICO_CONF_NAK) { - /* Every instance of the received Configuration Options is recognizable, but some values are not acceptable */ - optflags = lcp_optflags(ppp, pkt, len, 1u); /* We want our options adjusted */ - ppp_dbg("Received LCP CONF NAK - changed optflags: %04X\n", optflags); - evaluate_lcp_state(ppp, PPP_LCP_EVENT_RCN); - return; - } - - if (pkt[0] == PICO_CONF_REJ) { - /* Some Configuration Options received in a Configure-Request are not recognizable or are not acceptable for negotiation */ - optflags = lcp_optflags(ppp, pkt, len, 0u); - ppp_dbg("Received LCP CONF REJ - will disable optflags: %04X\n", optflags); - /* Disable the options that are not supported by the peer */ - LCPOPT_UNSET_LOCAL_MASK(ppp, optflags); - evaluate_lcp_state(ppp, PPP_LCP_EVENT_RCN); - return; - } - - if (pkt[0] == PICO_CONF_ECHO_REQ) { - ppp_dbg("Received LCP ECHO REQ\n"); - evaluate_lcp_state(ppp, PPP_LCP_EVENT_RXR); - return; - } -} - -static void pap_process_in(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) -{ - struct pico_pap_hdr *p = (struct pico_pap_hdr *)pkt; - (void)len; - if (!p) - return; - - if (ppp->auth != 0xc023) - return; - - switch(p->code) { - case PAP_AUTH_ACK: - ppp_dbg("PAP: Received Authentication OK!\n"); - evaluate_auth_state(ppp, PPP_AUTH_EVENT_RAA); - break; - case PAP_AUTH_NAK: - ppp_dbg("PAP: Received Authentication Reject!\n"); - evaluate_auth_state(ppp, PPP_AUTH_EVENT_RAN); - break; - - default: - ppp_dbg("PAP: Received invalid packet with code %d\n", p->code); - } -} - - -static void chap_process_in(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) -{ - struct pico_chap_hdr *ch = (struct pico_chap_hdr *)pkt; - - if (!pkt) - return; - - if (ppp->auth != 0xc223) - return; - - switch(ch->code) { - case CHAP_CHALLENGE: - ppp_dbg("Received CHAP CHALLENGE\n"); - ppp->pkt = pkt; - ppp->len = len; - evaluate_auth_state(ppp, PPP_AUTH_EVENT_RAC); - break; - case CHAP_SUCCESS: - ppp_dbg("Received CHAP SUCCESS\n"); - evaluate_auth_state(ppp, PPP_AUTH_EVENT_RAA); - break; - case CHAP_FAILURE: - ppp_dbg("Received CHAP FAILURE\n"); - evaluate_auth_state(ppp, PPP_AUTH_EVENT_RAN); - break; - } -} - - -static void ipcp_send_ack(struct pico_device_ppp *ppp) -{ - uint8_t ack[ppp->len + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_lcp_hdr) + PPP_FCS_SIZE + 1]; - struct pico_ipcp_hdr *ack_hdr = (struct pico_ipcp_hdr *) (ack + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - struct pico_ipcp_hdr *ipcpreq = (struct pico_ipcp_hdr *)ppp->pkt; - memcpy(ack + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE, ppp->pkt, ppp->len); - ack_hdr->code = PICO_CONF_ACK; - ack_hdr->id = ipcpreq->id; - ack_hdr->len = ipcpreq->len; - ppp_dbg("Sending IPCP CONF ACK\n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_IPCP, ack, - PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - short_be(ipcpreq->len) + /* Actual options size + hdr (whole ipcp packet) */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1 /* STOP Byte */ - ); -} - -static inline uint32_t ipcp_request_options_size(struct pico_device_ppp *ppp) -{ - uint32_t size = 0; - -/* if (ppp->ipcp_ip) */ - size += IPCP_ADDR_LEN; -/* if (ppp->ipcp_dns1) */ - size += IPCP_ADDR_LEN; -/* if (ppp->ipcp_dns2) */ - size += IPCP_ADDR_LEN; - if (ppp->ipcp_nbns1) - size += IPCP_ADDR_LEN; - - if (ppp->ipcp_nbns2) - size += IPCP_ADDR_LEN; - - return size; -} - -static int ipcp_request_add_address(uint8_t *dst, uint8_t tag, uint32_t arg) -{ - uint32_t addr = long_be(arg); - dst[0] = tag; - dst[1] = IPCP_ADDR_LEN; - dst[2] = (uint8_t)((addr & 0xFF000000u) >> 24); - dst[3] = (uint8_t)((addr & 0x00FF0000u) >> 16); - dst[4] = (uint8_t)((addr & 0x0000FF00u) >> 8); - dst[5] = (addr & 0x000000FFu); - return IPCP_ADDR_LEN; -} - -static void ipcp_request_fill(struct pico_device_ppp *ppp, uint8_t *opts) -{ - if (ppp->ipcp_allowed_fields & IPCP_ALLOW_IP) - opts += ipcp_request_add_address(opts, IPCP_OPT_IP, ppp->ipcp_ip); - - if (ppp->ipcp_allowed_fields & IPCP_ALLOW_DNS1) - opts += ipcp_request_add_address(opts, IPCP_OPT_DNS1, ppp->ipcp_dns1); - - if (ppp->ipcp_allowed_fields & IPCP_ALLOW_DNS2) - opts += ipcp_request_add_address(opts, IPCP_OPT_DNS2, ppp->ipcp_dns2); - - if ((ppp->ipcp_allowed_fields & IPCP_ALLOW_NBNS1) && (ppp->ipcp_nbns1)) - opts += ipcp_request_add_address(opts, IPCP_OPT_NBNS1, ppp->ipcp_nbns1); - - if ((ppp->ipcp_allowed_fields & IPCP_ALLOW_NBNS2) && (ppp->ipcp_nbns2)) - opts += ipcp_request_add_address(opts, IPCP_OPT_NBNS2, ppp->ipcp_nbns2); -} - -static void ipcp_send_req(struct pico_device_ppp *ppp) -{ - uint8_t ipcp_req[PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_ipcp_hdr) + ipcp_request_options_size(ppp) + PPP_FCS_SIZE + 1]; - uint32_t prefix = PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE; - struct pico_ipcp_hdr *ih = (struct pico_ipcp_hdr *) (ipcp_req + prefix); - uint8_t *p = ipcp_req + prefix + sizeof(struct pico_ipcp_hdr); - uint16_t len = (uint16_t)(ipcp_request_options_size(ppp) + sizeof(struct pico_ipcp_hdr)); - - ih->id = ppp->frame_id++; - ih->code = PICO_CONF_REQ; - ih->len = short_be(len); - ipcp_request_fill(ppp, p); - - ppp_dbg("Sending IPCP CONF REQ, ipcp size = %d\n", len); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_IPCP, - ipcp_req, /* Start of PPP packet */ - (uint32_t)(prefix + /* PPP Header, etc. */ - (uint32_t)len + /* IPCP Header + options */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1u) /* STOP Byte */ - ); -} - -static void ipcp_reject_vj(struct pico_device_ppp *ppp, uint8_t *comp_req) -{ - uint8_t ipcp_req[PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_ipcp_hdr) + IPCP_VJ_LEN + PPP_FCS_SIZE + 1]; - uint32_t prefix = PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE; - struct pico_ipcp_hdr *ih = (struct pico_ipcp_hdr *) (ipcp_req + prefix); - uint8_t *p = ipcp_req + prefix + sizeof(struct pico_ipcp_hdr); - uint32_t i; - - ih->id = ppp->frame_id++; - ih->code = PICO_CONF_REQ; - ih->len = short_be(IPCP_VJ_LEN + sizeof(struct pico_ipcp_hdr)); - for(i = 0; i < IPCP_OPT_VJ; i++) - p[i] = comp_req[i + sizeof(struct pico_ipcp_hdr)]; - ppp_dbg("Sending IPCP CONF REJ VJ\n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_IPCP, - ipcp_req, /* Start of PPP packet */ - (uint32_t)(prefix + /* PPP Header, etc. */ - sizeof(struct pico_ipcp_hdr) + /* LCP HDR */ - IPCP_VJ_LEN + /* Actual options size */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1u) /* STOP Byte */ - ); -} - -static void ppp_ipv4_conf(struct pico_device_ppp *ppp) -{ - struct pico_ip4 ip; - struct pico_ip4 nm; - struct pico_ip4 dns1; - struct pico_ip4 dns2; - struct pico_ip4 any = { - 0 - }; - ip.addr = ppp->ipcp_ip; - nm.addr = 0xFFFFFF00; - pico_ipv4_link_add(&ppp->dev, ip, nm); - pico_ipv4_route_add(any, any, any, 1, pico_ipv4_link_by_dev(&ppp->dev)); - - dns1.addr = ppp->ipcp_dns1; - dns2.addr = ppp->ipcp_dns2; - pico_dns_client_nameserver(&dns1, PICO_DNS_NS_ADD); - pico_dns_client_nameserver(&dns2, PICO_DNS_NS_ADD); -} - - -static void ipcp_process_in(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) -{ - struct pico_ipcp_hdr *ih = (struct pico_ipcp_hdr *)pkt; - uint8_t *p = pkt + sizeof(struct pico_ipcp_hdr); - int reject = 0; - while (p < pkt + len) { - if (p[0] == IPCP_OPT_VJ) { - reject++; - } - - if (p[0] == IPCP_OPT_IP) { - if (ih->code != PICO_CONF_REJ) - ppp->ipcp_ip = long_be((uint32_t)((p[2] << 24) + (p[3] << 16) + (p[4] << 8) + p[5])); - else { - ppp->ipcp_allowed_fields &= (~IPCP_ALLOW_IP); - ppp->ipcp_ip = 0; - } - } - - if (p[0] == IPCP_OPT_DNS1) { - if (ih->code != PICO_CONF_REJ) - ppp->ipcp_dns1 = long_be((uint32_t)((p[2] << 24) + (p[3] << 16) + (p[4] << 8) + p[5])); - else { - ppp->ipcp_allowed_fields &= (~IPCP_ALLOW_DNS1); - ppp->ipcp_dns1 = 0; - } - } - - if (p[0] == IPCP_OPT_NBNS1) { - if (ih->code != PICO_CONF_REJ) - ppp->ipcp_nbns1 = long_be((uint32_t)((p[2] << 24) + (p[3] << 16) + (p[4] << 8) + p[5])); - else { - ppp->ipcp_allowed_fields &= (~IPCP_ALLOW_NBNS1); - ppp->ipcp_nbns1 = 0; - } - } - - if (p[0] == IPCP_OPT_DNS2) { - if (ih->code != PICO_CONF_REJ) - ppp->ipcp_dns2 = long_be((uint32_t)((p[2] << 24) + (p[3] << 16) + (p[4] << 8) + p[5])); - else { - ppp->ipcp_allowed_fields &= (~IPCP_ALLOW_DNS2); - ppp->ipcp_dns2 = 0; - } - } - - if (p[0] == IPCP_OPT_NBNS2) { - if (ih->code != PICO_CONF_REJ) - ppp->ipcp_nbns2 = long_be((uint32_t)((p[2] << 24) + (p[3] << 16) + (p[4] << 8) + p[5])); - else { - ppp->ipcp_allowed_fields &= (~IPCP_ALLOW_NBNS2); - ppp->ipcp_nbns2 = 0; - } - } - - p += p[1]; - } - if (reject) { - ipcp_reject_vj(ppp, p); - return; - } - - ppp->pkt = pkt; - ppp->len = len; - - switch(ih->code) { - case PICO_CONF_ACK: - ppp_dbg("Received IPCP CONF ACK\n"); - evaluate_ipcp_state(ppp, PPP_IPCP_EVENT_RCA); - break; - case PICO_CONF_REQ: - ppp_dbg("Received IPCP CONF REQ\n"); - evaluate_ipcp_state(ppp, PPP_IPCP_EVENT_RCR_POS); - break; - case PICO_CONF_NAK: - ppp_dbg("Received IPCP CONF NAK\n"); - evaluate_ipcp_state(ppp, PPP_IPCP_EVENT_RCN); - break; - case PICO_CONF_REJ: - ppp_dbg("Received IPCP CONF REJ\n"); - - evaluate_ipcp_state(ppp, PPP_IPCP_EVENT_RCN); - break; - } -} - -static void ipcp6_process_in(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) -{ - IGNORE_PARAMETER(ppp); - IGNORE_PARAMETER(pkt); - IGNORE_PARAMETER(len); -} - -static void ppp_process_packet_payload(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) -{ - if (pkt[0] == 0xc0) { - /* Link control packet */ - if (pkt[1] == 0x21) { - /* LCP */ - lcp_process_in(ppp, pkt + 2, len - 2); - } - - if (pkt[1] == 0x23) { - /* PAP */ - pap_process_in(ppp, pkt + 2, len - 2); - } - - return; - } - - if ((pkt[0] == 0xc2) && (pkt[1] == 0x23)) { - /* CHAP */ - chap_process_in(ppp, pkt + 2, len - 2); - return; - } - - if (pkt[0] == 0x80) { - /* IP assignment (IPCP/IPCP6) */ - if (pkt[1] == 0x21) { - /* IPCP */ - ipcp_process_in(ppp, pkt + 2, len - 2); - } - - if (pkt[1] == 0x57) { - /* IPCP6 */ - ipcp6_process_in(ppp, pkt + 2, len - 2); - } - - return; - } - - if (pkt[0] == 0x00) { - /* Uncompressed protocol: leading zero. */ - pkt++; - len--; - } - - if ((pkt[0] == 0x21) || (pkt[0] == 0x57)) { - /* IPv4 /v6 Data */ - pico_stack_recv(&ppp->dev, pkt + 1, len - 1); - return; - } - - ppp_dbg("PPP: Unrecognized protocol %02x%02x\n", pkt[0], pkt[1]); -} - -static void ppp_process_packet(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) -{ - /* Verify incoming FCS */ - if (ppp_fcs_verify(pkt, len) != 0) - return; - - /* Remove trailing FCS */ - len -= 2; - - /* Remove ADDR/CTRL, then process */ - if ((pkt[0] == PPPF_ADDR) && (pkt[1] == PPPF_CTRL)) { - pkt += 2; - len -= 2; - } - - ppp_process_packet_payload(ppp, pkt, len); - -} - -static void ppp_recv_data(struct pico_device_ppp *ppp, void *data, uint32_t len) -{ - uint8_t *pkt = (uint8_t *)data; - -#ifdef PPP_DEBUG - uint32_t idx; - if (len > 0) { - ppp_dbg("PPP <<<<< "); - for(idx = 0; idx < len; idx++) { - ppp_dbg(" %02x", ((uint8_t *)data)[idx]); - } - ppp_dbg("\n"); - } - -#endif - - ppp_process_packet(ppp, pkt, len); -} - -static void lcp_this_layer_up(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: LCP up.\n"); - - switch (ppp->auth) { - case 0x0000: - evaluate_auth_state(ppp, PPP_AUTH_EVENT_UP_NONE); - break; - case 0xc023: - evaluate_auth_state(ppp, PPP_AUTH_EVENT_UP_PAP); - break; - case 0xc223: - evaluate_auth_state(ppp, PPP_AUTH_EVENT_UP_CHAP); - break; - default: - ppp_dbg("PPP: Unknown authentication protocol.\n"); - break; - } -} - -static void lcp_this_layer_down(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: LCP down.\n"); - evaluate_auth_state(ppp, PPP_AUTH_EVENT_DOWN); -} - -static void lcp_this_layer_started(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: LCP started.\n"); - evaluate_modem_state(ppp, PPP_MODEM_EVENT_START); -} - -static void lcp_this_layer_finished(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: LCP finished.\n"); - evaluate_modem_state(ppp, PPP_MODEM_EVENT_STOP); -} - -static void lcp_initialize_restart_count(struct pico_device_ppp *ppp) -{ - lcp_timer_start(ppp, PPP_TIMER_ON_LCPREQ); -} - -static void lcp_send_code_reject(struct pico_device_ppp *ppp) -{ - IGNORE_PARAMETER(ppp); -} - -static void lcp_send_echo_reply(struct pico_device_ppp *ppp) -{ - uint8_t reply[ppp->len + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_lcp_hdr) + PPP_FCS_SIZE + 1]; - struct pico_lcp_hdr *reply_hdr = (struct pico_lcp_hdr *) (reply + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - struct pico_lcp_hdr *lcpreq = (struct pico_lcp_hdr *)ppp->pkt; - memcpy(reply + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE, ppp->pkt, ppp->len); - reply_hdr->code = PICO_CONF_ECHO_REP; - reply_hdr->id = lcpreq->id; - reply_hdr->len = lcpreq->len; - ppp_dbg("Sending LCP ECHO REPLY\n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_LCP, reply, - PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - short_be(lcpreq->len) + /* Actual options size + hdr (whole lcp packet) */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1 /* STOP Byte */ - ); -} - -static const struct pico_ppp_fsm ppp_lcp_fsm[PPP_LCP_STATE_MAX][PPP_LCP_EVENT_MAX] = { - [PPP_LCP_STATE_INITIAL] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_CLOSED, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_STARTING, { lcp_this_layer_started } }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_INITIAL, {} } - }, - [PPP_LCP_STATE_STARTING] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_request } }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_INITIAL, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_STARTING, {} } - }, - [PPP_LCP_STATE_CLOSED] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_CLOSED, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_request} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSED, {} }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_CLOSED, {} }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_CLOSED, {} }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_CLOSED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_CLOSED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_CLOSED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_CLOSED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_CLOSED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_CLOSED, {} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_CLOSED, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_CLOSED, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_CLOSED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_CLOSED, {} } - }, - [PPP_LCP_STATE_STOPPED] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_STOPPED, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_STARTING, { lcp_this_layer_started } }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_STOPPED, {}}, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSED, {}}, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_STOPPED, {} }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_STOPPED, {} }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_ACK_SENT, - { lcp_send_configure_request, lcp_send_configure_ack}}, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_REQ_SENT, - { lcp_send_configure_request, lcp_send_configure_nack}}, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_STOPPED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_STOPPED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_STOPPED, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_STOPPED, {} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_STOPPED, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_STOPPED, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_STOPPED, {} } - }, - [PPP_LCP_STATE_CLOSING] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_INITIAL, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_CLOSING, { lcp_send_terminate_request } }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_CLOSED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_CLOSING, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_CLOSED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_CLOSING, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_CLOSED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_CLOSING, {} } - }, - [PPP_LCP_STATE_STOPPING] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSING, {} }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_STOPPING, { lcp_send_terminate_request } }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_STOPPING, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_STOPPING, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_STOPPING, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_STOPPING, {} } - }, - [PPP_LCP_STATE_REQ_SENT] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_REQ_SENT, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_REQ_SENT, {} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSING, { lcp_send_terminate_request } }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_request } }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_ACK_SENT, { lcp_send_configure_ack } }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_nack } }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_ACK_RCVD, { lcp_initialize_restart_count } }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_request} }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_REQ_SENT, {} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_REQ_SENT, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_REQ_SENT, {} } - }, - [PPP_LCP_STATE_ACK_RCVD] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_ACK_RCVD, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_ACK_RCVD, {} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSING, { lcp_send_terminate_request} }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_request } }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_OPENED, { lcp_send_configure_ack, lcp_this_layer_up} }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_ACK_RCVD, { lcp_send_configure_nack } }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_request } }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_request } }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_REQ_SENT, {} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_ACK_RCVD, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_REQ_SENT, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_ACK_RCVD, {} } - }, - [PPP_LCP_STATE_ACK_SENT] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_ACK_SENT, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_STARTING, {} }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_ACK_SENT, {} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSING, { lcp_send_terminate_request} }, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_ACK_SENT, { lcp_send_configure_request } }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_ACK_SENT, { lcp_send_configure_ack } }, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_configure_nack } }, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_OPENED, { lcp_this_layer_up} }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_ACK_SENT, { lcp_send_configure_request} }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_REQ_SENT, { lcp_send_terminate_ack } }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_ACK_SENT, {} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_ACK_SENT, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_ACK_SENT, {} }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_STOPPED, { lcp_this_layer_finished } }, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_ACK_SENT, {} } - }, - [PPP_LCP_STATE_OPENED] = { - [PPP_LCP_EVENT_UP] = { PPP_LCP_STATE_OPENED, {} }, - [PPP_LCP_EVENT_DOWN] = { PPP_LCP_STATE_STARTING, {lcp_this_layer_down } }, - [PPP_LCP_EVENT_OPEN] = { PPP_LCP_STATE_OPENED, {} }, - [PPP_LCP_EVENT_CLOSE] = { PPP_LCP_STATE_CLOSING, - { lcp_this_layer_down, lcp_send_terminate_request }}, - [PPP_LCP_EVENT_TO_POS] = { PPP_LCP_STATE_OPENED, {} }, - [PPP_LCP_EVENT_TO_NEG] = { PPP_LCP_STATE_OPENED, {} }, - [PPP_LCP_EVENT_RCR_POS] = { PPP_LCP_STATE_ACK_SENT, - { lcp_this_layer_down, lcp_send_terminate_request, lcp_send_configure_ack }}, - [PPP_LCP_EVENT_RCR_NEG] = { PPP_LCP_STATE_REQ_SENT, - { lcp_this_layer_down, lcp_send_configure_request, lcp_send_configure_nack }}, - [PPP_LCP_EVENT_RCA] = { PPP_LCP_STATE_REQ_SENT, { lcp_this_layer_down, lcp_send_terminate_request } }, - [PPP_LCP_EVENT_RCN] = { PPP_LCP_STATE_REQ_SENT, { lcp_this_layer_down, lcp_send_terminate_request } }, - [PPP_LCP_EVENT_RTR] = { PPP_LCP_STATE_STOPPING, { lcp_this_layer_down, lcp_zero_restart_count, lcp_send_terminate_ack} }, - [PPP_LCP_EVENT_RTA] = { PPP_LCP_STATE_REQ_SENT, { lcp_this_layer_down, lcp_send_terminate_request} }, - [PPP_LCP_EVENT_RUC] = { PPP_LCP_STATE_OPENED, { lcp_send_code_reject } }, - [PPP_LCP_EVENT_RXJ_POS] = { PPP_LCP_STATE_OPENED, { } }, - [PPP_LCP_EVENT_RXJ_NEG] = { PPP_LCP_STATE_STOPPING, - {lcp_this_layer_down, lcp_send_terminate_request}}, - [PPP_LCP_EVENT_RXR] = { PPP_LCP_STATE_OPENED, { lcp_send_echo_reply} } - } -}; - -static void evaluate_lcp_state(struct pico_device_ppp *ppp, enum ppp_lcp_event event) -{ - const struct pico_ppp_fsm *fsm, *next_fsm_to; - int i; - if (!ppp) - return; - - if (mock_lcp_state) { - mock_lcp_state(ppp, event); - return; - } - - fsm = &ppp_lcp_fsm[ppp->lcp_state][event]; - ppp->lcp_state = (enum ppp_lcp_state)fsm->next_state; - /* RFC1661: The states in which the Restart timer is running are identifiable by - * the presence of TO events. - */ - next_fsm_to = &ppp_lcp_fsm[ppp->lcp_state][PPP_LCP_EVENT_TO_POS]; - if (!next_fsm_to->event_handler[0]) { - /* The Restart timer is stopped when transitioning - * from any state where the timer is running to a state where the timer - * is not running. - */ - lcp_timer_stop(ppp, PPP_TIMER_ON_LCPREQ); - lcp_timer_stop(ppp, PPP_TIMER_ON_LCPTERM); - } - - for (i = 0; i < PPP_FSM_MAX_ACTIONS; i++) { - if (fsm->event_handler[i]) - fsm->event_handler[i](ppp); - } -} - -static void auth(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: Authenticated.\n"); - ppp->ipcp_allowed_fields = 0xFFFF; - evaluate_ipcp_state(ppp, PPP_IPCP_EVENT_UP); -} - -static void deauth(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: De-authenticated.\n"); - evaluate_ipcp_state(ppp, PPP_IPCP_EVENT_DOWN); -} - -static void auth_abort(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: Authentication failed!\n"); - ppp->timer_on = (uint8_t) (ppp->timer_on & (~PPP_TIMER_ON_AUTH)); - evaluate_lcp_state(ppp, PPP_LCP_EVENT_CLOSE); - -} - -static void auth_req(struct pico_device_ppp *ppp) -{ - uint16_t ppp_usr_len = 0; - uint16_t ppp_pwd_len = 0; - uint8_t *req = NULL, *p; - struct pico_pap_hdr *hdr; - uint16_t pap_len = 0; - uint8_t field_len = 0; - ppp_usr_len = (uint16_t)strlen(ppp->username); - ppp_pwd_len = (uint16_t)strlen(ppp->password); - - pap_len = (uint16_t)(sizeof(struct pico_pap_hdr) + 1u + 1u + ppp_usr_len + ppp_pwd_len); - - req = PICO_ZALLOC(PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + pap_len + PPP_FCS_SIZE + 1); - if (!req) - return; - - hdr = (struct pico_pap_hdr *) (req + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - - hdr->code = PAP_AUTH_REQ; - hdr->id = ppp->frame_id++; - hdr->len = short_be(pap_len); - - p = req + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_pap_hdr); - - /* Populate authentication domain */ - field_len = (uint8_t)(ppp_usr_len & 0xFF); - *p = field_len; - ++p; - if (ppp_usr_len > 0) { - memcpy(p, ppp->username, ppp_usr_len); - p += ppp_usr_len; - } - - /* Populate authentication password */ - field_len = (uint8_t)(ppp_pwd_len & 0xFF); - *p = field_len; - ++p; - if (ppp_pwd_len > 0) { - memcpy(p, ppp->password, ppp_pwd_len); - p += ppp_pwd_len; - } - - ppp_dbg("PAP: Sending authentication request.\n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_PAP, - req, /* Start of PPP packet */ - (uint32_t)( - PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - pap_len + /* Authentication packet len */ - PPP_FCS_SIZE + /* FCS */ - 1) /* STOP Byte */ - ); - PICO_FREE(req); -} - -static void auth_rsp(struct pico_device_ppp *ppp) -{ - struct pico_chap_hdr *ch = (struct pico_chap_hdr *)ppp->pkt; - uint8_t resp[PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_chap_hdr) + CHAP_MD5_SIZE + PPP_FCS_SIZE + 2]; - struct pico_chap_hdr *rh = (struct pico_chap_hdr *) (resp + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE); - uint8_t *md5resp = resp + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_chap_hdr) + 1; - uint8_t *md5resp_len = resp + PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + sizeof(struct pico_chap_hdr); - uint8_t *challenge; - uint32_t i = 0, pwdlen; - uint8_t *recvd_challenge_len = ppp->pkt + sizeof(struct pico_chap_hdr); - uint8_t *recvd_challenge = recvd_challenge_len + 1; - size_t challenge_size = CHALLENGE_SIZE(ppp, ch); - - challenge = PICO_ZALLOC(challenge_size); - - if (!challenge) - return; - - - pwdlen = (uint32_t)strlen(ppp->password); - challenge[i++] = ch->id; - memcpy(challenge + i, ppp->password, pwdlen); - i += pwdlen; - memcpy(challenge + i, recvd_challenge, *recvd_challenge_len); - i += *recvd_challenge_len; - pico_md5sum(md5resp, challenge, i); - PICO_FREE(challenge); - rh->id = ch->id; - rh->code = CHAP_RESPONSE; - rh->len = short_be(CHAP_MD5_SIZE + sizeof(struct pico_chap_hdr) + 1); - *md5resp_len = CHAP_MD5_SIZE; - ppp_dbg("Sending CHAP RESPONSE, \n"); - pico_ppp_ctl_send(&ppp->dev, PPP_PROTO_CHAP, - resp, /* Start of PPP packet */ - (uint32_t)( - PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE + /* PPP Header, etc. */ - sizeof(struct pico_chap_hdr) + /* CHAP HDR */ - 1 + /* Value length */ - CHAP_MD5_SIZE + /* Actual payload size */ - PPP_FCS_SIZE + /* FCS at the end of the frame */ - 1) /* STOP Byte */ - ); -} - -static void auth_start_timer(struct pico_device_ppp *ppp) -{ - ppp->timer_on = ppp->timer_on | PPP_TIMER_ON_AUTH; - ppp->timer_val = PICO_PPP_DEFAULT_TIMER; -} - -static const struct pico_ppp_fsm ppp_auth_fsm[PPP_AUTH_STATE_MAX][PPP_AUTH_EVENT_MAX] = { - [PPP_AUTH_STATE_INITIAL] = { - [PPP_AUTH_EVENT_UP_NONE] = { PPP_AUTH_STATE_AUTHENTICATED, {auth} }, - [PPP_AUTH_EVENT_UP_PAP] = { PPP_AUTH_STATE_REQ_SENT, {auth_req, auth_start_timer} }, - [PPP_AUTH_EVENT_UP_CHAP] = { PPP_AUTH_STATE_STARTING, {} }, - [PPP_AUTH_EVENT_DOWN] = { PPP_AUTH_STATE_INITIAL, {} }, - [PPP_AUTH_EVENT_RAC] = { PPP_AUTH_STATE_INITIAL, {} }, - [PPP_AUTH_EVENT_RAA] = { PPP_AUTH_STATE_INITIAL, {} }, - [PPP_AUTH_EVENT_RAN] = { PPP_AUTH_STATE_INITIAL, {auth_abort} }, - [PPP_AUTH_EVENT_TO] = { PPP_AUTH_STATE_INITIAL, {} } - }, - [PPP_AUTH_STATE_STARTING] = { - [PPP_AUTH_EVENT_UP_NONE] = { PPP_AUTH_STATE_STARTING, {} }, - [PPP_AUTH_EVENT_UP_PAP] = { PPP_AUTH_STATE_STARTING, {} }, - [PPP_AUTH_EVENT_UP_CHAP] = { PPP_AUTH_STATE_STARTING, {} }, - [PPP_AUTH_EVENT_DOWN] = { PPP_AUTH_STATE_INITIAL, {deauth} }, - [PPP_AUTH_EVENT_RAC] = { PPP_AUTH_STATE_RSP_SENT, {auth_rsp, auth_start_timer} }, - [PPP_AUTH_EVENT_RAA] = { PPP_AUTH_STATE_STARTING, {auth_start_timer} }, - [PPP_AUTH_EVENT_RAN] = { PPP_AUTH_STATE_STARTING, {auth_abort} }, - [PPP_AUTH_EVENT_TO] = { PPP_AUTH_STATE_INITIAL, {auth_req, auth_start_timer} } - }, - [PPP_AUTH_STATE_RSP_SENT] = { - [PPP_AUTH_EVENT_UP_NONE] = { PPP_AUTH_STATE_RSP_SENT, {} }, - [PPP_AUTH_EVENT_UP_PAP] = { PPP_AUTH_STATE_RSP_SENT, {} }, - [PPP_AUTH_EVENT_UP_CHAP] = { PPP_AUTH_STATE_RSP_SENT, {} }, - [PPP_AUTH_EVENT_DOWN] = { PPP_AUTH_STATE_INITIAL, {deauth} }, - [PPP_AUTH_EVENT_RAC] = { PPP_AUTH_STATE_RSP_SENT, {auth_rsp, auth_start_timer} }, - [PPP_AUTH_EVENT_RAA] = { PPP_AUTH_STATE_AUTHENTICATED, {auth} }, - [PPP_AUTH_EVENT_RAN] = { PPP_AUTH_STATE_STARTING, {auth_abort} }, - [PPP_AUTH_EVENT_TO] = { PPP_AUTH_STATE_STARTING, {auth_start_timer} } - }, - [PPP_AUTH_STATE_REQ_SENT] = { - [PPP_AUTH_EVENT_UP_NONE] = { PPP_AUTH_STATE_REQ_SENT, {} }, - [PPP_AUTH_EVENT_UP_PAP] = { PPP_AUTH_STATE_REQ_SENT, {} }, - [PPP_AUTH_EVENT_UP_CHAP] = { PPP_AUTH_STATE_REQ_SENT, {} }, - [PPP_AUTH_EVENT_DOWN] = { PPP_AUTH_STATE_INITIAL, {deauth} }, - [PPP_AUTH_EVENT_RAC] = { PPP_AUTH_STATE_REQ_SENT, {} }, - [PPP_AUTH_EVENT_RAA] = { PPP_AUTH_STATE_AUTHENTICATED, {auth} }, - [PPP_AUTH_EVENT_RAN] = { PPP_AUTH_STATE_REQ_SENT, {auth_abort} }, - [PPP_AUTH_EVENT_TO] = { PPP_AUTH_STATE_REQ_SENT, {auth_req, auth_start_timer} } - }, - [PPP_AUTH_STATE_AUTHENTICATED] = { - [PPP_AUTH_EVENT_UP_NONE] = { PPP_AUTH_STATE_AUTHENTICATED, {} }, - [PPP_AUTH_EVENT_UP_PAP] = { PPP_AUTH_STATE_AUTHENTICATED, {} }, - [PPP_AUTH_EVENT_UP_CHAP] = { PPP_AUTH_STATE_AUTHENTICATED, {} }, - [PPP_AUTH_EVENT_DOWN] = { PPP_AUTH_STATE_INITIAL, {deauth} }, - [PPP_AUTH_EVENT_RAC] = { PPP_AUTH_STATE_RSP_SENT, {auth_rsp} }, - [PPP_AUTH_EVENT_RAA] = { PPP_AUTH_STATE_AUTHENTICATED, {} }, - [PPP_AUTH_EVENT_RAN] = { PPP_AUTH_STATE_AUTHENTICATED, {} }, - [PPP_AUTH_EVENT_TO] = { PPP_AUTH_STATE_AUTHENTICATED, {} }, - } -}; - -static void evaluate_auth_state(struct pico_device_ppp *ppp, enum ppp_auth_event event) -{ - const struct pico_ppp_fsm *fsm; - int i; - if (mock_auth_state) { - mock_auth_state(ppp, event); - return; - } - - fsm = &ppp_auth_fsm[ppp->auth_state][event]; - - ppp->auth_state = (enum ppp_auth_state)fsm->next_state; - for (i = 0; i < PPP_FSM_MAX_ACTIONS; i++) { - if (fsm->event_handler[i]) - fsm->event_handler[i](ppp); - } -} - -static void ipcp_send_nack(struct pico_device_ppp *ppp) -{ - IGNORE_PARAMETER(ppp); -} - -static void ipcp_bring_up(struct pico_device_ppp *ppp) -{ - ppp_dbg("PPP: IPCP up.\n"); - - if (ppp->ipcp_ip) { - char my_ip[16], my_dns[16]; - pico_ipv4_to_string(my_ip, ppp->ipcp_ip); - ppp_dbg("Received IP config %s\n", my_ip); - pico_ipv4_to_string(my_dns, ppp->ipcp_dns1); - ppp_dbg("Received DNS: %s\n", my_dns); - ppp_ipv4_conf(ppp); - } -} - -static void ipcp_bring_down(struct pico_device_ppp *ppp) -{ - IGNORE_PARAMETER(ppp); - - ppp_dbg("PPP: IPCP down.\n"); -} - -static void ipcp_start_timer(struct pico_device_ppp *ppp) -{ - ppp->timer_on = ppp->timer_on | PPP_TIMER_ON_IPCP; - ppp->timer_val = PICO_PPP_DEFAULT_TIMER * PICO_PPP_DEFAULT_MAX_FAILURE; -} - -static const struct pico_ppp_fsm ppp_ipcp_fsm[PPP_IPCP_STATE_MAX][PPP_IPCP_EVENT_MAX] = { - [PPP_IPCP_STATE_INITIAL] = { - [PPP_IPCP_EVENT_UP] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_req, ipcp_start_timer} }, - [PPP_IPCP_EVENT_DOWN] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_RCR_POS] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_RCR_NEG] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_RCA] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_RCN] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_TO] = { PPP_IPCP_STATE_INITIAL, {} } - }, - [PPP_IPCP_STATE_REQ_SENT] = { - [PPP_IPCP_EVENT_UP] = { PPP_IPCP_STATE_REQ_SENT, {} }, - [PPP_IPCP_EVENT_DOWN] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_RCR_POS] = { PPP_IPCP_STATE_ACK_SENT, {ipcp_send_ack} }, - [PPP_IPCP_EVENT_RCR_NEG] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_nack} }, - [PPP_IPCP_EVENT_RCA] = { PPP_IPCP_STATE_ACK_RCVD, {} }, - [PPP_IPCP_EVENT_RCN] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_req, ipcp_start_timer} }, - [PPP_IPCP_EVENT_TO] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_req, ipcp_start_timer} } - }, - [PPP_IPCP_STATE_ACK_RCVD] = { - [PPP_IPCP_EVENT_UP] = { PPP_IPCP_STATE_ACK_RCVD, {} }, - [PPP_IPCP_EVENT_DOWN] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_RCR_POS] = { PPP_IPCP_STATE_OPENED, {ipcp_send_ack, ipcp_bring_up} }, - [PPP_IPCP_EVENT_RCR_NEG] = { PPP_IPCP_STATE_ACK_RCVD, {ipcp_send_nack} }, - [PPP_IPCP_EVENT_RCA] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_req, ipcp_start_timer} }, - [PPP_IPCP_EVENT_RCN] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_req, ipcp_start_timer} }, - [PPP_IPCP_EVENT_TO] = { PPP_IPCP_STATE_ACK_RCVD, {ipcp_send_req, ipcp_start_timer} } - }, - [PPP_IPCP_STATE_ACK_SENT] = { - [PPP_IPCP_EVENT_UP] = { PPP_IPCP_STATE_ACK_SENT, {} }, - [PPP_IPCP_EVENT_DOWN] = { PPP_IPCP_STATE_INITIAL, {} }, - [PPP_IPCP_EVENT_RCR_POS] = { PPP_IPCP_STATE_ACK_SENT, {ipcp_send_ack} }, - [PPP_IPCP_EVENT_RCR_NEG] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_nack} }, - [PPP_IPCP_EVENT_RCA] = { PPP_IPCP_STATE_OPENED, {ipcp_bring_up} }, - [PPP_IPCP_EVENT_RCN] = { PPP_IPCP_STATE_ACK_SENT, {ipcp_send_req, ipcp_start_timer} }, - [PPP_IPCP_EVENT_TO] = { PPP_IPCP_STATE_ACK_SENT, {ipcp_send_req, ipcp_start_timer} } - }, - [PPP_IPCP_STATE_OPENED] = { - [PPP_IPCP_EVENT_UP] = { PPP_IPCP_STATE_OPENED, {} }, - [PPP_IPCP_EVENT_DOWN] = { PPP_IPCP_STATE_INITIAL, {ipcp_bring_down} }, - [PPP_IPCP_EVENT_RCR_POS] = { PPP_IPCP_STATE_ACK_SENT, {ipcp_bring_down, ipcp_send_req, ipcp_send_ack} }, - [PPP_IPCP_EVENT_RCR_NEG] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_bring_down, ipcp_send_req, ipcp_send_nack} }, - [PPP_IPCP_EVENT_RCA] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_req} }, - [PPP_IPCP_EVENT_RCN] = { PPP_IPCP_STATE_REQ_SENT, {ipcp_send_req} }, - [PPP_IPCP_EVENT_TO] = { PPP_IPCP_STATE_OPENED, {} } - } -}; - -static void evaluate_ipcp_state(struct pico_device_ppp *ppp, enum ppp_ipcp_event event) -{ - const struct pico_ppp_fsm *fsm; - int i; - if (mock_ipcp_state) { - mock_ipcp_state(ppp, event); - return; - } - - fsm = &ppp_ipcp_fsm[ppp->ipcp_state][event]; - - ppp->ipcp_state = (enum ppp_ipcp_state)fsm->next_state; - for (i = 0; i < PPP_FSM_MAX_ACTIONS; i++) { - if (fsm->event_handler[i]) - fsm->event_handler[i](ppp); - } -} - -static int pico_ppp_poll(struct pico_device *dev, int loop_score) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *) dev; - static uint32_t len = 0; - int r; - if (ppp->serial_recv) { - do { - r = ppp->serial_recv(&ppp->dev, &ppp_recv_buf[len], 1); - if (r <= 0) - break; - - if (ppp->modem_state == PPP_MODEM_STATE_CONNECTED) { - static int control_escape = 0; - - if (ppp_recv_buf[len] == PPPF_FLAG_SEQ) { - if (control_escape) { - /* Illegal sequence, discard frame */ - ppp_dbg("Illegal sequence, ppp_recv_buf[%d] = %d\n", len, ppp_recv_buf[len]); - control_escape = 0; - len = 0; - } - - if (len > 1) { - ppp_recv_data(ppp, ppp_recv_buf, len); - loop_score--; - len = 0; - } - } else if (control_escape) { - ppp_recv_buf[len] ^= 0x20; - control_escape = 0; - len++; - } else if (ppp_recv_buf[len] == PPPF_CTRL_ESC) { - control_escape = 1; - } else { - len++; - } - } else { - static int s3 = 0; - - if (ppp_recv_buf[len] == AT_S3) { - s3 = 1; - if (len > 0) { - ppp_recv_buf[len] = '\0'; - ppp_modem_recv(ppp, ppp_recv_buf, len); - len = 0; - } - } else if (ppp_recv_buf[len] == AT_S4) { - if (!s3) { - len++; - } - - s3 = 0; - } else { - s3 = 0; - len++; - } - } - } while ((r > 0) && (len < ARRAY_SIZE(ppp_recv_buf)) && (loop_score > 0)); - } - - return loop_score; -} - -/* Public interface: create/destroy. */ - -static int pico_ppp_link_state(struct pico_device *dev) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - if (ppp->ipcp_state == PPP_IPCP_STATE_OPENED) - return 1; - - return 0; -} - -void pico_ppp_destroy(struct pico_device *ppp) -{ - if (!ppp) - return; - - /* Perform custom cleanup here before calling 'pico_device_destroy' - * or register a custom cleanup function during initialization - * by setting 'ppp->dev.destroy'. */ - - pico_device_destroy(ppp); -} - -static void check_to_modem(struct pico_device_ppp *ppp) -{ - if (ppp->timer_on & PPP_TIMER_ON_MODEM) { - if (ppp->timer_val == 0) { - ppp->timer_on = (uint8_t) (ppp->timer_on & (~PPP_TIMER_ON_MODEM)); - evaluate_modem_state(ppp, PPP_MODEM_EVENT_TIMEOUT); - } - } -} - -static void check_to_lcp(struct pico_device_ppp *ppp) -{ - if (ppp->timer_on & (PPP_TIMER_ON_LCPREQ | PPP_TIMER_ON_LCPTERM)) { - if (ppp->timer_val == 0) { - if (ppp->timer_count == 0) - evaluate_lcp_state(ppp, PPP_LCP_EVENT_TO_NEG); - else{ - evaluate_lcp_state(ppp, PPP_LCP_EVENT_TO_POS); - ppp->timer_count--; - } - } - } -} - -static void check_to_auth(struct pico_device_ppp *ppp) -{ - if (ppp->timer_on & PPP_TIMER_ON_AUTH) { - if (ppp->timer_val == 0) { - ppp->timer_on = (uint8_t) (ppp->timer_on & (~PPP_TIMER_ON_AUTH)); - evaluate_auth_state(ppp, PPP_AUTH_EVENT_TO); - } - } -} - -static void check_to_ipcp(struct pico_device_ppp *ppp) -{ - if (ppp->timer_on & PPP_TIMER_ON_IPCP) { - if (ppp->timer_val == 0) { - ppp->timer_on = (uint8_t) (ppp->timer_on & (~PPP_TIMER_ON_IPCP)); - evaluate_ipcp_state(ppp, PPP_IPCP_EVENT_TO); - } - } -} - -static void pico_ppp_tick(pico_time t, void *arg) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *) arg; - (void)t; - if (ppp->timer_val > 0) - ppp->timer_val--; - - check_to_modem(ppp); - check_to_lcp(ppp); - check_to_auth(ppp); - check_to_ipcp(ppp); - - if (ppp->autoreconnect && ppp->lcp_state == PPP_LCP_STATE_INITIAL) { - ppp_dbg("(Re)connecting...\n"); - evaluate_lcp_state(ppp, PPP_LCP_EVENT_OPEN); - } - - if (!pico_timer_add(1000, pico_ppp_tick, arg)) { - ppp_dbg("PPP: Failed to start tick timer\n"); - /* TODO No more PPP ticks now */ - } -} - -struct pico_device *pico_ppp_create(void) -{ - struct pico_device_ppp *ppp = PICO_ZALLOC(sizeof(struct pico_device_ppp)); - char devname[MAX_DEVICE_NAME]; - - if (!ppp) - return NULL; - - snprintf(devname, MAX_DEVICE_NAME, "ppp%d", ppp_devnum++); - - if( 0 != pico_device_init((struct pico_device *)ppp, devname, NULL)) { - return NULL; - } - - ppp->dev.overhead = PPP_HDR_SIZE; - ppp->dev.mtu = PICO_PPP_MTU; - ppp->dev.send = pico_ppp_send; - ppp->dev.poll = pico_ppp_poll; - ppp->dev.link_state = pico_ppp_link_state; - ppp->frame_id = (uint8_t)(pico_rand() % 0xFF); - - ppp->modem_state = PPP_MODEM_STATE_INITIAL; - ppp->lcp_state = PPP_LCP_STATE_INITIAL; - ppp->auth_state = PPP_AUTH_STATE_INITIAL; - ppp->ipcp_state = PPP_IPCP_STATE_INITIAL; - - ppp->timer = pico_timer_add(1000, pico_ppp_tick, ppp); - if (!ppp->timer) { - ppp_dbg("PPP: Failed to start tick timer\n"); - pico_device_destroy((struct pico_device*) ppp); - return NULL; - } - ppp->mru = PICO_PPP_MRU; - - LCPOPT_SET_LOCAL(ppp, LCPOPT_MRU); - LCPOPT_SET_LOCAL(ppp, LCPOPT_AUTH); /* We support authentication, even if it's not part of the req */ - LCPOPT_SET_LOCAL(ppp, LCPOPT_PROTO_COMP); - LCPOPT_SET_LOCAL(ppp, LCPOPT_ADDRCTL_COMP); - - - ppp_dbg("Device %s created.\n", ppp->dev.name); - return (struct pico_device *)ppp; -} - -int pico_ppp_connect(struct pico_device *dev) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - ppp->autoreconnect = 1; - return 0; -} - -int pico_ppp_disconnect(struct pico_device *dev) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - ppp->autoreconnect = 0; - evaluate_lcp_state(ppp, PPP_LCP_EVENT_CLOSE); - - pico_ipv4_cleanup_links(dev); - - return 0; -} - -int pico_ppp_set_serial_read(struct pico_device *dev, int (*sread)(struct pico_device *, void *, int)) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - - if (!dev) - return -1; - - ppp->serial_recv = sread; - return 0; -} - -int pico_ppp_set_serial_write(struct pico_device *dev, int (*swrite)(struct pico_device *, const void *, int)) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - - if (!dev) - return -1; - - ppp->serial_send = swrite; - return 0; -} - -int pico_ppp_set_serial_set_speed(struct pico_device *dev, int (*sspeed)(struct pico_device *, uint32_t)) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - - if (!dev) - return -1; - - ppp->serial_set_speed = sspeed; - return 0; -} - -int pico_ppp_set_apn(struct pico_device *dev, const char *apn) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - - if (!dev) - return -1; - - if (!apn) - return -1; - - strncpy(ppp->apn, apn, sizeof(ppp->apn) - 1); - return 0; -} - -int pico_ppp_set_username(struct pico_device *dev, const char *username) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - - if (!dev) - return -1; - - if (!username) - return -1; - - strncpy(ppp->username, username, sizeof(ppp->username) - 1); - return 0; -} - -int pico_ppp_set_password(struct pico_device *dev, const char *password) -{ - struct pico_device_ppp *ppp = (struct pico_device_ppp *)dev; - - if (!dev) - return -1; - - if (!password) - return -1; - - strncpy(ppp->password, password, sizeof(ppp->password) - 1); - return 0; -} diff --git a/kernel/picotcp/modules/pico_dev_ppp.h b/kernel/picotcp/modules/pico_dev_ppp.h deleted file mode 100644 index 5d479c4..0000000 --- a/kernel/picotcp/modules/pico_dev_ppp.h +++ /dev/null @@ -1,26 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_PPP -#define INCLUDE_PICO_PPP - -#include "pico_config.h" -#include "pico_device.h" - -void pico_ppp_destroy(struct pico_device *ppp); -struct pico_device *pico_ppp_create(void); - -int pico_ppp_connect(struct pico_device *dev); -int pico_ppp_disconnect(struct pico_device *dev); - -int pico_ppp_set_serial_read(struct pico_device *dev, int (*sread)(struct pico_device *, void *, int)); -int pico_ppp_set_serial_write(struct pico_device *dev, int (*swrite)(struct pico_device *, const void *, int)); -int pico_ppp_set_serial_set_speed(struct pico_device *dev, int (*sspeed)(struct pico_device *, uint32_t)); - -int pico_ppp_set_apn(struct pico_device *dev, const char *apn); -int pico_ppp_set_username(struct pico_device *dev, const char *username); -int pico_ppp_set_password(struct pico_device *dev, const char *password); - -#endif /* INCLUDE_PICO_PPP */ diff --git a/kernel/picotcp/modules/pico_dev_radio_mgr.c b/kernel/picotcp/modules/pico_dev_radio_mgr.c deleted file mode 100644 index 162caed..0000000 --- a/kernel/picotcp/modules/pico_dev_radio_mgr.c +++ /dev/null @@ -1,357 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ - -/* - * For testing purposes. pico_dev_radio_manager allows simulating a mesh - * network for smoke tests. I previously used geomess, but that's another - * dependency to add then. Then @danielinux wrote the pico_dev_radiotest but - * that required adding a multicast route on the host which in its turn - * required 'sudo'. So I wrote a small simulator which doesn't require sudo. - * - Jelle - */ - -#include "pico_dev_radiotest.h" -#include "pico_addressing.h" -#include "pico_dev_tap.h" -#include "pico_802154.h" -#include "pico_device.h" -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_dev_radio_mgr.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef DEBUG_RADIOTEST -#define RADIO_DBG dbg -#else -#define RADIO_DBG(...) do { } while (0) -#endif - -#define LISTENING_PORT 7777 -#define MESSAGE_MTU 150 -#define EVER (;;) - -struct socket { - int s; - uint8_t mgr; - uint8_t id; - uint8_t area0; - uint8_t area1; -}; - -/* Compare two application sockets */ -static int -pico_radio_mgr_sock_cmp(void *a, void *b) -{ - struct socket *sa = a, *sb = b; - return (int)(sa->id - sb->id); -} - -PICO_TREE_DECLARE(Sockets, pico_radio_mgr_sock_cmp); - -/* Insert a new socket in the tree */ -static int -pico_radio_mgr_socket_insert(int socket, uint8_t id, uint8_t area0, uint8_t area1, uint8_t mgr) -{ - struct socket *s = PICO_ZALLOC(sizeof(struct socket)); - if (s) { - s->area0 = area0; - s->area1 = area1; - s->s = socket; - s->mgr = mgr; - s->id = id; - if (!pico_tree_insert(&Sockets, s)) - return 0; - PICO_FREE(s); - } - return -1; -} - -/* Gather an array of poll descriptors with all sockets */ -static struct pollfd * -pico_radio_mgr_socket_all(int *n) -{ - struct pico_tree_node *i = NULL; - struct socket *key = NULL; - struct pollfd *fds = NULL; - int j = 1; - *n = 0; - - /* Retrieve all sockets */ - pico_tree_foreach(i, &Sockets) { - (*n)++; - } - - /* Create array from tree */ - fds = PICO_ZALLOC(sizeof(struct pollfd) * (size_t)*n); - if (fds) { - /* Put every socket in array */ - pico_tree_foreach(i, &Sockets) { - if (i && (key = i->keyValue)) { - if (!key->id) { - fds[0].fd = key->s; - fds[0].events = POLLIN; - } else { - fds[j].fd = key->s; - fds[j].events = POLLIN | POLLHUP; - j++; - } - } - } - } - - return fds; -} - -/* Get connection socket that belongs to a particular node */ -static struct socket * -pico_radio_mgr_socket_node(uint8_t id) -{ - struct socket test = { 0, 0, id }; - return pico_tree_findKey(&Sockets, &test); -} - -/* Handle POLLHUP event */ -static int -pico_radio_mgr_socket_hup(int socket) -{ - struct pico_tree_node *i = NULL; - struct socket *key = NULL; - - pico_tree_foreach(i, &Sockets) { - key = i->keyValue; - if (key && key->s == socket) { - pico_tree_delete(&Sockets, key); - RADIO_DBG("Radio %d detached from network\n", key->id); - PICO_FREE(key); - close(socket); - return 0; - } - } - return -1; -} - -/* Receive's an 'Hello'-message from the node that contains the id, the inserts - * an entry in the Sockets-tree */ -static int -pico_radio_mgr_welcome(int socket) -{ - int ret_len = sizeof(uint8_t); - uint8_t id = 0, area0, area1; - - errno = 0; - while ((ret_len = recv(socket, &id, (size_t)ret_len, 0)) != 1) { - if (errno && EINTR != errno) - goto hup; - } - while ((ret_len = recv(socket, &area0, (size_t)ret_len, 0)) != 1) { - if (errno && EINTR != errno) - goto hup; - } - while ((ret_len = recv(socket, &area1, (size_t)ret_len, 0)) != 1) { - if (errno && EINTR != errno) - goto hup; - } - - if (id <= 0) { // Node's can't have ID '0'. - RADIO_DBG("Invalid socket\n"); - close(socket); - return -1; - } - - RADIO_DBG("Connected to node %u in area %u and %u on socket %d.\n", id, area0, area1, socket); - if (pico_radio_mgr_socket_insert(socket, id, area0, area1, 0)) { - RADIO_DBG("Failed inserting new socket\n"); - close(socket); - return -1; - } - - return 0; -hup: - RADIO_DBG("recv() failed with error: %s\n", strerror(errno)); - close(socket); - return -1; -} - -/* Accepts a new TCP connection request */ -static int -pico_radio_mgr_accept(int socket) -{ - unsigned int len = sizeof(struct sockaddr_in); - struct sockaddr_in addr; - int s = accept(socket, (struct sockaddr *)&addr, &len); - if (s < 0) { - RADIO_DBG("Failed accepting connection\n"); - return s; - } else if (!s) { - RADIO_DBG("accept() returned file descriptor '%d'\n", s); - return s; - } - return pico_radio_mgr_welcome(s); -} - -/* Start listening for TCP connection requests on 'LISTENING_PORT' */ -static int -pico_radio_mgr_listen(void) -{ - struct sockaddr_in addr; - int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - int ret = 0, yes = 1; - - memset(&addr, 0, sizeof(struct sockaddr_in)); - addr.sin_family = AF_INET; - addr.sin_port = htons(LISTENING_PORT); - addr.sin_addr.s_addr = INADDR_ANY; - - setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int)); - - ret = bind(s, (struct sockaddr *)&addr, sizeof(struct sockaddr_in)); - if (ret < 0) { - RADIO_DBG("Failed binding socket to address: %s\n", strerror(ret)); - return -1; - } - - ret = listen(s, 5); - if (ret < 0) { - RADIO_DBG("Failed start listening\n"); - return -1; - } - - /* Also insert server socket in tree for polling */ - if (pico_radio_mgr_socket_insert(s, 0, 0, 0, 1)) { - close(s); - return -1; - } - - dbg("Started listening on port %d\n", LISTENING_PORT); - return s; -} - -/* Distribute received frame over all the areas where the node is attached to */ -static void -pico_radio_mgr_distribute(uint8_t *buf, int len, uint8_t id) -{ - struct socket *node = pico_radio_mgr_socket_node(id); - uint8_t area0 = 0, area1 = 0, ar0 = 0, ar1 = 0, phy = (uint8_t)len; - struct pico_tree_node *i = NULL; - struct socket *key = NULL; - if (node) { - RADIO_DBG("Received frame from node '%d' of '%d' bytes\n", id, len); - area0 = node->area0; - area1 = node->area1; - } else { - RADIO_DBG("Received frame from node not connected to network, weird..\n"); - return; - } - - pico_tree_foreach(i, &Sockets) { - key = i->keyValue; - if (key && key->id != id && key->id) { // Do not sent to ourselves or manager - ar0 = key->area0; - ar1 = key->area1; - if (area0 == ar0 || area0 == ar1 || (area1 && (area1 == ar0 || area1 == ar1))) { - len = (int)sendto(key->s, &phy, (size_t)1, 0, NULL, 0); - if (len != 1) return; - len = (int)sendto(key->s, buf, (size_t)phy, 0, NULL, 0); - if (len == (int)phy) - RADIO_DBG("Forwarded from '%u' of %d bytes sent to '%u'\n", id, len, key->id); - } - } - } -} - -/* Process poll-events */ -static void -pico_radio_mgr_process(struct pollfd *fds, int n) -{ - uint8_t buf[MESSAGE_MTU] = { 0 }, node = 0, phy = 0; - int i = 0, ret_len = 0; - short event = 0; - - for (i = 0; i < n; i++) { - event = fds[i].revents; - if (event && (event & POLLIN)) { // POLLIN - if (!i) { - /* Accept a new connection */ - pico_radio_mgr_accept(fds[i].fd); - continue; - } - - /* Read from node */ - ret_len = (int)recv(fds[i].fd, &phy, (size_t)1, 0); - if (ret_len <= 0) - goto hup; - ret_len = (int)recv(fds[i].fd, buf, (size_t)phy, 0); - if (ret_len <= 0 || ret_len != phy) - goto hup; - node = buf[ret_len - 2]; - pico_radio_mgr_distribute(buf, ret_len, node); - } else if (event && (event & POLLHUP)) { - goto hup; - } - } - - return; -hup: - pico_radio_mgr_socket_hup(fds[i].fd); -} - -static void -pico_radio_mgr_quit(int signum) -{ - struct pico_tree_node *i = NULL, *tmp = NULL; - struct socket *key = NULL; - IGNORE_PARAMETER(signum); - - dbg("Closing all sockets..."); - pico_tree_foreach_safe(i, &Sockets, tmp) { - key = i->keyValue; - if (key) { - pico_tree_delete(&Sockets, key); - shutdown(key->s, SHUT_RDWR); - PICO_FREE(key); - } - } - dbg("done.\n"); - exit(0); -} - -/* Create and start a radio-manager instance */ -int -pico_radio_mgr_start(void) -{ - int server = pico_radio_mgr_listen(); - struct pollfd *fds = NULL; - nfds_t n = 0; - int ret = 0; - if (server < 0) - return -1; - - signal(SIGQUIT, pico_radio_mgr_quit); - - for EVER { - if (fds) - PICO_FREE(fds); - fds = pico_radio_mgr_socket_all((int *)&n); - errno = 0; - ret = poll(fds, n, 1); - if (errno != EINTR && ret < 0) { - RADIO_DBG("Socket error: %s\n", strerror(ret)); - return ret; - } else if (!ret) { - continue; - } - pico_radio_mgr_process(fds, (int)n); - } -} diff --git a/kernel/picotcp/modules/pico_dev_radio_mgr.h b/kernel/picotcp/modules/pico_dev_radio_mgr.h deleted file mode 100644 index 37da415..0000000 --- a/kernel/picotcp/modules/pico_dev_radio_mgr.h +++ /dev/null @@ -1,14 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See LICENSE and COPYING for usage. - - Authors: Jelle De Vleeschouwer - *********************************************************************/ - -#ifndef __PICO_DEV_RADIO_MGR_H_ -#define __PICO_DEV_RADIO_MGR_H_ - -/* Start listening for TCP connection requests on 'LISTENING_PORT' */ -int pico_radio_mgr_start(void); - -#endif diff --git a/kernel/picotcp/modules/pico_dev_radiotest.c b/kernel/picotcp/modules/pico_dev_radiotest.c deleted file mode 100644 index a1b75be..0000000 --- a/kernel/picotcp/modules/pico_dev_radiotest.c +++ /dev/null @@ -1,486 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See LICENSE and COPYING for usage. - - Authors: Daniele Lacamera, Jelle De Vleeschouwer - *********************************************************************/ - -/******************************************************************************* - * PicoTCP - ******************************************************************************/ - -#include "pico_dev_radiotest.h" -#include "pico_6lowpan_ll.h" -#include "pico_addressing.h" -#include "pico_dev_tap.h" -#include "pico_802154.h" -#include "pico_device.h" -#include "pico_config.h" -#include "pico_stack.h" - -/******************************************************************************* - * System sockets - ******************************************************************************/ - -#include -#include -#include -#include -#include -#include - -#define LISTENING_PORT 7777 -#define MESSAGE_MTU 150 - -#ifdef RADIO_PCAP -#include -#endif - -#ifdef DEBUG_RADIOTEST -#define RADIO_DBG dbg -#else -#define RADIO_DBG(...) do { } while (0) -#endif - -/******************************************************************************* - * Constants - ******************************************************************************/ - -/* Uncomment next line to enable Random packet loss (specify percentage) */ -//#define PACKET_LOSS 3 - -#define RFDEV_PANID 0xABCD -#define MC_ADDR_BE 0x010101EBU -#define LO_ADDR 0x0100007FU - -#define LOOP_MTU 127 - -/******************************************************************************* - * Type definitions - ******************************************************************************/ - -struct radiotest_radio { - struct pico_dev_6lowpan dev; - struct pico_6lowpan_info addr; - int sock0; - int sock1; -#ifdef RADIO_PCAP - pcap_t *pcap; - pcap_dumper_t *pcapd; -#endif -}; - -struct radiotest_frame -{ - uint8_t *buf; - int len; - uint32_t id; - union pico_ll_addr src; - union pico_ll_addr dst; -}; - -/******************************************************************************* - * Global variables - ******************************************************************************/ - -static int connection = 0; - -static uint32_t tx_id = 0; -static uint32_t rx_id = 0; - -/******************************************************************************* - * pcap - ******************************************************************************/ - -#ifdef RADIO_PCAP - -static void radiotest_pcap_open(struct radiotest_radio *dev, char *dump) -{ - char path[100]; - - /* Open offline packet capture */ -#ifdef PICO_SUPPORT_802154 - dev->pcap = pcap_open_dead(DLT_IEEE802_15_4, 65535); -#elif defined (PICO_SUPPORT_802154_NO_MAC) - dev->pcap = pcap_open_dead(DLT_RAW, 65535); -#endif - if (!dev->pcap) { - perror("LibPCAP"); - exit (1); - } - - /* Construct file path */ - snprintf(path, 100, dump, dev->addr); - - /* Open dump */ - dev->pcapd = pcap_dump_open(dev->pcap, path); - if (dev->pcapd) - dbg("PCAP Enabled\n"); - else - dbg("PCAP Disabled\n"); -} - -static void radiotest_pcap_write(struct radiotest_radio *dev, uint8_t *buf, int len) -{ - struct pcap_pkthdr ph; - if (!dev || !dev->pcapd) - return; - ph.caplen = (uint32_t)len; - ph.len = (uint32_t)len; - gettimeofday(&ph.ts, NULL); - pcap_dump((u_char *)dev->pcapd, &ph, buf); - pcap_dump_flush(dev->pcapd); -} - -#else - -static void radiotest_pcap_open(struct radiotest_radio *dev, char *dump) -{ - (void)dev; - (void)dump; -} - -static void radiotest_pcap_write(struct radiotest_radio *dev, uint8_t *buf, int len) -{ - (void)dev; - (void)buf; - (void)len; -} - -#endif - -static int radiotest_cmp(void *a, void *b) -{ - struct radiotest_frame *fa = (struct radiotest_frame *)a; - struct radiotest_frame *fb = (struct radiotest_frame *)b; - return (int)(fa->id - fb->id); -} - -PICO_TREE_DECLARE(LoopFrames, radiotest_cmp); - -static uint8_t *radiotest_nxt_rx(int *len, union pico_ll_addr *src, union pico_ll_addr *dst) -{ - struct radiotest_frame test, *found = NULL; - uint8_t *ret = NULL; - test.id = rx_id++; - - found = pico_tree_findKey(&LoopFrames, &test); - if (found) { - ret = found->buf; - *len = found->len; - *src = found->src; - *dst = found->dst; - pico_tree_delete(&LoopFrames, found); - PICO_FREE(found); - } else { - rx_id--; - } - return ret; -} - -static void radiotest_nxt_tx(uint8_t *buf, int len, union pico_ll_addr src, union pico_ll_addr dst) -{ - struct radiotest_frame *new = PICO_ZALLOC(sizeof(struct radiotest_frame)); - if (new) { - new->buf = PICO_ZALLOC((uint16_t)len); - if (new->buf) { - memcpy(new->buf, buf, (size_t)len); - new->len = len; - new->id = tx_id++; - new->src = src; - new->dst = dst; - if (pico_tree_insert(&LoopFrames, new)) { - PICO_FREE(new); - tx_id--; - } - } else { - PICO_FREE(new); - } - } -} - -static int pico_loop_send(struct pico_device *dev, void *buf, int len, union pico_ll_addr src, union pico_ll_addr dst) -{ - IGNORE_PARAMETER(dev); - if (len > LOOP_MTU) - return 0; - RADIO_DBG("Looping back frame of %d bytes.\n", len); - radiotest_nxt_tx(buf, len, src, dst); - return len; -} - -static int pico_loop_poll(struct pico_device *dev, int loop_score) -{ - union pico_ll_addr src, dst; - uint8_t *buf = NULL; - int len = 0; - - if (loop_score <= 0) - return 0; - - buf = radiotest_nxt_rx(&len, &src, &dst); - if (buf) { - RADIO_DBG("Receiving frame of %d bytes.\n", len); - pico_6lowpan_stack_recv(dev, buf, (uint32_t)len, &src, &dst); - PICO_FREE(buf); - loop_score--; - } - - return loop_score; -} - - -/* Generates a simple extended address */ -static void radiotest_gen_ex(struct pico_6lowpan_short addr_short, uint8_t *buf) -{ - uint16_t sh = addr_short.addr; - buf[0] = 0x00; - buf[1] = 0x00; - buf[2] = 0x00; - buf[3] = 0xaa; - buf[4] = 0xab; - buf[5] = 0x00; - buf[6] = (uint8_t)((uint8_t)(short_be(sh) & 0xFF00) >> 8u); - buf[7] = (uint8_t)(short_be(sh) & 0xFFu); -} - -/** - * Simulated CRC16-CITT Kermit generation - * - * @param buf uint8_t *, buffer to generate FCS for. - * @param len uint8_t, len of the buffer - * - * @return CITT Kermit CRC16 of the buffer - */ -static uint16_t calculate_crc16(uint8_t *buf, uint8_t len) -{ - uint16_t crc = 0x0000; - uint16_t q = 0, i = 0; - uint8_t c = 0; - - for (i = 0; i < len; i++) { - c = buf[i]; - q = (crc ^ c) & 0x0F; - crc = (uint16_t)((uint16_t)(crc >> 4) ^ (q * 0x1081)); - q = (crc ^ (c >> 4)) & 0xF; - crc = (uint16_t)((uint16_t)(crc >> 4) ^ (q * 0x1081)); - } - - return crc; -} - -/* Poll-function for the pico_device-structure */ -static int radiotest_poll(struct pico_device *dev, int loop_score) -{ - struct radiotest_radio *radio = (struct radiotest_radio *)dev; - union pico_ll_addr src = {0}, dst = {0}; - int pollret, ret_len; - struct pollfd p; - uint8_t buf[128]; - uint8_t phy = 0; - - if (loop_score <= 0) - return 0; - - if (!dev) - return loop_score; - - p.fd = connection; - p.events = POLLIN | POLLHUP; - - /* Poll for data from radio management */ - errno = 0; - pollret = poll(&p, (nfds_t)1, 1); - if (errno == EINTR || pollret == 0) - return loop_score; - - if (pollret < 0) { - fprintf(stderr, "Socket error %s!\n", strerror(errno)); - exit(5); - } - - if (p.revents & POLLIN) { - ret_len = (int)recv(connection, &phy, (size_t)1, 0); - if (ret_len != 1) return loop_score; - ret_len = (int)recv(connection, buf, (size_t)phy, 0); - if (ret_len != (int)phy) - return loop_score; - else if (!ret_len) { - RADIO_DBG("Radio manager detached from network\n"); - exit(1); - } - } - - if (ret_len < 2) { /* Not valid */ - return loop_score; - } - -#ifdef P_LOSS - long n = lrand48(); - n = n % 100; - if (n < P_LOSS) { - RADIO_DBG("Packet got lost!\n"); - return loop_score; - } -#endif - - /* ADDRESS FILTER */ - if (buf[ret_len - 1] != 0xFF && buf[ret_len - 1] != (uint8_t)short_be(radio->addr.addr_short.addr)) { - RADIO_DBG("Packet is not for me!\n"); - return loop_score; - } - - /* Get src and destination address */ - dst.pan.addr._ext = radio->addr.addr_ext; - src.pan.addr.data[3] = 0xAA; - src.pan.addr.data[4] = 0xAB; - src.pan.addr.data[7] = buf[ret_len - 1]; - src.pan.mode = AM_6LOWPAN_EXT; - ret_len -= 2; - - /* Write the received frame to the pcap-dump */ - radiotest_pcap_write(radio, buf, ret_len); - - /* Hand the frame over to pico */ - pico_6lowpan_stack_recv(dev, buf, (uint32_t)(ret_len - 2), &src, &dst); - loop_score--; - - return loop_score; -} - -#define RADIO_OVERHEAD 4 - -/* Send-function for the pico_device-structure */ -static int radiotest_send(struct pico_device *dev, void *_buf, int len, union pico_ll_addr src, union pico_ll_addr dst) -{ - struct radiotest_radio *radio = (struct radiotest_radio *)dev; - uint8_t *buf = PICO_ZALLOC((size_t)(len + RADIO_OVERHEAD)); - uint8_t phy = 0, did = 0; - uint16_t crc = 0; - int ret = 0, dlen = 0; - IGNORE_PARAMETER(src); - - if (!buf) - return -1; - - /* Try to get node-ID from address */ - if (dev && pico_6lowpan_lls[dev->mode].addr_len) { - dlen = pico_6lowpan_lls[dev->mode].addr_len(&dst); - if (dlen < 0) - return -1; - did = dst.pan.addr.data[dlen - 1]; - } - - /* Store the addresses in buffer for management */ - memcpy(buf, _buf, (size_t)len); - len = (uint16_t)(len + (uint16_t)RADIO_OVERHEAD); // CRC + ID - buf[len - 2] = (uint8_t)short_be(radio->addr.addr_short.addr); - buf[len - 1] = did; - - /* Generate FCS, keep pcap happy ... */ - crc = calculate_crc16(_buf, (uint8_t)(len - RADIO_OVERHEAD)); - memcpy(buf + len - RADIO_OVERHEAD, (void *)&crc, 2); - - /* Send frame to radio management */ - phy = (uint8_t)(len); - ret = (int)sendto(connection, &phy, 1, 0, NULL, 0); - if (ret != 1) - return -1; - ret = (int)sendto(connection, buf, (size_t)(len), 0, NULL, 0); - RADIO_DBG("Radio '%u' transmitted a frame of %d bytes.\n", buf[len - 2], ret); - - /* Write the sent frame to the pcap-dump */ - radiotest_pcap_write(radio, buf, len - 2); - - PICO_FREE(buf); - return ret; -} - -static int radiotest_hello(int s, uint8_t id, uint8_t area0, uint8_t area1) -{ - uint8_t buf[3] = { id, area0, area1 }; - if (sendto(s, buf, (size_t)3, 0, NULL, 0) != 3) { - RADIO_DBG("Radio '%u' failed to send hello message\n", id); - return -1; - } - - RADIO_DBG("Radio '%u' attached to network\n", id); - return s; -} - -static int radiotest_connect(uint8_t id, uint8_t area0, uint8_t area1) -{ - struct sockaddr_in addr; - int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - int ret = 0; - - memset(&addr, 0, sizeof(struct sockaddr_in)); - addr.sin_family = AF_INET; - addr.sin_port = htons(LISTENING_PORT); - inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr); - - ret = connect(s, (struct sockaddr *)&addr, sizeof(struct sockaddr_in)); - if (ret) { - RADIO_DBG("Radio '%u' could not attach to network\n", id); - return ret; - } - - return radiotest_hello(s, id, area0, area1); -} - -static void -pico_radiotest_quit(int signum) -{ - IGNORE_PARAMETER(signum); - dbg("Quitting radiotest\n"); - exit(0); -} - -/* Creates a radiotest-device */ -struct pico_device *pico_radiotest_create(uint8_t addr, uint8_t area0, uint8_t area1, int loop, char *dump) -{ - struct radiotest_radio *radio = PICO_ZALLOC(sizeof(struct radiotest_radio)); - struct pico_dev_6lowpan *lp = (struct pico_dev_6lowpan *)radio; - if (!radio) - return NULL; - if (!addr || (addr && !area0)) { - RADIO_DBG("Usage (node): -6 [1-255],[1-255],[0-255] ...\n"); - } - - signal(SIGQUIT, pico_radiotest_quit); - - radio->addr.pan_id.addr = short_be(RFDEV_PANID); - radio->addr.addr_short.addr = short_be((uint16_t)addr); - radiotest_gen_ex(radio->addr.addr_short, radio->addr.addr_ext.addr); - RADIO_DBG("Radiotest short address: 0x%04X\n", short_be(radio->addr.addr_short.addr)); - RADIO_DBG("Radiotest ext address: %02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n", - radio->addr.addr_ext.addr[0],radio->addr.addr_ext.addr[1], - radio->addr.addr_ext.addr[2],radio->addr.addr_ext.addr[3], - radio->addr.addr_ext.addr[4],radio->addr.addr_ext.addr[5], - radio->addr.addr_ext.addr[6],radio->addr.addr_ext.addr[7]); - - if (!loop) { - if ((connection = radiotest_connect(addr, area0, area1)) <= 0) { - return NULL; - } - if (pico_dev_6lowpan_init(lp, "radio", (uint8_t *)&radio->addr, LL_MODE_IEEE802154, MTU_802154_MAC, 0, radiotest_send, radiotest_poll)) { - RADIO_DBG("pico_device_init failed.\n"); - pico_device_destroy((struct pico_device *)lp); - return NULL; - } - } else { - if (pico_dev_6lowpan_init(lp, "radio", (uint8_t *)&radio->addr, LL_MODE_IEEE802154, MTU_802154_MAC, 0, pico_loop_send, pico_loop_poll)) { - RADIO_DBG("pico_device_init failed.\n"); - pico_device_destroy((struct pico_device *)lp); - return NULL; - } - } - - if (dump) { - dbg("Dump: %s\n", dump); - radiotest_pcap_open(radio, dump); - } - - return (struct pico_device *)lp; -} - diff --git a/kernel/picotcp/modules/pico_dev_radiotest.h b/kernel/picotcp/modules/pico_dev_radiotest.h deleted file mode 100644 index eb484ee..0000000 --- a/kernel/picotcp/modules/pico_dev_radiotest.h +++ /dev/null @@ -1,16 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See LICENSE and COPYING for usage. - - Authors: Daniele Lacamera, Jelle De Vleeschouwer - *********************************************************************/ - -#ifndef INCLUDE_PICO_DEV_RADIOTEST -#define INCLUDE_PICO_DEV_RADIOTEST - -#include "pico_device.h" -#include "pico_config.h" - -struct pico_device *pico_radiotest_create(uint8_t addr, uint8_t area0, uint8_t area1, int loop, char *dump); - -#endif /* INCLUDE_PICO_DEV_RADIOTEST */ diff --git a/kernel/picotcp/modules/pico_dev_tap.c b/kernel/picotcp/modules/pico_dev_tap.c deleted file mode 100644 index 877941f..0000000 --- a/kernel/picotcp/modules/pico_dev_tap.c +++ /dev/null @@ -1,230 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include -#include -#include -#include -#include "pico_device.h" -#include "pico_dev_tap.h" -#include "pico_stack.h" - -#ifndef __FreeBSD__ -#include -#endif - -#include - -struct pico_device_tap { - struct pico_device dev; - int fd; -}; - -#define TUN_MTU 2048 - -/* We only support one global link state - we only have two USR signals, we */ -/* can't spread these out over an arbitrary amount of devices. When you unplug */ -/* one tap, you unplug all of them. */ - -static int tapdev_link_state = 0; - -static void sig_handler(int signo) -{ - if (signo == SIGUSR1) { - tapdev_link_state = 0; - } - - if (signo == SIGUSR2) { - tapdev_link_state = 1; - } -} - -static int tap_link_state(__attribute__((unused)) struct pico_device *self) -{ - return tapdev_link_state; -} - - -static int pico_tap_send(struct pico_device *dev, void *buf, int len) -{ - struct pico_device_tap *tap = (struct pico_device_tap *) dev; - return (int)write(tap->fd, buf, (uint32_t)len); -} - -static int pico_tap_poll(struct pico_device *dev, int loop_score) -{ - struct pico_device_tap *tap = (struct pico_device_tap *) dev; - struct pollfd pfd; - unsigned char buf[TUN_MTU]; - int len; - pfd.fd = tap->fd; - pfd.events = POLLIN; - do { - if (poll(&pfd, 1, 0) <= 0) { - return loop_score; - } - - len = (int)read(tap->fd, buf, TUN_MTU); - if (len > 0) { - loop_score--; - pico_stack_recv(dev, buf, (uint32_t)len); - } - } while(loop_score > 0); - return 0; -} - -/* Public interface: create/destroy. */ - -void pico_tap_destroy(struct pico_device *dev) -{ - struct pico_device_tap *tap = (struct pico_device_tap *) dev; - if(tap->fd > 0) { - close(tap->fd); - } -} - -#ifndef __FreeBSD__ -static int tap_open(char *name) -{ - struct ifreq ifr; - int tap_fd; - if((tap_fd = open("/dev/net/tun", O_RDWR)) < 0) { - return -1; - } - - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strncpy(ifr.ifr_name, name, IFNAMSIZ); - if(ioctl(tap_fd, TUNSETIFF, &ifr) < 0) { - return -1; - } - - return tap_fd; -} -#else -static int tap_open(char *name) -{ - int tap_fd; - (void)name; - tap_fd = open("/dev/tap0", O_RDWR); - return tap_fd; -} -#endif - - -#ifndef __FreeBSD__ -static int tap_get_mac(char *name, uint8_t *mac) -{ - int sck; - struct ifreq eth; - int retval = -1; - - sck = socket(AF_INET, SOCK_DGRAM, 0); - if(sck < 0) { - return retval; - } - - memset(ð, 0, sizeof(struct ifreq)); - strcpy(eth.ifr_name, name); - /* call the IOCTL */ - if (ioctl(sck, SIOCGIFHWADDR, ð) < 0) { - perror("ioctl(SIOCGIFHWADDR)"); - return -1; - } - - memcpy (mac, ð.ifr_hwaddr.sa_data, 6); - close(sck); - return 0; - -} -#else -#include -#include -#include -static int tap_get_mac(char *name, uint8_t *mac) -{ - struct sockaddr_dl *sdl; - struct ifaddrs *ifap, *root; - if (getifaddrs(&ifap) != 0) - return -1; - - root = ifap; - while(ifap) { - if (strcmp(name, ifap->ifa_name) == 0) { - sdl = (struct sockaddr_dl *) ifap->ifa_addr; - } - - if (sdl->sdl_type == IFT_ETHER) { - memcpy(mac, LLADDR(sdl), 6); - freeifaddrs(root); - return 0; - } - - ifap = ifap->ifa_next; - } - freeifaddrs(root); - return 0; -} -#endif - -struct pico_device *pico_tap_create(char *name) -{ - struct pico_device_tap *tap = PICO_ZALLOC(sizeof(struct pico_device_tap)); - uint8_t mac[6] = {}; - struct sigaction sa; - - if (!tap) { - return NULL; - } - - sa.sa_flags = 0; - sigemptyset(&sa.sa_mask); - sa.sa_handler = sig_handler; - - if ((sigaction(SIGUSR1, &sa, NULL) == 0) && - (sigaction(SIGUSR2, &sa, NULL) == 0)) { - tap->dev.link_state = &tap_link_state; - } - - tap->dev.overhead = 0; - tap->fd = tap_open(name); - if (tap->fd < 0) { - dbg("Tap creation failed.\n"); - pico_tap_destroy((struct pico_device *)tap); - return NULL; - } - - /* Host's mac address is generated * by the host kernel and is - * retrieved via tap_get_mac(). - */ - if (tap_get_mac(name, mac) < 0) { - dbg("Tap mac query failed.\n"); - pico_tap_destroy((struct pico_device *)tap); - return NULL; - } - - /* To act as a second endpoint in the same subnet, the picoTCP - * app using the tap device must have a different mac address. - * For simplicity, we just add 1 to the last byte of the linux - * endpoint so the two addresses are consecutive. - */ - mac[5]++; - - if( 0 != pico_device_init((struct pico_device *)tap, name, mac)) { - dbg("Tap init failed.\n"); - pico_tap_destroy((struct pico_device *)tap); - return NULL; - } - - tap->dev.send = pico_tap_send; - tap->dev.poll = pico_tap_poll; - tap->dev.destroy = pico_tap_destroy; - dbg("Device %s created.\n", tap->dev.name); - return (struct pico_device *)tap; -} - diff --git a/kernel/picotcp/modules/pico_dev_tap.h b/kernel/picotcp/modules/pico_dev_tap.h deleted file mode 100644 index 384226e..0000000 --- a/kernel/picotcp/modules/pico_dev_tap.h +++ /dev/null @@ -1,15 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_TAP -#define INCLUDE_PICO_TAP -#include "pico_config.h" -#include "pico_device.h" - -void pico_tap_destroy(struct pico_device *tap); -struct pico_device *pico_tap_create(char *name); - -#endif - diff --git a/kernel/picotcp/modules/pico_dev_tap_windows.c b/kernel/picotcp/modules/pico_dev_tap_windows.c deleted file mode 100644 index 252f32a..0000000 --- a/kernel/picotcp/modules/pico_dev_tap_windows.c +++ /dev/null @@ -1,1101 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2014-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Maxime Vincent - Based on the OpenVPN tun.c driver, under GPL - - NOTES: This is the Windows-only driver, a Linux-equivalent is available, too - You need to have an OpenVPN TUN/TAP network adapter installed, first - This driver is barely working: - * Only TAP-mode is supported (TUN is not) - * it will simply open the first TAP device it can find - * there is memory being allocated that's never freed - * there is no destroy function, yet - * it has only been tested on a Windows 7 machine - *********************************************************************/ - -#include "pico_device.h" -#include "pico_dev_null.h" -#include "pico_stack.h" -#include "pico_dev_tap_windows.h" - -#include -#include -#include -#include "pico_dev_tap_windows_private.h" - -/* - * Debugging info - */ -#ifdef DEBUG_TAP_GENERAL -#define dbg_tap dbg /* first level debug */ -#else -#define dbg_tap(...) do{} while(0) -#endif - -#ifdef DEBUG_TAP_INFO -#define dbg_tap_info dbg /* tap info messages */ -#else -#define dbg_tap_info(...) do{} while(0) -#endif - -#ifdef DEBUG_TAP_WIN -#define dbg_tap_win32 dbg /* second level detailed win32 debug */ -#else -#define dbg_tap_win32(...) do{} while(0) -#endif - -#ifdef DEBUG_TAP_REG -#define dbg_tap_reg dbg /* third level: registry debug */ -#else -#define dbg_tap_reg(...) do{} while(0) -#endif - -/* - * Tunnel types - */ -#define DEV_TYPE_UNDEF 0 -#define DEV_TYPE_NULL 1 -#define DEV_TYPE_TUN 2 /* point-to-point IP tunnel */ -#define DEV_TYPE_TAP 3 /* ethernet (802.3) tunnel */ - - -/* - * We try to do all Win32 I/O using overlapped - * (i.e. asynchronous) I/O for a performance win. - */ -struct overlapped_io { -# define IOSTATE_INITIAL 0 -# define IOSTATE_QUEUED 1 /* overlapped I/O has been queued */ -# define IOSTATE_IMMEDIATE_RETURN 2 /* I/O function returned immediately without queueing */ - int iostate; - OVERLAPPED overlapped; - DWORD size; - DWORD flags; - int status; - int addr_defined; - uint8_t *buf_init; - uint32_t buf_init_len; - uint8_t *buf; - uint32_t buf_len; -}; - -struct rw_handle { - HANDLE read; - HANDLE write; -}; - -struct tuntap -{ - int type; /* DEV_TYPE_x as defined in proto.h */ - int ipv6; - int persistent_if; /* if existed before, keep on program end */ - char *actual_name; /* actual name of TUN/TAP dev, usually including unit number */ - int post_open_mtu; - uint8_t mac[6]; - - /* Windows stuff */ - DWORD adapter_index; /*adapter index for TAP-Windows adapter, ~0 if undefined */ - HANDLE hand; - struct overlapped_io reads; /* for overlapped IO */ - struct overlapped_io writes; - struct rw_handle rw_handle; - -}; - - -struct pico_device_tap { - struct pico_device dev; - int statistics_frames_out; - struct tuntap *tt; -}; - - -/* - * Private function prototypes - */ -const struct tap_reg *get_tap_reg (void); -const struct panel_reg *get_panel_reg (void); - - -/* - * Private functions - */ - -/* Get TAP info from Windows registry */ -const struct tap_reg *get_tap_reg (void) -{ - HKEY adapter_key; - LONG status; - DWORD len; - struct tap_reg *first = NULL; - struct tap_reg *last = NULL; - int i = 0; - - status = RegOpenKeyEx( - HKEY_LOCAL_MACHINE, - ADAPTER_KEY, - 0, - KEY_READ, - &adapter_key); - - if (status != ERROR_SUCCESS) - { - dbg_tap_reg("Error opening registry key: %s\n", ADAPTER_KEY); - return NULL; - } - - while (1) - { - char enum_name[256]; - char unit_string[256]; - HKEY unit_key; - char component_id_string[] = "ComponentId"; - char component_id[256]; - char net_cfg_instance_id_string[] = "NetCfgInstanceId"; - char net_cfg_instance_id[256]; - DWORD data_type; - - len = sizeof (enum_name); - status = RegEnumKeyEx( - adapter_key, - i, - enum_name, - &len, - NULL, - NULL, - NULL, - NULL); - if (status == ERROR_NO_MORE_ITEMS) - break; - else if (status != ERROR_SUCCESS) - dbg_tap_reg("Error enumerating registry subkeys of key: %s.\n", ADAPTER_KEY); - - snprintf (unit_string, sizeof(unit_string), "%s\\%s", - ADAPTER_KEY, enum_name); - - status = RegOpenKeyEx( - HKEY_LOCAL_MACHINE, - unit_string, - 0, - KEY_READ, - &unit_key); - - if (status != ERROR_SUCCESS) - { - dbg_tap_reg("Error opening registry key: %s\n", unit_string); - } - else - { - len = sizeof (component_id); - status = RegQueryValueEx( - unit_key, - component_id_string, - NULL, - &data_type, - (LPBYTE)component_id, - &len); - - if (status != ERROR_SUCCESS || data_type != REG_SZ) - { - dbg_tap_reg("Error opening registry key: %s\\%s\n", unit_string, component_id_string); - } - else - { - len = sizeof (net_cfg_instance_id); - status = RegQueryValueEx( - unit_key, - net_cfg_instance_id_string, - NULL, - &data_type, - (LPBYTE)net_cfg_instance_id, - &len); - - if (status == ERROR_SUCCESS && data_type == REG_SZ) - { - if (!strcmp (component_id, TAP_WIN_COMPONENT_ID)) - { - struct tap_reg *reg; - reg = PICO_ZALLOC(sizeof(struct tap_reg), 1); - /* ALLOC_OBJ_CLEAR_GC (reg, struct tap_reg, gc); */ - if (!reg) - return NULL; - - /* reg->guid = string_alloc (net_cfg_instance_id, gc); */ - reg->guid = PICO_ZALLOC (strlen(net_cfg_instance_id) + 1, 1); - if (!(reg->guid)) - { - PICO_FREE(reg); - return NULL; - } - - strcpy((char *)reg->guid, net_cfg_instance_id); - /* link into return list */ - if (!first) - first = reg; - - if (last) - last->next = reg; - - last = reg; - } - } - } - - RegCloseKey (unit_key); - } - - ++i; - } - RegCloseKey (adapter_key); - return first; -} - -/* Get Panel info from Windows registry */ -const struct panel_reg *get_panel_reg (void) -{ - LONG status; - HKEY network_connections_key; - DWORD len; - struct panel_reg *first = NULL; - struct panel_reg *last = NULL; - int i = 0; - - status = RegOpenKeyEx( - HKEY_LOCAL_MACHINE, - NETWORK_CONNECTIONS_KEY, - 0, - KEY_READ, - &network_connections_key); - - if (status != ERROR_SUCCESS) - { - dbg_tap_reg("Error opening registry key: %s\n", NETWORK_CONNECTIONS_KEY); - return NULL; - } - - while (1) - { - char enum_name[256]; - char connection_string[256]; - HKEY connection_key; - WCHAR name_data[256]; - DWORD name_type; - const WCHAR name_string[] = L"Name"; - - len = sizeof (enum_name); - status = RegEnumKeyEx( - network_connections_key, - i, - enum_name, - &len, - NULL, - NULL, - NULL, - NULL); - if (status == ERROR_NO_MORE_ITEMS) - break; - else if (status != ERROR_SUCCESS) - dbg_tap_reg("Error enumerating registry subkeys of key: %s.\n", NETWORK_CONNECTIONS_KEY); - - snprintf (connection_string, sizeof(connection_string), "%s\\%s\\Connection", NETWORK_CONNECTIONS_KEY, enum_name); - - status = RegOpenKeyEx( - HKEY_LOCAL_MACHINE, - connection_string, - 0, - KEY_READ, - &connection_key); - if (status != ERROR_SUCCESS) - dbg_tap_reg("Error opening registry key: %s\n", connection_string); - else - { - len = sizeof (name_data); - status = RegQueryValueExW( - connection_key, - name_string, - NULL, - &name_type, - (LPBYTE) name_data, - &len); - - if (status != ERROR_SUCCESS || name_type != REG_SZ) - dbg_tap_reg("Error opening registry key: %s\\%s\\%S\n", NETWORK_CONNECTIONS_KEY, connection_string, name_string); - else - { - int n; - LPSTR name; - struct panel_reg *reg; - - /* ALLOC_OBJ_CLEAR_GC (reg, struct panel_reg, gc); */ - reg = PICO_ZALLOC(sizeof(struct panel_reg), 1); - if (!reg) - return NULL; - - n = WideCharToMultiByte (CP_UTF8, 0, name_data, -1, NULL, 0, NULL, NULL); - /* name = gc_malloc (n, false, gc); */ - name = PICO_ZALLOC(n, 1); - if (!name) - { - PICO_FREE(reg); - return NULL; - } - - WideCharToMultiByte (CP_UTF8, 0, name_data, -1, name, n, NULL, NULL); - reg->name = name; - /* reg->guid = string_alloc (enum_name, gc); */ - reg->guid = PICO_ZALLOC(strlen(enum_name) + 1, 1); - if (!reg->guid) - { - PICO_FREE((void *)reg->name); - PICO_FREE((void *)reg); - return NULL; - } - - strcpy((char *)reg->guid, enum_name); - - /* link into return list */ - if (!first) - first = reg; - - if (last) - last->next = reg; - - last = reg; - } - - RegCloseKey (connection_key); - } - - ++i; - } - RegCloseKey (network_connections_key); - - return first; -} - - -void show_tap_win_adapters (void) -{ - int warn_panel_null = 0; - int warn_panel_dup = 0; - int warn_tap_dup = 0; - - int links; - - const struct tap_reg *tr; - const struct tap_reg *tr1; - const struct panel_reg *pr; - - const struct tap_reg *tap_reg = get_tap_reg (); - const struct panel_reg *panel_reg = get_panel_reg (); - - if (!(tap_reg && panel_reg)) - return; - - dbg_tap_info("Available TAP-WIN32 adapters [name, GUID]:\n"); - - /* loop through each TAP-Windows adapter registry entry */ - for (tr = tap_reg; tr != NULL; tr = tr->next) - { - links = 0; - - /* loop through each network connections entry in the control panel */ - for (pr = panel_reg; pr != NULL; pr = pr->next) - { - if (!strcmp (tr->guid, pr->guid)) - { - dbg_tap_info("\t>> '%s' %s\n", pr->name, tr->guid); - ++links; - } - } - if (links > 1) - { - warn_panel_dup = 1; - } - else if (links == 0) - { - /* a TAP adapter exists without a link from the network - connections control panel */ - warn_panel_null = 1; - dbg_tap_info("\t>> [NULL] %s\n", tr->guid); - } - } - /* check for TAP-Windows adapter duplicated GUIDs */ - for (tr = tap_reg; tr != NULL; tr = tr->next) - { - for (tr1 = tap_reg; tr1 != NULL; tr1 = tr1->next) - { - if (tr != tr1 && !strcmp (tr->guid, tr1->guid)) - warn_tap_dup = 1; - } - } - /* warn on registry inconsistencies */ - if (warn_tap_dup) - dbg_tap_info("WARNING: Some TAP-Windows adapters have duplicate GUIDs\n"); - - if (warn_panel_dup) - dbg_tap_info("WARNING: Some TAP-Windows adapters have duplicate links from the Network Connections control panel\n"); - - if (warn_panel_null) - dbg_tap_info("WARNING: Some TAP-Windows adapters have no link from the Network Connections control panel\n"); -} - - -/* Get the GUID of the first TAP device found */ -const char *get_first_device_guid(const struct tap_reg *tap_reg, const struct panel_reg *panel_reg, char *name) -{ - const struct tap_reg *tr; - const struct panel_reg *pr; - /* loop through each TAP-Windows adapter registry entry */ - for (tr = tap_reg; tr != NULL; tr = tr->next) - { - /* loop through each network connections entry in the control panel */ - for (pr = panel_reg; pr != NULL; pr = pr->next) - { - if (!strcmp (tr->guid, pr->guid)) - { - dbg_tap_info("Using first TAP device: '%s' %s\n", pr->name, tr->guid); - if (name) - strcpy(name, pr->name); - - return tr->guid; - } - } - } - return NULL; -} - - - -int open_tun (const char *dev, const char *dev_type, const char *dev_node, struct tuntap *tt) -{ - char device_path[256]; - const char *device_guid = NULL; - DWORD len; - - dbg_tap_info("open_tun, tt->ipv6=%d\n", tt->ipv6 ); - - if (!(tt->type == DEV_TYPE_TAP || tt->type == DEV_TYPE_TUN)) - { - dbg_tap_info("Unknown virtual device type: '%s'\n", dev); - return -1; - } - - /* - * Lookup the device name in the registry, using the --dev-node high level name. - */ - { - const struct tap_reg *tap_reg = get_tap_reg(); - const struct panel_reg *panel_reg = get_panel_reg(); - char name[256]; - - if (!(tap_reg && panel_reg)) - { - dbg_tap_info("No TUN/TAP devices found\n"); - return -1; - } - - /* Get the device GUID for the device specified with --dev-node. */ - device_guid = get_first_device_guid (tap_reg, panel_reg, name); - - if (!device_guid) - dbg_tap_info("TAP-Windows adapter '%s' not found\n", dev_node); - - /* Open Windows TAP-Windows adapter */ - snprintf (device_path, sizeof(device_path), "%s%s%s", - USERMODEDEVICEDIR, - device_guid, - TAP_WIN_SUFFIX); - - tt->hand = CreateFile ( - device_path, - GENERIC_READ | GENERIC_WRITE, - 0, /* was: FILE_SHARE_READ */ - 0, - OPEN_EXISTING, - FILE_ATTRIBUTE_SYSTEM | FILE_FLAG_OVERLAPPED, - 0 - ); - - if (tt->hand == INVALID_HANDLE_VALUE) - dbg_tap_info("CreateFile failed on TAP device: %s\n", device_path); - - /* translate high-level device name into a device instance - GUID using the registry */ - tt->actual_name = PICO_ZALLOC(strlen(name) + 1); - if (tt->actual_name) - strcpy(tt->actual_name, name); - } - - dbg_tap_info("TAP-WIN32 device [%s] opened: %s\n", tt->actual_name, device_path); - /* TODO TODO TODO */ - /* tt->adapter_index = get_adapter_index (device_guid); */ - - /* get driver version info */ - { - ULONG info[3]; - /* TODO TODO TODO */ - /* CLEAR (info); */ - if (DeviceIoControl (tt->hand, TAP_WIN_IOCTL_GET_VERSION, &info, sizeof (info), &info, sizeof (info), &len, NULL)) - { - dbg_tap_info ("TAP-Windows Driver Version %d.%d %s\n", - (int) info[0], - (int) info[1], - (info[2] ? "(DEBUG)" : "")); - - } - - if (!(info[0] == TAP_WIN_MIN_MAJOR && info[1] >= TAP_WIN_MIN_MINOR)) - dbg_tap_info ("ERROR: This version of " PACKAGE_NAME " requires a TAP-Windows driver that is at least version %d.%d \ - -- If you recently upgraded your " PACKAGE_NAME " distribution, \ - a reboot is probably required at this point to get Windows to see the new driver.\n", - TAP_WIN_MIN_MAJOR, - TAP_WIN_MIN_MINOR); - - /* usage of numeric constants is ugly, but this is really tied to - * *this* version of the driver - */ - if ( tt->ipv6 && tt->type == DEV_TYPE_TUN && info[0] == 9 && info[1] < 8) - { - dbg_tap_info("WARNING: Tap-Win32 driver version %d.%d does not support IPv6 in TUN mode. IPv6 will be disabled. \ - Upgrade to Tap-Win32 9.8 (2.2-beta3 release or later) or use TAP mode to get IPv6\n", (int) info[0], (int) info[1] ); - tt->ipv6 = 0; - } - - /* tap driver 9.8 (2.2.0 and 2.2.1 release) is buggy - */ - if ( tt->type == DEV_TYPE_TUN && info[0] == 9 && info[1] == 8) - { - dbg_tap_info("ERROR: Tap-Win32 driver version %d.%d is buggy regarding small IPv4 packets in TUN mode. Upgrade to Tap-Win32 9.9 (2.2.2 release or later) or use TAP mode\n", (int) info[0], (int) info[1] ); - } - } - - /* get driver MTU */ - { - ULONG mtu; - if (DeviceIoControl (tt->hand, TAP_WIN_IOCTL_GET_MTU, - &mtu, sizeof (mtu), - &mtu, sizeof (mtu), &len, NULL)) - { - tt->post_open_mtu = (int) mtu; - dbg_tap_info("TAP-Windows MTU=%d\n", (int) mtu); - } - } - - - /* get driver MAC */ - { - uint8_t mac[6] = { - 0, 0, 0, 0, 0, 0 - }; - if (DeviceIoControl (tt->hand, TAP_WIN_IOCTL_GET_MAC, - mac, sizeof (mac), - mac, sizeof (mac), &len, NULL)) - { - dbg_tap_info("TAP-Windows MAC=[%x,%x,%x,%x,%x,%x]\n", mac[0], mac[1], mac[2], - mac[2], mac[4], mac[5]); - memcpy(tt->mac, mac, sizeof(mac)); - } - } - - /* set point-to-point mode if TUN device */ - - if (tt->type == DEV_TYPE_TUN) - { - dbg_tap_info("TUN type not supported for now...\n"); - return -1; - } - else if (tt->type == DEV_TYPE_TAP) - { /* TAP DEVICE */ - dbg_tap_info("TODO: Set Point-to-point through DeviceIoControl\n"); - } - - /* set driver media status to 'connected' */ - { - ULONG status = TRUE; - if (!DeviceIoControl (tt->hand, TAP_WIN_IOCTL_SET_MEDIA_STATUS, - &status, sizeof (status), - &status, sizeof (status), &len, NULL)) - dbg_tap_info("WARNING: The TAP-Windows driver rejected a TAP_WIN_IOCTL_SET_MEDIA_STATUS DeviceIoControl call."); - } - - return 0; -} - - - - -/* TODO: Closing a TUN device is currently not implemented */ -/* - void close_tun (struct tuntap *tt) - { - (void)tt; - } - */ - - -int tap_win_getinfo (const struct tuntap *tt, char *buf, int bufsize) -{ - if (tt && tt->hand != NULL && buf != NULL) - { - DWORD len; - if (DeviceIoControl (tt->hand, TAP_WIN_IOCTL_GET_INFO, - buf, bufsize, - buf, bufsize, - &len, NULL)) - { - return 0; - } - } - - return -1; -} - -void tun_show_debug (struct tuntap *tt, char *buf, int bufsize) -{ - if (tt && tt->hand != NULL && buf != NULL) - { - DWORD len; - while (DeviceIoControl (tt->hand, TAP_WIN_IOCTL_GET_LOG_LINE, - buf, bufsize, - buf, bufsize, - &len, NULL)) - { - dbg_tap_info("TAP-Windows: %s\n", buf); - } - } -} - - -/* returns the state */ -int tun_read_queue (struct tuntap *tt, uint8_t *buffer, int maxsize) -{ - if (tt->reads.iostate == IOSTATE_INITIAL) - { - DWORD len = 1500; - BOOL status; - int err; - - /* reset buf to its initial state */ - tt->reads.buf = tt->reads.buf_init; - tt->reads.buf_len = tt->reads.buf_init_len; - - len = maxsize ? maxsize : (tt->reads.buf_len); - if (len > (tt->reads.buf_len)) /* clip to buffer len */ - len = tt->reads.buf_len; - - /* the overlapped read will signal this event on I/O completion */ - if (!ResetEvent (tt->reads.overlapped.hEvent)) - dbg_tap("ResetEvent failed\n"); - - status = ReadFile( - tt->hand, - buffer, - len, - &tt->reads.size, - &tt->reads.overlapped - ); - - if (status) /* operation completed immediately? */ - { - /* since we got an immediate return, we must signal the event object ourselves */ - /* ASSERT (SetEvent (tt->reads.overlapped.hEvent)); */ - if (!SetEvent (tt->reads.overlapped.hEvent)) - dbg_tap("SetEvent failed\n"); - - tt->reads.iostate = IOSTATE_IMMEDIATE_RETURN; - tt->reads.status = 0; - - dbg_tap_win32 ("WIN32 I/O: TAP Read immediate return [%d,%d]\n", - (int) len, - (int) tt->reads.size); - } - else - { - err = GetLastError (); - if (err == ERROR_IO_PENDING) /* operation queued? */ - { - tt->reads.iostate = IOSTATE_QUEUED; - tt->reads.status = err; - dbg_tap_win32 ("WIN32 I/O: TAP Read queued [%d]\n", (int) len); - } - else /* error occurred */ - { - if (!SetEvent (tt->reads.overlapped.hEvent)) - dbg_tap("SetEvent failed\n"); - - tt->reads.iostate = IOSTATE_IMMEDIATE_RETURN; - tt->reads.status = err; - dbg_tap ("WIN32 I/O: TAP Read error [%d] : %d\n", (int) len, (int) err); - } - } - } - - return tt->reads.iostate; -} - -/* Finalize any pending overlapped IO's */ -int tun_finalize(HANDLE h, struct overlapped_io *io, uint8_t **buf, uint32_t *buf_len) -{ - int ret = -1; - BOOL status; - - switch (io->iostate) - { - case IOSTATE_QUEUED: - status = GetOverlappedResult( - h, - &io->overlapped, - &io->size, - 0u - ); - if (status) - { - /* successful return for a queued operation */ - if (buf) - { - *buf = io->buf; - *buf_len = io->buf_len; - } - - ret = io->size; - io->iostate = IOSTATE_INITIAL; - - if (!ResetEvent (io->overlapped.hEvent)) - dbg_tap("ResetEvent in finalize failed!\n"); - - dbg_tap_win32 ("WIN32 I/O: TAP Completion success: QUEUED! [%d]\n", ret); - } - else - { - /* error during a queued operation */ - /* error, or just not completed? */ - ret = 0; - if (GetLastError() != ERROR_IO_INCOMPLETE) - { - /* if no error (i.e. just not finished yet), - then DON'T execute this code */ - io->iostate = IOSTATE_INITIAL; - if (!ResetEvent (io->overlapped.hEvent)) - dbg_tap("ResetEvent in finalize failed!\n"); - - dbg_tap("WIN32 I/O: TAP Completion error\n"); - ret = -1; /* There actually was an error */ - } - } - - break; - - case IOSTATE_IMMEDIATE_RETURN: - io->iostate = IOSTATE_INITIAL; - if (!ResetEvent (io->overlapped.hEvent)) - dbg_tap("ResetEvent in finalize failed!\n"); - - if (io->status) - { - /* error return for a non-queued operation */ - SetLastError (io->status); - ret = -1; - dbg_tap("WIN32 I/O: TAP Completion non-queued error\n"); - } - else - { - /* successful return for a non-queued operation */ - if (buf) - *buf = io->buf; - - ret = io->size; - dbg_tap_win32 ("WIN32 I/O: TAP Completion non-queued success [%d]\n", ret); - } - - break; - - case IOSTATE_INITIAL: /* were we called without proper queueing? */ - SetLastError (ERROR_INVALID_FUNCTION); - ret = -1; - dbg_tap ("WIN32 I/O: TAP Completion BAD STATE\n"); - break; - - default: - dbg_tap ("Some weird case happened..\n"); - } - - if (buf) - *buf_len = ret; - - return ret; -} - - - -/* returns the amount of bytes written */ -int tun_write_queue (struct tuntap *tt, uint8_t *buf, uint32_t buf_len) -{ - if (tt->writes.iostate == IOSTATE_INITIAL) - { - BOOL status; - int err; - - /* make a private copy of buf */ - tt->writes.buf = tt->writes.buf_init; - tt->writes.buf_len = buf_len; - memcpy(tt->writes.buf, buf, buf_len); - - /* the overlapped write will signal this event on I/O completion */ - if (!ResetEvent (tt->writes.overlapped.hEvent)) - dbg_tap("ResetEvent in write_queue failed!\n"); - - status = WriteFile( - tt->hand, - tt->writes.buf, - tt->writes.buf_len, - &tt->writes.size, - &tt->writes.overlapped - ); - - if (status) /* operation completed immediately? */ - { - tt->writes.iostate = IOSTATE_IMMEDIATE_RETURN; - - /* since we got an immediate return, we must signal the event object ourselves */ - if (!SetEvent (tt->writes.overlapped.hEvent)) - dbg_tap("SetEvent in write_queue failed!\n"); - - tt->writes.status = 0; - - dbg_tap_win32 ("WIN32 I/O: TAP Write immediate return [%d,%d]\n", - (int)(tt->writes.buf_len), - (int)tt->writes.size); - } - else - { - err = GetLastError (); - if (err == ERROR_IO_PENDING) /* operation queued? */ - { - tt->writes.iostate = IOSTATE_QUEUED; - tt->writes.status = err; - dbg_tap_win32("WIN32 I/O: TAP Write queued [%d]\n", - (tt->writes.buf_len)); - } - else /* error occurred */ - { - if (!SetEvent (tt->writes.overlapped.hEvent)) - dbg_tap("SetEvent in write_queue failed!\n"); - - tt->writes.iostate = IOSTATE_IMMEDIATE_RETURN; - tt->writes.status = err; - dbg_tap ("WIN32 I/O: TAP Write error [%d] : %d\n", (int) &tt->writes.buf_len, (int) err); - } - } - } - - return tt->writes.iostate; -} - -static inline int overlapped_io_active (struct overlapped_io *o) -{ - return o->iostate == IOSTATE_QUEUED || o->iostate == IOSTATE_IMMEDIATE_RETURN; -} - -/* if >= 0: returns the amount of bytes read, otherwise error! */ -static int tun_write_win32 (struct tuntap *tt, uint8_t *buf, uint32_t buf_len) -{ - int err = 0; - int status = 0; - if (overlapped_io_active (&tt->writes)) - { - status = tun_finalize (tt->hand, &tt->writes, NULL, 0); - if (status == 0) - { - /* busy, just wait, do not schedule a new write */ - return 0; - } - - if (status < 0) - err = GetLastError (); - } - - /* the overlapped IO is done, now we can schedule a new write */ - tun_write_queue (tt, buf, buf_len); - if (status < 0) - { - SetLastError (err); - return status; - } - else - return buf_len; -} - - -/* if >= 0: returns the amount of bytes read, otherwise error! */ -static int tun_read_win32 (struct tuntap *tt, uint8_t *buf, uint32_t buf_len) -{ - int err = 0; - int status = 0; - - - /* First, finish possible pending IOs */ - if (overlapped_io_active (&tt->reads)) - { - status = tun_finalize (tt->hand, &tt->reads, &buf, &buf_len); - if (status == 0) - { - /* busy, just wait, do not schedule a new read */ - return 0; - } - - if (status < 0) - { - dbg_tap ("tun_finalize status < 0: %d\n", status); - err = GetLastError (); - } - - if (status > 0) - { - return buf_len; - } - } - - /* If no pending IOs, schedule a new read */ - /* queue, or immediate return */ - if (IOSTATE_IMMEDIATE_RETURN == tun_read_queue(tt, buf, buf_len)) - { - return tt->reads.size; - } - - /* If the pending IOs gave an error, report it */ - if (status < 0) - { - SetLastError (err); - return status; - } - else - { - /* no errors, but the newly scheduled read is now pending */ - return 0; - } -} - - -static int read_tun_buffered(struct tuntap *tt, uint8_t *buf, uint32_t buf_len) -{ - return tun_read_win32 (tt, buf, buf_len); -} - -static int write_tun_buffered(struct tuntap *tt, uint8_t *buf, uint32_t buf_len) -{ - return tun_write_win32 (tt, buf, buf_len); -} - - -static int pico_tap_send(struct pico_device *dev, void *buf, int len) -{ - uint32_t bytes_sent = 0; - struct pico_device_tap *tap = (struct pico_device_tap *) dev; - - /* Increase the statistic count */ - tap->statistics_frames_out++; - - bytes_sent = write_tun_buffered (tap->tt, buf, len); - dbg_tap("TX> sent %d bytes\n", bytes_sent); - - /* Discard the frame content silently. */ - return bytes_sent; -} - -uint8_t recv_buffer[1500]; - -static int pico_tap_poll(struct pico_device *dev, int loop_score) -{ - struct pico_device_tap *tap = (struct pico_device_tap *) dev; - while (loop_score) - { - int bytes_read = read_tun_buffered(tap->tt, recv_buffer, 1500); - loop_score--; - if (bytes_read > 0) - { - dbg_tap("RX< recvd: %d bytes\n", bytes_read); - pico_stack_recv(dev, recv_buffer, bytes_read); - /* break; */ - } - else - break; - } - return loop_score; -} - - - -#define CLEAR(x) memset(&(x), 0, sizeof(x)) - -void overlapped_io_init (struct overlapped_io *o, int event_state) -{ - CLEAR (*o); - - /* manual reset event, initially set according to event_state */ - o->overlapped.hEvent = CreateEvent (NULL, TRUE, event_state, NULL); - if (o->overlapped.hEvent == NULL) - dbg_tap ("Error: overlapped_io_init: CreateEvent failed\n"); - - /* allocate buffer for overlapped I/O */ - o->buf_init = PICO_ZALLOC(1500); /* XXX: MTU */ - o->buf_init_len = 1500; /* XXX: MTU */ - if (!(o->buf_init)) - dbg_tap("buffer alloc failed!\n"); /* XXX: return -1 or so? */ - else - dbg_tap("overlapped_io_init buffer allocated!\n"); -} - -void init_tun_post (struct tuntap *tt) -{ - dbg_tap("TUN post init (for overlapped io)\n"); - overlapped_io_init (&tt->reads, FALSE); - overlapped_io_init (&tt->writes, TRUE); - tt->rw_handle.read = tt->reads.overlapped.hEvent; - tt->rw_handle.write = tt->writes.overlapped.hEvent; -} - - -/* - * Public interface: pico_tap_create - * TODO: pico_tap_destroy - */ - -struct pico_device *pico_tap_create(char *name, uint8_t *mac) -{ - struct pico_device_tap *tap = PICO_ZALLOC(sizeof(struct pico_device_tap)); - struct tuntap *tt = PICO_ZALLOC(sizeof(struct tuntap), 1); - - if (!(tap) || !(tt)) - return NULL; - - tap->dev.overhead = 0; - tap->statistics_frames_out = 0; - tap->dev.send = pico_tap_send; - tap->dev.poll = pico_tap_poll; - - show_tap_win_adapters(); - - tt->type = DEV_TYPE_TAP; - if (open_tun(NULL, NULL, "tap0", tt)) - { - dbg_tap("Failed to create TAP device!\n"); - PICO_FREE(tt); - PICO_FREE(tap); - return NULL; - } - - tap->tt = tt; - - if( 0 != pico_device_init((struct pico_device *)tap, name, mac)) { - return NULL; - } - - init_tun_post(tt); /* init overlapped io */ - - dbg_tap("Device %s created.\n", tap->dev.name); - - return (struct pico_device *)tap; -} diff --git a/kernel/picotcp/modules/pico_dev_tap_windows.h b/kernel/picotcp/modules/pico_dev_tap_windows.h deleted file mode 100644 index bdc6977..0000000 --- a/kernel/picotcp/modules/pico_dev_tap_windows.h +++ /dev/null @@ -1,17 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2014-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_TAP -#define INCLUDE_PICO_TAP -#include "pico_config.h" -#include "pico_device.h" - -/* will look for the first TAP device available, and use it */ -struct pico_device *pico_tap_create(char *name, uint8_t *mac); -/* TODO: not implemented yet */ -/* void pico_tap_destroy(struct pico_device *null); */ - -#endif - diff --git a/kernel/picotcp/modules/pico_dev_tap_windows_private.h b/kernel/picotcp/modules/pico_dev_tap_windows_private.h deleted file mode 100644 index ef52354..0000000 --- a/kernel/picotcp/modules/pico_dev_tap_windows_private.h +++ /dev/null @@ -1,89 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2014-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Maxime Vincent - Based on the OpenVPN tun.c driver, under GPL - - NOTES: This is the Windows-only driver, a Linux-equivalent is available, too - You need to have an OpenVPN TUN/TAP network adapter installed, first - This driver is barely working: - * Only TAP-mode is supported (TUN is not) - * it will simply open the first TAP device it can find - * there is memory being allocated that's never freed - * there is no destroy function, yet - * it has only been tested on a Windows 7 machine - *********************************************************************/ - -#ifndef __PICO_DEV_TAP_WINDOWS_PRIVATE_H -#define __PICO_DEV_TAP_WINDOWS_PRIVATE_H - -/* Extra defines (vnz) */ -#define TAP_WIN_COMPONENT_ID "tap0901" -#define TAP_WIN_MIN_MAJOR 9 -#define TAP_WIN_MIN_MINOR 9 -#define PACKAGE_NAME "PicoTCP WinTAP" - -/* Extra structs */ -struct tap_reg -{ - const char *guid; - struct tap_reg *next; -}; - -struct panel_reg -{ - const char *name; - const char *guid; - struct panel_reg *next; -}; - - -/* - * ============= - * TAP IOCTLs - * ============= - */ - -#define TAP_WIN_CONTROL_CODE(request, method) \ - CTL_CODE (FILE_DEVICE_UNKNOWN, request, method, FILE_ANY_ACCESS) - -/* Present in 8.1 */ - -#define TAP_WIN_IOCTL_GET_MAC TAP_WIN_CONTROL_CODE (1, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_GET_VERSION TAP_WIN_CONTROL_CODE (2, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_GET_MTU TAP_WIN_CONTROL_CODE (3, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_GET_INFO TAP_WIN_CONTROL_CODE (4, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_CONFIG_POINT_TO_POINT TAP_WIN_CONTROL_CODE (5, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_SET_MEDIA_STATUS TAP_WIN_CONTROL_CODE (6, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_CONFIG_DHCP_MASQ TAP_WIN_CONTROL_CODE (7, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_GET_LOG_LINE TAP_WIN_CONTROL_CODE (8, METHOD_BUFFERED) -#define TAP_WIN_IOCTL_CONFIG_DHCP_SET_OPT TAP_WIN_CONTROL_CODE (9, METHOD_BUFFERED) - -/* Added in 8.2 */ - -/* obsoletes TAP_WIN_IOCTL_CONFIG_POINT_TO_POINT */ -#define TAP_WIN_IOCTL_CONFIG_TUN TAP_WIN_CONTROL_CODE (10, METHOD_BUFFERED) - -/* - * ================= - * Registry keys - * ================= - */ - -#define ADAPTER_KEY "SYSTEM\\CurrentControlSet\\Control\\Class\\{4D36E972-E325-11CE-BFC1-08002BE10318}" - -#define NETWORK_CONNECTIONS_KEY "SYSTEM\\CurrentControlSet\\Control\\Network\\{4D36E972-E325-11CE-BFC1-08002BE10318}" - -/* - * ====================== - * Filesystem prefixes - * ====================== - */ - -#define USERMODEDEVICEDIR "\\\\.\\Global\\" -#define SYSDEVICEDIR "\\Device\\" -#define USERDEVICEDIR "\\DosDevices\\Global\\" -#define TAP_WIN_SUFFIX ".tap" - -#endif diff --git a/kernel/picotcp/modules/pico_dev_tun.c b/kernel/picotcp/modules/pico_dev_tun.c deleted file mode 100644 index 148ad92..0000000 --- a/kernel/picotcp/modules/pico_dev_tun.c +++ /dev/null @@ -1,110 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include -#include -#include -#include -#include "pico_device.h" -#include "pico_dev_tun.h" -#include "pico_stack.h" - -#include - -struct pico_device_tun { - struct pico_device dev; - int fd; -}; - -#define TUN_MTU 2048 - -static int pico_tun_send(struct pico_device *dev, void *buf, int len) -{ - struct pico_device_tun *tun = (struct pico_device_tun *) dev; - return (int)write(tun->fd, buf, (uint32_t)len); -} - -static int pico_tun_poll(struct pico_device *dev, int loop_score) -{ - struct pico_device_tun *tun = (struct pico_device_tun *) dev; - struct pollfd pfd; - unsigned char buf[TUN_MTU]; - int len; - pfd.fd = tun->fd; - pfd.events = POLLIN; - do { - if (poll(&pfd, 1, 0) <= 0) - return loop_score; - - len = (int)read(tun->fd, buf, TUN_MTU); - if (len > 0) { - loop_score--; - pico_stack_recv(dev, buf, (uint32_t)len); - } - } while(loop_score > 0); - return 0; -} - -/* Public interface: create/destroy. */ - -void pico_tun_destroy(struct pico_device *dev) -{ - struct pico_device_tun *tun = (struct pico_device_tun *) dev; - if(tun->fd > 0) - close(tun->fd); -} - - -static int tun_open(char *name) -{ - struct ifreq ifr; - int tun_fd; - if((tun_fd = open("/dev/net/tun", O_RDWR)) < 0) { - return(-1); - } - - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TUN | IFF_NO_PI; - strncpy(ifr.ifr_name, name, IFNAMSIZ); - if(ioctl(tun_fd, TUNSETIFF, &ifr) < 0) { - return(-1); - } - - return tun_fd; -} - - - -struct pico_device *pico_tun_create(char *name) -{ - struct pico_device_tun *tun = PICO_ZALLOC(sizeof(struct pico_device_tun)); - - if (!tun) - return NULL; - - if( 0 != pico_device_init((struct pico_device *)tun, name, NULL)) { - dbg("Tun init failed.\n"); - pico_tun_destroy((struct pico_device *)tun); - return NULL; - } - - tun->dev.overhead = 0; - tun->fd = tun_open(name); - if (tun->fd < 0) { - dbg("Tun creation failed.\n"); - pico_tun_destroy((struct pico_device *)tun); - return NULL; - } - - tun->dev.send = pico_tun_send; - tun->dev.poll = pico_tun_poll; - tun->dev.destroy = pico_tun_destroy; - dbg("Device %s created.\n", tun->dev.name); - return (struct pico_device *)tun; -} - diff --git a/kernel/picotcp/modules/pico_dev_tun.h b/kernel/picotcp/modules/pico_dev_tun.h deleted file mode 100644 index 85b9dbf..0000000 --- a/kernel/picotcp/modules/pico_dev_tun.h +++ /dev/null @@ -1,15 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_TUN -#define INCLUDE_PICO_TUN -#include "pico_config.h" -#include "pico_device.h" - -void pico_tun_destroy(struct pico_device *tun); -struct pico_device *pico_tun_create(char *name); - -#endif - diff --git a/kernel/picotcp/modules/pico_dev_vde.c b/kernel/picotcp/modules/pico_dev_vde.c deleted file mode 100644 index 3358162..0000000 --- a/kernel/picotcp/modules/pico_dev_vde.c +++ /dev/null @@ -1,122 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - *********************************************************************/ - -#ifndef UNIT_TEST -#include -#endif -#include "pico_device.h" -#include "pico_dev_vde.h" -#include "pico_stack.h" - -#include - -struct pico_device_vde { - struct pico_device dev; - char *sock; - VDECONN *conn; - uint32_t counter_in; - uint32_t counter_out; - uint32_t lost_in; - uint32_t lost_out; -}; - -#define VDE_MTU 65536 - -/* Mockables */ -#if defined UNIT_TEST -# define MOCKABLE __attribute__((weak)) -#else -# define MOCKABLE -#endif - -static int pico_vde_send(struct pico_device *dev, void *buf, int len) -{ - struct pico_device_vde *vde = (struct pico_device_vde *) dev; - /* dbg("[%s] send %d bytes.\n", dev->name, len); */ - if ((vde->lost_out == 0) || ((pico_rand() % 100) > vde->lost_out)) - return (int)vde_send(vde->conn, buf, (uint32_t)len, 0); - else - return len; /* Silently discarded "on the wire" */ - -} - -static int pico_vde_poll(struct pico_device *dev, int loop_score) -{ - struct pico_device_vde *vde = (struct pico_device_vde *) dev; - struct pollfd pfd; - unsigned char buf[VDE_MTU]; - int len; - pfd.fd = vde_datafd(vde->conn); - pfd.events = POLLIN; - do { - if (poll(&pfd, 1, 0) <= 0) - return loop_score; - - len = (int)vde_recv(vde->conn, buf, VDE_MTU, 0); - if (len > 0) { - /* dbg("Received pkt.\n"); */ - if ((vde->lost_in == 0) || ((pico_rand() % 100) > vde->lost_in)) { - loop_score--; - pico_stack_recv(dev, buf, (uint32_t)len); - } - } - } while(loop_score > 0); - return 0; -} - -/* Public interface: create/destroy. */ - -void pico_vde_destroy(struct pico_device *dev) -{ - struct pico_device_vde *vde = (struct pico_device_vde *) dev; - vde_close(vde->conn); - usleep(100000); - sync(); -} - -void pico_vde_set_packetloss(struct pico_device *dev, uint32_t in_pct, uint32_t out_pct) -{ - struct pico_device_vde *vde = (struct pico_device_vde *) dev; - vde->lost_in = in_pct; - vde->lost_out = out_pct; -} - - - -struct pico_device *MOCKABLE pico_vde_create(char *sock, char *name, uint8_t *mac) -{ - struct pico_device_vde *vde = PICO_ZALLOC(sizeof(struct pico_device_vde)); - struct vde_open_args open_args = { - .mode = 0700 - }; - char vdename[] = "picotcp"; - - if (!vde) - return NULL; - - if( 0 != pico_device_init((struct pico_device *)vde, name, mac)) { - dbg ("Vde init failed.\n"); - pico_vde_destroy((struct pico_device *)vde); - return NULL; - } - - vde->dev.overhead = 0; - vde->sock = PICO_ZALLOC(strlen(sock) + 1); - memcpy(vde->sock, sock, strlen(sock)); - vde->conn = vde_open(sock, vdename, &open_args); - if (!vde->conn) { - pico_vde_destroy((struct pico_device *)vde); - return NULL; - } - - vde->dev.send = pico_vde_send; - vde->dev.poll = pico_vde_poll; - vde->dev.destroy = pico_vde_destroy; - dbg("Device %s created.\n", vde->dev.name); - return (struct pico_device *)vde; -} - diff --git a/kernel/picotcp/modules/pico_dev_vde.h b/kernel/picotcp/modules/pico_dev_vde.h deleted file mode 100644 index 4b13087..0000000 --- a/kernel/picotcp/modules/pico_dev_vde.h +++ /dev/null @@ -1,18 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - - *********************************************************************/ -#ifndef INCLUDE_PICO_VDE -#define INCLUDE_PICO_VDE -#include "pico_config.h" -#include "pico_device.h" -#include - -void pico_vde_destroy(struct pico_device *vde); -struct pico_device *pico_vde_create(char *sock, char *name, uint8_t *mac); -void pico_vde_set_packetloss(struct pico_device *dev, uint32_t in_pct, uint32_t out_pct); - -#endif - diff --git a/kernel/picotcp/modules/pico_dhcp_client.c b/kernel/picotcp/modules/pico_dhcp_client.c deleted file mode 100644 index 5b56962..0000000 --- a/kernel/picotcp/modules/pico_dhcp_client.c +++ /dev/null @@ -1,1074 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Kristof Roelants, Frederik Van Slycken, Maxime Vincent - *********************************************************************/ - - -#include "pico_dhcp_client.h" -#include "pico_stack.h" -#include "pico_config.h" -#include "pico_device.h" -#include "pico_ipv4.h" -#include "pico_socket.h" -#include "pico_eth.h" - -#if (defined PICO_SUPPORT_DHCPC && defined PICO_SUPPORT_UDP) - -#ifdef DEBUG_DHCP_CLIENT - #define dhcpc_dbg dbg -#else - #define dhcpc_dbg(...) do {} while(0) -#endif - -/* timer values */ -#define DHCP_CLIENT_REINIT 6000 /* msec */ -#define DHCP_CLIENT_RETRANS 4 /* sec */ -#define DHCP_CLIENT_RETRIES 3 - -#define DHCP_CLIENT_TIMER_STOPPED 0 -#define DHCP_CLIENT_TIMER_STARTED 1 - -/* maximum size of a DHCP message */ -#define DHCP_CLIENT_MAXMSGZISE (PICO_IP_MRU - PICO_SIZE_IP4HDR) -#define PICO_DHCP_HOSTNAME_MAXLEN 64U - -/* Mockables */ -#if defined UNIT_TEST -# define MOCKABLE __attribute__((weak)) -#else -# define MOCKABLE -#endif - -static char dhcpc_host_name[PICO_DHCP_HOSTNAME_MAXLEN] = ""; -static char dhcpc_domain_name[PICO_DHCP_HOSTNAME_MAXLEN] = ""; - - -enum dhcp_client_state { - DHCP_CLIENT_STATE_INIT_REBOOT = 0, - DHCP_CLIENT_STATE_REBOOTING, - DHCP_CLIENT_STATE_INIT, - DHCP_CLIENT_STATE_SELECTING, - DHCP_CLIENT_STATE_REQUESTING, - DHCP_CLIENT_STATE_BOUND, - DHCP_CLIENT_STATE_RENEWING, - DHCP_CLIENT_STATE_REBINDING -}; - - -#define PICO_DHCPC_TIMER_INIT 0 -#define PICO_DHCPC_TIMER_REQUEST 1 -#define PICO_DHCPC_TIMER_RENEW 2 -#define PICO_DHCPC_TIMER_REBIND 3 -#define PICO_DHCPC_TIMER_T1 4 -#define PICO_DHCPC_TIMER_T2 5 -#define PICO_DHCPC_TIMER_LEASE 6 -#define PICO_DHCPC_TIMER_ARRAY_SIZE 7 - -struct dhcp_client_timer -{ - uint8_t state; - unsigned int type; - uint32_t xid; - uint32_t timer_id; -}; - -struct pico_dhcp_client_cookie -{ - uint8_t event; - uint8_t retry; - uint32_t xid; - uint32_t *uid; - enum dhcp_client_state state; - void (*cb)(void*dhcpc, int code); - pico_time init_timestamp; - struct pico_socket *s; - struct pico_ip4 address; - struct pico_ip4 netmask; - struct pico_ip4 gateway; - struct pico_ip4 nameserver[2]; - struct pico_ip4 server_id; - struct pico_device *dev; - struct dhcp_client_timer *timer[PICO_DHCPC_TIMER_ARRAY_SIZE]; - uint32_t t1_time; - uint32_t t2_time; - uint32_t lease_time; - uint32_t renew_time; - uint32_t rebind_time; -}; - -static int pico_dhcp_client_init(struct pico_dhcp_client_cookie *dhcpc); -static int reset(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); -static int8_t pico_dhcp_client_msg(struct pico_dhcp_client_cookie *dhcpc, uint8_t msg_type); -static void pico_dhcp_client_wakeup(uint16_t ev, struct pico_socket *s); -static void pico_dhcp_state_machine(uint8_t event, struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); -static void pico_dhcp_client_callback(struct pico_dhcp_client_cookie *dhcpc, int code); - -static const struct pico_ip4 bcast_netmask = { - .addr = 0xFFFFFFFF -}; - -static struct pico_ip4 inaddr_any = { - 0 -}; - - -static int dhcp_cookies_cmp(void *ka, void *kb) -{ - struct pico_dhcp_client_cookie *a = ka, *b = kb; - if (a->xid == b->xid) - return 0; - - return (a->xid < b->xid) ? (-1) : (1); -} -static PICO_TREE_DECLARE(DHCPCookies, dhcp_cookies_cmp); - -static struct pico_dhcp_client_cookie *pico_dhcp_client_add_cookie(uint32_t xid, struct pico_device *dev, void (*cb)(void *dhcpc, int code), uint32_t *uid) -{ - struct pico_dhcp_client_cookie *dhcpc = NULL, *found = NULL, test = { - 0 - }; - - test.xid = xid; - found = pico_tree_findKey(&DHCPCookies, &test); - if (found) { - pico_err = PICO_ERR_EAGAIN; - return NULL; - } - - dhcpc = PICO_ZALLOC(sizeof(struct pico_dhcp_client_cookie)); - if (!dhcpc) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - dhcpc->state = DHCP_CLIENT_STATE_INIT; - dhcpc->xid = xid; - dhcpc->uid = uid; - *(dhcpc->uid) = 0; - dhcpc->cb = cb; - dhcpc->dev = dev; - - if (pico_tree_insert(&DHCPCookies, dhcpc)) { - PICO_FREE(dhcpc); - return NULL; - } - - return dhcpc; -} - -static void pico_dhcp_client_stop_timers(struct pico_dhcp_client_cookie *dhcpc); -static int pico_dhcp_client_del_cookie(uint32_t xid) -{ - struct pico_dhcp_client_cookie test = { - 0 - }, *found = NULL; - - test.xid = xid; - found = pico_tree_findKey(&DHCPCookies, &test); - if (!found) - return -1; - - pico_dhcp_client_stop_timers(found); - pico_socket_close(found->s); - found->s = NULL; - pico_ipv4_link_del(found->dev, found->address); - pico_tree_delete(&DHCPCookies, found); - PICO_FREE(found); - return 0; -} - -static struct pico_dhcp_client_cookie *pico_dhcp_client_find_cookie(uint32_t xid) -{ - struct pico_dhcp_client_cookie test = { - 0 - }, *found = NULL; - - test.xid = xid; - found = pico_tree_findKey(&DHCPCookies, &test); - if (found) - return found; - else - return NULL; -} - -static void pico_dhcp_client_timer_handler(pico_time now, void *arg); -static void pico_dhcp_client_reinit(pico_time now, void *arg); -static struct dhcp_client_timer *pico_dhcp_timer_add(uint8_t type, uint32_t time, struct pico_dhcp_client_cookie *ck) -{ - struct dhcp_client_timer *t = ck->timer[type]; - - if (t) { - /* Stale timer, mark to be freed in the callback */ - t->state = DHCP_CLIENT_TIMER_STOPPED; - } - - /* allocate a new timer, the old one is still in the timer tree, and will be freed as soon as it expires */ - t = PICO_ZALLOC(sizeof(struct dhcp_client_timer)); - - if (!t) - return NULL; - - t->state = DHCP_CLIENT_TIMER_STARTED; - t->xid = ck->xid; - t->type = type; - t->timer_id = pico_timer_add(time, pico_dhcp_client_timer_handler, t); - if (!t->timer_id) { - dhcpc_dbg("DHCP: Failed to start timer\n"); - PICO_FREE(t); - return NULL; - } - - /* store timer struct reference in cookie */ - ck->timer[type] = t; - return t; -} - -static int dhcp_get_timer_event(struct pico_dhcp_client_cookie *dhcpc, unsigned int type) -{ - const int events[PICO_DHCPC_TIMER_ARRAY_SIZE] = - { - PICO_DHCP_EVENT_RETRANSMIT, - PICO_DHCP_EVENT_RETRANSMIT, - PICO_DHCP_EVENT_RETRANSMIT, - PICO_DHCP_EVENT_RETRANSMIT, - PICO_DHCP_EVENT_T1, - PICO_DHCP_EVENT_T2, - PICO_DHCP_EVENT_LEASE - }; - - if (type == PICO_DHCPC_TIMER_REQUEST) { - if (++dhcpc->retry > DHCP_CLIENT_RETRIES) { - reset(dhcpc, NULL); - return PICO_DHCP_EVENT_NONE; - } - } else if (type < PICO_DHCPC_TIMER_T1) { - dhcpc->retry++; - } - - return events[type]; -} - -static void pico_dhcp_client_timer_handler(pico_time now, void *arg) -{ - struct dhcp_client_timer *t = (struct dhcp_client_timer *)arg; - struct pico_dhcp_client_cookie *dhcpc; - - if (!t) - return; - - (void) now; - if (t->state != DHCP_CLIENT_TIMER_STOPPED) { - dhcpc = pico_dhcp_client_find_cookie(t->xid); - if (dhcpc) { - t->state = DHCP_CLIENT_TIMER_STOPPED; - if ((t->type == PICO_DHCPC_TIMER_INIT) && (dhcpc->state < DHCP_CLIENT_STATE_SELECTING)) { - /* this was an INIT timer */ - pico_dhcp_client_reinit(now, dhcpc); - } else if (t->type != PICO_DHCPC_TIMER_INIT) { - /* this was NOT an INIT timer */ - dhcpc->event = (uint8_t)dhcp_get_timer_event(dhcpc, t->type); - if (dhcpc->event != PICO_DHCP_EVENT_NONE) - pico_dhcp_state_machine(dhcpc->event, dhcpc, NULL); - } - } - } - - /* stale timer, it's associated struct should be freed */ - if (t->state == DHCP_CLIENT_TIMER_STOPPED) - PICO_FREE(t); -} - -static void pico_dhcp_client_reinit(pico_time now, void *arg) -{ - struct pico_dhcp_client_cookie *dhcpc = (struct pico_dhcp_client_cookie *)arg; - (void) now; - - if (dhcpc->s) { - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - } - - if (++dhcpc->retry > DHCP_CLIENT_RETRIES) { - pico_err = PICO_ERR_EAGAIN; - pico_dhcp_client_callback(dhcpc, PICO_DHCP_ERROR); - - pico_dhcp_client_del_cookie(dhcpc->xid); - return; - } - - pico_dhcp_client_init(dhcpc); - return; -} - - -static void pico_dhcp_client_stop_timers(struct pico_dhcp_client_cookie *dhcpc) -{ - int i; - dhcpc->retry = 0; - for (i = 0; i < PICO_DHCPC_TIMER_ARRAY_SIZE; i++) - { - if (dhcpc->timer[i]) { - /* Do not cancel timer, but rather set it's state to be freed when it expires */ - dhcpc->timer[i]->state = DHCP_CLIENT_TIMER_STOPPED; - dhcpc->timer[i] = NULL; - } - } -} - -static int pico_dhcp_client_start_init_timer(struct pico_dhcp_client_cookie *dhcpc) -{ - uint32_t time = 0; - /* timer value is doubled with every retry (exponential backoff) */ - time = (uint32_t) (DHCP_CLIENT_RETRANS << dhcpc->retry); - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_INIT, time * 1000, dhcpc)) - return -1; - - return 0; -} - -static int pico_dhcp_client_start_requesting_timer(struct pico_dhcp_client_cookie *dhcpc) -{ - uint32_t time = 0; - - /* timer value is doubled with every retry (exponential backoff) */ - time = (uint32_t)(DHCP_CLIENT_RETRANS << dhcpc->retry); - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_REQUEST, time * 1000, dhcpc)) - return -1; - - return 0; -} - -static int pico_dhcp_client_start_renewing_timer(struct pico_dhcp_client_cookie *dhcpc) -{ - uint32_t halftime = 0; - - /* wait one-half of the remaining time until T2, down to a minimum of 60 seconds */ - /* (dhcpc->retry + 1): initial -> divide by 2, 1st retry -> divide by 4, 2nd retry -> divide by 8, etc */ - pico_dhcp_client_stop_timers(dhcpc); - halftime = dhcpc->renew_time >> (dhcpc->retry + 1); - if (halftime < 60) - halftime = 60; - - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_RENEW, halftime * 1000, dhcpc)) - return -1; - - return 0; -} - -static int pico_dhcp_client_start_rebinding_timer(struct pico_dhcp_client_cookie *dhcpc) -{ - uint32_t halftime = 0; - - pico_dhcp_client_stop_timers(dhcpc); - halftime = dhcpc->rebind_time >> (dhcpc->retry + 1); - if (halftime < 60) - halftime = 60; - - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_REBIND, halftime * 1000, dhcpc)) - return -1; - - return 0; -} - -static int pico_dhcp_client_start_reacquisition_timers(struct pico_dhcp_client_cookie *dhcpc) -{ - - pico_dhcp_client_stop_timers(dhcpc); - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_T1, dhcpc->t1_time * 1000, dhcpc)) - goto fail; - - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_T2, dhcpc->t2_time * 1000, dhcpc)) - goto fail; - - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_LEASE, dhcpc->lease_time * 1000, dhcpc)) - goto fail; - - return 0; - -fail: - pico_dhcp_client_stop_timers(dhcpc); - return -1; -} - -static int pico_dhcp_client_init(struct pico_dhcp_client_cookie *dhcpc) -{ - uint16_t port = PICO_DHCP_CLIENT_PORT; - if (!dhcpc) - return -1; - - /* adding a link with address 0.0.0.0 and netmask 0.0.0.0, - * automatically adds a route for a global broadcast */ - pico_ipv4_link_add(dhcpc->dev, inaddr_any, bcast_netmask); - if (!dhcpc->s) - dhcpc->s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &pico_dhcp_client_wakeup); - - if (!dhcpc->s) { - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_INIT, DHCP_CLIENT_REINIT, dhcpc)) - return -1; - - return 0; - } - - dhcpc->s->dev = dhcpc->dev; - if (pico_socket_bind(dhcpc->s, &inaddr_any, &port) < 0) { - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_INIT, DHCP_CLIENT_REINIT, dhcpc)) - return -1; - - return 0; - } - - if (pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_DISCOVER) < 0) { - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - if (!pico_dhcp_timer_add(PICO_DHCPC_TIMER_INIT, DHCP_CLIENT_REINIT, dhcpc)) - return -1; - - return 0; - } - - dhcpc->retry = 0; - dhcpc->init_timestamp = PICO_TIME_MS(); - if (pico_dhcp_client_start_init_timer(dhcpc) < 0) { - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - return -1; - } - - return 0; -} - -int MOCKABLE pico_dhcp_initiate_negotiation(struct pico_device *dev, void (*cb)(void *dhcpc, int code), uint32_t *uid) -{ - uint8_t retry = 32; - uint32_t xid = 0; - struct pico_dhcp_client_cookie *dhcpc = NULL; - - if (!dev || !cb || !uid) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (!dev->eth) { - pico_err = PICO_ERR_EOPNOTSUPP; - return -1; - } - - /* attempt to generate a correct xid, else fail */ - do { - xid = pico_rand(); - } while (!xid && --retry); - - if (!xid) { - pico_err = PICO_ERR_EAGAIN; - return -1; - } - - dhcpc = pico_dhcp_client_add_cookie(xid, dev, cb, uid); - if (!dhcpc) - return -1; - - dhcpc_dbg("DHCP client: cookie with xid %u\n", dhcpc->xid); - *uid = xid; - return pico_dhcp_client_init(dhcpc); -} - -static void pico_dhcp_client_recv_params(struct pico_dhcp_client_cookie *dhcpc, struct pico_dhcp_opt *opt) -{ - do { - switch (opt->code) - { - case PICO_DHCP_OPT_PAD: - break; - - case PICO_DHCP_OPT_END: - break; - - case PICO_DHCP_OPT_MSGTYPE: - dhcpc->event = opt->ext.msg_type.type; - dhcpc_dbg("DHCP client: message type %u\n", dhcpc->event); - break; - - case PICO_DHCP_OPT_LEASETIME: - dhcpc->lease_time = long_be(opt->ext.lease_time.time); - dhcpc_dbg("DHCP client: lease time %u\n", dhcpc->lease_time); - break; - - case PICO_DHCP_OPT_RENEWALTIME: - dhcpc->t1_time = long_be(opt->ext.renewal_time.time); - dhcpc_dbg("DHCP client: renewal time %u\n", dhcpc->t1_time); - break; - - case PICO_DHCP_OPT_REBINDINGTIME: - dhcpc->t2_time = long_be(opt->ext.rebinding_time.time); - dhcpc_dbg("DHCP client: rebinding time %u\n", dhcpc->t2_time); - break; - - case PICO_DHCP_OPT_ROUTER: - dhcpc->gateway = opt->ext.router.ip; - dhcpc_dbg("DHCP client: router %08X\n", dhcpc->gateway.addr); - break; - - case PICO_DHCP_OPT_DNS: - dhcpc->nameserver[0] = opt->ext.dns1.ip; - dhcpc_dbg("DHCP client: dns1 %08X\n", dhcpc->nameserver[0].addr); - if (opt->len >= 8) { - dhcpc->nameserver[1] = opt->ext.dns2.ip; - dhcpc_dbg("DHCP client: dns1 %08X\n", dhcpc->nameserver[1].addr); - } - - break; - - case PICO_DHCP_OPT_NETMASK: - dhcpc->netmask = opt->ext.netmask.ip; - dhcpc_dbg("DHCP client: netmask %08X\n", dhcpc->netmask.addr); - break; - - case PICO_DHCP_OPT_SERVERID: - dhcpc->server_id = opt->ext.server_id.ip; - dhcpc_dbg("DHCP client: server ID %08X\n", dhcpc->server_id.addr); - break; - - case PICO_DHCP_OPT_OPTOVERLOAD: - dhcpc_dbg("DHCP client: WARNING option overload present (not processed)"); - break; - - case PICO_DHCP_OPT_HOSTNAME: - { - uint32_t maxlen = PICO_DHCP_HOSTNAME_MAXLEN; - if (opt->len < maxlen) - maxlen = opt->len; - - strncpy(dhcpc_host_name, opt->ext.string.txt, maxlen); - } - break; - - case PICO_DHCP_OPT_DOMAINNAME: - { - uint32_t maxlen = PICO_DHCP_HOSTNAME_MAXLEN; - if (opt->len < maxlen) - maxlen = opt->len; - - strncpy(dhcpc_domain_name, opt->ext.string.txt, maxlen); - } - break; - - default: - dhcpc_dbg("DHCP client: WARNING unsupported option %u\n", opt->code); - break; - } - } while (pico_dhcp_next_option(&opt)); - - /* default values for T1 and T2 when not provided */ - if (!dhcpc->t1_time) - dhcpc->t1_time = dhcpc->lease_time >> 1; - - if (!dhcpc->t2_time) - dhcpc->t2_time = (dhcpc->lease_time * 875) / 1000; - - return; -} - -static int recv_offer(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - struct pico_dhcp_hdr *hdr = (struct pico_dhcp_hdr *)buf; - struct pico_dhcp_opt *opt = DHCP_OPT(hdr, 0); - - pico_dhcp_client_recv_params(dhcpc, opt); - if ((dhcpc->event != PICO_DHCP_MSG_OFFER) || !dhcpc->server_id.addr || !dhcpc->netmask.addr || !dhcpc->lease_time) - return -1; - - dhcpc->address.addr = hdr->yiaddr; - - /* we skip state SELECTING, process first offer received */ - dhcpc->state = DHCP_CLIENT_STATE_REQUESTING; - dhcpc->retry = 0; - pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_REQUEST); - if (pico_dhcp_client_start_requesting_timer(dhcpc) < 0) - return -1; - - return 0; -} - -static void pico_dhcp_client_update_link(struct pico_dhcp_client_cookie *dhcpc) -{ - struct pico_ip4 any_address = { - 0 - }; - struct pico_ip4 address = { - 0 - }; - struct pico_ipv4_link *l; - - dbg("DHCP client: update link\n"); - - pico_ipv4_link_del(dhcpc->dev, address); - l = pico_ipv4_link_by_dev(dhcpc->dev); - while(l) { - pico_ipv4_link_del(dhcpc->dev, l->address); - l = pico_ipv4_link_by_dev_next(dhcpc->dev, l); - } - pico_ipv4_link_add(dhcpc->dev, dhcpc->address, dhcpc->netmask); - - /* If router option is received, use it as default gateway */ - if (dhcpc->gateway.addr != 0U) { - pico_ipv4_route_add(any_address, any_address, dhcpc->gateway, 1, NULL); - } -} - -static int recv_ack(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - struct pico_dhcp_hdr *hdr = (struct pico_dhcp_hdr *)buf; - struct pico_dhcp_opt *opt = DHCP_OPT(hdr, 0); - struct pico_ipv4_link *l; - - pico_dhcp_client_recv_params(dhcpc, opt); - if ((dhcpc->event != PICO_DHCP_MSG_ACK) || !dhcpc->server_id.addr || !dhcpc->netmask.addr || !dhcpc->lease_time) - return -1; - - /* Issue #20 the server can transmit on ACK a different IP than the one in OFFER */ - /* RFC2131 ch 4.3.2 ... The client SHOULD use the parameters in the DHCPACK message for configuration */ - if (dhcpc->state == DHCP_CLIENT_STATE_REQUESTING) - dhcpc->address.addr = hdr->yiaddr; - - - /* close the socket used for address (re)acquisition */ - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - - /* Delete all the links before adding the new ip address - * in case the new address doesn't match the old one */ - l = pico_ipv4_link_by_dev(dhcpc->dev); - if (dhcpc->address.addr != (l->address).addr) { - pico_dhcp_client_update_link(dhcpc); - } - - dbg("DHCP client: renewal time (T1) %u\n", (unsigned int)dhcpc->t1_time); - dbg("DHCP client: rebinding time (T2) %u\n", (unsigned int)dhcpc->t2_time); - dbg("DHCP client: lease time %u\n", (unsigned int)dhcpc->lease_time); - - dhcpc->retry = 0; - dhcpc->renew_time = dhcpc->t2_time - dhcpc->t1_time; - dhcpc->rebind_time = dhcpc->lease_time - dhcpc->t2_time; - if (pico_dhcp_client_start_reacquisition_timers(dhcpc) < 0) { - pico_dhcp_client_callback(dhcpc, PICO_DHCP_ERROR); - return -1; - } - - - *(dhcpc->uid) = dhcpc->xid; - pico_dhcp_client_callback(dhcpc, PICO_DHCP_SUCCESS); - - dhcpc->state = DHCP_CLIENT_STATE_BOUND; - return 0; -} - -static int renew(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - uint16_t port = PICO_DHCP_CLIENT_PORT; - (void) buf; - dhcpc->state = DHCP_CLIENT_STATE_RENEWING; - dhcpc->s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &pico_dhcp_client_wakeup); - if (!dhcpc->s) { - dhcpc_dbg("DHCP client ERROR: failure opening socket on renew, aborting DHCP! (%s)\n", strerror(pico_err)); - pico_dhcp_client_callback(dhcpc, PICO_DHCP_ERROR); - - return -1; - } - - if (pico_socket_bind(dhcpc->s, &dhcpc->address, &port) != 0) { - dhcpc_dbg("DHCP client ERROR: failure binding socket on renew, aborting DHCP! (%s)\n", strerror(pico_err)); - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - pico_dhcp_client_callback(dhcpc, PICO_DHCP_ERROR); - - return -1; - } - - dhcpc->retry = 0; - pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_REQUEST); - if (pico_dhcp_client_start_renewing_timer(dhcpc) < 0) { - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - pico_dhcp_client_callback(dhcpc, PICO_DHCP_ERROR); - - return -1; - } - - return 0; -} - -static int rebind(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - (void) buf; - - dhcpc->state = DHCP_CLIENT_STATE_REBINDING; - dhcpc->retry = 0; - pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_REQUEST); - if (pico_dhcp_client_start_rebinding_timer(dhcpc) < 0) - return -1; - - return 0; -} - -static int reset(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - struct pico_ip4 address = { - 0 - }; - (void) buf; - - if (dhcpc->state == DHCP_CLIENT_STATE_REQUESTING) - address.addr = PICO_IP4_ANY; - else - address.addr = dhcpc->address.addr; - - /* close the socket used for address (re)acquisition */ - pico_socket_close(dhcpc->s); - dhcpc->s = NULL; - /* delete the link with the currently in use address */ - pico_ipv4_link_del(dhcpc->dev, address); - - pico_dhcp_client_callback(dhcpc, PICO_DHCP_RESET); - - if (dhcpc->state < DHCP_CLIENT_STATE_BOUND) - { - /* pico_dhcp_client_timer_stop(dhcpc, PICO_DHCPC_TIMER_INIT); */ - } - - - dhcpc->state = DHCP_CLIENT_STATE_INIT; - pico_dhcp_client_stop_timers(dhcpc); - pico_dhcp_client_init(dhcpc); - return 0; -} - -static int retransmit(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - (void) buf; - switch (dhcpc->state) - { - case DHCP_CLIENT_STATE_INIT: - pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_DISCOVER); - if (pico_dhcp_client_start_init_timer(dhcpc) < 0) - return -1; - break; - - case DHCP_CLIENT_STATE_REQUESTING: - pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_REQUEST); - if (pico_dhcp_client_start_requesting_timer(dhcpc) < 0) - return -1; - break; - - case DHCP_CLIENT_STATE_RENEWING: - pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_REQUEST); - if (pico_dhcp_client_start_renewing_timer(dhcpc) < 0) - return -1; - break; - - case DHCP_CLIENT_STATE_REBINDING: - pico_dhcp_client_msg(dhcpc, PICO_DHCP_MSG_DISCOVER); - if (pico_dhcp_client_start_rebinding_timer(dhcpc) < 0) - return -1; - break; - - default: - dhcpc_dbg("DHCP client WARNING: retransmit in incorrect state (%u)!\n", dhcpc->state); - return -1; - } - - return 0; -} - -struct dhcp_action_entry { - int (*offer)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); - int (*ack)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); - int (*nak)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); - int (*timer1)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); - int (*timer2)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); - int (*timer_lease)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); - int (*timer_retransmit)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf); -}; - -static struct dhcp_action_entry dhcp_fsm[] = -{ /* event |offer |ack |nak |T1 |T2 |lease |retransmit */ -/* state init-reboot */ { NULL, NULL, NULL, NULL, NULL, NULL, NULL }, -/* state rebooting */ { NULL, NULL, NULL, NULL, NULL, NULL, NULL }, -/* state init */ { recv_offer, NULL, NULL, NULL, NULL, NULL, retransmit }, -/* state selecting */ { NULL, NULL, NULL, NULL, NULL, NULL, NULL }, -/* state requesting */ { NULL, recv_ack, reset, NULL, NULL, NULL, retransmit }, -/* state bound */ { NULL, NULL, NULL, renew, NULL, NULL, NULL }, -/* state renewing */ { NULL, recv_ack, reset, NULL, rebind, NULL, retransmit }, -/* state rebinding */ { NULL, recv_ack, reset, NULL, NULL, reset, retransmit }, -}; - -static void dhcp_action_call( int (*call)(struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf), struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - if (call) - call(dhcpc, buf); -} - -/* TIMERS REMARK: - * In state bound we have T1, T2 and the lease timer running. If T1 goes off, we attempt to renew. - * If the renew succeeds a new T1, T2 and lease timer is started. The former T2 and lease timer is - * still running though. This poses no concerns as the T2 and lease event in state bound have a NULL - * pointer in the fsm. If the former T2 or lease timer goes off, nothing happens. Same situation - * applies for T2 and a succesfull rebind. */ - -static void pico_dhcp_state_machine(uint8_t event, struct pico_dhcp_client_cookie *dhcpc, uint8_t *buf) -{ - switch (event) - { - case PICO_DHCP_MSG_OFFER: - dhcpc_dbg("DHCP client: received OFFER\n"); - dhcp_action_call(dhcp_fsm[dhcpc->state].offer, dhcpc, buf); - break; - - case PICO_DHCP_MSG_ACK: - dhcpc_dbg("DHCP client: received ACK\n"); - dhcp_action_call(dhcp_fsm[dhcpc->state].ack, dhcpc, buf); - break; - - case PICO_DHCP_MSG_NAK: - dhcpc_dbg("DHCP client: received NAK\n"); - dhcp_action_call(dhcp_fsm[dhcpc->state].nak, dhcpc, buf); - break; - - case PICO_DHCP_EVENT_T1: - dhcpc_dbg("DHCP client: received T1 timeout\n"); - dhcp_action_call(dhcp_fsm[dhcpc->state].timer1, dhcpc, buf); - break; - - case PICO_DHCP_EVENT_T2: - dhcpc_dbg("DHCP client: received T2 timeout\n"); - dhcp_action_call(dhcp_fsm[dhcpc->state].timer2, dhcpc, buf); - break; - - case PICO_DHCP_EVENT_LEASE: - dhcpc_dbg("DHCP client: received LEASE timeout\n"); - dhcp_action_call(dhcp_fsm[dhcpc->state].timer_lease, dhcpc, buf); - break; - - case PICO_DHCP_EVENT_RETRANSMIT: - dhcpc_dbg("DHCP client: received RETRANSMIT timeout\n"); - dhcp_action_call(dhcp_fsm[dhcpc->state].timer_retransmit, dhcpc, buf); - break; - - default: - dhcpc_dbg("DHCP client WARNING: unrecognized event (%u)!\n", dhcpc->event); - return; - } - return; -} - -static int16_t pico_dhcp_client_opt_parse(void *ptr, uint16_t len) -{ - uint32_t optlen = len - (uint32_t)sizeof(struct pico_dhcp_hdr); - struct pico_dhcp_hdr *hdr = (struct pico_dhcp_hdr *)ptr; - struct pico_dhcp_opt *opt = DHCP_OPT(hdr, 0); - - if (hdr->dhcp_magic != PICO_DHCPD_MAGIC_COOKIE) - return -1; - - if (!pico_dhcp_are_options_valid(opt, (int32_t)optlen)) - return -1; - - do { - if (opt->code == PICO_DHCP_OPT_MSGTYPE) - return opt->ext.msg_type.type; - } while (pico_dhcp_next_option(&opt)); - - return -1; -} - -static int8_t pico_dhcp_client_msg(struct pico_dhcp_client_cookie *dhcpc, uint8_t msg_type) -{ - int32_t r = 0; - uint16_t optlen = 0, offset = 0; - struct pico_ip4 destination = { - .addr = 0xFFFFFFFF - }; - struct pico_dhcp_hdr *hdr = NULL; - - - /* RFC 2131 3.1.3: Request is always BROADCAST */ - - /* Set again default route for the bcast request */ - pico_ipv4_route_set_bcast_link(pico_ipv4_link_by_dev(dhcpc->dev)); - - switch (msg_type) - { - case PICO_DHCP_MSG_DISCOVER: - dhcpc_dbg("DHCP client: sent DHCPDISCOVER\n"); - optlen = PICO_DHCP_OPTLEN_MSGTYPE + PICO_DHCP_OPTLEN_MAXMSGSIZE + PICO_DHCP_OPTLEN_PARAMLIST + PICO_DHCP_OPTLEN_END; - hdr = PICO_ZALLOC((size_t)(sizeof(struct pico_dhcp_hdr) + optlen)); - if (!hdr) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - /* specific options */ - offset = (uint16_t)(offset + pico_dhcp_opt_maxmsgsize(DHCP_OPT(hdr, offset), DHCP_CLIENT_MAXMSGZISE)); - break; - - case PICO_DHCP_MSG_REQUEST: - optlen = PICO_DHCP_OPTLEN_MSGTYPE + PICO_DHCP_OPTLEN_MAXMSGSIZE + PICO_DHCP_OPTLEN_PARAMLIST + PICO_DHCP_OPTLEN_REQIP + PICO_DHCP_OPTLEN_SERVERID - + PICO_DHCP_OPTLEN_END; - hdr = PICO_ZALLOC(sizeof(struct pico_dhcp_hdr) + optlen); - if (!hdr) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - /* specific options */ - offset = (uint16_t)(offset + pico_dhcp_opt_maxmsgsize(DHCP_OPT(hdr, offset), DHCP_CLIENT_MAXMSGZISE)); - if (dhcpc->state == DHCP_CLIENT_STATE_REQUESTING) { - offset = (uint16_t)(offset + pico_dhcp_opt_reqip(DHCP_OPT(hdr, offset), &dhcpc->address)); - offset = (uint16_t)(offset + pico_dhcp_opt_serverid(DHCP_OPT(hdr, offset), &dhcpc->server_id)); - } - - break; - - default: - return -1; - } - - /* common options */ - offset = (uint16_t)(offset + pico_dhcp_opt_msgtype(DHCP_OPT(hdr, offset), msg_type)); - offset = (uint16_t)(offset + pico_dhcp_opt_paramlist(DHCP_OPT(hdr, offset))); - offset = (uint16_t)(offset + pico_dhcp_opt_end(DHCP_OPT(hdr, offset))); - - switch (dhcpc->state) - { - case DHCP_CLIENT_STATE_BOUND: - destination.addr = dhcpc->server_id.addr; - hdr->ciaddr = dhcpc->address.addr; - break; - - case DHCP_CLIENT_STATE_RENEWING: - destination.addr = dhcpc->server_id.addr; - hdr->ciaddr = dhcpc->address.addr; - break; - - case DHCP_CLIENT_STATE_REBINDING: - hdr->ciaddr = dhcpc->address.addr; - break; - - default: - /* do nothing */ - break; - } - - /* header information */ - hdr->op = PICO_DHCP_OP_REQUEST; - hdr->htype = PICO_DHCP_HTYPE_ETH; - hdr->hlen = PICO_SIZE_ETH; - hdr->xid = dhcpc->xid; - /* hdr->flags = short_be(PICO_DHCP_FLAG_BROADCAST); / * Nope: see bug #96! * / */ - hdr->dhcp_magic = PICO_DHCPD_MAGIC_COOKIE; - /* copy client hardware address */ - memcpy(hdr->hwaddr, &dhcpc->dev->eth->mac, PICO_SIZE_ETH); - - if (destination.addr == PICO_IP4_BCAST) - pico_ipv4_route_set_bcast_link(pico_ipv4_link_get(&dhcpc->address)); - - r = pico_socket_sendto(dhcpc->s, hdr, (int)(sizeof(struct pico_dhcp_hdr) + optlen), &destination, PICO_DHCPD_PORT); - PICO_FREE(hdr); - if (r < 0) - return -1; - - return 0; -} - -static void pico_dhcp_client_wakeup(uint16_t ev, struct pico_socket *s) -{ - - uint8_t *buf; - int r = 0; - struct pico_dhcp_hdr *hdr = NULL; - struct pico_dhcp_client_cookie *dhcpc = NULL; - - if ((ev & PICO_SOCK_EV_RD) == 0) - return; - - buf = PICO_ZALLOC(DHCP_CLIENT_MAXMSGZISE); - if (!buf) { - return; - } - - r = pico_socket_recvfrom(s, buf, DHCP_CLIENT_MAXMSGZISE, NULL, NULL); - if (r < 0) - goto out_discard_buf; - - /* If the 'xid' of an arriving message does not match the 'xid' - * of the most recent transmitted message, the message must be - * silently discarded. */ - hdr = (struct pico_dhcp_hdr *)buf; - dhcpc = pico_dhcp_client_find_cookie(hdr->xid); - if (!dhcpc) - goto out_discard_buf; - - dhcpc->event = (uint8_t)pico_dhcp_client_opt_parse(buf, (uint16_t)r); - pico_dhcp_state_machine(dhcpc->event, dhcpc, buf); - -out_discard_buf: - PICO_FREE(buf); -} - -static void pico_dhcp_client_callback(struct pico_dhcp_client_cookie *dhcpc, int code) -{ - if(dhcpc->cb) - dhcpc->cb(dhcpc, code); -} - -void *MOCKABLE pico_dhcp_get_identifier(uint32_t xid) -{ - return (void *)pico_dhcp_client_find_cookie(xid); -} - -struct pico_ip4 MOCKABLE pico_dhcp_get_address(void*dhcpc) -{ - return ((struct pico_dhcp_client_cookie*)dhcpc)->address; -} - -struct pico_ip4 MOCKABLE pico_dhcp_get_gateway(void*dhcpc) -{ - return ((struct pico_dhcp_client_cookie*)dhcpc)->gateway; -} - -struct pico_ip4 pico_dhcp_get_netmask(void *dhcpc) -{ - return ((struct pico_dhcp_client_cookie*)dhcpc)->netmask; -} - -struct pico_ip4 pico_dhcp_get_nameserver(void*dhcpc, int index) -{ - struct pico_ip4 fault = { - .addr = 0xFFFFFFFFU - }; - if ((index != 0) && (index != 1)) - return fault; - - return ((struct pico_dhcp_client_cookie*)dhcpc)->nameserver[index]; -} - -int pico_dhcp_client_abort(uint32_t xid) -{ - return pico_dhcp_client_del_cookie(xid); -} - - -char *pico_dhcp_get_hostname(void) -{ - return dhcpc_host_name; -} - -char *pico_dhcp_get_domain(void) -{ - return dhcpc_domain_name; -} - -#endif diff --git a/kernel/picotcp/modules/pico_dhcp_client.h b/kernel/picotcp/modules/pico_dhcp_client.h deleted file mode 100644 index 8ec8e27..0000000 --- a/kernel/picotcp/modules/pico_dhcp_client.h +++ /dev/null @@ -1,32 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef INCLUDE_PICO_DHCP_CLIENT -#define INCLUDE_PICO_DHCP_CLIENT -#include "pico_defines.h" -#ifdef PICO_SUPPORT_UDP -#include "pico_dhcp_common.h" -#include "pico_addressing.h" -#include "pico_protocol.h" - -int pico_dhcp_initiate_negotiation(struct pico_device *device, void (*callback)(void*cli, int code), uint32_t *xid); -void *pico_dhcp_get_identifier(uint32_t xid); -struct pico_ip4 pico_dhcp_get_address(void *cli); -struct pico_ip4 pico_dhcp_get_gateway(void *cli); -struct pico_ip4 pico_dhcp_get_netmask(void *cli); -struct pico_ip4 pico_dhcp_get_nameserver(void*cli, int index); -int pico_dhcp_client_abort(uint32_t xid); -char *pico_dhcp_get_hostname(void); -char *pico_dhcp_get_domain(void); - -/* possible codes for the callback */ -#define PICO_DHCP_SUCCESS 0 -#define PICO_DHCP_ERROR 1 -#define PICO_DHCP_RESET 2 - -#endif -#endif diff --git a/kernel/picotcp/modules/pico_dhcp_common.c b/kernel/picotcp/modules/pico_dhcp_common.c deleted file mode 100644 index ccd15f2..0000000 --- a/kernel/picotcp/modules/pico_dhcp_common.c +++ /dev/null @@ -1,190 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Frederik Van Slycken - *********************************************************************/ - -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_dhcp_common.h" - -#if defined (PICO_SUPPORT_DHCPC) || defined (PICO_SUPPORT_DHCPD) -/* pico_dhcp_are_options_valid needs to be called first to prevent illegal memory access */ -/* The argument pointer is moved forward to the next option */ -struct pico_dhcp_opt *pico_dhcp_next_option(struct pico_dhcp_opt **ptr) -{ - uint8_t **p = (uint8_t **)ptr; - struct pico_dhcp_opt *opt = *ptr; - - if (opt->code == PICO_DHCP_OPT_END) - return NULL; - - if (opt->code == PICO_DHCP_OPT_PAD) { - *p += 1; - return *ptr; - } - - *p += (opt->len + 2); /* (len + 2) to account for code and len octet */ - return *ptr; -} - -uint8_t pico_dhcp_are_options_valid(void *ptr, int32_t len) -{ - uint8_t optlen = 0, *p = ptr; - - while (len > 0) { - switch (*p) - { - case PICO_DHCP_OPT_END: - return 1; - - case PICO_DHCP_OPT_PAD: - p++; - len--; - break; - - default: - p++; /* move pointer from code octet to len octet */ - len--; - if ((len <= 0) || (len - (*p + 1) < 0)) /* (*p + 1) to account for len octet */ - return 0; - - optlen = *p; - p += optlen + 1; - len -= optlen; - break; - } - } - return 0; -} - -uint8_t pico_dhcp_opt_netmask(void *ptr, struct pico_ip4 *ip) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: netmask */ - opt->code = PICO_DHCP_OPT_NETMASK; - opt->len = PICO_DHCP_OPTLEN_NETMASK - PICO_DHCP_OPTLEN_HDR; - opt->ext.netmask.ip = *ip; - return PICO_DHCP_OPTLEN_NETMASK; -} - -uint8_t pico_dhcp_opt_router(void *ptr, struct pico_ip4 *ip) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: router */ - opt->code = PICO_DHCP_OPT_ROUTER; - opt->len = PICO_DHCP_OPTLEN_ROUTER - PICO_DHCP_OPTLEN_HDR; - opt->ext.router.ip = *ip; - return PICO_DHCP_OPTLEN_ROUTER; -} - -uint8_t pico_dhcp_opt_dns(void *ptr, struct pico_ip4 *ip) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: dns */ - opt->code = PICO_DHCP_OPT_DNS; - opt->len = PICO_DHCP_OPTLEN_DNS - PICO_DHCP_OPTLEN_HDR; - opt->ext.dns1.ip = *ip; - return PICO_DHCP_OPTLEN_DNS; -} - -uint8_t pico_dhcp_opt_broadcast(void *ptr, struct pico_ip4 *ip) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: broadcast */ - opt->code = PICO_DHCP_OPT_BROADCAST; - opt->len = PICO_DHCP_OPTLEN_BROADCAST - PICO_DHCP_OPTLEN_HDR; - opt->ext.broadcast.ip = *ip; - return PICO_DHCP_OPTLEN_BROADCAST; -} - -uint8_t pico_dhcp_opt_reqip(void *ptr, struct pico_ip4 *ip) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: request IP address */ - opt->code = PICO_DHCP_OPT_REQIP; - opt->len = PICO_DHCP_OPTLEN_REQIP - PICO_DHCP_OPTLEN_HDR; - opt->ext.req_ip.ip = *ip; - return PICO_DHCP_OPTLEN_REQIP; -} - -uint8_t pico_dhcp_opt_leasetime(void *ptr, uint32_t time) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: lease time */ - opt->code = PICO_DHCP_OPT_LEASETIME; - opt->len = PICO_DHCP_OPTLEN_LEASETIME - PICO_DHCP_OPTLEN_HDR; - opt->ext.lease_time.time = time; - return PICO_DHCP_OPTLEN_LEASETIME; -} - -uint8_t pico_dhcp_opt_msgtype(void *ptr, uint8_t type) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: message type */ - opt->code = PICO_DHCP_OPT_MSGTYPE; - opt->len = PICO_DHCP_OPTLEN_MSGTYPE - PICO_DHCP_OPTLEN_HDR; - opt->ext.msg_type.type = type; - return PICO_DHCP_OPTLEN_MSGTYPE; -} - -uint8_t pico_dhcp_opt_serverid(void *ptr, struct pico_ip4 *ip) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: server identifier */ - opt->code = PICO_DHCP_OPT_SERVERID; - opt->len = PICO_DHCP_OPTLEN_SERVERID - PICO_DHCP_OPTLEN_HDR; - opt->ext.server_id.ip = *ip; - return PICO_DHCP_OPTLEN_SERVERID; -} - -uint8_t pico_dhcp_opt_paramlist(void *ptr) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - uint8_t *param_code = &(opt->ext.param_list.code[0]); - - /* option: parameter list */ - opt->code = PICO_DHCP_OPT_PARAMLIST; - opt->len = PICO_DHCP_OPTLEN_PARAMLIST - PICO_DHCP_OPTLEN_HDR; - param_code[0] = PICO_DHCP_OPT_NETMASK; - param_code[1] = PICO_DHCP_OPT_TIME; - param_code[2] = PICO_DHCP_OPT_ROUTER; - param_code[3] = PICO_DHCP_OPT_HOSTNAME; - param_code[4] = PICO_DHCP_OPT_RENEWALTIME; - param_code[5] = PICO_DHCP_OPT_REBINDINGTIME; - param_code[6] = PICO_DHCP_OPT_DNS; - return PICO_DHCP_OPTLEN_PARAMLIST; -} - -uint8_t pico_dhcp_opt_maxmsgsize(void *ptr, uint16_t size) -{ - struct pico_dhcp_opt *opt = (struct pico_dhcp_opt *)ptr; - - /* option: maximum message size */ - opt->code = PICO_DHCP_OPT_MAXMSGSIZE; - opt->len = PICO_DHCP_OPTLEN_MAXMSGSIZE - PICO_DHCP_OPTLEN_HDR; - opt->ext.max_msg_size.size = short_be(size); - return PICO_DHCP_OPTLEN_MAXMSGSIZE; -} - -uint8_t pico_dhcp_opt_end(void *ptr) -{ - uint8_t *opt = (uint8_t *)ptr; - - /* option: end of options */ - *opt = PICO_DHCP_OPT_END; - return PICO_DHCP_OPTLEN_END; -} - -#endif diff --git a/kernel/picotcp/modules/pico_dhcp_common.h b/kernel/picotcp/modules/pico_dhcp_common.h deleted file mode 100644 index 4ccdc67..0000000 --- a/kernel/picotcp/modules/pico_dhcp_common.h +++ /dev/null @@ -1,191 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef INCLUDE_PICO_DHCP_COMMON -#define INCLUDE_PICO_DHCP_COMMON -#include "pico_config.h" -#include "pico_addressing.h" - -#define PICO_DHCPD_PORT (short_be(67)) -#define PICO_DHCP_CLIENT_PORT (short_be(68)) -#define PICO_DHCPD_MAGIC_COOKIE (long_be(0x63825363)) -#define PICO_DHCP_HTYPE_ETH 1 - -/* Macro to get DHCP option field */ -#define DHCP_OPT(hdr, off) ((struct pico_dhcp_opt *)(((uint8_t *)hdr) + sizeof(struct pico_dhcp_hdr) + off)) - -/* flags */ -#define PICO_DHCP_FLAG_BROADCAST 0x8000 - -/* options */ -#define PICO_DHCP_OPT_PAD 0x00 -#define PICO_DHCP_OPT_NETMASK 0x01 -#define PICO_DHCP_OPT_TIME 0x02 -#define PICO_DHCP_OPT_ROUTER 0x03 -#define PICO_DHCP_OPT_DNS 0x06 -#define PICO_DHCP_OPT_HOSTNAME 0x0c -#define PICO_DHCP_OPT_DOMAINNAME 0x0f -#define PICO_DHCP_OPT_MTU 0x1a -#define PICO_DHCP_OPT_BROADCAST 0x1c -#define PICO_DHCP_OPT_NETBIOSNS 0x2c -#define PICO_DHCP_OPT_NETBIOSSCOPE 0x2f -#define PICO_DHCP_OPT_REQIP 0x32 -#define PICO_DHCP_OPT_LEASETIME 0x33 -#define PICO_DHCP_OPT_OPTOVERLOAD 0x34 -#define PICO_DHCP_OPT_MSGTYPE 0x35 -#define PICO_DHCP_OPT_SERVERID 0x36 -#define PICO_DHCP_OPT_PARAMLIST 0x37 -#define PICO_DHCP_OPT_MESSAGE 0x38 -#define PICO_DHCP_OPT_MAXMSGSIZE 0x39 -#define PICO_DHCP_OPT_RENEWALTIME 0x3a -#define PICO_DHCP_OPT_REBINDINGTIME 0x3b -#define PICO_DHCP_OPT_VENDORID 0x3c -#define PICO_DHCP_OPT_CLIENTID 0x3d -#define PICO_DHCP_OPT_DOMAINSEARCH 0x77 -#define PICO_DHCP_OPT_STATICROUTE 0x79 -#define PICO_DHCP_OPT_END 0xFF - -/* options len */ -#define PICO_DHCP_OPTLEN_HDR 2 /* account for code and len field */ -#define PICO_DHCP_OPTLEN_NETMASK 6 -#define PICO_DHCP_OPTLEN_ROUTER 6 -#define PICO_DHCP_OPTLEN_DNS 6 -#define PICO_DHCP_OPTLEN_BROADCAST 6 -#define PICO_DHCP_OPTLEN_REQIP 6 -#define PICO_DHCP_OPTLEN_LEASETIME 6 -#define PICO_DHCP_OPTLEN_OPTOVERLOAD 3 -#define PICO_DHCP_OPTLEN_MSGTYPE 3 -#define PICO_DHCP_OPTLEN_SERVERID 6 -#define PICO_DHCP_OPTLEN_PARAMLIST 9 /* PicoTCP specific */ -#define PICO_DHCP_OPTLEN_MAXMSGSIZE 4 -#define PICO_DHCP_OPTLEN_RENEWALTIME 6 -#define PICO_DHCP_OPTLEN_REBINDINGTIME 6 -#define PICO_DHCP_OPTLEN_END 1 - -/* op codes */ -#define PICO_DHCP_OP_REQUEST 1 -#define PICO_DHCP_OP_REPLY 2 - -/* rfc message types */ -#define PICO_DHCP_MSG_DISCOVER 1 -#define PICO_DHCP_MSG_OFFER 2 -#define PICO_DHCP_MSG_REQUEST 3 -#define PICO_DHCP_MSG_DECLINE 4 -#define PICO_DHCP_MSG_ACK 5 -#define PICO_DHCP_MSG_NAK 6 -#define PICO_DHCP_MSG_RELEASE 7 -#define PICO_DHCP_MSG_INFORM 8 - -/* custom message types */ -#define PICO_DHCP_EVENT_T1 9 -#define PICO_DHCP_EVENT_T2 10 -#define PICO_DHCP_EVENT_LEASE 11 -#define PICO_DHCP_EVENT_RETRANSMIT 12 -#define PICO_DHCP_EVENT_NONE 0xff - -PACKED_STRUCT_DEF pico_dhcp_hdr -{ - uint8_t op; - uint8_t htype; - uint8_t hlen; - uint8_t hops; /* zero */ - uint32_t xid; /* store this in the request */ - uint16_t secs; /* ignore */ - uint16_t flags; - uint32_t ciaddr; /* client address - if asking for renewal */ - uint32_t yiaddr; /* your address (client) */ - uint32_t siaddr; /* dhcp offered address */ - uint32_t giaddr; /* relay agent, bootp. */ - uint8_t hwaddr[6]; - uint8_t hwaddr_padding[10]; - char hostname[64]; - char bootp_filename[128]; - uint32_t dhcp_magic; -}; - -PACKED_STRUCT_DEF pico_dhcp_opt -{ - uint8_t code; - uint8_t len; - PACKED_UNION_DEF dhcp_opt_ext_u { - PEDANTIC_STRUCT_DEF netmask_s { - struct pico_ip4 ip; - } netmask; - PEDANTIC_STRUCT_DEF router_s { - struct pico_ip4 ip; - } router; - PEDANTIC_STRUCT_DEF dns_s { - struct pico_ip4 ip; - } dns1; - struct dns_s dns2; - PEDANTIC_STRUCT_DEF broadcast_s { - struct pico_ip4 ip; - } broadcast; - PEDANTIC_STRUCT_DEF req_ip_s { - struct pico_ip4 ip; - } req_ip; - PEDANTIC_STRUCT_DEF lease_time_s { - uint32_t time; - } lease_time; - PEDANTIC_STRUCT_DEF opt_overload_s { - uint8_t value; - } opt_overload; - PEDANTIC_STRUCT_DEF tftp_server_s { - char name[1]; - } tftp_server; - PEDANTIC_STRUCT_DEF bootfile_s { - char name[1]; - } bootfile; - PEDANTIC_STRUCT_DEF msg_type_s { - uint8_t type; - } msg_type; - PEDANTIC_STRUCT_DEF server_id_s { - struct pico_ip4 ip; - } server_id; - PEDANTIC_STRUCT_DEF param_list_s { - uint8_t code[1]; - } param_list; - PEDANTIC_STRUCT_DEF message_s { - char error[1]; - } message; - PEDANTIC_STRUCT_DEF max_msg_size_s { - uint16_t size; - } max_msg_size; - PEDANTIC_STRUCT_DEF renewal_time_s { - uint32_t time; - } renewal_time; - PEDANTIC_STRUCT_DEF rebinding_time_s { - uint32_t time; - } rebinding_time; - PEDANTIC_STRUCT_DEF vendor_id_s { - uint8_t id[1]; - } vendor_id; - PEDANTIC_STRUCT_DEF client_id_s { - uint8_t id[1]; - } client_id; - PEDANTIC_STRUCT_DEF text_s { - char txt[1]; - } string; - } ext; -}; - -uint8_t dhcp_get_next_option(uint8_t *begin, uint8_t *data, int *len, uint8_t **nextopt); -struct pico_dhcp_opt *pico_dhcp_next_option(struct pico_dhcp_opt **ptr); -uint8_t pico_dhcp_are_options_valid(void *ptr, int32_t len); - -uint8_t pico_dhcp_opt_netmask(void *ptr, struct pico_ip4 *ip); -uint8_t pico_dhcp_opt_router(void *ptr, struct pico_ip4 *ip); -uint8_t pico_dhcp_opt_dns(void *ptr, struct pico_ip4 *ip); -uint8_t pico_dhcp_opt_broadcast(void *ptr, struct pico_ip4 *ip); -uint8_t pico_dhcp_opt_reqip(void *ptr, struct pico_ip4 *ip); -uint8_t pico_dhcp_opt_leasetime(void *ptr, uint32_t time); -uint8_t pico_dhcp_opt_msgtype(void *ptr, uint8_t type); -uint8_t pico_dhcp_opt_serverid(void *ptr, struct pico_ip4 *ip); -uint8_t pico_dhcp_opt_paramlist(void *ptr); -uint8_t pico_dhcp_opt_maxmsgsize(void *ptr, uint16_t size); -uint8_t pico_dhcp_opt_end(void *ptr); -#endif diff --git a/kernel/picotcp/modules/pico_dhcp_server.c b/kernel/picotcp/modules/pico_dhcp_server.c deleted file mode 100644 index 03ce387..0000000 --- a/kernel/picotcp/modules/pico_dhcp_server.c +++ /dev/null @@ -1,426 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - - Authors: Frederik Van Slycken, Kristof Roelants - *********************************************************************/ - -#include "pico_dhcp_server.h" -#include "pico_config.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_udp.h" -#include "pico_stack.h" -#include "pico_arp.h" - -#if (defined PICO_SUPPORT_DHCPD && defined PICO_SUPPORT_UDP) - -#ifdef DEBUG_DHCP_SERVER - #define dhcps_dbg dbg -#else - #define dhcps_dbg(...) do {} while(0) -#endif - -/* default configurations */ -#define DHCP_SERVER_OPENDNS long_be(0xd043dede) /* OpenDNS DNS server 208.67.222.222 */ -#define DHCP_SERVER_POOL_START long_be(0x00000064) -#define DHCP_SERVER_POOL_END long_be(0x000000fe) -#define DHCP_SERVER_LEASE_TIME long_be(0x00000078) - -/* maximum size of a DHCP message */ -#define DHCP_SERVER_MAXMSGSIZE (PICO_IP_MRU - sizeof(struct pico_ipv4_hdr) - sizeof(struct pico_udp_hdr)) - -enum dhcp_server_state { - PICO_DHCP_STATE_DISCOVER = 0, - PICO_DHCP_STATE_OFFER, - PICO_DHCP_STATE_REQUEST, - PICO_DHCP_STATE_BOUND, - PICO_DHCP_STATE_RENEWING -}; - -struct pico_dhcp_server_negotiation { - uint32_t xid; - enum dhcp_server_state state; - struct pico_dhcp_server_setting *dhcps; - struct pico_ip4 ciaddr; - struct pico_eth hwaddr; - uint8_t bcast; -}; - -static inline int ip_address_is_in_dhcp_range(struct pico_dhcp_server_negotiation *n, uint32_t x) -{ - uint32_t ip_hostendian = long_be(x); - if (ip_hostendian < long_be(n->dhcps->pool_start)) - return 0; - - if (ip_hostendian > long_be(n->dhcps->pool_end)) - return 0; - - return 1; -} - - -static void pico_dhcpd_wakeup(uint16_t ev, struct pico_socket *s); - -static int dhcp_settings_cmp(void *ka, void *kb) -{ - struct pico_dhcp_server_setting *a = ka, *b = kb; - if (a->dev == b->dev) - return 0; - - return (a->dev < b->dev) ? (-1) : (1); -} -static PICO_TREE_DECLARE(DHCPSettings, dhcp_settings_cmp); - -static int dhcp_negotiations_cmp(void *ka, void *kb) -{ - struct pico_dhcp_server_negotiation *a = ka, *b = kb; - if (a->xid == b->xid) - return 0; - - return (a->xid < b->xid) ? (-1) : (1); -} -static PICO_TREE_DECLARE(DHCPNegotiations, dhcp_negotiations_cmp); - - -static inline void dhcps_set_default_pool_start_if_not_provided(struct pico_dhcp_server_setting *dhcps) -{ - if (!dhcps->pool_start) - dhcps->pool_start = (dhcps->server_ip.addr & dhcps->netmask.addr) | DHCP_SERVER_POOL_START; -} - -static inline void dhcps_set_default_pool_end_if_not_provided(struct pico_dhcp_server_setting *dhcps) -{ - if (!dhcps->pool_end) - dhcps->pool_end = (dhcps->server_ip.addr & dhcps->netmask.addr) | DHCP_SERVER_POOL_END; -} -static inline void dhcps_set_default_lease_time_if_not_provided(struct pico_dhcp_server_setting *dhcps) -{ - if (!dhcps->lease_time) - dhcps->lease_time = DHCP_SERVER_LEASE_TIME; -} - -static inline struct pico_dhcp_server_setting *dhcps_try_open_socket(struct pico_dhcp_server_setting *dhcps) -{ - uint16_t port = PICO_DHCPD_PORT; - dhcps->s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &pico_dhcpd_wakeup); - if (!dhcps->s) { - dhcps_dbg("DHCP server ERROR: failure opening socket (%s)\n", strerror(pico_err)); - PICO_FREE(dhcps); - return NULL; - } - - if (pico_socket_bind(dhcps->s, &dhcps->server_ip, &port) < 0) { - dhcps_dbg("DHCP server ERROR: failure binding socket (%s)\n", strerror(pico_err)); - PICO_FREE(dhcps); - return NULL; - } - - if (pico_tree_insert(&DHCPSettings, dhcps)) { - dhcps_dbg("DHCP server ERROR: could not insert settings in tree\n"); - PICO_FREE(dhcps); - return NULL; - } - - return dhcps; -} - -static struct pico_dhcp_server_setting *pico_dhcp_server_add_setting(struct pico_dhcp_server_setting *setting) -{ - struct pico_dhcp_server_setting *dhcps = NULL, *found = NULL, test = { - 0 - }; - struct pico_ipv4_link *link = NULL; - - link = pico_ipv4_link_get(&setting->server_ip); - if (!link) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - test.dev = setting->dev; - found = pico_tree_findKey(&DHCPSettings, &test); - if (found) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - dhcps = PICO_ZALLOC(sizeof(struct pico_dhcp_server_setting)); - if (!dhcps) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - dhcps->lease_time = setting->lease_time; - dhcps->pool_start = setting->pool_start; - dhcps->pool_next = setting->pool_next; - dhcps->pool_end = setting->pool_end; - dhcps->dev = link->dev; - dhcps->server_ip = link->address; - dhcps->netmask = link->netmask; - - /* default values if not provided */ - dhcps_set_default_lease_time_if_not_provided(dhcps); - dhcps_set_default_pool_end_if_not_provided(dhcps); - dhcps_set_default_pool_start_if_not_provided(dhcps); - - dhcps->pool_next = dhcps->pool_start; - - return dhcps_try_open_socket(dhcps); - -} - -static struct pico_dhcp_server_negotiation *pico_dhcp_server_find_negotiation(uint32_t xid) -{ - struct pico_dhcp_server_negotiation test = { - 0 - }, *found = NULL; - - test.xid = xid; - found = pico_tree_findKey(&DHCPNegotiations, &test); - if (found) - return found; - else - return NULL; -} - -static inline void dhcp_negotiation_set_ciaddr(struct pico_dhcp_server_negotiation *dhcpn) -{ - struct pico_ip4 *ciaddr = NULL; - ciaddr = pico_arp_reverse_lookup(&dhcpn->hwaddr); - if (!ciaddr) { - dhcpn->ciaddr.addr = dhcpn->dhcps->pool_next; - dhcpn->dhcps->pool_next = long_be(long_be(dhcpn->dhcps->pool_next) + 1); - pico_arp_create_entry(dhcpn->hwaddr.addr, dhcpn->ciaddr, dhcpn->dhcps->dev); - } else { - dhcpn->ciaddr = *ciaddr; - } -} - -static struct pico_dhcp_server_negotiation *pico_dhcp_server_add_negotiation(struct pico_device *dev, struct pico_dhcp_hdr *hdr) -{ - struct pico_dhcp_server_negotiation *dhcpn = NULL; - struct pico_dhcp_server_setting test = { - 0 - }; - - if (pico_dhcp_server_find_negotiation(hdr->xid)) - return NULL; - - dhcpn = PICO_ZALLOC(sizeof(struct pico_dhcp_server_negotiation)); - if (!dhcpn) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - dhcpn->xid = hdr->xid; - dhcpn->state = PICO_DHCP_STATE_DISCOVER; - dhcpn->bcast = ((short_be(hdr->flags) & PICO_DHCP_FLAG_BROADCAST) != 0) ? (1) : (0); - memcpy(dhcpn->hwaddr.addr, hdr->hwaddr, PICO_SIZE_ETH); - - test.dev = dev; - dhcpn->dhcps = pico_tree_findKey(&DHCPSettings, &test); - if (!dhcpn->dhcps) { - dhcps_dbg("DHCP server WARNING: received DHCP message on unconfigured link %s\n", dev->name); - PICO_FREE(dhcpn); - return NULL; - } - - dhcp_negotiation_set_ciaddr(dhcpn); - if (pico_tree_insert(&DHCPNegotiations, dhcpn)) { - dhcps_dbg("DHCP server ERROR: could not insert negotiations in tree\n"); - PICO_FREE(dhcpn); - return NULL; - } - - return dhcpn; -} - -static void dhcpd_make_reply(struct pico_dhcp_server_negotiation *dhcpn, uint8_t msg_type) -{ - int r = 0, optlen = 0, offset = 0; - struct pico_ip4 broadcast = { - 0 - }, dns = { - 0 - }, destination = { - .addr = 0xFFFFFFFF - }; - struct pico_dhcp_hdr *hdr = NULL; - - dns.addr = DHCP_SERVER_OPENDNS; - broadcast.addr = dhcpn->dhcps->server_ip.addr | ~(dhcpn->dhcps->netmask.addr); - - optlen = PICO_DHCP_OPTLEN_MSGTYPE + PICO_DHCP_OPTLEN_SERVERID + PICO_DHCP_OPTLEN_LEASETIME + PICO_DHCP_OPTLEN_NETMASK + PICO_DHCP_OPTLEN_ROUTER - + PICO_DHCP_OPTLEN_BROADCAST + PICO_DHCP_OPTLEN_DNS + PICO_DHCP_OPTLEN_END; - hdr = PICO_ZALLOC(sizeof(struct pico_dhcp_hdr) + (uint32_t)optlen); - if (!hdr) { - return; - } - - hdr->op = PICO_DHCP_OP_REPLY; - hdr->htype = PICO_DHCP_HTYPE_ETH; - hdr->hlen = PICO_SIZE_ETH; - hdr->xid = dhcpn->xid; - hdr->yiaddr = dhcpn->ciaddr.addr; - hdr->siaddr = dhcpn->dhcps->server_ip.addr; - hdr->dhcp_magic = PICO_DHCPD_MAGIC_COOKIE; - memcpy(hdr->hwaddr, dhcpn->hwaddr.addr, PICO_SIZE_ETH); - - /* options */ - offset += pico_dhcp_opt_msgtype(DHCP_OPT(hdr, offset), msg_type); - offset += pico_dhcp_opt_serverid(DHCP_OPT(hdr, offset), &dhcpn->dhcps->server_ip); - offset += pico_dhcp_opt_leasetime(DHCP_OPT(hdr, offset), dhcpn->dhcps->lease_time); - offset += pico_dhcp_opt_netmask(DHCP_OPT(hdr, offset), &dhcpn->dhcps->netmask); - offset += pico_dhcp_opt_router(DHCP_OPT(hdr, offset), &dhcpn->dhcps->server_ip); - offset += pico_dhcp_opt_broadcast(DHCP_OPT(hdr, offset), &broadcast); - offset += pico_dhcp_opt_dns(DHCP_OPT(hdr, offset), &dns); - offset += pico_dhcp_opt_end(DHCP_OPT(hdr, offset)); - - if (dhcpn->bcast == 0) - destination.addr = hdr->yiaddr; - else { - hdr->flags |= short_be(PICO_DHCP_FLAG_BROADCAST); - destination.addr = broadcast.addr; - } - - r = pico_socket_sendto(dhcpn->dhcps->s, hdr, (int)(sizeof(struct pico_dhcp_hdr) + (uint32_t)optlen), &destination, PICO_DHCP_CLIENT_PORT); - if (r < 0) - dhcps_dbg("DHCP server WARNING: failure sending: %s!\n", strerror(pico_err)); - - PICO_FREE(hdr); - - return; -} - -static inline void parse_opt_msgtype(struct pico_dhcp_opt *opt, uint8_t *msgtype) -{ - if (opt->code == PICO_DHCP_OPT_MSGTYPE) { - *msgtype = opt->ext.msg_type.type; - dhcps_dbg("DHCP server: message type %u\n", *msgtype); - } -} - -static inline void parse_opt_reqip(struct pico_dhcp_opt *opt, struct pico_ip4 *reqip) -{ - if (opt->code == PICO_DHCP_OPT_REQIP) - reqip->addr = opt->ext.req_ip.ip.addr; -} - -static inline void parse_opt_serverid(struct pico_dhcp_opt *opt, struct pico_ip4 *serverid) -{ - if (opt->code == PICO_DHCP_OPT_SERVERID) - *serverid = opt->ext.server_id.ip; -} - -static inline void dhcps_make_reply_to_request_msg(struct pico_dhcp_server_negotiation *dhcpn, int bound_valid_flag) -{ - if ((dhcpn->state == PICO_DHCP_STATE_BOUND) && bound_valid_flag) - dhcpd_make_reply(dhcpn, PICO_DHCP_MSG_ACK); - - if (dhcpn->state == PICO_DHCP_STATE_OFFER) { - dhcpn->state = PICO_DHCP_STATE_BOUND; - dhcpd_make_reply(dhcpn, PICO_DHCP_MSG_ACK); - } -} - -static inline void dhcps_make_reply_to_discover_or_request(struct pico_dhcp_server_negotiation *dhcpn, uint8_t msgtype, int bound_valid_flag) -{ - if (PICO_DHCP_MSG_DISCOVER == msgtype) { - dhcpd_make_reply(dhcpn, PICO_DHCP_MSG_OFFER); - dhcpn->state = PICO_DHCP_STATE_OFFER; - } else if (PICO_DHCP_MSG_REQUEST == msgtype) { - dhcps_make_reply_to_request_msg(dhcpn, bound_valid_flag); - } -} - -static inline void dhcps_parse_options_loop(struct pico_dhcp_server_negotiation *dhcpn, struct pico_dhcp_hdr *hdr) -{ - struct pico_dhcp_opt *opt = DHCP_OPT(hdr, 0); - uint8_t msgtype = 0; - struct pico_ip4 reqip = { - 0 - }, server_id = { - 0 - }; - - do { - parse_opt_msgtype(opt, &msgtype); - parse_opt_reqip(opt, &reqip); - parse_opt_serverid(opt, &server_id); - } while (pico_dhcp_next_option(&opt)); - dhcps_make_reply_to_discover_or_request(dhcpn, msgtype, (!reqip.addr) && (!server_id.addr) && (hdr->ciaddr == dhcpn->ciaddr.addr)); -} - -static void pico_dhcp_server_recv(struct pico_socket *s, uint8_t *buf, uint32_t len) -{ - int32_t optlen = (int32_t)(len - sizeof(struct pico_dhcp_hdr)); - struct pico_dhcp_hdr *hdr = (struct pico_dhcp_hdr *)buf; - struct pico_dhcp_server_negotiation *dhcpn = NULL; - struct pico_device *dev = NULL; - - if (!pico_dhcp_are_options_valid(DHCP_OPT(hdr, 0), optlen)) - return; - - dev = pico_ipv4_link_find(&s->local_addr.ip4); - dhcpn = pico_dhcp_server_find_negotiation(hdr->xid); - if (!dhcpn) - dhcpn = pico_dhcp_server_add_negotiation(dev, hdr); - - if (!ip_address_is_in_dhcp_range(dhcpn, dhcpn->ciaddr.addr)) - return; - - dhcps_parse_options_loop(dhcpn, hdr); - -} - -static void pico_dhcpd_wakeup(uint16_t ev, struct pico_socket *s) -{ - uint8_t buf[DHCP_SERVER_MAXMSGSIZE] = { - 0 - }; - int r = 0; - - if (ev != PICO_SOCK_EV_RD) - return; - - r = pico_socket_recvfrom(s, buf, DHCP_SERVER_MAXMSGSIZE, NULL, NULL); - if (r < 0) - return; - - pico_dhcp_server_recv(s, buf, (uint32_t)r); - return; -} - -int pico_dhcp_server_initiate(struct pico_dhcp_server_setting *setting) -{ - if (!setting || !setting->server_ip.addr) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (pico_dhcp_server_add_setting(setting) == NULL) - return -1; - - return 0; -} - -int pico_dhcp_server_destroy(struct pico_device *dev) -{ - struct pico_dhcp_server_setting *found, test = { - 0 - }; - test.dev = dev; - found = pico_tree_findKey(&DHCPSettings, &test); - if (!found) { - pico_err = PICO_ERR_ENOENT; - return -1; - } - - pico_tree_delete(&DHCPSettings, found); - PICO_FREE(found); - return 0; -} - -#endif /* PICO_SUPPORT_DHCP */ diff --git a/kernel/picotcp/modules/pico_dhcp_server.h b/kernel/picotcp/modules/pico_dhcp_server.h deleted file mode 100644 index 45a9b1b..0000000 --- a/kernel/picotcp/modules/pico_dhcp_server.h +++ /dev/null @@ -1,34 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef INCLUDE_PICO_DHCP_SERVER -#define INCLUDE_PICO_DHCP_SERVER -#include "pico_defines.h" -#ifdef PICO_SUPPORT_UDP - -#include "pico_dhcp_common.h" -#include "pico_addressing.h" - -struct pico_dhcp_server_setting -{ - uint32_t pool_start; - uint32_t pool_next; - uint32_t pool_end; - uint32_t lease_time; - struct pico_device *dev; - struct pico_socket *s; - struct pico_ip4 server_ip; - struct pico_ip4 netmask; - uint8_t flags; /* unused atm */ -}; - -/* required field: IP address of the interface to serve, only IPs of this network will be served. */ -int pico_dhcp_server_initiate(struct pico_dhcp_server_setting *dhcps); - -/* To destroy an existing DHCP server configuration, running on a given interface */ -int pico_dhcp_server_destroy(struct pico_device *dev); - -#endif /* _INCLUDE_PICO_DHCP_SERVER */ -#endif diff --git a/kernel/picotcp/modules/pico_dns_client.c b/kernel/picotcp/modules/pico_dns_client.c deleted file mode 100644 index 91d9cc3..0000000 --- a/kernel/picotcp/modules/pico_dns_client.c +++ /dev/null @@ -1,846 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - . - Authors: Kristof Roelants - *********************************************************************/ -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_dns_client.h" -#include "pico_dns_common.h" -#include "pico_tree.h" - -#ifdef PICO_SUPPORT_DNS_CLIENT - -#ifdef PICO_SUPPORT_IPV4 - -#ifdef DEBUG_DNS - #define dns_dbg dbg -#else - #define dns_dbg(...) do {} while(0) -#endif - -/* DNS response length */ -#define PICO_DNS_MAX_QUERY_LEN 255 -#define PICO_DNS_MAX_QUERY_LABEL_LEN 63 - -/* DNS client retransmission time (msec) + frequency */ -#define PICO_DNS_CLIENT_RETRANS 4000 -#define PICO_DNS_CLIENT_MAX_RETRANS 3 - -static void pico_dns_client_callback(uint16_t ev, struct pico_socket *s); -static void pico_dns_client_retransmission(pico_time now, void *arg); -static int pico_dns_client_getaddr_init(const char *url, uint16_t proto, void (*callback)(char *, void *), void *arg); - -struct pico_dns_ns -{ - struct pico_ip4 ns; /* nameserver */ -}; - -static int dns_ns_cmp(void *ka, void *kb) -{ - struct pico_dns_ns *a = ka, *b = kb; - return pico_ipv4_compare(&a->ns, &b->ns); -} -static PICO_TREE_DECLARE(NSTable, dns_ns_cmp); - -struct pico_dns_query -{ - char *query; - uint16_t len; - uint16_t id; - uint16_t qtype; - uint16_t qclass; - uint8_t retrans; - struct pico_dns_ns q_ns; - struct pico_socket *s; - void (*callback)(char *, void *); - void *arg; -}; - -static int dns_query_cmp(void *ka, void *kb) -{ - struct pico_dns_query *a = ka, *b = kb; - if (a->id == b->id) - return 0; - - return (a->id < b->id) ? (-1) : (1); -} -static PICO_TREE_DECLARE(DNSTable, dns_query_cmp); - -static int pico_dns_client_del_ns(struct pico_ip4 *ns_addr) -{ - struct pico_dns_ns test = {{0}}, *found = NULL; - - test.ns = *ns_addr; - found = pico_tree_findKey(&NSTable, &test); - if (!found) - return -1; - - pico_tree_delete(&NSTable, found); - PICO_FREE(found); - - /* no NS left, add default NS */ - if (pico_tree_empty(&NSTable)) - pico_dns_client_init(); - - return 0; -} - -static struct pico_dns_ns *pico_dns_client_add_ns(struct pico_ip4 *ns_addr) -{ - struct pico_dns_ns *dns = NULL, *found = NULL, test = {{0}}; - struct pico_ip4 zero = { - 0 - }; /* 0.0.0.0 */ - - /* Do not add 0.0.0.0 addresses, which some DHCP servers might reply */ - if (!pico_ipv4_compare(ns_addr, &zero)) - { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - dns = PICO_ZALLOC(sizeof(struct pico_dns_ns)); - if (!dns) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - dns->ns = *ns_addr; - - found = pico_tree_insert(&NSTable, dns); - if (found) { /* nameserver already present or out of memory */ - PICO_FREE(dns); - if ((void *)found == (void *)&LEAF) - return NULL; - else - return found; - } - - /* default NS found, remove it */ - pico_string_to_ipv4(PICO_DNS_NS_DEFAULT, (uint32_t *)&test.ns.addr); - found = pico_tree_findKey(&NSTable, &test); - if (found && (found->ns.addr != ns_addr->addr)) - pico_dns_client_del_ns(&found->ns); - - return dns; -} - -static struct pico_dns_ns pico_dns_client_next_ns(struct pico_ip4 *ns_addr) -{ - struct pico_dns_ns dns = {{0}}, *nxtdns = NULL; - struct pico_tree_node *node = NULL, *nxtnode = NULL; - - dns.ns = *ns_addr; - node = pico_tree_findNode(&NSTable, &dns); - if (!node) - return dns; /* keep using current NS */ - - nxtnode = pico_tree_next(node); - nxtdns = nxtnode->keyValue; - if (!nxtdns) - nxtdns = (struct pico_dns_ns *)pico_tree_first(&NSTable); - - return *nxtdns; -} - -static struct pico_dns_query *pico_dns_client_add_query(struct pico_dns_header *hdr, uint16_t len, struct pico_dns_question_suffix *suffix, - void (*callback)(char *, void *), void *arg) -{ - struct pico_dns_query *q = NULL, *found = NULL; - - q = PICO_ZALLOC(sizeof(struct pico_dns_query)); - if (!q) - return NULL; - - q->query = (char *)hdr; - q->len = len; - q->id = short_be(hdr->id); - q->qtype = short_be(suffix->qtype); - q->qclass = short_be(suffix->qclass); - q->retrans = 1; - q->q_ns = *((struct pico_dns_ns *)pico_tree_first(&NSTable)); - q->callback = callback; - q->arg = arg; - q->s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &pico_dns_client_callback); - if (!q->s) { - PICO_FREE(q); - return NULL; - } - - found = pico_tree_insert(&DNSTable, q); - if (found) { - if ((void *)found != (void *)&LEAF) /* If found == &LEAF we're out of memory and pico_err is set */ - pico_err = PICO_ERR_EAGAIN; - pico_socket_close(q->s); - PICO_FREE(q); - return NULL; - } - - return q; -} - -static int pico_dns_client_del_query(uint16_t id) -{ - struct pico_dns_query test = { - 0 - }, *found = NULL; - - test.id = id; - found = pico_tree_findKey(&DNSTable, &test); - if (!found) - return -1; - - PICO_FREE(found->query); - pico_socket_close(found->s); - pico_tree_delete(&DNSTable, found); - PICO_FREE(found); - return 0; -} - -static struct pico_dns_query *pico_dns_client_find_query(uint16_t id) -{ - struct pico_dns_query test = { - 0 - }, *found = NULL; - - test.id = id; - found = pico_tree_findKey(&DNSTable, &test); - if (found) - return found; - else - return NULL; -} - -/* seek end of string */ -static char *pico_dns_client_seek(char *ptr) -{ - if (!ptr) - return NULL; - - while (*ptr != 0) - ptr++; - return ptr + 1; -} - -static struct pico_dns_query *pico_dns_client_idcheck(uint16_t id) -{ - struct pico_dns_query test = { - 0 - }; - - test.id = id; - return pico_tree_findKey(&DNSTable, &test); -} - -static int pico_dns_client_query_header(struct pico_dns_header *hdr) -{ - uint16_t id = 0; - uint8_t retry = 32; - - do { - id = (uint16_t)(pico_rand() & 0xFFFFU); - dns_dbg("DNS: generated id %u\n", id); - } while (retry-- && pico_dns_client_idcheck(id)); - if (!retry) - return -1; - - hdr->id = short_be(id); - pico_dns_fill_packet_header(hdr, 1, 0, 0, 0); /* 1 question, 0 answers */ - - return 0; -} - -static int pico_dns_client_check_header(struct pico_dns_header *pre) -{ - if (pre->qr != PICO_DNS_QR_RESPONSE || pre->opcode != PICO_DNS_OPCODE_QUERY || pre->rcode != PICO_DNS_RCODE_NO_ERROR) { - dns_dbg("DNS ERROR: OPCODE %d | TC %d | RCODE %d\n", pre->opcode, pre->tc, pre->rcode); - return -1; - } - - if (short_be(pre->ancount) < 1) { - dns_dbg("DNS ERROR: ancount < 1\n"); - return -1; - } - - return 0; -} - -static int pico_dns_client_check_qsuffix(struct pico_dns_question_suffix *suf, struct pico_dns_query *q) -{ - if (!suf) - return -1; - - if (short_be(suf->qtype) != q->qtype || short_be(suf->qclass) != q->qclass) { - dns_dbg("DNS ERROR: received qtype (%u) or qclass (%u) incorrect\n", short_be(suf->qtype), short_be(suf->qclass)); - return -1; - } - - return 0; -} - -static int pico_dns_client_check_url(struct pico_dns_header *resp, struct pico_dns_query *q) -{ - char *recv_name = (char*)(resp) + sizeof(struct pico_dns_header) + PICO_DNS_LABEL_INITIAL; - char *exp_name = (char *)(q->query) + sizeof(struct pico_dns_header) + PICO_DNS_LABEL_INITIAL; - if (strcasecmp(recv_name, exp_name) != 0) - return -1; - - return 0; -} - -static int pico_dns_client_check_asuffix(struct pico_dns_record_suffix *suf, struct pico_dns_query *q) -{ - if (!suf) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (short_be(suf->rtype) != q->qtype || short_be(suf->rclass) != q->qclass) { - dns_dbg("DNS WARNING: received qtype (%u) or qclass (%u) incorrect\n", short_be(suf->rtype), short_be(suf->rclass)); - return -1; - } - - if (long_be(suf->rttl) > PICO_DNS_MAX_TTL) { - dns_dbg("DNS WARNING: received TTL (%u) > MAX (%u)\n", long_be(suf->rttl), PICO_DNS_MAX_TTL); - return -1; - } - - return 0; -} - -static char *pico_dns_client_seek_suffix(char *suf, struct pico_dns_header *pre, struct pico_dns_query *q) -{ - struct pico_dns_record_suffix *asuffix = NULL; - uint16_t comp = 0, compression = 0; - uint16_t i = 0; - - if (!suf) - return NULL; - - while (i++ < short_be(pre->ancount)) { - comp = short_from(suf); - compression = short_be(comp); - switch (compression >> 14) - { - case PICO_DNS_POINTER: - while (compression >> 14 == PICO_DNS_POINTER) { - dns_dbg("DNS: pointer\n"); - suf += sizeof(uint16_t); - comp = short_from(suf); - compression = short_be(comp); - } - break; - - case PICO_DNS_LABEL: - dns_dbg("DNS: label\n"); - suf = pico_dns_client_seek(suf); - break; - - default: - dns_dbg("DNS ERROR: incorrect compression (%u) value\n", compression); - return NULL; - } - - asuffix = (struct pico_dns_record_suffix *)suf; - if (!asuffix) - break; - - if (pico_dns_client_check_asuffix(asuffix, q) < 0) { - suf += (sizeof(struct pico_dns_record_suffix) + short_be(asuffix->rdlength)); - continue; - } - - return suf; - } - return NULL; -} - -static int pico_dns_client_send(struct pico_dns_query *q) -{ - uint16_t *paramID = PICO_ZALLOC(sizeof(uint16_t)); - if (!paramID) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - dns_dbg("DNS: sending query to %08X\n", q->q_ns.ns.addr); - if (!q->s) - goto failure; - - if (pico_socket_connect(q->s, &q->q_ns.ns, short_be(PICO_DNS_NS_PORT)) < 0) - goto failure; - - pico_socket_send(q->s, q->query, q->len); - *paramID = q->id; - if (!pico_timer_add(PICO_DNS_CLIENT_RETRANS, pico_dns_client_retransmission, paramID)) { - dns_dbg("DNS: Failed to start retransmission timer\n"); - goto failure; - } - - return 0; - -failure: - PICO_FREE(paramID); - return -1; -} - -static void pico_dns_client_retransmission(pico_time now, void *arg) -{ - struct pico_dns_query *q = NULL; - struct pico_dns_query dummy; - IGNORE_PARAMETER(now); - - if(!arg) - return; - - /* search for the dns query and free used space */ - dummy.id = *(uint16_t *)arg; - q = (struct pico_dns_query *)pico_tree_findKey(&DNSTable, &dummy); - PICO_FREE(arg); - - /* dns query successful? */ - if (!q) { - return; - } - - q->retrans++; - if (q->retrans <= PICO_DNS_CLIENT_MAX_RETRANS) { - q->q_ns = pico_dns_client_next_ns(&q->q_ns.ns); - pico_dns_client_send(q); - } else { - pico_err = PICO_ERR_EIO; - q->callback(NULL, q->arg); - pico_dns_client_del_query(q->id); - } -} - -static int pico_dns_client_check_rdlength(uint16_t qtype, uint16_t rdlength) -{ - switch (qtype) - { - case PICO_DNS_TYPE_A: - if (rdlength != PICO_DNS_RR_A_RDLENGTH) - return -1; - break; -#ifdef PICO_SUPPORT_IPV6 - case PICO_DNS_TYPE_AAAA: - if (rdlength != PICO_DNS_RR_AAAA_RDLENGTH) - return -1; - break; -#endif - default: - break; - } - - return 0; -} - -static int pico_dns_client_user_callback(struct pico_dns_record_suffix *asuffix, struct pico_dns_query *q) -{ - uint32_t ip = 0; - char *str = NULL; - char *rdata = (char *) asuffix + sizeof(struct pico_dns_record_suffix); - - if (pico_dns_client_check_rdlength(q->qtype, short_be(asuffix->rdlength)) < 0) { - dns_dbg("DNS ERROR: Invalid RR rdlength: %u\n", short_be(asuffix->rdlength)); - return -1; - } - - switch (q->qtype) - { - case PICO_DNS_TYPE_A: - ip = long_from(rdata); - str = PICO_ZALLOC(PICO_DNS_IPV4_ADDR_LEN); - pico_ipv4_to_string(str, ip); - break; -#ifdef PICO_SUPPORT_IPV6 - case PICO_DNS_TYPE_AAAA: - { - struct pico_ip6 ip6; - memcpy(&ip6.addr, rdata, sizeof(struct pico_ip6)); - str = PICO_ZALLOC(PICO_DNS_IPV6_ADDR_LEN); - pico_ipv6_to_string(str, ip6.addr); - break; - } -#endif - case PICO_DNS_TYPE_PTR: - /* TODO: check for decompression / rdlength vs. decompressed length */ - pico_dns_notation_to_name(rdata, short_be(asuffix->rdlength)); - str = PICO_ZALLOC((size_t)(short_be(asuffix->rdlength) - - PICO_DNS_LABEL_INITIAL)); - if (!str) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - memcpy(str, rdata + PICO_DNS_LABEL_INITIAL, short_be(asuffix->rdlength) - PICO_DNS_LABEL_INITIAL); - break; - - default: - dns_dbg("DNS ERROR: incorrect qtype (%u)\n", q->qtype); - break; - } - - if (q->retrans) { - q->callback(str, q->arg); - q->retrans = 0; - pico_dns_client_del_query(q->id); - } - - if (str) - PICO_FREE(str); - - return 0; -} - -static char dns_response[PICO_IP_MRU] = { - 0 -}; - -static void pico_dns_try_fallback_cname(struct pico_dns_query *q, struct pico_dns_header *h, struct pico_dns_question_suffix *qsuffix) -{ - uint16_t type = q->qtype; - uint16_t proto = PICO_PROTO_IPV4; - struct pico_dns_record_suffix *asuffix = NULL; - char *p_asuffix = NULL; - char *cname_orig = NULL; - char *cname = NULL; - uint16_t cname_len; - - /* Try to use CNAME only if A or AAAA query is ongoing */ - if (type != PICO_DNS_TYPE_A && type != PICO_DNS_TYPE_AAAA) - return; - - if (type == PICO_DNS_TYPE_AAAA) - proto = PICO_PROTO_IPV6; - - q->qtype = PICO_DNS_TYPE_CNAME; - p_asuffix = (char *)qsuffix + sizeof(struct pico_dns_question_suffix); - p_asuffix = pico_dns_client_seek_suffix(p_asuffix, h, q); - if (!p_asuffix) { - return; - } - - /* Found CNAME response. Re-initiating query. */ - asuffix = (struct pico_dns_record_suffix *)p_asuffix; - cname = pico_dns_decompress_name((char *)asuffix + sizeof(struct pico_dns_record_suffix), (pico_dns_packet *)h); /* allocates memory! */ - cname_orig = cname; /* to free later */ - - if (cname == NULL) - return; - - cname_len = (uint16_t)(pico_dns_strlen(cname) + 1); - - pico_dns_notation_to_name(cname, cname_len); - if (cname[0] == '.') - cname++; - - dns_dbg("Restarting query for name '%s'\n", cname); - pico_dns_client_getaddr_init(cname, proto, q->callback, q->arg); - PICO_FREE(cname_orig); - pico_dns_client_del_query(q->id); -} - -static void pico_dns_client_callback(uint16_t ev, struct pico_socket *s) -{ - struct pico_dns_header *header = NULL; - char *domain; - struct pico_dns_question_suffix *qsuffix = NULL; - struct pico_dns_record_suffix *asuffix = NULL; - struct pico_dns_query *q = NULL; - char *p_asuffix = NULL; - - if (ev == PICO_SOCK_EV_ERR) { - dns_dbg("DNS: socket error received\n"); - return; - } - - if (ev & PICO_SOCK_EV_RD) { - if (pico_socket_read(s, dns_response, PICO_IP_MRU) < 0) - return; - } - - header = (struct pico_dns_header *)dns_response; - domain = (char *)header + sizeof(struct pico_dns_header); - qsuffix = (struct pico_dns_question_suffix *)pico_dns_client_seek(domain); - /* valid asuffix is determined dynamically later on */ - - if (pico_dns_client_check_header(header) < 0) - return; - - q = pico_dns_client_find_query(short_be(header->id)); - if (!q) - return; - - if (pico_dns_client_check_qsuffix(qsuffix, q) < 0) - return; - - if (pico_dns_client_check_url(header, q) < 0) - return; - - p_asuffix = (char *)qsuffix + sizeof(struct pico_dns_question_suffix); - p_asuffix = pico_dns_client_seek_suffix(p_asuffix, header, q); - if (!p_asuffix) { - pico_dns_try_fallback_cname(q, header, qsuffix); - return; - } - - asuffix = (struct pico_dns_record_suffix *)p_asuffix; - pico_dns_client_user_callback(asuffix, q); - - return; -} - -static int pico_dns_create_message(struct pico_dns_header **header, struct pico_dns_question_suffix **qsuffix, enum pico_dns_arpa arpa, const char *url, uint16_t *urlen, uint16_t *hdrlen) -{ - char *domain; - char inaddr_arpa[14]; - uint16_t strlen = 0, arpalen = 0; - - if (!url) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if(arpa == PICO_DNS_ARPA4) { - strcpy(inaddr_arpa, ".in-addr.arpa"); - strlen = pico_dns_strlen(url); - } - -#ifdef PICO_SUPPORT_IPV6 - else if (arpa == PICO_DNS_ARPA6) { - strcpy(inaddr_arpa, ".IP6.ARPA"); - strlen = STRLEN_PTR_IP6; - } -#endif - else { - strcpy(inaddr_arpa, ""); - strlen = pico_dns_strlen(url); - } - - arpalen = pico_dns_strlen(inaddr_arpa); - *urlen = (uint16_t)(PICO_DNS_LABEL_INITIAL + strlen + arpalen + PICO_DNS_LABEL_ROOT); - *hdrlen = (uint16_t)(sizeof(struct pico_dns_header) + *urlen + sizeof(struct pico_dns_question_suffix)); - *header = PICO_ZALLOC(*hdrlen); - if (!*header) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - *header = (struct pico_dns_header *)*header; - domain = (char *) *header + sizeof(struct pico_dns_header); - *qsuffix = (struct pico_dns_question_suffix *)(domain + *urlen); - - if(arpa == PICO_DNS_ARPA4) { - memcpy(domain + PICO_DNS_LABEL_INITIAL, url, strlen); - pico_dns_mirror_addr(domain + PICO_DNS_LABEL_INITIAL); - memcpy(domain + PICO_DNS_LABEL_INITIAL + strlen, inaddr_arpa, arpalen); - } - -#ifdef PICO_SUPPORT_IPV6 - else if (arpa == PICO_DNS_ARPA6) { - pico_dns_ipv6_set_ptr(url, domain + PICO_DNS_LABEL_INITIAL); - memcpy(domain + PICO_DNS_LABEL_INITIAL + STRLEN_PTR_IP6, inaddr_arpa, arpalen); - } -#endif - else { - memcpy(domain + PICO_DNS_LABEL_INITIAL, url, strlen); - } - - /* assemble dns message */ - pico_dns_client_query_header(*header); - pico_dns_name_to_dns_notation(domain, strlen); - - return 0; -} - -static int pico_dns_client_addr_label_check_len(const char *url) -{ - const char *p, *label; - int count; - label = url; - p = label; - - while(*p != (char) 0) { - count = 0; - while((*p != (char)0)) { - if (*p == '.') { - label = ++p; - break; - } - - count++; - p++; - if (count > PICO_DNS_MAX_QUERY_LABEL_LEN) - return -1; - } - } - return 0; -} - -static int pico_dns_client_getaddr_check(const char *url, void (*callback)(char *, void *)) -{ - if (!url || !callback) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (strlen(url) > PICO_DNS_MAX_QUERY_LEN) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (pico_dns_client_addr_label_check_len(url) < 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; -} - -static int pico_dns_client_getaddr_init(const char *url, uint16_t proto, void (*callback)(char *, void *), void *arg) -{ - struct pico_dns_header *header = NULL; - struct pico_dns_question_suffix *qsuffix = NULL; - struct pico_dns_query *q = NULL; - uint16_t len = 0, lblen = 0; - (void)proto; - - if (pico_dns_client_getaddr_check(url, callback) < 0) - return -1; - - if(pico_dns_create_message(&header, &qsuffix, PICO_DNS_NO_ARPA, url, &lblen, &len) != 0) - return -1; - -#ifdef PICO_SUPPORT_IPV6 - if (proto == PICO_PROTO_IPV6) { - pico_dns_question_fill_suffix(qsuffix, PICO_DNS_TYPE_AAAA, PICO_DNS_CLASS_IN); - } else -#endif - pico_dns_question_fill_suffix(qsuffix, PICO_DNS_TYPE_A, PICO_DNS_CLASS_IN); - - q = pico_dns_client_add_query(header, len, qsuffix, callback, arg); - if (!q) { - PICO_FREE(header); - return -1; - } - - if (pico_dns_client_send(q) < 0) { - pico_dns_client_del_query(q->id); /* frees msg */ - return -1; - } - - return 0; -} - -int pico_dns_client_getaddr(const char *url, void (*callback)(char *, void *), void *arg) -{ - return pico_dns_client_getaddr_init(url, PICO_PROTO_IPV4, callback, arg); -} - -int pico_dns_client_getaddr6(const char *url, void (*callback)(char *, void *), void *arg) -{ - return pico_dns_client_getaddr_init(url, PICO_PROTO_IPV6, callback, arg); -} - -static int pico_dns_getname_univ(const char *ip, void (*callback)(char *, void *), void *arg, enum pico_dns_arpa arpa) -{ - struct pico_dns_header *header = NULL; - struct pico_dns_question_suffix *qsuffix = NULL; - struct pico_dns_query *q = NULL; - uint16_t len = 0, lblen = 0; - - if (!ip || !callback) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if(pico_dns_create_message(&header, &qsuffix, arpa, ip, &lblen, &len) != 0) - return -1; - - pico_dns_question_fill_suffix(qsuffix, PICO_DNS_TYPE_PTR, PICO_DNS_CLASS_IN); - q = pico_dns_client_add_query(header, len, qsuffix, callback, arg); - if (!q) { - PICO_FREE(header); - return -1; - } - - if (pico_dns_client_send(q) < 0) { - pico_dns_client_del_query(q->id); /* frees header */ - return -1; - } - - return 0; -} - -int pico_dns_client_getname(const char *ip, void (*callback)(char *, void *), void *arg) -{ - return pico_dns_getname_univ(ip, callback, arg, PICO_DNS_ARPA4); -} - - -int pico_dns_client_getname6(const char *ip, void (*callback)(char *, void *), void *arg) -{ - return pico_dns_getname_univ(ip, callback, arg, PICO_DNS_ARPA6); -} - -int pico_dns_client_nameserver(struct pico_ip4 *ns, uint8_t flag) -{ - if (!ns) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - switch (flag) - { - case PICO_DNS_NS_ADD: - if (!pico_dns_client_add_ns(ns)) - return -1; - - break; - - case PICO_DNS_NS_DEL: - if (pico_dns_client_del_ns(ns) < 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - break; - - default: - pico_err = PICO_ERR_EINVAL; - return -1; - } - return 0; -} - -int pico_dns_client_init(void) -{ - struct pico_ip4 default_ns = { - 0 - }; - - if (pico_string_to_ipv4(PICO_DNS_NS_DEFAULT, (uint32_t *)&default_ns.addr) < 0) - return -1; - - return pico_dns_client_nameserver(&default_ns, PICO_DNS_NS_ADD); -} - -#else - -int pico_dns_client_init(void) -{ - dbg("ERROR Trying to initialize DNS module: IPv4 not supported in this build.\n"); - return -1; -} -#endif /* PICO_SUPPORT_IPV4 */ - - -#endif /* PICO_SUPPORT_DNS_CLIENT */ - diff --git a/kernel/picotcp/modules/pico_dns_client.h b/kernel/picotcp/modules/pico_dns_client.h deleted file mode 100644 index da3f313..0000000 --- a/kernel/picotcp/modules/pico_dns_client.h +++ /dev/null @@ -1,50 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Kristof Roelants - *********************************************************************/ - -#ifndef INCLUDE_PICO_DNS_CLIENT -#define INCLUDE_PICO_DNS_CLIENT - -#define PICO_DNS_NS_DEL 0 -#define PICO_DNS_NS_ADD 1 -#include "pico_config.h" - -/* Compression values */ -#define PICO_DNS_LABEL 0 -#define PICO_DNS_POINTER 3 - -/* Label len */ -#define PICO_DNS_LABEL_INITIAL 1u -#define PICO_DNS_LABEL_ROOT 1 - -/* TTL values */ -#define PICO_DNS_MAX_TTL 604800 /* one week */ - -/* Len of an IPv4 address string */ -#define PICO_DNS_IPV4_ADDR_LEN 16 -#define PICO_DNS_IPV6_ADDR_LEN 54 - -/* Default nameservers + port */ -#define PICO_DNS_NS_DEFAULT "208.67.222.222" -#define PICO_DNS_NS_PORT 53 - -/* RDLENGTH for A and AAAA RR's */ -#define PICO_DNS_RR_A_RDLENGTH 4 -#define PICO_DNS_RR_AAAA_RDLENGTH 16 - -int pico_dns_client_init(void); -/* flag is PICO_DNS_NS_DEL or PICO_DNS_NS_ADD */ -int pico_dns_client_nameserver(struct pico_ip4 *ns, uint8_t flag); -int pico_dns_client_getaddr(const char *url, void (*callback)(char *ip, void *arg), void *arg); -int pico_dns_client_getname(const char *ip, void (*callback)(char *url, void *arg), void *arg); -#ifdef PICO_SUPPORT_IPV6 -int pico_dns_client_getaddr6(const char *url, void (*callback)(char *, void *), void *arg); -int pico_dns_client_getname6(const char *url, void (*callback)(char *, void *), void *arg); -#endif - -#endif /* _INCLUDE_PICO_DNS_CLIENT */ diff --git a/kernel/picotcp/modules/pico_dns_common.c b/kernel/picotcp/modules/pico_dns_common.c deleted file mode 100644 index 44d89da..0000000 --- a/kernel/picotcp/modules/pico_dns_common.c +++ /dev/null @@ -1,1784 +0,0 @@ -/* **************************************************************************** - * PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - * See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - * . - * Authors: Toon Stegen, Jelle De Vleeschouwer - * ****************************************************************************/ -#include "pico_config.h" -#include "pico_protocol.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_dns_common.h" -#include "pico_tree.h" - -#ifdef DEBUG_DNS - #define dns_dbg dbg -#else - #define dns_dbg(...) do {} while(0) -#endif - -/* MARK: v NAME & IP FUNCTIONS */ -#define dns_name_foreach_label_safe(label, name, next, maxlen) \ - for ((label) = (name), (next) = (char *)((name) + *(unsigned char*)(name) + 1); \ - (*(label) != '\0') && ((uint16_t)((label) - (name)) < (maxlen)); \ - (label) = (next), (next) = (char *)((next) + *(unsigned char*)(next) + 1)) - -/* **************************************************************************** - * Checks if the DNS name doesn't exceed 256 bytes including zero-byte. - * - * @param namelen Length of the DNS name-string including zero-byte - * @return 0 when the length is correct - * ****************************************************************************/ -int -pico_dns_check_namelen( uint16_t namelen ) -{ - return ((namelen > 2u) && (namelen < 256u)) ? (0) : (-1); -} - -/* **************************************************************************** - * Returns the length of a name in a DNS-packet as if DNS name compression - * would be applied to the packet. If there's no compression present - * - * @param name Compressed name you want the calculate the strlen from - * @return Returns strlen of a compressed name, takes the first byte of compr- - * ession pointer into account but not the second byte, which acts - * like a trailing zero-byte - * ****************************************************************************/ -uint16_t -pico_dns_namelen_comp( char *name ) -{ - uint16_t len = 0; - char *label = NULL, *next = NULL; - - /* Check params */ - if (!name) { - pico_err = PICO_ERR_EINVAL; - return 0; - } - - /* Just count until the zero-byte or a pointer */ - dns_name_foreach_label_safe(label, name, next, 255) { - if ((0xC0 & *label)) - break; - } - - /* Calculate the length */ - len = (uint16_t)(label - name); - if(*label != '\0') - len++; - - return len; -} - -/* **************************************************************************** - * Returns the uncompressed name in DNS name format when DNS name compression - * is applied to the packet-buffer. - * - * @param name Compressed name, should be in the bounds of the actual packet - * @param packet Packet that contains the compressed name - * @return Returns the decompressed name, NULL on failure. - * ****************************************************************************/ -char * -pico_dns_decompress_name( char *name, pico_dns_packet *packet ) -{ - char decompressed_name[PICO_DNS_NAMEBUF_SIZE] = { - 0 - }; - char *return_name = NULL; - uint8_t *dest_iterator = NULL; - uint8_t *iterator = NULL; - uint16_t ptr = 0, nslen = 0; - - /* Initialise iterators */ - iterator = (uint8_t *) name; - dest_iterator = (uint8_t *) decompressed_name; - while (*iterator != '\0') { - if ((*iterator) & 0xC0) { - /* We have a pointer */ - ptr = (uint16_t)((((uint16_t) *iterator) & 0x003F) << 8); - ptr = (uint16_t)(ptr | (uint16_t) *(iterator + 1)); - iterator = (uint8_t *)((uint8_t *)packet + ptr); - } else { - /* We want to keep the label lengths */ - *dest_iterator = (uint8_t) *iterator; - /* Copy the label */ - memcpy(dest_iterator + 1, iterator + 1, *iterator); - /* Move to next length label */ - dest_iterator += (*iterator) + 1; - iterator += (*iterator) + 1; - } - } - /* Append final zero-byte */ - *dest_iterator = (uint8_t) '\0'; - - /* Provide storage for the name to return */ - nslen = (uint16_t)(pico_dns_strlen(decompressed_name) + 1); - if(!(return_name = PICO_ZALLOC((size_t)nslen))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - memcpy((void *)return_name, (void *)decompressed_name, (size_t)nslen); - - return return_name; -} - -/* **************************************************************************** - * Determines the length of a given url as if it where a DNS name in reverse - * resolution format. - * - * @param url URL wanted to create a reverse resolution name from. - * @param arpalen Will get filled with the length of the ARPA-suffix depending - * on the proto-parameter. - * @param proto The protocol to create a ARPA-suffix for. Can be either - * 'PICO_PROTO_IPV4' or 'PICO_PROTO_IPV6' - * @return Returns the length of the reverse name - * ****************************************************************************/ -static uint16_t -pico_dns_url_get_reverse_len( const char *url, - uint16_t *arpalen, - uint16_t proto ) -{ - uint16_t slen = (uint16_t)(pico_dns_strlen(url) + 2u); - - /* Check if pointers given are not NULL */ - if (pico_dns_check_namelen(slen) && !arpalen) { - pico_err = PICO_ERR_EINVAL; - return 0; - } - - /* Get the length of arpa-suffix if needed */ - if (proto == PICO_PROTO_IPV4) - *arpalen = (uint16_t) pico_dns_strlen(PICO_ARPA_IPV4_SUFFIX); - -#ifdef PICO_SUPPORT_IPV6 - else if (proto == PICO_PROTO_IPV6) - { - *arpalen = (uint16_t) pico_dns_strlen(PICO_ARPA_IPV6_SUFFIX); - slen = STRLEN_PTR_IP6 + 2u; - } -#endif - return slen; -} - -/* **************************************************************************** - * Converts a DNS name in URL format to a reverse name in DNS name format. - * Provides space for the DNS name as well. PICO_FREE() should be called on the - * returned string buffer that contains the reverse DNS name. - * - * @param url DNS name in URL format to convert to reverse name - * @param proto Depending on the protocol given the ARPA-suffix will be added. - * @return Returns a pointer to a string-buffer with the reverse DNS name. - * ****************************************************************************/ -static char * -pico_dns_url_to_reverse_qname( const char *url, uint8_t proto ) -{ - char *reverse_qname = NULL; - uint16_t arpalen = 0; - uint16_t slen = pico_dns_url_get_reverse_len(url, &arpalen, proto); - - /* Check namelen */ - if (pico_dns_check_namelen(slen)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Provide space for the reverse name */ - if (!(reverse_qname = PICO_ZALLOC((size_t)(slen + arpalen)))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* If reverse IPv4 address resolving, convert to IPv4 arpa-format */ - if (PICO_PROTO_IPV4 == proto) { - memcpy(reverse_qname + 1u, url, slen - 1u); - pico_dns_mirror_addr(reverse_qname + 1u); - memcpy(reverse_qname + slen - 1, PICO_ARPA_IPV4_SUFFIX, arpalen); - } - - /* If reverse IPv6 address resolving, convert to IPv6 arpa-format */ -#ifdef PICO_SUPPORT_IPV6 - else if (proto == PICO_PROTO_IPV6) { - pico_dns_ipv6_set_ptr(url, reverse_qname + 1u); - memcpy(reverse_qname + 1u + STRLEN_PTR_IP6, - PICO_ARPA_IPV6_SUFFIX, arpalen); - } -#endif - else { /* This shouldn't happen */ - PICO_FREE(reverse_qname); - return NULL; - } - - pico_dns_name_to_dns_notation(reverse_qname, (uint16_t)(slen + arpalen)); - return reverse_qname; -} - -/* **************************************************************************** - * Converts a DNS name in DNS name format to a name in URL format. Provides - * space for the name in URL format as well. PICO_FREE() should be called on - * the returned string buffer that contains the name in URL format. - * - * @param qname DNS name in DNS name format to convert - * @return Returns a pointer to a string-buffer with the URL name on success. - * ****************************************************************************/ -char * -pico_dns_qname_to_url( const char *qname ) -{ - char *url = NULL; - char temp[256] = { - 0 - }; - uint16_t namelen = pico_dns_strlen(qname); - - /* Check if qname is not a NULL-pointer and if the length is OK */ - if (pico_dns_check_namelen(namelen)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Provide space for the URL */ - if (!(url = PICO_ZALLOC(namelen))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Convert qname to an URL */ - memcpy(temp, qname, namelen); - pico_dns_notation_to_name(temp, namelen); - memcpy((void *)url, (void *)(temp + 1), (size_t)(namelen - 1)); - - return url; -} - -/* **************************************************************************** - * Converts a DNS name in URL format to a name in DNS name format. Provides - * space for the DNS name as well. PICO_FREE() should be called on the returned - * string buffer that contains the DNS name. - * - * @param url DNS name in URL format to convert - * @return Returns a pointer to a string-buffer with the DNS name on success. - * ****************************************************************************/ -char * -pico_dns_url_to_qname( const char *url ) -{ - char *qname = NULL; - uint16_t namelen = (uint16_t)(pico_dns_strlen(url) + 2u); - - /* Check if url or qname_addr is not a NULL-pointer */ - if (pico_dns_check_namelen(namelen)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Provide space for the qname */ - if (!(qname = PICO_ZALLOC(namelen))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Copy in the URL (+1 to leave space for leading '.') */ - memcpy(qname + 1, url, (size_t)(namelen - 1)); - pico_dns_name_to_dns_notation(qname, namelen); - return qname; -} - -/* **************************************************************************** - * @param url String-buffer - * @return Length of string-buffer in an uint16_t - * ****************************************************************************/ -uint16_t -pico_dns_strlen( const char *url ) -{ - if (!url) - return 0; - - return (uint16_t) strlen(url); -} - -/* **************************************************************************** - * Replaces .'s in a DNS name in URL format by the label lengths. So it - * actually converts a name in URL format to a name in DNS name format. - * f.e. "*www.google.be" => "3www6google2be0" - * - * @param url Location to buffer with name in URL format. The URL needs to - * be +1 byte offset in the actual buffer. Size is should be - * pico_dns_strlen(url) + 2. - * @param maxlen Maximum length of buffer so it doesn't cause a buffer overflow - * @return 0 on success, something else on failure. - * ****************************************************************************/ -int pico_dns_name_to_dns_notation( char *url, uint16_t maxlen ) -{ - char c = '\0'; - char *lbl = url, *i = url; - - /* Check params */ - if (!url || pico_dns_check_namelen(maxlen)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Iterate over url */ - while ((c = *++i) != '\0') { - if ('.' == c) { - *lbl = (char)(i - lbl - 1); - lbl = i; - } - - if ((uint16_t)(i - url) > (uint16_t)maxlen) break; - } - *lbl = (char)(i - lbl - 1); - - return 0; -} - -/* **************************************************************************** - * Replaces the label lengths in a DNS-name by .'s. So it actually converts a - * name in DNS format to a name in URL format. - * f.e. 3www6google2be0 => .www.google.be - * - * @param ptr Location to buffer with name in DNS name format - * @param maxlen Maximum length of buffer so it doesn't cause a buffer overflow - * @return 0 on success, something else on failure. - * ****************************************************************************/ -int pico_dns_notation_to_name( char *ptr, uint16_t maxlen ) -{ - char *label = NULL, *next = NULL; - - /* Iterate safely over the labels and update each label */ - dns_name_foreach_label_safe(label, ptr, next, maxlen) { - *label = '.'; - } - - return 0; -} - -/* **************************************************************************** - * Determines the length of the first label of a DNS name in URL-format - * - * @param url DNS name in URL-format - * @return Length of the first label of DNS name in URL-format - * ****************************************************************************/ -uint16_t -pico_dns_first_label_length( const char *url ) -{ - const char *i = NULL; - uint16_t len = 0; - - /* Check params */ - if (!url) return 0; - - /* Count */ - i = url; - while (*i != '.' && *i != '\0') { - ++i; - ++len; - } - return len; -} - -/* **************************************************************************** - * Mirrors a dotted IPv4-address string. - * f.e. 192.168.0.1 => 1.0.168.192 - * - * @param ptr - * @return 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_mirror_addr( char *ip ) -{ - uint32_t addr = 0; - - /* Convert IPv4-string to network-order 32-bit number */ - if (pico_string_to_ipv4(ip, &addr) < 0) - return -1; - - /* Mirror the 32-bit number */ - addr = (uint32_t)((uint32_t)((addr & (uint32_t)0xFF000000u) >> 24) | - (uint32_t)((addr & (uint32_t)0xFF0000u) >> 8) | - (uint32_t)((addr & (uint32_t)0xFF00u) << 8) | - (uint32_t)((addr & (uint32_t)0xFFu) << 24)); - - return pico_ipv4_to_string(ip, addr); -} - -#ifdef PICO_SUPPORT_IPV6 -/* **************************************************************************** - * Get the ASCII value of the Most Significant Nibble of a byte - * - * @param byte Byte you want to extract the MSN from. - * @return The ASCII value of the Most Significant Nibble of the byte - * ****************************************************************************/ -static inline char -dns_ptr_ip6_nibble_lo( uint8_t byte ) -{ - uint8_t nibble = byte & 0x0f; - if (nibble < 10) - return (char)(nibble + '0'); - else - return (char)(nibble - 0xa + 'a'); -} - -/* **************************************************************************** - * Get the ASCII value of the Least Significant Nibble of a byte - * - * @param byte Byte you want to extract the LSN from. - * @return The ASCII value of the Least Significant Nibble of the byte - * ****************************************************************************/ -static inline char -dns_ptr_ip6_nibble_hi( uint8_t byte ) -{ - uint8_t nibble = (byte & 0xf0u) >> 4u; - if (nibble < 10u) - return (char)(nibble + '0'); - else - return (char)(nibble - 0xa + 'a'); -} - -/* **************************************************************************** - * Convert an IPv6-address in string-format to a IPv6-address in nibble-format. - * Doesn't add a IPv6 ARPA-suffix though. - * - * @param ip IPv6-address stored as a string - * @param dst Destination to store IPv6-address in nibble-format - * ****************************************************************************/ -void -pico_dns_ipv6_set_ptr( const char *ip, char *dst ) -{ - int i = 0, j = 0; - struct pico_ip6 ip6; - memset(&ip6, 0, sizeof(struct pico_ip6)); - pico_string_to_ipv6(ip, ip6.addr); - for (i = 15; i >= 0; i--) { - if ((j + 3) > 64) return; /* Don't want j to go out of bounds */ - - dst[j++] = dns_ptr_ip6_nibble_lo(ip6.addr[i]); - dst[j++] = '.'; - dst[j++] = dns_ptr_ip6_nibble_hi(ip6.addr[i]); - dst[j++] = '.'; - } -} -#endif - -/* MARK: ^ NAME & IP FUNCTIONS */ -/* MARK: v QUESTION FUNCTIONS */ - -/* **************************************************************************** - * Calculates the size of a single DNS Question. Void-pointer allows this - * function to be used with pico_tree_size. - * - * @param question Void-point to DNS Question - * @return Size in bytes of single DNS Question if it was copied flat. - * ****************************************************************************/ -static uint16_t pico_dns_question_size( void *question ) -{ - uint16_t size = 0; - struct pico_dns_question *q = (struct pico_dns_question *)question; - if (!q) - return 0; - - size = q->qname_length; - size = (uint16_t)(size + sizeof(struct pico_dns_question_suffix)); - return size; -} - -/* **************************************************************************** - * Deletes a single DNS Question. - * - * @param question Void-pointer to DNS Question. Can be used with pico_tree_- - * destroy. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_question_delete( void **question ) -{ - struct pico_dns_question **q = (struct pico_dns_question **)question; - - /* Check params */ - if ((!q) || !(*q)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if ((*q)->qname) - PICO_FREE(((*q)->qname)); - - if ((*q)->qsuffix) - PICO_FREE((*q)->qsuffix); - - PICO_FREE((*q)); - *question = NULL; - - return 0; -} - -/* **************************************************************************** - * Fills in the DNS question suffix-fields with the correct values. - * - * todo: Update pico_dns_client to make the same mechanism possible like with - * filling DNS Resource Record-suffixes. - * - * @param suf Pointer to the suffix member of the DNS question. - * @param qtype DNS type of the DNS question to be. - * @param qclass DNS class of the DNS question to be. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_question_fill_suffix( struct pico_dns_question_suffix *suf, - uint16_t qtype, - uint16_t qclass ) -{ - if (!suf) - return -1; - - suf->qtype = short_be(qtype); - suf->qclass = short_be(qclass); - return 0; -} - -/* **************************************************************************** - * Fills in the name of the DNS question. - * - * @param qname Pointer-pointer to the name-member of the DNS-question - * @param url Name in URL format you want to convert to a name in DNS name - * format. When reverse resolving, only the IP, either IPV4 or - * IPV6, should be given in string format. - * f.e. => for IPv4: "192.168.2.1" - * => for IPv6: "2001:0db8:85a3:0042:1000:8a2e:0370:7334" - * @param qtype DNS type type of the DNS question to be. - * @param proto When reverse is true the reverse resolution name will be - * generated depending on the protocol. Can be either - * PICO_PROTO_IPV4 or PICO_PROTO_IPV6. - * @param reverse When this is true a reverse resolution name will be generated - * from the URL. - * @return The eventual length of the generated name, 0 on failure. - * ****************************************************************************/ -static uint16_t -pico_dns_question_fill_name( char **qname, - const char *url, - uint16_t qtype, - uint8_t proto, - uint8_t reverse ) -{ - uint16_t slen = 0; - - /* Try to convert the URL to an FQDN */ - if (reverse && qtype == PICO_DNS_TYPE_PTR) - *qname = pico_dns_url_to_reverse_qname(url, proto); - else { - (*qname) = pico_dns_url_to_qname(url); - } - - if (!(*qname)) { - return 0; - } - - slen = (uint16_t)(pico_dns_strlen(*qname) + 1u); - return (pico_dns_check_namelen(slen)) ? ((uint16_t)0) : (slen); -} - -/* **************************************************************************** - * Creates a standalone DNS Question with a given name and type. - * - * @param url DNS question name in URL format. Will be converted to DNS - * name notation format. - * @param len Will be filled with the total length of the DNS question. - * @param proto Protocol for which you want to create a question. Can be - * either PICO_PROTO_IPV4 or PICO_PROTO_IPV6. - * @param qtype DNS type of the question to be. - * @param qclass DNS class of the question to be. - * @param reverse When this is true, a reverse resolution name will be - * generated from the URL - * @return Returns pointer to the created DNS Question on success, NULL on - * failure. - * ****************************************************************************/ -struct pico_dns_question * -pico_dns_question_create( const char *url, - uint16_t *len, - uint8_t proto, - uint16_t qtype, - uint16_t qclass, - uint8_t reverse ) -{ - struct pico_dns_question *question = NULL; - uint16_t slen = 0; - int ret = 0; - - /* Check if valid arguments are provided */ - if (!url || !len) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Allocate space for the question and the subfields */ - if (!(question = PICO_ZALLOC(sizeof(struct pico_dns_question)))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Fill name field */ - slen = pico_dns_question_fill_name(&(question->qname), url, - qtype, proto, reverse); - question->qname_length = (uint8_t)(slen); - question->proto = proto; - - /* Provide space for the question suffix & try to fill in */ - question->qsuffix = PICO_ZALLOC(sizeof(struct pico_dns_question_suffix)); - ret = pico_dns_question_fill_suffix(question->qsuffix, qtype, qclass); - if (ret || pico_dns_check_namelen(slen)) { - pico_dns_question_delete((void **)&question); - return NULL; - } - - /* Determine the entire length of the question */ - *len = (uint16_t)(slen + (uint16_t)sizeof(struct pico_dns_question_suffix)); - - return question; -} - -/* **************************************************************************** - * Decompresses the name of a single DNS question. - * - * @param question Question you want to decompress the name of - * @param packet Packet in which the DNS question is contained. - * @return Pointer to original name of the DNS question before decompressing. - * ****************************************************************************/ -char * -pico_dns_question_decompress( struct pico_dns_question *question, - pico_dns_packet *packet ) -{ - char *qname_original = question->qname; - - /* Try to decompress the question name */ - if (!(question->qname = pico_dns_decompress_name(question->qname, packet))) { - question->qname = qname_original; - } - - return qname_original; -} - - -/* MARK: ^ QUESTION FUNCTIONS */ -/* MARK: v RESOURCE RECORD FUNCTIONS */ - -/* **************************************************************************** - * Copies the contents of DNS Resource Record to a single flat memory-buffer. - * - * @param record Pointer to DNS record you want to copy flat. - * @param destination Pointer-pointer to flat memory buffer to copy DNS record - * to. When function returns, this will point to location - * right after the flat copied DNS Resource Record. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_dns_record_copy_flat( struct pico_dns_record *record, - uint8_t **destination ) -{ - char *dest_rname = NULL; /* rname destination location */ - struct pico_dns_record_suffix *dest_rsuffix = NULL; /* rsuffix destin. */ - uint8_t *dest_rdata = NULL; /* rdata destination location */ - - /* Check if there are no NULL-pointers given */ - if (!record || !destination || !(*destination)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Initialise the destination pointers to the right locations */ - dest_rname = (char *) *destination; - dest_rsuffix = (struct pico_dns_record_suffix *) - (dest_rname + record->rname_length); - dest_rdata = ((uint8_t *)dest_rsuffix + - sizeof(struct pico_dns_record_suffix)); - - /* Copy the rname of the resource record into the flat location */ - strcpy(dest_rname, record->rname); - - /* Copy the question suffix fields */ - dest_rsuffix->rtype = record->rsuffix->rtype; - dest_rsuffix->rclass = record->rsuffix->rclass; - dest_rsuffix->rttl = record->rsuffix->rttl; - dest_rsuffix->rdlength = record->rsuffix->rdlength; - - /* Copy the rdata of the resource */ - memcpy(dest_rdata, record->rdata, short_be(dest_rsuffix->rdlength)); - - /* Point to location right after flat resource record */ - *destination = (uint8_t *)(dest_rdata + - short_be(record->rsuffix->rdlength)); - return 0; -} - -/* **************************************************************************** - * Calculates the size of a single DNS Resource Record. Void-pointer allows - * this function to be used with pico_tree_size. - * - * @param record void-pointer to DNS record you want to know the size of. - * @return Size of single DNS record if it was copied flat. - * ****************************************************************************/ -static uint16_t -pico_dns_record_size( void *record ) -{ - uint16_t size = 0; - struct pico_dns_record *rr = (struct pico_dns_record *)record; - - if (!rr || !(rr->rsuffix)) - return 0; - - size = rr->rname_length; - size = (uint16_t)(size + sizeof(struct pico_dns_record_suffix)); - size = (uint16_t)(size + short_be(rr->rsuffix->rdlength)); - return size; -} - -/* **************************************************************************** - * Deletes a single DNS resource record. Void-pointer-pointer allows this - * function to be used with pico_tree_destroy. - * - * @param record void-pointer-pointer to DNS record you want to delete. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_record_delete( void **record ) -{ - struct pico_dns_record **rr = (struct pico_dns_record **)record; - - if ((!rr) || !(*rr)) - return 0; - - if ((*rr)->rname) - PICO_FREE((*rr)->rname); - - if ((*rr)->rsuffix) - PICO_FREE((*rr)->rsuffix); - - if ((*rr)->rdata) - PICO_FREE((*rr)->rdata); - - PICO_FREE((*rr)); - *record = NULL; - - return 0; -} - -/* **************************************************************************** - * Just copies a resource record hard. - * - * @param record DNS record you want to copy - * @return Pointer to copy of DNS record. - * ****************************************************************************/ -struct pico_dns_record * -pico_dns_record_copy( struct pico_dns_record *record ) -{ - struct pico_dns_record *copy = NULL; - - /* Check params */ - if (!record || !(record->rname) || !(record->rdata) || !(record->rsuffix)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Provide space for the copy */ - if (!(copy = PICO_ZALLOC(sizeof(struct pico_dns_record)))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Provide space for the subfields */ - copy->rname = PICO_ZALLOC((size_t)record->rname_length); - copy->rsuffix = PICO_ZALLOC(sizeof(struct pico_dns_record_suffix)); - copy->rdata = PICO_ZALLOC((size_t)short_be(record->rsuffix->rdlength)); - if (!(copy->rname) || !(copy->rsuffix) || !(copy->rdata)) { - pico_dns_record_delete((void **)©); - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Fill in the rname field */ - memcpy((void *)(copy->rname), (void *)(record->rname), - (size_t)(record->rname_length)); - copy->rname_length = record->rname_length; - - /* Fill in the rsuffix fields */ - copy->rsuffix->rtype = record->rsuffix->rtype; - copy->rsuffix->rclass = record->rsuffix->rclass; - copy->rsuffix->rttl = record->rsuffix->rttl; - copy->rsuffix->rdlength = record->rsuffix->rdlength; - - /* Fill in the rdata field */ - memcpy(copy->rdata, record->rdata, short_be(record->rsuffix->rdlength)); - - return copy; -} - -/* **************************************************************************** - * Fills in the DNS resource record suffix-fields with the correct values. - * - * @param suf Pointer-pointer to rsuffix-member of struct pico_dns_record. - * @param rtype DNS type of the resource record to be. - * @param rclass DNS class of the resource record to be. - * @param rttl DNS ttl of the resource record to be. - * @param rdlength DNS rdlength of the resource record to be. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_dns_record_fill_suffix( struct pico_dns_record_suffix **suf, - uint16_t rtype, - uint16_t rclass, - uint32_t rttl, - uint16_t rdlength ) -{ - /* Try to provide space for the rsuffix */ - if (!(*suf = PICO_ZALLOC(sizeof(struct pico_dns_record_suffix)))) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - /* Fill in the fields */ - (*suf)->rtype = short_be(rtype); - (*suf)->rclass = short_be(rclass); - (*suf)->rttl = long_be(rttl); - (*suf)->rdlength = short_be(rdlength); - - return 0; -} - -/* **************************************************************************** - * Fills the data-buffer of a DNS resource record. - * - * @param rdata Pointer-pointer to rdata-member of struct pico_dns_record. - * @param _rdata Memory buffer with data to insert in the resource record. If - * data should contain a DNS name, the name in the databuffer - * needs to be in URL-format. - * @param datalen The exact length in bytes of the _rdata-buffer. If data of - * record should contain a DNS name, datalen needs to be - * pico_dns_strlen(_rdata). - * @param rtype DNS type of the resource record to be - * @return Returns 0 on failure, length of filled in rdata-member on success. - * Can differ from datalen-param because of URL to DNS Name conversion. - * ****************************************************************************/ -static uint16_t -pico_dns_record_fill_rdata( uint8_t **rdata, - void *_rdata, - uint16_t datalen, - uint16_t rtype ) -{ - uint16_t _datalen = 0; - - /* If type is PTR, rdata will be a DNS name in URL format */ - if (rtype == PICO_DNS_TYPE_PTR) { - _datalen = (uint16_t)(datalen + 2u); - if (!(*rdata = (uint8_t *)pico_dns_url_to_qname(_rdata))) { - pico_err = PICO_ERR_ENOMEM; - return 0; - } - } else { - /* Otherwise just copy in the databuffer */ - if (datalen == 0) { - return datalen; - } - - _datalen = datalen; - if (!(*rdata = (uint8_t *)PICO_ZALLOC((size_t)datalen))) { - pico_err = PICO_ERR_ENOMEM; - return 0; - } - - memcpy((void *)*rdata, (void *)_rdata, datalen); - } - - return _datalen; -} - -/* **************************************************************************** - * Create a standalone DNS Resource Record with a given name. - * - * @param url DNS rrecord name in URL format. Will be converted to DNS - * name notation format. - * @param _rdata Memory buffer with data to insert in the resource record. If - * data should contain a DNS name, the name in the databuffer - * needs to be in URL-format. - * @param datalen The exact length in bytes of the _rdata-buffer. If data of - * record should contain a DNS name, datalen needs to be - * pico_dns_strlen(_rdata). - * @param len Will be filled with the total length of the DNS rrecord. - * @param rtype DNS type of the resource record to be. - * @param rclass DNS class of the resource record to be. - * @param rttl DNS ttl of the resource record to be. - * @return Returns pointer to the created DNS Resource Record - * ****************************************************************************/ -struct pico_dns_record * -pico_dns_record_create( const char *url, - void *_rdata, - uint16_t datalen, - uint16_t *len, - uint16_t rtype, - uint16_t rclass, - uint32_t rttl ) -{ - struct pico_dns_record *record = NULL; - uint16_t slen = (uint16_t)(pico_dns_strlen(url) + 2u); - int ret = 0; - - /* Check params */ - if (pico_dns_check_namelen(slen) || !_rdata || !len) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Allocate space for the record and subfields */ - if (!(record = PICO_ZALLOC(sizeof(struct pico_dns_record)))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Provide space and convert the URL to a DNS name */ - record->rname = pico_dns_url_to_qname(url); - record->rname_length = slen; - - /* Provide space & fill in the rdata field */ - datalen = pico_dns_record_fill_rdata(&(record->rdata), _rdata, - datalen, rtype); - - /* Provide space & fill in the rsuffix */ - ret = pico_dns_record_fill_suffix(&(record->rsuffix), rtype, rclass, rttl, - datalen); - - /* Check if everything succeeded */ - if (!(record->rname) || ret) { - pico_dns_record_delete((void **)&record); - return NULL; - } - - /* Determine the complete length of resource record */ - *len = (uint16_t)(slen + sizeof(struct pico_dns_record_suffix) + datalen); - return record; -} - -/* **************************************************************************** - * Decompresses the name of single DNS record. - * - * @param record DNS record to decompress the name of. - * @param packet Packet in which is DNS record is present - * @return Pointer to original name of the DNS record before decompressing. - * ****************************************************************************/ -char * -pico_dns_record_decompress( struct pico_dns_record *record, - pico_dns_packet *packet ) -{ - char *rname_original = record->rname; - - /* Try to decompress the record name */ - if (!(record->rname = pico_dns_decompress_name(record->rname, packet))) { - record->rname = rname_original; - } - - return rname_original; -} - -static int pico_tolower(int c) -{ - if ((c >= 'A') && (c <= 'Z')) - c += 'a' - 'A'; - - return c; -} - -/* MARK: ^ RESOURCE RECORD FUNCTIONS */ -/* MARK: v COMPARING */ - -/* **************************************************************************** - * Compares two databuffers against each other. - * - * @param a 1st Memory buffer to compare - * @param b 2nd Memory buffer to compare - * @param rdlength_a Length of 1st memory buffer - * @param rdlength_b Length of 2nd memory buffer - * @param caseinsensitive Whether or not the bytes are compared - * case-insensitive. Should be either - * PICO_DNS_CASE_SENSITIVE or PICO_DNS_CASE_INSENSITIVE - * @return 0 when the buffers are equal, returns difference when they're not. - * ****************************************************************************/ -int -pico_dns_rdata_cmp( uint8_t *a, uint8_t *b, - uint16_t rdlength_a, uint16_t rdlength_b, uint8_t caseinsensitive ) -{ - uint16_t i = 0; - uint16_t slen = 0; - int dif = 0; - - /* Check params */ - if (!a || !b) { - if (!a && !b) - return 0; - - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Determine the smallest length */ - slen = rdlength_a; - if (rdlength_b < slen) - slen = rdlength_b; - - /* loop over slen */ - if(caseinsensitive) { - for (i = 0; i < slen; i++) { - if ((dif = pico_tolower((int)a[i]) - pico_tolower((int)b[i]))) { - return dif; - } - } - }else{ - for (i = 0; i < slen; i++) { - if ((dif = (int)a[i] - (int)b[i])) { - return dif; - } - } - } - - /* Return difference of buffer lengths */ - return (int)((int)rdlength_a - (int)rdlength_b); -} - -/* **************************************************************************** - * Compares 2 DNS questions - * - * @param qa DNS question A as a void-pointer (for pico_tree) - * @param qb DNS question A as a void-pointer (for pico_tree) - * @return 0 when questions are equal, returns difference when they're not. - * ****************************************************************************/ -int -pico_dns_question_cmp( void *qa, - void *qb ) -{ - int dif = 0; - uint16_t at = 0, bt = 0; - struct pico_dns_question *a = (struct pico_dns_question *)qa; - struct pico_dns_question *b = (struct pico_dns_question *)qb; - - /* Check params */ - if (!a || !b) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* First, compare the qtypes */ - at = short_be(a->qsuffix->qtype); - bt = short_be(b->qsuffix->qtype); - if ((dif = (int)((int)at - (int)bt))) - return dif; - - /* Then compare qnames */ - return pico_dns_rdata_cmp((uint8_t *)a->qname, (uint8_t *)b->qname, - pico_dns_strlen(a->qname), - pico_dns_strlen(b->qname), PICO_DNS_CASE_INSENSITIVE); -} - -/* **************************************************************************** - * Compares 2 DNS records by type and name only - * - * @param ra DNS record A as a void-pointer (for pico_tree) - * @param rb DNS record B as a void-pointer (for pico_tree) - * @return 0 when name and type of records are equal, returns difference when - * they're not. - * ****************************************************************************/ -int -pico_dns_record_cmp_name_type( void *ra, - void *rb ) -{ - int dif; - uint16_t at = 0, bt = 0; - struct pico_dns_record *a = (struct pico_dns_record *)ra; - struct pico_dns_record *b = (struct pico_dns_record *)rb; - - /* Check params */ - if (!a || !b) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* First, compare the rrtypes */ - at = short_be(a->rsuffix->rtype); - bt = short_be(b->rsuffix->rtype); - if ((dif = (int)((int)at - (int)bt))) - return dif; - - /* Then compare names */ - return pico_dns_rdata_cmp((uint8_t *)(a->rname), (uint8_t *)(b->rname), - (uint16_t)strlen(a->rname), - (uint16_t)strlen(b->rname), PICO_DNS_CASE_INSENSITIVE); -} - -/* **************************************************************************** - * Compares 2 DNS records by type, name AND rdata for a truly unique result - * - * @param ra DNS record A as a void-pointer (for pico_tree) - * @param rb DNS record B as a void-pointer (for pico_tree) - * @return 0 when records are equal, returns difference when they're not - * ****************************************************************************/ -int -pico_dns_record_cmp( void *ra, - void *rb ) -{ - int dif = 0; - struct pico_dns_record *a = (struct pico_dns_record *)ra; - struct pico_dns_record *b = (struct pico_dns_record *)rb; - - /* Check params */ - if (!a || !b) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Compare type and name */ - if ((dif = pico_dns_record_cmp_name_type(a, b))) - return dif; - - /* Then compare rdata */ - return pico_dns_rdata_cmp(a->rdata, b->rdata, - short_be(a->rsuffix->rdlength), - short_be(b->rsuffix->rdlength), PICO_DNS_CASE_SENSITIVE); -} - -/* MARK: ^ COMPARING */ -/* MARK: v PICO_TREE */ - -/* **************************************************************************** - * Erases a pico_tree entirely. - * - * @param tree Pointer to a pico_tree-instance - * @param node_delete Helper-function for type-specific deleting. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_tree_destroy( struct pico_tree *tree, int (*node_delete)(void **)) -{ - struct pico_tree_node *node = NULL, *next = NULL; - void *item = NULL; - - /* Check params */ - if (!tree) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - pico_tree_foreach_safe(node, tree, next) { - item = node->keyValue; - pico_tree_delete(tree, node->keyValue); - if (item && node_delete) { - node_delete((void **)&item); - } - } - - return 0; -} - -/* **************************************************************************** - * Calculates the size in bytes of all the nodes contained in the tree summed - * up. And gets the amount of items in the tree as well. - * - * @param tree Pointer to pico_tree-instance - * @param size Will get filled with the size of all the nodes summed up. - * Make sure you clear out (set to 0) this param before you - * call this function because it doesn't happen inside and - * each size will be added to the initial value. - * @param node_size Helper-function for type-specific size-determination - * @return Amount of items in the tree. - * ****************************************************************************/ -static uint16_t -pico_tree_size( struct pico_tree *tree, - uint16_t *size, - uint16_t (*node_size)(void *)) -{ - struct pico_tree_node *node = NULL; - void *node_item = NULL; - uint16_t count = 0; - - /* Check params */ - if (!tree || !size) { - pico_err = PICO_ERR_EINVAL; - return 0; - } - - /* Add up the node sizes */ - pico_tree_foreach(node, tree) { - if ((node_item = node->keyValue)) { - *size = (uint16_t)((*size) + node_size(node_item)); - count++; - } - } - - return count; -} - -/* **************************************************************************** - * Determines the amount of nodes in a pico_tere - * - * @param tree Pointer to pico_tree-instance - * @return Amount of items in the tree. - * ****************************************************************************/ -uint16_t -pico_tree_count( struct pico_tree *tree ) -{ - struct pico_tree_node *node = NULL; - uint16_t count = 0; - - pico_tree_foreach(node, tree) { - if (node->keyValue) - count++; - } - - return count; -} - -/* **************************************************************************** - * Deletes all the questions with given DNS name from a pico_tree - * - * @param qtree Pointer to pico_tree-instance which contains DNS questions - * @param name Name of the questions you want to delete - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_qtree_del_name( struct pico_tree *qtree, - const char *name ) -{ - struct pico_tree_node *node = NULL, *next = NULL; - struct pico_dns_question *question = NULL; - - /* Check params */ - if (!qtree || !name) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Iterate over tree and delete every node with given name */ - pico_tree_foreach_safe(node, qtree, next) { - question = (struct pico_dns_question *)node->keyValue; - if ((question) && (strcasecmp(question->qname, name) == 0)) { - question = pico_tree_delete(qtree, (void *)question); - pico_dns_question_delete((void **)&question); - } - } - - return 0; -} - -/* **************************************************************************** - * Checks whether a question with given name is in the tree or not. - * - * @param qtree Pointer to pico_tree-instance which contains DNS questions - * @param name Name you want to check for - * @return 1 when the name is present in the qtree, 0 when it's not. - * ****************************************************************************/ -int -pico_dns_qtree_find_name( struct pico_tree *qtree, - const char *name ) -{ - struct pico_tree_node *node = NULL; - struct pico_dns_question *question = NULL; - - /* Check params */ - if (!qtree || !name) { - pico_err = PICO_ERR_EINVAL; - return 0; - } - - /* Iterate over tree and compare names */ - pico_tree_foreach(node, qtree) { - question = (struct pico_dns_question *)node->keyValue; - if ((question) && (strcasecmp(question->qname, name) == 0)) - return 1; - } - - return 0; -} - -/* MARK: ^ PICO_TREE */ -/* MARK: v DNS PACKET FUNCTIONS */ - -/* **************************************************************************** - * Fills the header section of a DNS packet with the correct flags and section - * -counts. - * - * @param hdr Header to fill in. - * @param qdcount Amount of questions added to the packet - * @param ancount Amount of answer records added to the packet - * @param nscount Amount of authority records added to the packet - * @param arcount Amount of additional records added to the packet - * ****************************************************************************/ -void -pico_dns_fill_packet_header( struct pico_dns_header *hdr, - uint16_t qdcount, - uint16_t ancount, - uint16_t nscount, - uint16_t arcount ) -{ - /* ID should be filled by caller */ - - if(qdcount > 0) { /* Questions present? Make it a query */ - hdr->qr = PICO_DNS_QR_QUERY; - hdr->aa = PICO_DNS_AA_NO_AUTHORITY; - } else { /* No questions present? Make it an answer*/ - hdr->qr = PICO_DNS_QR_RESPONSE; - hdr->aa = PICO_DNS_AA_IS_AUTHORITY; - } - - /* Fill in the flags and the fields */ - hdr->opcode = PICO_DNS_OPCODE_QUERY; - hdr->tc = PICO_DNS_TC_NO_TRUNCATION; - hdr->rd = PICO_DNS_RD_IS_DESIRED; - hdr->ra = PICO_DNS_RA_NO_SUPPORT; - hdr->z = 0; /* Z, AD, CD are 0 */ - hdr->rcode = PICO_DNS_RCODE_NO_ERROR; - hdr->qdcount = short_be(qdcount); - hdr->ancount = short_be(ancount); - hdr->nscount = short_be(nscount); - hdr->arcount = short_be(arcount); -} - -/* **************************************************************************** - * Fills a single DNS resource record section of a DNS packet. - * - * @param rtree Tree that contains the DNS resource records. - * @param dest Pointer-pointer to location where you want to insert records. - * Will point to location after current section on return. - * @return 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_dns_fill_packet_rr_section( struct pico_tree *rtree, - uint8_t **dest ) -{ - struct pico_tree_node *node = NULL; - struct pico_dns_record *record = NULL; - - pico_tree_foreach(node, rtree) { - record = node->keyValue; - if ((record) && pico_dns_record_copy_flat(record, dest)) { - dns_dbg("Could not copy record into Answer Section!\n"); - return -1; - } - } - return 0; -} - -/* **************************************************************************** - * Fills the resource record sections of a DNS packet with provided record- - * trees. - * - * @param packet Packet you want to fill - * @param qtree Question tree to determine where the rrsections begin. - * @param antree DNS records to put in Answer section - * @param nstree DNS records to put in Authority section - * @param artree DNS records to put in Additional section - * @return 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_dns_fill_packet_rr_sections( pico_dns_packet *packet, - struct pico_tree *qtree, - struct pico_tree *antree, - struct pico_tree *nstree, - struct pico_tree *artree ) -{ - int anret = 0, nsret = 0, arret = 0; - uint16_t temp = 0; - uint8_t *destination = NULL; - - /* Check params */ - if (!packet || !qtree || !antree || !nstree || !artree) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Initialise the destination pointers before iterating */ - destination = (uint8_t *)packet + sizeof(struct pico_dns_header); - pico_tree_size(qtree, &temp, &pico_dns_question_size); - destination = destination + temp; - - /* Iterate over ANSWERS */ - anret = pico_dns_fill_packet_rr_section(antree, &destination); - - /* Iterate over AUTHORITIES */ - nsret = pico_dns_fill_packet_rr_section(nstree, &destination); - - /* Iterate over ADDITIONALS */ - arret = pico_dns_fill_packet_rr_section(artree, &destination); - - if (anret || nsret || arret) - return -1; - - return 0; -} - -/* **************************************************************************** - * Fills the question section of a DNS packet with provided questions in the - * tree. - * - * @param packet Packet you want to fill - * @param qtree Question tree with question you want to insert - * @return 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_dns_fill_packet_question_section( pico_dns_packet *packet, - struct pico_tree *qtree ) -{ - struct pico_tree_node *node = NULL; - struct pico_dns_question *question = NULL; - struct pico_dns_question_suffix *dest_qsuffix = NULL; - char *dest_qname = NULL; - - /* Check params */ - if (!packet || !qtree) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Initialise pointer */ - dest_qname = (char *)((char *)packet + sizeof(struct pico_dns_header)); - - pico_tree_foreach(node, qtree) { - question = node->keyValue; - if (question) { - /* Copy the name */ - memcpy(dest_qname, question->qname, question->qname_length); - - /* Copy the suffix */ - dest_qsuffix = (struct pico_dns_question_suffix *) - (dest_qname + question->qname_length); - dest_qsuffix->qtype = question->qsuffix->qtype; - dest_qsuffix->qclass = question->qsuffix->qclass; - - /* Move to next question */ - dest_qname = (char *)((char *)dest_qsuffix + - sizeof(struct pico_dns_question_suffix)); - } - } - return 0; -} - -/* **************************************************************************** - * Looks for a name somewhere else in packet, more specifically between the - * beginning of the data buffer and the name itself. - * ****************************************************************************/ -static uint8_t * -pico_dns_packet_compress_find_ptr( uint8_t *name, - uint8_t *data, - uint16_t len ) -{ - uint8_t *iterator = NULL; - - /* Check params */ - if (!name || !data || !len) - return NULL; - - if ((name < data) || (name > (data + len))) - return NULL; - - iterator = data; - - /* Iterate from the beginning of data up until the name-ptr */ - while (iterator < name) { - /* Compare in each iteration of current name is equal to a section of - the DNS packet and if so return the pointer to that section */ - if (memcmp((void *)iterator++, (void *)name, - pico_dns_strlen((char *)name) + 1u) == 0) - return (iterator - 1); - } - return NULL; -} - -/* **************************************************************************** - * Compresses a single name by looking for the same name somewhere else in the - * packet-buffer. - * ****************************************************************************/ -static int -pico_dns_packet_compress_name( uint8_t *name, - uint8_t *packet, - uint16_t *len) -{ - uint8_t *lbl_iterator = NULL; /* To iterate over labels */ - uint8_t *compression_ptr = NULL; /* PTR to somewhere else in the packet */ - uint8_t *offset = NULL; /* PTR after compression pointer */ - uint8_t *ptr_after_str = NULL; - uint8_t *last_byte = NULL; - uint8_t *i = NULL; - uint16_t ptr = 0; - uint16_t difference = 0; - - /* Check params */ - if (!name || !packet || !len) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if ((name < packet) || (name > (packet + *len))) { - dns_dbg("Name ptr OOB. name: %p max: %p\n", name, packet + *len); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Try to compress name */ - lbl_iterator = name; - while (lbl_iterator != '\0') { - /* Try to find a compression pointer with current name */ - compression_ptr = pico_dns_packet_compress_find_ptr(lbl_iterator, - packet + 12, *len); - /* If name can be compressed */ - if (compression_ptr) { - /* Point to place after current string */ - ptr_after_str = lbl_iterator + strlen((char *)lbl_iterator) + 1u; - - /* Calculate the compression pointer value */ - ptr = (uint16_t)(compression_ptr - packet); - - /* Set the compression pointer in the packet */ - *lbl_iterator = (uint8_t)(0xC0 | (uint8_t)(ptr >> 8)); - *(lbl_iterator + 1) = (uint8_t)(ptr & 0xFF); - - /* Move up the rest of the packet data to right after the pointer */ - offset = lbl_iterator + 2; - - /* Move up left over data */ - difference = (uint16_t)(ptr_after_str - offset); - last_byte = packet + *len; - for (i = ptr_after_str; i < last_byte; i++) { - *((uint8_t *)(i - difference)) = *i; - } - /* Update length */ - *len = (uint16_t)(*len - difference); - break; - } - - /* Move to next length label */ - lbl_iterator = lbl_iterator + *(lbl_iterator) + 1; - } - return 0; -} - -/* **************************************************************************** - * Utility function compress a record section - * ****************************************************************************/ -static int -pico_dns_compress_record_sections( uint16_t qdcount, uint16_t count, - uint8_t *buf, uint8_t **iterator, - uint16_t *len ) -{ - struct pico_dns_record_suffix *rsuffix = NULL; - uint8_t *_iterator = NULL; - uint16_t i = 0; - - /* Check params */ - if (!iterator || !(*iterator) || !buf || !len) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - _iterator = *iterator; - - for (i = 0; i < count; i++) { - if (qdcount || i) - pico_dns_packet_compress_name(_iterator, buf, len); - - /* To get rdlength */ - rsuffix = (struct pico_dns_record_suffix *) - (_iterator + pico_dns_namelen_comp((char *)_iterator) + 1u); - - /* Move to next res record */ - _iterator = ((uint8_t *)rsuffix + - sizeof(struct pico_dns_record_suffix) + - short_be(rsuffix->rdlength)); - } - *iterator = _iterator; - return 0; -} - -/* **************************************************************************** - * Applies DNS name compression to an entire DNS packet - * ****************************************************************************/ -static int -pico_dns_packet_compress( pico_dns_packet *packet, uint16_t *len ) -{ - uint8_t *packet_buf = NULL; - uint8_t *iterator = NULL; - uint16_t qdcount = 0, rcount = 0, i = 0; - - /* Check params */ - if (!packet || !len) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - packet_buf = (uint8_t *)packet; - - /* Temporarily store the question & record counts */ - qdcount = short_be(packet->qdcount); - rcount = (uint16_t)(rcount + short_be(packet->ancount)); - rcount = (uint16_t)(rcount + short_be(packet->nscount)); - rcount = (uint16_t)(rcount + short_be(packet->arcount)); - - /* Move past the DNS packet header */ - iterator = (uint8_t *)((uint8_t *) packet + 12u); - - /* Start with the questions */ - for (i = 0; i < qdcount; i++) { - if(i) { /* First question can't be compressed */ - pico_dns_packet_compress_name(iterator, packet_buf, len); - } - - /* Move to next question */ - iterator = (uint8_t *)(iterator + - pico_dns_namelen_comp((char *)iterator) + - sizeof(struct pico_dns_question_suffix) + 1u); - } - /* Then onto the answers */ - pico_dns_compress_record_sections(qdcount, rcount, packet_buf, &iterator, - len); - return 0; -} - -/* **************************************************************************** - * Calculates how big a packet needs be in order to store all the questions & - * records in the tree. Also determines the amount of questions and records. - * - * @param qtree Tree with Questions. - * @param antree Tree with Answer Records. - * @param nstree Tree with Authority Records. - * @param artree Tree with Additional Records. - * @param qdcount Pointer to var to store amount of questions - * @param ancount Pointer to var to store amount of answers. - * @param nscount Pointer to var to store amount of authorities. - * @param arcount Pointer to var to store amount of additionals. - * @return Returns the total length that the DNS packet needs to be. - * ****************************************************************************/ -static uint16_t -pico_dns_packet_len( struct pico_tree *qtree, - struct pico_tree *antree, - struct pico_tree *nstree, - struct pico_tree *artree, - uint8_t *qdcount, uint8_t *ancount, - uint8_t *nscount, uint8_t *arcount ) -{ - uint16_t len = (uint16_t) sizeof(pico_dns_packet); - - /* Check params */ - if (!qtree || !antree || !nstree || !artree) { - pico_err = PICO_ERR_EINVAL; - return 0; - } - - *qdcount = (uint8_t)pico_tree_size(qtree, &len, &pico_dns_question_size); - *ancount = (uint8_t)pico_tree_size(antree, &len, &pico_dns_record_size); - *nscount = (uint8_t)pico_tree_size(nstree, &len, &pico_dns_record_size); - *arcount = (uint8_t)pico_tree_size(artree, &len, &pico_dns_record_size); - return len; -} - -/* **************************************************************************** - * Generic packet creation utility that just creates a DNS packet with given - * questions and resource records to put in the Resource Record Sections. If a - * NULL-pointer is provided for a certain tree, no records will be added to - * that particular section of the packet. - * - * @param qtree DNS Questions to put in the Question Section. - * @param antree DNS Records to put in the Answer Section. - * @param nstree DNS Records to put in the Authority Section. - * @param artree DNS Records to put in the Additional Section. - * @param len Will get fill with the entire size of the packet - * @return Pointer to created DNS packet - * ****************************************************************************/ -static pico_dns_packet * -pico_dns_packet_create( struct pico_tree *qtree, - struct pico_tree *antree, - struct pico_tree *nstree, - struct pico_tree *artree, - uint16_t *len ) -{ - PICO_DNS_QTREE_DECLARE(_qtree); - PICO_DNS_RTREE_DECLARE(_antree); - PICO_DNS_RTREE_DECLARE(_nstree); - PICO_DNS_RTREE_DECLARE(_artree); - pico_dns_packet *packet = NULL; - uint8_t qdcount = 0, ancount = 0, nscount = 0, arcount = 0; - - /* Set default vector, if arguments are NULL-pointers */ - _qtree = (qtree) ? (*qtree) : (_qtree); - _antree = (antree) ? (*antree) : (_antree); - _nstree = (nstree) ? (*nstree) : (_nstree); - _artree = (artree) ? (*artree) : (_artree); - - /* Get the size of the entire packet and determine the header counters */ - *len = pico_dns_packet_len(&_qtree, &_antree, &_nstree, &_artree, - &qdcount, &ancount, &nscount, &arcount); - - /* Provide space for the entire packet */ - if (!(packet = PICO_ZALLOC(*len))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Fill the Question Section with questions */ - if (qtree && pico_tree_count(&_qtree) != 0) { - if (pico_dns_fill_packet_question_section(packet, &_qtree)) { - dns_dbg("Could not fill Question Section correctly!\n"); - PICO_FREE(packet); - return NULL; - } - } - - /* Fill the Resource Record Sections with resource records */ - if (pico_dns_fill_packet_rr_sections(packet, &_qtree, &_antree, - &_nstree, &_artree)) { - dns_dbg("Could not fill Resource Record Sections correctly!\n"); - PICO_FREE(packet); - return NULL; - } - - /* Fill the DNS packet header and try to compress */ - pico_dns_fill_packet_header(packet, qdcount, ancount, nscount, arcount); - pico_dns_packet_compress(packet, len); - - return packet; -} - -/* **************************************************************************** - * Creates a DNS Query packet with given question and resource records to put - * the Resource Record Sections. If a NULL-pointer is provided for a certain - * tree, no records will be added to that particular section of the packet. - * - * @param qtree DNS Questions to put in the Question Section - * @param antree DNS Records to put in the Answer Section - * @param nstree DNS Records to put in the Authority Section - * @param artree DNS Records to put in the Additional Section - * @param len Will get filled with the entire size of the packet - * @return Pointer to created DNS packet - * ****************************************************************************/ -pico_dns_packet * -pico_dns_query_create( struct pico_tree *qtree, - struct pico_tree *antree, - struct pico_tree *nstree, - struct pico_tree *artree, - uint16_t *len ) -{ - return pico_dns_packet_create(qtree, antree, nstree, artree, len); -} - -/* **************************************************************************** - * Creates a DNS Answer packet with given resource records to put in the - * Resource Record Sections. If a NULL-pointer is provided for a certain tree, - * no records will be added to that particular section of the packet. - * - * @param antree DNS Records to put in the Answer Section - * @param nstree DNS Records to put in the Authority Section - * @param artree DNS Records to put in the Additional Section - * @param len Will get filled with the entire size of the packet - * @return Pointer to created DNS packet. - * ****************************************************************************/ -pico_dns_packet * -pico_dns_answer_create( struct pico_tree *antree, - struct pico_tree *nstree, - struct pico_tree *artree, - uint16_t *len ) -{ - return pico_dns_packet_create(NULL, antree, nstree, artree, len); -} -/* MARK: ^ DNS PACKET FUNCTIONS */ diff --git a/kernel/picotcp/modules/pico_dns_common.h b/kernel/picotcp/modules/pico_dns_common.h deleted file mode 100644 index 8272128..0000000 --- a/kernel/picotcp/modules/pico_dns_common.h +++ /dev/null @@ -1,528 +0,0 @@ - -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - . - Authors: Toon Stegen, Jelle De Vleeschouwer - *********************************************************************/ - -#ifndef INCLUDE_PICO_DNS_COMMON -#define INCLUDE_PICO_DNS_COMMON - -#include "pico_config.h" -#include "pico_tree.h" - -/* TYPE values */ -#define PICO_DNS_TYPE_A 1 -#define PICO_DNS_TYPE_CNAME 5 -#define PICO_DNS_TYPE_PTR 12 -#define PICO_DNS_TYPE_TXT 16 -#define PICO_DNS_TYPE_AAAA 28 -#define PICO_DNS_TYPE_SRV 33 -#define PICO_DNS_TYPE_NSEC 47 -#define PICO_DNS_TYPE_ANY 255 - -/* CLASS values */ -#define PICO_DNS_CLASS_IN 1 - -/* FLAG values */ -#define PICO_DNS_QR_QUERY 0 -#define PICO_DNS_QR_RESPONSE 1 -#define PICO_DNS_OPCODE_QUERY 0 -#define PICO_DNS_OPCODE_IQUERY 1 -#define PICO_DNS_OPCODE_STATUS 2 -#define PICO_DNS_AA_NO_AUTHORITY 0 -#define PICO_DNS_AA_IS_AUTHORITY 1 -#define PICO_DNS_TC_NO_TRUNCATION 0 -#define PICO_DNS_TC_IS_TRUNCATED 1 -#define PICO_DNS_RD_NO_DESIRE 0 -#define PICO_DNS_RD_IS_DESIRED 1 -#define PICO_DNS_RA_NO_SUPPORT 0 -#define PICO_DNS_RA_IS_SUPPORTED 1 -#define PICO_DNS_RCODE_NO_ERROR 0 -#define PICO_DNS_RCODE_EFORMAT 1 -#define PICO_DNS_RCODE_ESERVER 2 -#define PICO_DNS_RCODE_ENAME 3 -#define PICO_DNS_RCODE_ENOIMP 4 -#define PICO_DNS_RCODE_EREFUSED 5 - -#define PICO_ARPA_IPV4_SUFFIX ".in-addr.arpa" - -#ifdef PICO_SUPPORT_IPV6 -#define STRLEN_PTR_IP6 63 -#define PICO_ARPA_IPV6_SUFFIX ".IP6.ARPA" -#endif - -/* Used in pico_dns_rdata_cmp */ -#define PICO_DNS_CASE_SENSITIVE 0x00u -#define PICO_DNS_CASE_INSENSITIVE 0x01u - -#define PICO_DNS_NAMEBUF_SIZE (256) - -enum pico_dns_arpa -{ - PICO_DNS_ARPA4, - PICO_DNS_ARPA6, - PICO_DNS_NO_ARPA, -}; - -/* flags split in 2x uint8 due to endianness */ -PACKED_STRUCT_DEF pico_dns_header -{ - uint16_t id; /* Packet id */ - uint8_t rd : 1; /* Recursion Desired */ - uint8_t tc : 1; /* TrunCation */ - uint8_t aa : 1; /* Authoritative Answer */ - uint8_t opcode : 4; /* Opcode */ - uint8_t qr : 1; /* Query/Response */ - uint8_t rcode : 4; /* Response code */ - uint8_t z : 3; /* Zero */ - uint8_t ra : 1; /* Recursion Available */ - uint16_t qdcount; /* Question count */ - uint16_t ancount; /* Answer count */ - uint16_t nscount; /* Authority count */ - uint16_t arcount; /* Additional count */ -}; -typedef struct pico_dns_header pico_dns_packet; - -/* Question fixed-sized fields */ -PACKED_STRUCT_DEF pico_dns_question_suffix -{ - uint16_t qtype; - uint16_t qclass; -}; - -/* Resource record fixed-sized fields */ -PACKED_STRUCT_DEF pico_dns_record_suffix -{ - uint16_t rtype; - uint16_t rclass; - uint32_t rttl; - uint16_t rdlength; -}; - -/* DNS QUESTION */ -struct pico_dns_question -{ - char *qname; - struct pico_dns_question_suffix *qsuffix; - uint16_t qname_length; - uint8_t proto; -}; - -/* DNS RECORD */ -struct pico_dns_record -{ - char *rname; - struct pico_dns_record_suffix *rsuffix; - uint8_t *rdata; - uint16_t rname_length; -}; - -/* MARK: v NAME & IP FUNCTIONS */ - -/* **************************************************************************** - * Checks if the DNS name doesn't exceed 256 bytes including zero-byte. - * - * @param namelen Length of the DNS name-string including zero-byte - * @return 0 when the length is correct - * ****************************************************************************/ -int -pico_dns_check_namelen( uint16_t namelen ); - -/* **************************************************************************** - * Returns the length of a name in a DNS-packet as if DNS name compression - * would be applied to the packet. If there's no compression present this - * returns the strlen. If there's compression present this returns the length - * until the compression-pointer + 1. - * - * @param name Compressed name you want the calculate the strlen from - * @return Returns strlen of a compressed name, takes the first byte of compr- - * ession pointer into account but not the second byte, which acts - * like a trailing zero-byte. - * ****************************************************************************/ -uint16_t -pico_dns_namelen_comp( char *name ); - -/* **************************************************************************** - * Returns the uncompressed name in DNS name format when DNS name compression - * is applied to the packet-buffer. - * - * @param name Compressed name, should be in the bounds of the actual packet - * @param packet Packet that contains the compressed name - * @return Returns the decompressed name, NULL on failure. - * ****************************************************************************/ -char * -pico_dns_decompress_name( char *name, pico_dns_packet *packet ); - -/* **************************************************************************** - * Converts a DNS name in DNS name format to a name in URL format. Provides - * space for the name in URL format as well. PICO_FREE() should be called on - * the returned string buffer that contains the name in URL format. - * - * @param qname DNS name in DNS name format to convert - * @return Returns a pointer to a string-buffer with the URL name on success. - * ****************************************************************************/ -char * -pico_dns_qname_to_url( const char *qname ); - -/* **************************************************************************** - * Converts a DNS name in URL format to name in DNS name format. Provides - * space for the DNS name as well. PICO_FREE() should be called on the returned - * string buffer that contains the DNS name. - * - * @param url DNS name in URL format to convert - * @return Returns a pointer to a string-buffer with the DNS name on success. - * ****************************************************************************/ -char * -pico_dns_url_to_qname( const char *url ); - -/* **************************************************************************** - * @param url String-buffer - * @return Length of string-buffer in an uint16_t - * ****************************************************************************/ -uint16_t -pico_dns_strlen( const char *url ); - -/* **************************************************************************** - * Replaces .'s in a DNS name in URL format by the label lengths. So it - * actually converts a name in URL format to a name in DNS name format. - * f.e. "*www.google.be" => "3www6google2be0" - * - * @param url Location to buffer with name in URL format. The URL needs to - * be +1 byte offset in the actual buffer. Size is should be - * strlen(url) + 2. - * @param maxlen Maximum length of buffer so it doesn't cause a buffer overflow - * @return 0 on success, something else on failure. - * ****************************************************************************/ -int pico_dns_name_to_dns_notation( char *url, uint16_t maxlen ); - -/* **************************************************************************** - * Replaces the label lengths in a DNS-name by .'s. So it actually converts a - * name in DNS format to a name in URL format. - * f.e. 3www6google2be0 => .www.google.be - * - * @param ptr Location to buffer with name in DNS name format - * @param maxlen Maximum length of buffer so it doesn't cause a buffer overflow - * @return 0 on success, something else on failure. - * ****************************************************************************/ -int pico_dns_notation_to_name( char *ptr, uint16_t maxlen ); - -/* **************************************************************************** - * Determines the length of the first label of a DNS name in URL-format - * - * @param url DNS name in URL-format - * @return Length of the first label of DNS name in URL-format - * ****************************************************************************/ -uint16_t -pico_dns_first_label_length( const char *url ); - -/* **************************************************************************** - * Mirrors a dotted IPv4-address string. - * f.e. 192.168.0.1 => 1.0.168.192 - * - * @param ptr - * @return 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_mirror_addr( char *ptr ); - -/* **************************************************************************** - * Convert an IPv6-address in string-format to a IPv6-address in nibble-format. - * Doesn't add a IPv6 ARPA-suffix though. - * - * @param ip IPv6-address stored as a string - * @param dst Destination to store IPv6-address in nibble-format - * ****************************************************************************/ -void -pico_dns_ipv6_set_ptr( const char *ip, char *dst ); - -/* MARK: QUESTION FUNCTIONS */ - -/* **************************************************************************** - * Deletes a single DNS Question. - * - * @param question Void-pointer to DNS Question. Can be used with pico_tree_- - * destroy. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_question_delete( void **question); - -/* **************************************************************************** - * Fills in the DNS question suffix-fields with the correct values. - * - * todo: Update pico_dns_client to make the same mechanism possible as with - * filling DNS Resource Record-suffixes. This function shouldn't be an - * API-function. - * - * @param suf Pointer to the suffix member of the DNS question. - * @param qtype DNS type of the DNS question to be. - * @param qclass DNS class of the DNS question to be. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_question_fill_suffix( struct pico_dns_question_suffix *suf, - uint16_t qtype, - uint16_t qclass ); - -/* **************************************************************************** - * Creates a standalone DNS Question with a given name and type. - * - * @param url DNS question name in URL format. Will be converted to DNS - * name notation format. - * @param len Will be filled with the total length of the DNS question. - * @param proto Protocol for which you want to create a question. Can be - * either PICO_PROTO_IPV4 or PICO_PROTO_IPV6. - * @param qtype DNS type of the question to be. - * @param qclass DNS class of the question to be. - * @param reverse When this is true, a reverse resolution name will be gene- - * from the URL - * @return Returns pointer to the created DNS Question on success, NULL on - * failure. - * ****************************************************************************/ -struct pico_dns_question * -pico_dns_question_create( const char *url, - uint16_t *len, - uint8_t proto, - uint16_t qtype, - uint16_t qclass, - uint8_t reverse ); - -/* **************************************************************************** - * Decompresses the name of a single DNS question. - * - * @param question Question you want to decompress the name of - * @param packet Packet in which the DNS question is contained. - * @return Pointer to original name of the DNS question before decompressing. - * ****************************************************************************/ -char * -pico_dns_question_decompress( struct pico_dns_question *question, - pico_dns_packet *packet ); - -/* MARK: RESOURCE RECORD FUNCTIONS */ - -/* **************************************************************************** - * Deletes a single DNS resource record. - * - * @param record Void-pointer to DNS record. Can be used with pico_tree_destroy - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_record_delete( void **record ); - -/* **************************************************************************** - * Just makes a hardcopy from a single DNS Resource Record - * - * @param record DNS record you want to copy - * @return Pointer to copy of DNS record. - * ****************************************************************************/ -struct pico_dns_record * -pico_dns_record_copy( struct pico_dns_record *record ); - -/* **************************************************************************** - * Create a standalone DNS Resource Record with given name, type and data. - * - * @param url DNS rrecord name in URL format. Will be converted to DNS - * name notation format. - * @param _rdata Memory buffer with data to insert in the resource record. If - * data of record should contain a DNS name, the name in the - * databuffer needs to be in URL-format. - * @param datalen The exact length in bytes of the _rdata-buffer. If data of - * record should contain a DNS name, datalen needs to be - * pico_dns_strlen(_rdata). - * @param len Will be filled with the total length of the DNS rrecord. - * @param rtype DNS type of the resource record to be. - * @param rclass DNS class of the resource record to be. - * @param rttl DNS ttl of the resource record to be. - * @return Returns pointer to the created DNS Resource Record - * ****************************************************************************/ -struct pico_dns_record * -pico_dns_record_create( const char *url, - void *_rdata, - uint16_t datalen, - uint16_t *len, - uint16_t rtype, - uint16_t rclass, - uint32_t rttl ); - -/* **************************************************************************** - * Decompresses the name of single DNS record. - * - * @param record DNS record to decompress the name of. - * @param packet Packet in which is DNS record is present - * @return Pointer to original name of the DNS record before decompressing. - * ****************************************************************************/ -char * -pico_dns_record_decompress( struct pico_dns_record *record, - pico_dns_packet *packet ); - -/* MARK: COMPARING */ - -/* **************************************************************************** - * Compares two databuffers against each other. - * - * @param a 1st Memory buffer to compare - * @param b 2nd Memory buffer to compare - * @param rdlength_a Length of 1st memory buffer - * @param rdlength_b Length of 2nd memory buffer - * @param caseinsensitive Whether or not the bytes are compared - * case-insensitive. Should be either - * PICO_DNS_CASE_SENSITIVE or PICO_DNS_CASE_INSENSITIVE - * @return 0 when the buffers are equal, returns difference when they're not. - * ****************************************************************************/ -int -pico_dns_rdata_cmp( uint8_t *a, uint8_t *b, - uint16_t rdlength_a, uint16_t rdlength_b, uint8_t caseinsensitive ); - -/* **************************************************************************** - * Compares 2 DNS questions - * - * @param qa DNS question A as a void-pointer (for pico_tree) - * @param qb DNS question A as a void-pointer (for pico_tree) - * @return 0 when questions are equal, returns difference when they're not. - * ****************************************************************************/ -int -pico_dns_question_cmp( void *qa, - void *qb ); - -/* **************************************************************************** - * Compares 2 DNS records by type and name only - * - * @param ra DNS record A as a void-pointer (for pico_tree) - * @param rb DNS record B as a void-pointer (for pico_tree) - * @return 0 when name and type of records are equal, returns difference when - * they're not. - * ****************************************************************************/ -int -pico_dns_record_cmp_name_type( void *ra, - void *rb ); - -/* **************************************************************************** - * Compares 2 DNS records by type, name AND rdata for a truly unique result - * - * @param ra DNS record A as a void-pointer (for pico_tree) - * @param rb DNS record B as a void-pointer (for pico_tree) - * @return 0 when records are equal, returns difference when they're not - * ****************************************************************************/ -int -pico_dns_record_cmp( void *ra, - void *rb ); - -/* MARK: PICO_TREE */ - -/* **************************************************************************** - * Erases a pico_tree entirely. - * - * @param tree Pointer to a pico_tree-instance - * @param node_delete Helper-function for type-specific deleting. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_tree_destroy( struct pico_tree *tree, int (*node_delete)(void **)); - -/* **************************************************************************** - * Determines the amount of nodes in a pico_tree - * - * @param tree Pointer to pico_tree-instance - * @return Amount of items in the tree. - * ****************************************************************************/ -uint16_t -pico_tree_count( struct pico_tree *tree ); - -/* **************************************************************************** - * Definition of DNS question tree - * ****************************************************************************/ -typedef struct pico_tree pico_dns_qtree; -#define PICO_DNS_QTREE_DECLARE(name) \ - pico_dns_qtree (name) = {&LEAF, pico_dns_question_cmp} -#define PICO_DNS_QTREE_DESTROY(qtree) \ - pico_tree_destroy(qtree, pico_dns_question_delete) - -/* **************************************************************************** - * Deletes all the questions with given DNS name from a pico_tree - * - * @param qtree Pointer to pico_tree-instance which contains DNS questions - * @param name Name of the questions you want to delete - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_qtree_del_name( struct pico_tree *qtree, - const char *name ); - -/* **************************************************************************** - * Checks whether a question with given name is in the tree or not. - * - * @param qtree Pointer to pico_tree-instance which contains DNS questions - * @param name Name you want to check for - * @return 1 when the name is present in the qtree, 0 when it's not. - * ****************************************************************************/ -int -pico_dns_qtree_find_name( struct pico_tree *qtree, - const char *name ); - -/* **************************************************************************** - * Definition of DNS record tree - * ****************************************************************************/ -typedef struct pico_tree pico_dns_rtree; -#define PICO_DNS_RTREE_DECLARE(name) \ - pico_dns_rtree (name) = {&LEAF, pico_dns_record_cmp} -#define PICO_DNS_RTREE_DESTROY(rtree) \ - pico_tree_destroy((rtree), pico_dns_record_delete) - -/* MARK: DNS PACKET FUNCTIONS */ - -/* **************************************************************************** - * Fills the header section of a DNS packet with the correct flags and section - * -counts. - * - * @param hdr Header to fill in. - * @param qdcount Amount of questions added to the packet - * @param ancount Amount of answer records added to the packet - * @param nscount Amount of authority records added to the packet - * @param arcount Amount of additional records added to the packet - * ****************************************************************************/ -void -pico_dns_fill_packet_header( struct pico_dns_header *hdr, - uint16_t qdcount, - uint16_t ancount, - uint16_t authcount, - uint16_t addcount ); - -/* **************************************************************************** - * Creates a DNS Query packet with given question and resource records to put - * the Resource Record Sections. If a NULL-pointer is provided for a certain - * tree, no records will be added to that particular section of the packet. - * - * @param qtree DNS Questions to put in the Question Section - * @param antree DNS Records to put in the Answer Section - * @param nstree DNS Records to put in the Authority Section - * @param artree DNS Records to put in the Additional Section - * @param len Will get filled with the entire size of the packet - * @return Pointer to created DNS packet - * ****************************************************************************/ -pico_dns_packet * -pico_dns_query_create( struct pico_tree *qtree, - struct pico_tree *antree, - struct pico_tree *nstree, - struct pico_tree *artree, - uint16_t *len ); - -/* **************************************************************************** - * Creates a DNS Answer packet with given resource records to put in the - * Resource Record Sections. If a NULL-pointer is provided for a certain tree, - * no records will be added to that particular section of the packet. - * - * @param antree DNS Records to put in the Answer Section - * @param nstree DNS Records to put in the Authority Section - * @param artree DNS Records to put in the Additional Section - * @param len Will get filled with the entire size of the packet - * @return Pointer to created DNS packet. - * ****************************************************************************/ -pico_dns_packet * -pico_dns_answer_create( struct pico_tree *antree, - struct pico_tree *nstree, - struct pico_tree *artree, - uint16_t *len ); - -#endif /* _INCLUDE_PICO_DNS_COMMON */ diff --git a/kernel/picotcp/modules/pico_dns_sd.c b/kernel/picotcp/modules/pico_dns_sd.c deleted file mode 100644 index d845b73..0000000 --- a/kernel/picotcp/modules/pico_dns_sd.c +++ /dev/null @@ -1,568 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2014-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - . - Author: Jelle De Vleeschouwer - *********************************************************************/ - -#include "pico_dns_sd.h" - -#ifdef PICO_SUPPORT_DNS_SD - -/* --- Debugging --- */ -#ifdef DEBUG_DNS_SD - #define dns_sd_dbg dbg -#else - #define dns_sd_dbg(...) do {} while(0) -#endif - -/* --- PROTOTYPES --- */ -key_value_pair_t * -pico_dns_sd_kv_vector_get( kv_vector *vector, uint16_t index ); -int -pico_dns_sd_kv_vector_erase( kv_vector *vector ); -/* ------------------- */ - -typedef PACKED_STRUCT_DEF pico_dns_srv_record_prefix -{ - uint16_t priority; - uint16_t weight; - uint16_t port; -} pico_dns_srv_record; - -/* **************************************************************************** - * Determines the length of the resulting string when a string would be - * created from a key-value pair vector. - * - * @param vector Key-Value pair vector to determine the length of. - * @return The length of the key-value pair vector in bytes as if it would be - * converted to a string. - * ****************************************************************************/ -static uint16_t -pico_dns_sd_kv_vector_strlen( kv_vector *vector ) -{ - key_value_pair_t *iterator = NULL; - uint16_t i = 0, len = 0; - - /* Check params */ - if (!vector) { - pico_err = PICO_ERR_EINVAL; - return 0; - } - - /* Iterate over the key-value pairs */ - for (i = 0; i < vector->count; i++) { - iterator = pico_dns_sd_kv_vector_get(vector, i); - len = (uint16_t) (len + 1u + /* Length byte */ - strlen(iterator->key) /* Length of the key */); - if (iterator->value) { - len = (uint16_t) (len + 1u /* '=' char */ + - strlen(iterator->value) /* Length of value */); - } - } - return len; -} - -/* **************************************************************************** - * Creates an mDNS record with the SRV record format. - * - * @param url Name of the SRV record in URL format. - * @param priority Priority, should be 0. - * @param weight Weight, should be 0. - * @param port Port to register the service on. - * @param target_url Hostname of the service-target, in URL-format - * @param ttl TTL of the SRV Record - * @param flags mDNS record flags to set specifications of the record. - * @return Pointer to newly created record on success, NULL on failure. - * ****************************************************************************/ -static struct pico_mdns_record * -pico_dns_sd_srv_record_create( const char *url, - uint16_t priority, - uint16_t weight, - uint16_t port, - const char *target_url, - uint32_t ttl, - uint8_t flags ) -{ - struct pico_mdns_record *record = NULL; - pico_dns_srv_record *srv_data = NULL; - char *target_rname = NULL; - uint16_t srv_length = 0; - - /* Check params */ - if (!url || !target_url) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Determine the length the rdata buf needs to be */ - srv_length = (uint16_t) (6u + strlen(target_url) + 2u); - - /* Provide space for the data-buf */ - if (!(srv_data = (pico_dns_srv_record *) PICO_ZALLOC(srv_length))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Set the fields */ - srv_data->priority = short_be(priority); - srv_data->weight = short_be(weight); - srv_data->port = short_be(port); - - /* Copy in the URL and convert to DNS notation */ - if (!(target_rname = pico_dns_url_to_qname(target_url))) { - dns_sd_dbg("Could not convert URL to qname!\n"); - PICO_FREE(srv_data); - return NULL; - } - - strcpy((char *)srv_data + 6u, target_rname); - PICO_FREE(target_rname); - - /* Create and return new mDNS record */ - record = pico_mdns_record_create(url, srv_data, srv_length, - PICO_DNS_TYPE_SRV, - ttl, flags); - PICO_FREE(srv_data); - return record; -} - -/* **************************************************************************** - * Creates an mDNS record with the TXT record format. - * - * @param url Name of the TXT record in URL format. - * @param key_value_pairs Key-Value pair vector to generate the data from. - * @param ttl TTL of the TXT record. - * @param flags mDNS record flags to set specifications of the record - * @return Pointer to newly created record on success, NULL on failure. - * ****************************************************************************/ -static struct pico_mdns_record * -pico_dns_sd_txt_record_create( const char *url, - kv_vector key_value_pairs, - uint32_t ttl, - uint8_t flags ) -{ - struct pico_mdns_record *record = NULL; - key_value_pair_t *iterator = NULL; - char *txt = NULL; - uint16_t i = 0, txt_i = 0, pair_len = 0, key_len = 0, value_len = 0; - - /* Determine the length of the string to fit in all pairs */ - uint16_t len = (uint16_t)(pico_dns_sd_kv_vector_strlen(&key_value_pairs) + 1u); - - /* If kv-vector is empty don't bother to create a TXT record */ - if (len <= 1) { - return NULL; - } - - /* Provide space for the txt buf */ - if (!(txt = (char *)PICO_ZALLOC(len))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Iterate over all the key-value pairs */ - for (i = 0; i < key_value_pairs.count; i++) { - iterator = pico_dns_sd_kv_vector_get(&key_value_pairs, i); - - /* Determine the length of the key */ - key_len = (uint16_t) strlen(iterator->key); - pair_len = key_len; - - /* If value is not a NULL-ptr */ - if (iterator->value) { - value_len = (uint16_t) strlen(iterator->value); - pair_len = (uint16_t) (pair_len + 1u + value_len); - } - - /* Set the pair length label */ - txt[txt_i] = (char)pair_len; - - /* Copy the key */ - strcpy(txt + txt_i + 1u, iterator->key); - - /* Copy the value if it is not a NULL-ptr */ - if (iterator->value) { - strcpy(txt + txt_i + 1u + key_len, "="); - strcpy(txt + txt_i + 2u + key_len, iterator->value); - txt_i = (uint16_t) (txt_i + 2u + key_len + value_len); - } else { - txt_i = (uint16_t) (txt_i + 1u + key_len); - } - } - record = pico_mdns_record_create(url, txt, (uint16_t)(len - 1u), PICO_DNS_TYPE_TXT, ttl, flags); - PICO_FREE(txt); - - return record; -} - -/* **************************************************************************** - * Deletes a single key-value pair instance - * - * @param kv_pair Pointer-pointer to to delete instance - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_dns_sd_kv_delete( key_value_pair_t **kv_pair ) -{ - /* Check params */ - if (!kv_pair || !(*kv_pair)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Delete the fields */ - if ((*kv_pair)->key) - PICO_FREE((*kv_pair)->key); - - if ((*kv_pair)->value) - PICO_FREE((*kv_pair)->value); - - PICO_FREE(*kv_pair); - *kv_pair = NULL; - kv_pair = NULL; - - return 0; -} - -/* **************************************************************************** - * Creates a single key-value pair-instance - * - * @param key Key of the pair, cannot be NULL. - * @param value Value of the pair, can be NULL, empty ("") or filled ("qkejq") - * @return Pointer to newly created KV-instance on success, NULL on failure. - * ****************************************************************************/ -static key_value_pair_t * -pico_dns_sd_kv_create( const char *key, const char *value ) -{ - key_value_pair_t *kv_pair = NULL; - - /* Check params */ - if (!key || !(kv_pair = PICO_ZALLOC(sizeof(key_value_pair_t)))) { - pico_dns_sd_kv_delete(&kv_pair); - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Provide space to copy the values */ - if (!(kv_pair->key = PICO_ZALLOC((size_t)(strlen(key) + 1)))) { - pico_err = PICO_ERR_ENOMEM; - pico_dns_sd_kv_delete(&kv_pair); - return NULL; - } - - strcpy(kv_pair->key, key); - - if (value) { - if (!(kv_pair->value = PICO_ZALLOC((size_t)(strlen(value) + 1)))) { - pico_err = PICO_ERR_ENOMEM; - pico_dns_sd_kv_delete(&kv_pair); - return NULL; - } - - strcpy(kv_pair->value, value); - } else - kv_pair->value = NULL; - - return kv_pair; -} - -/* **************************************************************************** - * Checks whether the type is correctly formatted ant it's label length are - * between the allowed boundaries. - * - * @param type Servicetype to check the format of. - * @return Returns 0 when the type is correctly formatted, something else when - * it's not. - * ****************************************************************************/ -static int -pico_dns_sd_check_type_format( const char *type ) -{ - uint16_t first_lbl = 0; - int8_t subtype_present = 0; - - /* Check params */ - if (!(first_lbl = pico_dns_first_label_length(type))) - return -1; - - subtype_present = !memcmp(type + first_lbl + 1, "_sub", 4); - - /* Check if there is a subtype present */ - if (subtype_present && (first_lbl > 63)) - return -1; - else if (subtype_present) - /* Get the length of the service name */ - first_lbl = pico_dns_first_label_length(type + first_lbl + 6); - else { - /* Check if type is not greater then 21 bytes (22 - 1, since the length - byte of the service name isn't included yet) */ - if (strlen(type) > (size_t) 21) - return -1; - } - - /* Check if the service name is not greater then 16 bytes (17 - 1) */ - return (first_lbl > ((uint16_t) 16u)); -} - -/* **************************************************************************** - * Checks whether the service instance name is correctly formatted and it's - * label length falls between the allowed boundaries. - * - * @param name Instance name to check the format of. - * @return Returns 0 when the name is correctly formatted, something else when - * it's not. - * ****************************************************************************/ -static int -pico_dns_sd_check_instance_name_format( const char *name ) -{ - /* First of all check if the total length is larger than 63 bytes */ - if (pico_dns_strlen(name) > 63 || !pico_dns_strlen(name)) - return -1; - - return 0; -} - -/* **************************************************************************** - * Append the instance name adn service type to create a '.local' service SIN. - * - * @param name Instance Name of the service, f.e. "Printer 2nd Floor". - * @param type ServiceType of the service, f.e. "_http._tcp". - * @return Pointer to newly created SIN on success, NULL on failure. - * ****************************************************************************/ -static char * -pico_dns_sd_create_service_url( const char *name, - const char *type ) -{ - char *url = NULL; - uint16_t len = 0, namelen = 0, typelen = 0; - - if (pico_dns_sd_check_type_format(type)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - if (pico_dns_sd_check_instance_name_format(name)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - namelen = (uint16_t)strlen(name); - typelen = (uint16_t)strlen(type); - - /* Determine the length that the URL needs to be */ - len = (uint16_t)(namelen + 1u /* for '.'*/ + - typelen + 7u /* for '.local\0' */); - url = (char *)PICO_ZALLOC(len); - if (!url) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Append the parts together */ - strcpy(url, name); - strcpy(url + namelen, "."); - strcpy(url + namelen + 1, type); - strcpy(url + namelen + 1 + typelen, ".local"); - - return url; -} - -/* **************************************************************************** - * This function actually does exactly the same as pico_mdns_init(); - * ****************************************************************************/ -int -pico_dns_sd_init( const char *_hostname, - struct pico_ip4 address, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - return pico_mdns_init(_hostname, address, callback, arg); -} - -/* **************************************************************************** - * Just calls pico_mdns_init in its turn to initialise the mDNS-module. - * See pico_mdns.h for description. - * ****************************************************************************/ -int -pico_dns_sd_register_service( const char *name, - const char *type, - uint16_t port, - kv_vector *txt_data, - uint16_t ttl, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg) -{ - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *srv_record = NULL; - struct pico_mdns_record *txt_record = NULL; - const char *hostname = pico_mdns_get_hostname(); - char *url = NULL; - - /* Try to create a service URL to create records with */ - if (!(url = pico_dns_sd_create_service_url(name, type)) || !txt_data || !hostname) { - if (url) { - PICO_FREE(url); - } - - pico_err = PICO_ERR_EINVAL; - return -1; - } - - dns_sd_dbg("\n>>>>>>>>>> Target: %s <<<<<<<<<<\n\n", hostname); - - /* Create the SRV record */ - srv_record = pico_dns_sd_srv_record_create(url, 0, 0, port, hostname, ttl, PICO_MDNS_RECORD_UNIQUE); - if (!srv_record) { - PICO_FREE(url); - return -1; - } - - /* Create the TXT record */ - txt_record = pico_dns_sd_txt_record_create(url, *txt_data, ttl, PICO_MDNS_RECORD_UNIQUE); - PICO_FREE(url); - - /* Erase the key-value pair vector, it's no longer needed */ - pico_dns_sd_kv_vector_erase(txt_data); - - if (txt_record) { - if (pico_tree_insert(&rtree, txt_record) == &LEAF) { - PICO_MDNS_RTREE_DESTROY(&rtree); - pico_mdns_record_delete((void **)&txt_record); - pico_mdns_record_delete((void **)&srv_record); - return -1; - } - } - - if (pico_tree_insert(&rtree, srv_record) == &LEAF) { - PICO_MDNS_RTREE_DESTROY(&rtree); - pico_mdns_record_delete((void **)&srv_record); - return -1; - } - - if (pico_mdns_claim(rtree, callback, arg)) { - PICO_MDNS_RTREE_DESTROY(&rtree); - return -1; - } - - /* Only destroy the tree, not its elements since they still exist in another tree */ - pico_tree_destroy(&rtree, NULL); - return 0; -} - -/* **************************************************************************** - * Does nothing for now. - * - * @param type Type to browse for. - * @param callback Callback to call when something particular happens. - * @return When the module successfully started browsing the servicetype. - * ****************************************************************************/ -int -pico_dns_sd_browse_service( const char *type, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - IGNORE_PARAMETER(type); - IGNORE_PARAMETER(callback); - IGNORE_PARAMETER(arg); - return 0; -} - -/* **************************************************************************** - * Add a key-value pair the a key-value pair vector. - * - * @param vector Vector to add the pair to. - * @param key Key of the pair, cannot be NULL. - * @param value Value of the pair, can be NULL, empty ("") or filled ("qkejq") - * @return Returns 0 when the pair is added successfully, something else on - * failure. - * ****************************************************************************/ -int -pico_dns_sd_kv_vector_add( kv_vector *vector, char *key, char *value ) -{ - key_value_pair_t *kv_pair = NULL; - key_value_pair_t **new_pairs = NULL; - uint16_t i = 0; - - /* Check params */ - if (!vector || !key || !(kv_pair = pico_dns_sd_kv_create(key, value))) { - pico_err = PICO_ERR_EINVAL; - pico_dns_sd_kv_delete(&kv_pair); - return -1; - } - - /* Provide enough space for the new pair pointers */ - if (!(new_pairs = PICO_ZALLOC(sizeof(key_value_pair_t *) * - (vector->count + 1u)))) { - pico_err = PICO_ERR_ENOMEM; - pico_dns_sd_kv_delete(&kv_pair); - return -1; - } - - /* Copy previous pairs and add new one */ - for (i = 0; i < vector->count; i++) - new_pairs[i] = vector->pairs[i]; - new_pairs[i] = kv_pair; - - /* Free the previous array */ - if (vector->pairs) - PICO_FREE(vector->pairs); - - vector->pairs = new_pairs; - vector->count++; - - return 0; -} - -/* **************************************************************************** - * Gets a single key-value pair form a Key-Value pair vector @ certain index. - * - * @param vector Vector to get KV-pair from. - * @param index Index of the KV-pair. - * @return key_value_pair_t* on success, NULL on failure. - * ****************************************************************************/ -key_value_pair_t * -pico_dns_sd_kv_vector_get( kv_vector *vector, uint16_t index ) -{ - /* Check params */ - if (!vector) - return NULL; - - /* Return record with conditioned index */ - if (index < vector->count) - return vector->pairs[index]; - - return NULL; -} - -/* **************************************************************************** - * Erase all the contents of a key-value pair vector. - * - * @param vector Key-Value pair vector. - * @return 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_dns_sd_kv_vector_erase( kv_vector *vector ) -{ - uint16_t i = 0; - - /* Iterate over each key-value pair */ - for (i = 0; i < vector->count; i++) { - if (pico_dns_sd_kv_delete(&(vector->pairs[i])) < 0) { - dns_sd_dbg("Could not delete key-value pairs from vector"); - return -1; - } - } - PICO_FREE(vector->pairs); - vector->pairs = NULL; - vector->count = 0; - - return 0; -} - -#endif diff --git a/kernel/picotcp/modules/pico_dns_sd.h b/kernel/picotcp/modules/pico_dns_sd.h deleted file mode 100644 index ec9f727..0000000 --- a/kernel/picotcp/modules/pico_dns_sd.h +++ /dev/null @@ -1,91 +0,0 @@ -/* **************************************************************************** - * PicoTCP. Copyright (c) 2014 TASS Belgium NV. Some rights reserved. - * See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - * . - * Author: Jelle De Vleeschouwer - * ****************************************************************************/ -#ifndef INCLUDE_PICO_DNS_SD -#define INCLUDE_PICO_DNS_SD - -#include "pico_mdns.h" - -typedef struct -{ - char *key; - char *value; -} key_value_pair_t; - -typedef struct -{ - key_value_pair_t **pairs; - uint16_t count; -} kv_vector; - -#define PICO_DNS_SD_KV_VECTOR_DECLARE(name) \ - kv_vector (name) = {0} - -/* **************************************************************************** - * Just calls pico_mdns_init in it's turn to initialise the mDNS-module. - * See pico_mdns.h for description. - * ****************************************************************************/ -int -pico_dns_sd_init( const char *_hostname, - struct pico_ip4 address, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ); - -/* **************************************************************************** - * Register a DNS-SD service via Multicast DNS on the local network. - * - * @param name Instance Name of the service, f.e. "Printer 2nd Floor". - * @param type ServiceType of the service, f.e. "_http._tcp". - * @param port Port number on which the service runs. - * @param txt_data TXT data to create TXT record with, need kv_vector-type, - * Declare such a type with PICO_DNS_SD_KV_VECTOR_DECLARE(*) & - * add key-value pairs with pico_dns_sd_kv_vector_add(). - * @param ttl TTL - * @param callback Callback-function to call when the service is registered. - * @return - * ****************************************************************************/ -int -pico_dns_sd_register_service( const char *name, - const char *type, - uint16_t port, - kv_vector *txt_data, - uint16_t ttl, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg); - -/* **************************************************************************** - * Does nothing for now. - * - * @param type Type to browse for. - * @param callback Callback to call when something particular happens. - * @return When the module successfully started browsing the servicetype. - * ****************************************************************************/ -int -pico_dns_sd_browse_service( const char *type, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ); - -/* **************************************************************************** - * Add a key-value pair the a key-value pair vector. - * - * @param vector Vector to add the pair to. - * @param key Key of the pair, cannot be NULL. - * @param value Value of the pair, can be NULL, empty ("") or filled ("qkejq") - * @return Returns 0 when the pair is added successfully, something else on - * failure. - * ****************************************************************************/ -int -pico_dns_sd_kv_vector_add( kv_vector *vector, char *key, char *value ); - - -#endif /* _INCLUDE_PICO_DNS_SD */ - diff --git a/kernel/picotcp/modules/pico_ethernet.c b/kernel/picotcp/modules/pico_ethernet.c deleted file mode 100644 index 301072b..0000000 --- a/kernel/picotcp/modules/pico_ethernet.c +++ /dev/null @@ -1,448 +0,0 @@ - /********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_icmp4.h" -#include "pico_icmp6.h" -#include "pico_arp.h" -#include "pico_ethernet.h" - -#define IS_LIMITED_BCAST(f) (((struct pico_ipv4_hdr *) f->net_hdr)->dst.addr == PICO_IP4_BCAST) - -#ifdef PICO_SUPPORT_ETH - -const uint8_t PICO_ETHADDR_ALL[6] = { - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}; - -# define PICO_SIZE_MCAST 3 -static const uint8_t PICO_ETHADDR_MCAST[6] = { - 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 -}; - -#ifdef PICO_SUPPORT_IPV6 -# define PICO_SIZE_MCAST6 2 -static const uint8_t PICO_ETHADDR_MCAST6[6] = { - 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 -}; -#endif - -/* DATALINK LEVEL: interface from network to the device - * and vice versa. - */ - -/* The pico_ethernet_receive() function is used by - * those devices supporting ETH in order to push packets up - * into the stack. - */ - -/* Queues */ -static struct pico_queue ethernet_in = { - 0 -}; -static struct pico_queue ethernet_out = { - 0 -}; - -int32_t MOCKABLE pico_ethernet_send(struct pico_frame *f); -static int32_t pico_ethernet_receive(struct pico_frame *f); - -static int pico_ethernet_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - return pico_ethernet_send(f); -} - -static int pico_ethernet_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - return (pico_ethernet_receive(f) <= 0); /* 0 on success, which is ret > 0 */ -} - -static struct pico_frame *pico_ethernet_alloc(struct pico_protocol *self, struct pico_device *dev, uint16_t size) -{ - struct pico_frame *f = NULL; - uint32_t overhead = 0; - IGNORE_PARAMETER(self); - - if (dev) - overhead = dev->overhead; - - f = pico_frame_alloc((uint32_t)(overhead + size + PICO_SIZE_ETHHDR)); - if (!f) - return NULL; - - f->dev = dev; - f->datalink_hdr = f->buffer + overhead; - f->net_hdr = f->datalink_hdr + PICO_SIZE_ETHHDR; - /* Stay of the rest, higher levels will take care */ - - return f; -} - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_ethernet = { - .name = "ethernet", - .layer = PICO_LAYER_DATALINK, - .alloc = pico_ethernet_alloc, - .process_in = pico_ethernet_process_in, - .process_out = pico_ethernet_process_out, - .q_in = ðernet_in, - .q_out = ðernet_out, -}; - -static int destination_is_bcast(struct pico_frame *f) -{ - if (!f) - return 0; - - if (IS_IPV6(f)) - return 0; - -#ifdef PICO_SUPPORT_IPV4 - else { - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - return pico_ipv4_is_broadcast(hdr->dst.addr); - } -#else - return 0; -#endif -} - -static int destination_is_mcast(struct pico_frame *f) -{ - int ret = 0; - if (!f) - return 0; - -#ifdef PICO_SUPPORT_IPV6 - if (IS_IPV6(f)) { - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *) f->net_hdr; - ret = pico_ipv6_is_multicast(hdr->dst.addr); - } - -#endif -#ifdef PICO_SUPPORT_IPV4 - else { - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - ret = pico_ipv4_is_multicast(hdr->dst.addr); - } -#endif - - return ret; -} - -#ifdef PICO_SUPPORT_IPV4 -static int32_t pico_ipv4_ethernet_receive(struct pico_frame *f) -{ - if (IS_IPV4(f)) { - if (pico_enqueue(pico_proto_ipv4.q_in, f) < 0) { - pico_frame_discard(f); - return -1; - } - } else { - (void)pico_icmp4_param_problem(f, 0); - pico_frame_discard(f); - return -1; - } - - return (int32_t)f->buffer_len; -} -#endif - -#ifdef PICO_SUPPORT_IPV6 -static int32_t pico_ipv6_ethernet_receive(struct pico_frame *f) -{ - if (IS_IPV6(f)) { - if (pico_enqueue(pico_proto_ipv6.q_in, f) < 0) { - pico_frame_discard(f); - return -1; - } - } else { - /* Wrong version for link layer type */ - pico_frame_discard(f); - return -1; - } - - return (int32_t)f->buffer_len; -} -#endif - -static int32_t pico_eth_receive(struct pico_frame *f) -{ - struct pico_eth_hdr *hdr = (struct pico_eth_hdr *) f->datalink_hdr; - f->net_hdr = f->datalink_hdr + sizeof(struct pico_eth_hdr); - -#if (defined PICO_SUPPORT_IPV4) && (defined PICO_SUPPORT_ETH) - if (hdr->proto == PICO_IDETH_ARP) - return pico_arp_receive(f); -#endif - -#if defined (PICO_SUPPORT_IPV4) - if (hdr->proto == PICO_IDETH_IPV4) - return pico_ipv4_ethernet_receive(f); -#endif - -#if defined (PICO_SUPPORT_IPV6) - if (hdr->proto == PICO_IDETH_IPV6) - return pico_ipv6_ethernet_receive(f); -#endif - - pico_frame_discard(f); - return -1; -} - -static void pico_eth_check_bcast(struct pico_frame *f) -{ - struct pico_eth_hdr *hdr = (struct pico_eth_hdr *) f->datalink_hdr; - /* Indicate a link layer broadcast packet */ - if (memcmp(hdr->daddr, PICO_ETHADDR_ALL, PICO_SIZE_ETH) == 0) - f->flags |= PICO_FRAME_FLAG_BCAST; -} - -static int32_t pico_ethernet_receive(struct pico_frame *f) -{ - struct pico_eth_hdr *hdr; - if (!f || !f->dev || !f->datalink_hdr) - { - pico_frame_discard(f); - return -1; - } - - hdr = (struct pico_eth_hdr *) f->datalink_hdr; - if ((memcmp(hdr->daddr, f->dev->eth->mac.addr, PICO_SIZE_ETH) != 0) && - (memcmp(hdr->daddr, PICO_ETHADDR_MCAST, PICO_SIZE_MCAST) != 0) && -#ifdef PICO_SUPPORT_IPV6 - (memcmp(hdr->daddr, PICO_ETHADDR_MCAST6, PICO_SIZE_MCAST6) != 0) && -#endif - (memcmp(hdr->daddr, PICO_ETHADDR_ALL, PICO_SIZE_ETH) != 0)) - { - pico_frame_discard(f); - return -1; - } - - pico_eth_check_bcast(f); - return pico_eth_receive(f); -} - -static struct pico_eth *pico_ethernet_mcast_translate(struct pico_frame *f, uint8_t *pico_mcast_mac) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - - /* place 23 lower bits of IP in lower 23 bits of MAC */ - pico_mcast_mac[5] = (long_be(hdr->dst.addr) & 0x000000FFu); - pico_mcast_mac[4] = (uint8_t)((long_be(hdr->dst.addr) & 0x0000FF00u) >> 8u); - pico_mcast_mac[3] = (uint8_t)((long_be(hdr->dst.addr) & 0x007F0000u) >> 16u); - - return (struct pico_eth *)pico_mcast_mac; -} - - -#ifdef PICO_SUPPORT_IPV6 -static struct pico_eth *pico_ethernet_mcast6_translate(struct pico_frame *f, uint8_t *pico_mcast6_mac) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - - /* first 2 octets are 0x33, last four are the last four of dst */ - pico_mcast6_mac[5] = hdr->dst.addr[PICO_SIZE_IP6 - 1]; - pico_mcast6_mac[4] = hdr->dst.addr[PICO_SIZE_IP6 - 2]; - pico_mcast6_mac[3] = hdr->dst.addr[PICO_SIZE_IP6 - 3]; - pico_mcast6_mac[2] = hdr->dst.addr[PICO_SIZE_IP6 - 4]; - - return (struct pico_eth *)pico_mcast6_mac; -} -#endif - -static int pico_ethernet_ipv6_dst(struct pico_frame *f, struct pico_eth *const dstmac) -{ - int retval = -1; - if (!dstmac) - return -1; - - #ifdef PICO_SUPPORT_IPV6 - if (destination_is_mcast(f)) { - uint8_t pico_mcast6_mac[6] = { - 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 - }; - pico_ethernet_mcast6_translate(f, pico_mcast6_mac); - memcpy(dstmac, pico_mcast6_mac, PICO_SIZE_ETH); - retval = 0; - } else { - struct pico_eth *neighbor = pico_ipv6_get_neighbor(f); - if (neighbor) - { - memcpy(dstmac, neighbor, PICO_SIZE_ETH); - retval = 0; - } - } - - #else - (void)f; - pico_err = PICO_ERR_EPROTONOSUPPORT; - #endif - return retval; -} - - -/* Ethernet send, first attempt: try our own address. - * Returns 0 if the packet is not for us. - * Returns 1 if the packet is cloned to our own receive queue and the original frame is dicarded. - * */ -static int32_t pico_ethsend_local(struct pico_frame *f, struct pico_eth_hdr *hdr) -{ - if (!hdr) - return 0; - - /* Check own mac */ - if(!memcmp(hdr->daddr, hdr->saddr, PICO_SIZE_ETH)) { - struct pico_frame *clone = pico_frame_copy(f); - dbg("sending out packet destined for our own mac\n"); - if (pico_ethernet_receive(clone) < 0) { - dbg("pico_ethernet_receive() failed\n"); - return 0; - } - pico_frame_discard(f); - return 1; - } - - return 0; -} - -/* Ethernet send, second attempt: try bcast. - * Returns 0 if the packet is not bcast, so it will be handled somewhere else. - * Returns 1 if the packet is handled by the pico_device_broadcast() function and is discarded. - * */ -static int32_t pico_ethsend_bcast(struct pico_frame *f) -{ - if (IS_LIMITED_BCAST(f)) { - return (pico_device_broadcast(f) > 0); // Return 1 on success, ret > 0 - } - - return 0; -} - -/* Ethernet send, third attempt: try unicast. - * If the device driver is busy, we return 0, so the stack won't discard the frame. - * In case of success, we can safely return 1. - */ -static int32_t pico_ethsend_dispatch(struct pico_frame *f) -{ - return (pico_sendto_dev(f) > 0); // Return 1 on success, ret > 0 -} - -/* Checks whether or not there's enough headroom allocated in the frame to - * prepend the Ethernet header. Reallocates if this is not the case. */ -static int eth_check_headroom(struct pico_frame *f) -{ - uint32_t headroom = (uint32_t)(f->net_hdr - f->buffer); - uint32_t grow = (uint32_t)(PICO_SIZE_ETHHDR - headroom); - if (headroom < (uint32_t)PICO_SIZE_ETHHDR) { - return pico_frame_grow_head(f, (uint32_t)(f->buffer_len + grow)); - } - return 0; -} - -/* This function looks for the destination mac address - * in order to send the frame being processed. - */ -int32_t MOCKABLE pico_ethernet_send(struct pico_frame *f) -{ - struct pico_eth dstmac; - uint8_t dstmac_valid = 0; - uint16_t proto = PICO_IDETH_IPV4; - -#ifdef PICO_SUPPORT_IPV6 - /* Step 1: If the frame has an IPv6 packet, - * destination address is taken from the ND tables - */ - if (IS_IPV6(f)) { - if (pico_ethernet_ipv6_dst(f, &dstmac) < 0) - { - /* Enqueue copy of frame in IPv6 ND-module to retry later. Discard - * frame, otherwise we have a duplicate in IPv6-ND */ - pico_ipv6_nd_postpone(f); - return (int32_t)f->len; - } - - dstmac_valid = 1; - proto = PICO_IDETH_IPV6; - } - else -#endif - - /* In case of broadcast (IPV4 only), dst mac is FF:FF:... */ - if (IS_BCAST(f) || destination_is_bcast(f)) - { - memcpy(&dstmac, PICO_ETHADDR_ALL, PICO_SIZE_ETH); - dstmac_valid = 1; - } - - /* In case of multicast, dst mac is translated from the group address */ - else if (destination_is_mcast(f)) { - uint8_t pico_mcast_mac[6] = { - 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 - }; - pico_ethernet_mcast_translate(f, pico_mcast_mac); - memcpy(&dstmac, pico_mcast_mac, PICO_SIZE_ETH); - dstmac_valid = 1; - } - -#if (defined PICO_SUPPORT_IPV4) - else { - struct pico_eth *arp_get; - arp_get = pico_arp_get(f); - if (arp_get) { - memcpy(&dstmac, arp_get, PICO_SIZE_ETH); - dstmac_valid = 1; - } else { - /* Enqueue copy of frame in ARP-module to retry later. Discard - * frame otherwise we have a duplicate */ - pico_arp_postpone(f); - return (int32_t)f->len; - } - } -#endif - - /* This sets destination and source address, then pushes the packet to the device. */ - if (dstmac_valid) { - struct pico_eth_hdr *hdr; - if (!eth_check_headroom(f)) { - hdr = (struct pico_eth_hdr *) f->datalink_hdr; - if ((f->start > f->buffer) && ((f->start - f->buffer) >= PICO_SIZE_ETHHDR)) - { - f->start -= PICO_SIZE_ETHHDR; - f->len += PICO_SIZE_ETHHDR; - f->datalink_hdr = f->start; - hdr = (struct pico_eth_hdr *) f->datalink_hdr; - memcpy(hdr->saddr, f->dev->eth->mac.addr, PICO_SIZE_ETH); - memcpy(hdr->daddr, &dstmac, PICO_SIZE_ETH); - hdr->proto = proto; - } - - if (pico_ethsend_local(f, hdr) || pico_ethsend_bcast(f) || pico_ethsend_dispatch(f)) { - /* one of the above functions has delivered the frame accordingly. - * (returned != 0). It is safe to directly return successfully. - * Lower level queue has frame, so don't discard */ - return (int32_t)f->len; - } - } - } - - /* Failure, frame could not be be enqueued in lower-level layer, safe - * to discard since something clearly went wrong */ - pico_frame_discard(f); - return 0; -} - -#endif /* PICO_SUPPORT_ETH */ - - diff --git a/kernel/picotcp/modules/pico_ethernet.h b/kernel/picotcp/modules/pico_ethernet.h deleted file mode 100644 index e60c467..0000000 --- a/kernel/picotcp/modules/pico_ethernet.h +++ /dev/null @@ -1,18 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - -#ifndef INCLUDE_PICO_ETHERNET -#define INCLUDE_PICO_ETHERNET - -#include "pico_config.h" -#include "pico_frame.h" - -extern struct pico_protocol pico_proto_ethernet; - -#endif /* INCLUDE_PICO_ETHERNET */ diff --git a/kernel/picotcp/modules/pico_fragments.c b/kernel/picotcp/modules/pico_fragments.c deleted file mode 100644 index 81b92ed..0000000 --- a/kernel/picotcp/modules/pico_fragments.c +++ /dev/null @@ -1,573 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Laurens Miers, Daniele Lacamera - *********************************************************************/ - - -#include "pico_config.h" -#ifdef PICO_SUPPORT_IPV6 -#include "pico_ipv6.h" -#include "pico_icmp6.h" -#endif -#ifdef PICO_SUPPORT_IPV4 -#include "pico_ipv4.h" -#include "pico_icmp4.h" -#endif -#include "pico_stack.h" -#include "pico_eth.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_socket.h" -#include "pico_device.h" -#include "pico_tree.h" -#include "pico_constants.h" -#include "pico_fragments.h" - -#ifdef DEBUG_FRAG - #define frag_dbg dbg -#else - #define frag_dbg(...) do {} while(0) -#endif - -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) -#define IP6_FRAG_OFF(x) ((x & 0xFFF8u)) -#define IP6_FRAG_MORE(x) ((x & 0x0001)) -#define IP6_FRAG_ID(x) ((uint32_t)(((uint32_t)x->ext.frag.id[0] << 24) + ((uint32_t)x->ext.frag.id[1] << 16) + \ - ((uint32_t)x->ext.frag.id[2] << 8) + (uint32_t)x->ext.frag.id[3])) - -#else -#define IP6_FRAG_OFF(x) (0) -#define IP6_FRAG_MORE(x) (0) -#define IP6_FRAG_ID(x) (0) -#endif - -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) -#define IP4_FRAG_OFF(frag) (((uint32_t)frag & PICO_IPV4_FRAG_MASK) << 3ul) -#define IP4_FRAG_MORE(frag) ((frag & PICO_IPV4_MOREFRAG) ? 1 : 0) -#define IP4_FRAG_ID(hdr) (hdr->id) -#else -#define IP4_FRAG_OFF(frag) (0) -#define IP4_FRAG_MORE(frag) (0) -#define IP4_FRAG_ID(hdr) (0) -#endif - -#define PICO_IPV6_FRAG_TIMEOUT 60000 -#define PICO_IPV4_FRAG_TIMEOUT 15000 - -static void pico_frag_expire(pico_time now, void *arg); -static void pico_fragments_complete(unsigned int bookmark, uint8_t proto, uint8_t net); -static int pico_fragments_check_complete(struct pico_tree *tree, uint8_t proto, uint8_t net); -static int pico_fragments_reassemble(struct pico_tree *tree, unsigned int len, uint8_t proto, uint8_t net); -static int pico_fragments_get_more_flag(struct pico_frame *frame, uint8_t net); -static uint32_t pico_fragments_get_offset(struct pico_frame *frame, uint8_t net); -static void pico_fragments_send_notify(struct pico_frame *first); -static uint16_t pico_fragments_get_header_length(uint8_t net); -static void pico_fragments_empty_tree(struct pico_tree *tree); - -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) -static uint32_t ipv6_cur_frag_id = 0u; -static uint32_t ipv6_fragments_timer = 0u; - -static int pico_ipv6_frag_compare(void *ka, void *kb) -{ - struct pico_frame *a = ka, *b = kb; - if (IP6_FRAG_OFF(a->frag) > IP6_FRAG_OFF(b->frag)) - return 1; - - if (IP6_FRAG_OFF(a->frag) < IP6_FRAG_OFF(b->frag)) - return -1; - - return 0; -} -static PICO_TREE_DECLARE(ipv6_fragments, pico_ipv6_frag_compare); - -static void pico_ipv6_fragments_complete(unsigned int len, uint8_t proto) -{ - if (pico_fragments_reassemble(&ipv6_fragments, len, proto, PICO_PROTO_IPV6) == 0) - { - pico_timer_cancel(ipv6_fragments_timer); - ipv6_fragments_timer = 0; - } -} - -static void pico_ipv6_frag_timer_on(void) -{ - ipv6_fragments_timer = pico_timer_add(PICO_IPV6_FRAG_TIMEOUT, pico_frag_expire, &ipv6_fragments); - if (!ipv6_fragments_timer) { - frag_dbg("FRAG: Failed to start IPv6 expiration timer\n"); - pico_fragments_empty_tree(&ipv6_fragments); - } -} - -static int pico_ipv6_frag_match(struct pico_frame *a, struct pico_frame *b) -{ - struct pico_ipv6_hdr *ha = NULL, *hb = NULL; - if (!a || !b) - return -1; - - ha = (struct pico_ipv6_hdr *)a->net_hdr; - hb = (struct pico_ipv6_hdr *)b->net_hdr; - if (!ha || !hb) - return -2; - - if (memcmp(ha->src.addr, hb->src.addr, PICO_SIZE_IP6) != 0) - return 1; - - if (memcmp(ha->dst.addr, hb->dst.addr, PICO_SIZE_IP6) != 0) - return 2; - - return 0; -} -#endif - -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) -static uint32_t ipv4_cur_frag_id = 0u; -static uint32_t ipv4_fragments_timer = 0u; - -static int pico_ipv4_frag_compare(void *ka, void *kb) -{ - struct pico_frame *a = ka, *b = kb; - if (IP4_FRAG_OFF(a->frag) > IP4_FRAG_OFF(b->frag)) - return 1; - - if (IP4_FRAG_OFF(a->frag) < IP4_FRAG_OFF(b->frag)) - return -1; - - return 0; -} -static PICO_TREE_DECLARE(ipv4_fragments, pico_ipv4_frag_compare); - -static void pico_ipv4_fragments_complete(unsigned int len, uint8_t proto) -{ - if (pico_fragments_reassemble(&ipv4_fragments, len, proto, PICO_PROTO_IPV4) == 0) - { - pico_timer_cancel(ipv4_fragments_timer); - ipv4_fragments_timer = 0; - } -} - -static void pico_ipv4_frag_timer_on(void) -{ - ipv4_fragments_timer = pico_timer_add( PICO_IPV4_FRAG_TIMEOUT, pico_frag_expire, &ipv4_fragments); - if (!ipv4_fragments_timer) { - frag_dbg("FRAG: Failed to start IPv4 expiration timer\n"); - pico_fragments_empty_tree(&ipv4_fragments); - } -} - -static int pico_ipv4_frag_match(struct pico_frame *a, struct pico_frame *b) -{ - struct pico_ipv4_hdr *ha, *hb; - if (!a || !b) - return -1; - - ha = (struct pico_ipv4_hdr *)a->net_hdr; - hb = (struct pico_ipv4_hdr *)b->net_hdr; - if (!ha || !hb) - return -2; - - if (memcmp(&(ha->src.addr), &(hb->src.addr), PICO_SIZE_IP4) != 0) - return 1; - - if (memcmp(&(ha->dst.addr), &(hb->dst.addr), PICO_SIZE_IP4) != 0) - return 2; - - return 0; -} -#endif - -static void pico_fragments_complete(unsigned int bookmark, uint8_t proto, uint8_t net) -{ - if (0) {} - -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) - else if (net == PICO_PROTO_IPV4) - { - pico_ipv4_fragments_complete(bookmark, proto); - } -#endif -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) - else if (net == PICO_PROTO_IPV6) - { - pico_ipv6_fragments_complete(bookmark, proto); - } -#endif -} - -static void pico_fragments_empty_tree(struct pico_tree *tree) -{ - struct pico_tree_node *index, *tmp; - - if (!tree) - { - return; - } - - pico_tree_foreach_safe(index, tree, tmp) { - struct pico_frame * old = index->keyValue; - pico_tree_delete(tree, old); - pico_frame_discard(old); - } - -} - -static int pico_fragments_check_complete(struct pico_tree *tree, uint8_t proto, uint8_t net) -{ - struct pico_tree_node *index, *temp; - struct pico_frame *cur; - unsigned int bookmark = 0; - - if (!tree) - return 0; - - pico_tree_foreach_safe(index, tree, temp) { - cur = index->keyValue; - if (cur) { - if (pico_fragments_get_offset(cur, net) != bookmark) - return -1; - - bookmark += cur->transport_len; - if (!pico_fragments_get_more_flag(cur, net)) { - pico_fragments_complete(bookmark, proto, net); - return 0; - } - } - } - return 1; -} - -static void pico_frag_expire(pico_time now, void *arg) -{ - struct pico_tree *tree = (struct pico_tree *) arg; - struct pico_frame *first = NULL; - IGNORE_PARAMETER(now); - - if (!tree) - { - frag_dbg("Expired packet but no tree supplied!\n"); - return; - } - - first = pico_tree_first(tree); - - if (!first) { - frag_dbg("Empty tree - not sending notify\n"); - return; - } - - pico_fragments_send_notify(first); - - pico_fragments_empty_tree(tree); -} - -static void pico_fragments_send_notify(struct pico_frame *first) -{ - uint8_t net = 0; - - if (!first) - { - return; - } - - if (0) {} - -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) - else if (IS_IPV4(first)) - { - net = PICO_PROTO_IPV4; - frag_dbg("Packet expired! ID:%hu\n", ipv4_cur_frag_id); - } - -#endif -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) - else if (IS_IPV6(first)) - { - net = PICO_PROTO_IPV6; - frag_dbg("Packet expired! ID:%hu\n", ipv6_cur_frag_id); - } - -#endif - - if (((pico_fragments_get_offset(first, net) == 0) && (pico_frame_dst_is_unicast(first)))) - { - frag_dbg("sending notify\n"); - pico_notify_frag_expired(first); - } - else - { - frag_dbg("Not first packet or not unicast address, not sending notify"); - } -} - -static int pico_fragments_reassemble(struct pico_tree *tree, unsigned int len, uint8_t proto, uint8_t net) -{ - struct pico_tree_node *index, *tmp; - struct pico_frame *f; - uint16_t header_length = 0; - unsigned int bookmark = 0; - struct pico_frame *full = NULL; - struct pico_frame *first = NULL; - - if (!tree) - { - frag_dbg("Cannot reassemble packet, no tree supplied!\n"); - return -1; - } - - first = pico_tree_first(tree); - - if (!first) - { - frag_dbg("Cannot reassemble packet, empty tree supplied!\n"); - return -2; - } - - header_length = pico_fragments_get_header_length(net); - - if (!header_length) - { - return -3; - } - - full = pico_frame_alloc((uint16_t)(header_length + len)); - if (full) { - full->net_hdr = full->buffer; - full->net_len = header_length; - memcpy(full->net_hdr, first->net_hdr, full->net_len); - full->transport_hdr = full->net_hdr + full->net_len; - full->transport_len = (uint16_t)len; - full->dev = first->dev; - pico_tree_foreach_safe(index, tree, tmp) { - f = index->keyValue; - memcpy(full->transport_hdr + bookmark, f->transport_hdr, f->transport_len); - bookmark += f->transport_len; - pico_tree_delete(tree, f); - pico_frame_discard(f); - } - if (pico_transport_receive(full, proto) == -1) - { - pico_frame_discard(full); - } - - return 0; - } - - return 1; -} - -static uint16_t pico_fragments_get_header_length(uint8_t net) -{ - if (0) {} - -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) - else if (net == PICO_PROTO_IPV4) - { - return PICO_SIZE_IP4HDR; - } -#endif -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) - else if (net == PICO_PROTO_IPV6) - { - return PICO_SIZE_IP6HDR; - } -#endif - - return 0; -} - -static int pico_fragments_get_more_flag(struct pico_frame *frame, uint8_t net) -{ - if (!frame) - { - frag_dbg("no frame given to determine more flag\n"); - return 0; - } - - if (0) {} - -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) - else if (net == PICO_PROTO_IPV4) - { - return IP4_FRAG_MORE(frame->frag); - } -#endif -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) - else if (net == PICO_PROTO_IPV6) - { - return IP6_FRAG_MORE(frame->frag); - } -#endif - - return 0; -} - -static uint32_t pico_fragments_get_offset(struct pico_frame *frame, uint8_t net) -{ - if (!frame) - { - frag_dbg("no frame given to determine offset\n"); - return 0; - } - - if (0) {} - -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) - else if (net == PICO_PROTO_IPV4) - { - return IP4_FRAG_OFF(frame->frag); - } -#endif -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) - else if (net == PICO_PROTO_IPV6) - { - return IP6_FRAG_OFF(frame->frag); - } -#endif - - return 0; -} - -void pico_ipv6_process_frag(struct pico_ipv6_exthdr *frag, struct pico_frame *f, uint8_t proto) -{ -#if defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_IPV6FRAG) - struct pico_frame *first = NULL; - - if (!f || !frag) - { - frag_dbg("Bad arguments provided to pico_ipv6_process_frag\n"); - return; - } - - first = pico_tree_first(&ipv6_fragments); - - if (first) - { - if ((pico_ipv6_frag_match(f, first) == 0 && (IP6_FRAG_ID(frag) == ipv6_cur_frag_id))) { - struct pico_frame *temp = NULL; - - temp = pico_frame_copy(f); - - if (!temp) { - frag_dbg("Could not allocate memory to continue reassembly of IPV6 fragmented packet (id: %hu)\n", ipv6_cur_frag_id); - return; - } - - if (pico_tree_insert(&ipv6_fragments, temp)) { - frag_dbg("FRAG: Could not insert picoframe in tree\n"); - pico_frame_discard(temp); - return; - } - } - } - else - { - struct pico_frame *temp = NULL; - - if (ipv6_cur_frag_id && (IP6_FRAG_ID(frag) == ipv6_cur_frag_id)) { - /* Discard late arrivals, without firing the timer. */ - frag_dbg("discarded late arrival, exp:%hu found:%hu\n", ipv6_cur_frag_id, IP6_FRAG_ID(frag)); - return; - } - - temp = pico_frame_copy(f); - - if (!temp) { - frag_dbg("Could not allocate memory to start reassembly of fragmented packet\n"); - return; - } - - pico_ipv6_frag_timer_on(); - ipv6_cur_frag_id = IP6_FRAG_ID(frag); - frag_dbg("Started new reassembly, ID:%hu\n", ipv6_cur_frag_id); - - if (pico_tree_insert(&ipv6_fragments, temp)) { - frag_dbg("FRAG: Could not insert picoframe in tree\n"); - pico_frame_discard(temp); - return; - } - } - - pico_fragments_check_complete(&ipv6_fragments, proto, PICO_PROTO_IPV6); -#else - IGNORE_PARAMETER(frag); - IGNORE_PARAMETER(f); - IGNORE_PARAMETER(proto); -#endif -} - -void pico_ipv4_process_frag(struct pico_ipv4_hdr *hdr, struct pico_frame *f, uint8_t proto) -{ -#if defined(PICO_SUPPORT_IPV4) && defined(PICO_SUPPORT_IPV4FRAG) - struct pico_frame *first = NULL; - - if (!f || !hdr) - { - frag_dbg("Bad arguments provided to pico_ipv4_process_frag\n"); - return; - } - - first = pico_tree_first(&ipv4_fragments); - - if (first) - { - /* fragments from old packets still in tree, and new first fragment ? */ - if ((IP4_FRAG_ID(hdr) != ipv4_cur_frag_id) && (IP4_FRAG_OFF(f->frag) == 0)) { - pico_fragments_empty_tree(&ipv4_fragments); - - first = NULL; - ipv4_cur_frag_id = 0; - } - - if ((pico_ipv4_frag_match(f, first) == 0 && (IP4_FRAG_ID(hdr) == ipv4_cur_frag_id))) { - struct pico_frame *temp = NULL; - - temp = pico_frame_copy(f); - - if (!temp) { - frag_dbg("Could not allocate memory to continue reassembly of IPV4 fragmented packet (id: %hu)\n", ipv4_cur_frag_id); - return; - } - - if (pico_tree_insert(&ipv4_fragments, temp)) { - frag_dbg("FRAG: Could not insert picoframe in tree\n"); - pico_frame_discard(temp); - return; - } - } - } - else - { - struct pico_frame *temp = NULL; - - if (ipv4_cur_frag_id && (IP4_FRAG_ID(hdr) == ipv4_cur_frag_id)) { - /* Discard late arrivals, without firing the timer */ - return; - } - - temp = pico_frame_copy(f); - - if (!temp) { - frag_dbg("Could not allocate memory to start reassembly fragmented packet\n"); - return; - } - - pico_ipv4_frag_timer_on(); - ipv4_cur_frag_id = IP4_FRAG_ID(hdr); - frag_dbg("Started new reassembly, ID:%hu\n", ipv4_cur_frag_id); - - if (pico_tree_insert(&ipv4_fragments, temp)) { - frag_dbg("FRAG: Could not insert picoframe in tree\n"); - pico_frame_discard(temp); - return; - } - } - - pico_fragments_check_complete(&ipv4_fragments, proto, PICO_PROTO_IPV4); -#else - IGNORE_PARAMETER(hdr); - IGNORE_PARAMETER(f); - IGNORE_PARAMETER(proto); -#endif -} diff --git a/kernel/picotcp/modules/pico_fragments.h b/kernel/picotcp/modules/pico_fragments.h deleted file mode 100644 index e51ec44..0000000 --- a/kernel/picotcp/modules/pico_fragments.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef PICO_FRAGMENTS_H -#define PICO_FRAGMENTS_H -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_addressing.h" -#include "pico_frame.h" - -void pico_ipv6_process_frag(struct pico_ipv6_exthdr *frag, struct pico_frame *f, uint8_t proto); -void pico_ipv4_process_frag(struct pico_ipv4_hdr *hdr, struct pico_frame *f, uint8_t proto); - -#endif diff --git a/kernel/picotcp/modules/pico_hotplug_detection.c b/kernel/picotcp/modules/pico_hotplug_detection.c deleted file mode 100644 index 0c362cb..0000000 --- a/kernel/picotcp/modules/pico_hotplug_detection.c +++ /dev/null @@ -1,207 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Frederik Van Slycken - *********************************************************************/ -#include "pico_protocol.h" -#include "pico_hotplug_detection.h" -#include "pico_tree.h" -#include "pico_device.h" - -struct pico_hotplug_device { - struct pico_device *dev; - int prev_state; - struct pico_tree callbacks; - struct pico_tree init_callbacks; /* functions we still need to call for initialization */ -}; - -static uint32_t timer_id = 0; - -static int pico_hotplug_dev_cmp(void *ka, void *kb) -{ - struct pico_hotplug_device *a = ka, *b = kb; - if (a->dev->hash < b->dev->hash) - return -1; - - if (a->dev->hash > b->dev->hash) - return 1; - - return 0; -} - -static int callback_compare(void *ka, void *kb) -{ - if (ka < kb) - return -1; - - if (ka > kb) - return 1; - - return 0; -} - -static PICO_TREE_DECLARE(Hotplug_device_tree, pico_hotplug_dev_cmp); - -static void initial_callbacks(struct pico_hotplug_device *hpdev, int event) -{ - struct pico_tree_node *cb_node = NULL, *cb_safe = NULL; - void (*cb)(struct pico_device *dev, int event); - pico_tree_foreach_safe(cb_node, &(hpdev->init_callbacks), cb_safe) - { - cb = cb_node->keyValue; - cb(hpdev->dev, event); - pico_tree_delete(&hpdev->init_callbacks, cb); - } -} - -static void execute_callbacks(struct pico_hotplug_device *hpdev, int new_state, int event) -{ - struct pico_tree_node *cb_node = NULL, *cb_safe = NULL; - void (*cb)(struct pico_device *dev, int event); - if (new_state != hpdev->prev_state) - { - /* we don't know if one of the callbacks might deregister, so be safe */ - pico_tree_foreach_safe(cb_node, &(hpdev->callbacks), cb_safe) - { - cb = cb_node->keyValue; - cb(hpdev->dev, event); - } - hpdev->prev_state = new_state; - } -} - -static void timer_cb(__attribute__((unused)) pico_time t, __attribute__((unused)) void*v) -{ - struct pico_tree_node *node = NULL, *safe = NULL; - int new_state, event; - struct pico_hotplug_device *hpdev = NULL; - - /* we don't know if one of the callbacks might deregister, so be safe */ - pico_tree_foreach_safe(node, &Hotplug_device_tree, safe) - { - hpdev = node->keyValue; - new_state = hpdev->dev->link_state(hpdev->dev); - - if (new_state == 1) { - event = PICO_HOTPLUG_EVENT_UP; - } else { - event = PICO_HOTPLUG_EVENT_DOWN; - } - - initial_callbacks(hpdev, event); - execute_callbacks(hpdev, new_state, event); - } - - timer_id = pico_timer_add(PICO_HOTPLUG_INTERVAL, &timer_cb, NULL); - if (timer_id == 0) { - dbg("HOTPLUG: Failed to start timer\n"); - } -} - -static int ensure_hotplug_timer(void) -{ - if (timer_id == 0) - { - timer_id = pico_timer_add(PICO_HOTPLUG_INTERVAL, &timer_cb, NULL); - if (timer_id == 0) { - dbg("HOTPLUG: Failed to start timer\n"); - return -1; - } - } - - return 0; -} - -static void disable_hotplug_timer(void) -{ - if (timer_id != 0) - { - pico_timer_cancel(timer_id); - timer_id = 0; - } -} - -int pico_hotplug_register(struct pico_device *dev, void (*cb)(struct pico_device *dev, int event)) -{ - struct pico_hotplug_device *hotplug_dev; - struct pico_hotplug_device search = { - .dev = dev - }; - - /* If it does not have a link_state, */ - /* the device does not support hotplug detection */ - if (dev->link_state == NULL) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - } - - hotplug_dev = (struct pico_hotplug_device*)pico_tree_findKey(&Hotplug_device_tree, &search); - if (!hotplug_dev ) - { - hotplug_dev = PICO_ZALLOC(sizeof(struct pico_hotplug_device)); - if (!hotplug_dev) - { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - hotplug_dev->dev = dev; - hotplug_dev->prev_state = dev->link_state(hotplug_dev->dev); - hotplug_dev->callbacks.root = &LEAF; - hotplug_dev->callbacks.compare = &callback_compare; - hotplug_dev->init_callbacks.root = &LEAF; - hotplug_dev->init_callbacks.compare = &callback_compare; - if (pico_tree_insert(&Hotplug_device_tree, hotplug_dev)) { - PICO_FREE(hotplug_dev); - return -1; - } - } - - if (pico_tree_insert(&(hotplug_dev->callbacks), cb) == &LEAF) { - PICO_FREE(hotplug_dev); - return -1; - } - - if (pico_tree_insert(&(hotplug_dev->init_callbacks), cb) == &LEAF) { - pico_tree_delete(&(hotplug_dev->callbacks), cb); - PICO_FREE(hotplug_dev); - return -1; - } - - if (ensure_hotplug_timer() < 0) { - pico_hotplug_deregister((struct pico_device *)hotplug_dev, cb); - return -1; - } - - return 0; -} - -int pico_hotplug_deregister(struct pico_device *dev, void (*cb)(struct pico_device *dev, int event)) -{ - struct pico_hotplug_device*hotplug_dev; - struct pico_hotplug_device search = { - .dev = dev - }; - - hotplug_dev = (struct pico_hotplug_device*)pico_tree_findKey(&Hotplug_device_tree, &search); - if (!hotplug_dev) - /* wasn't registered */ - return 0; - - pico_tree_delete(&hotplug_dev->callbacks, cb); - pico_tree_delete(&hotplug_dev->init_callbacks, cb); - if (pico_tree_empty(&hotplug_dev->callbacks)) - { - pico_tree_delete(&Hotplug_device_tree, hotplug_dev); - PICO_FREE(hotplug_dev); - } - - if (pico_tree_empty(&Hotplug_device_tree)) - { - disable_hotplug_timer(); - } - - return 0; -} - diff --git a/kernel/picotcp/modules/pico_hotplug_detection.h b/kernel/picotcp/modules/pico_hotplug_detection.h deleted file mode 100644 index 1ab7d85..0000000 --- a/kernel/picotcp/modules/pico_hotplug_detection.h +++ /dev/null @@ -1,23 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Frederik Van Slycken - *********************************************************************/ -#ifndef INCLUDE_PICO_SUPPORT_HOTPLUG -#define INCLUDE_PICO_SUPPORT_HOTPLUG -#include "pico_stack.h" - -#define PICO_HOTPLUG_EVENT_UP 1 /* link went up */ -#define PICO_HOTPLUG_EVENT_DOWN 2 /* link went down */ - -#define PICO_HOTPLUG_INTERVAL 100 - -/* register your callback to be notified of hotplug events on a certain device. - * Note that each callback will be called at least once, shortly after adding, for initialization. - */ -int pico_hotplug_register(struct pico_device *dev, void (*cb)(struct pico_device *dev, int event)); -int pico_hotplug_deregister(struct pico_device *dev, void (*cb)(struct pico_device *dev, int event)); - -#endif /* _INCLUDE_PICO_SUPPORT_HOTPLUG */ - diff --git a/kernel/picotcp/modules/pico_icmp4.c b/kernel/picotcp/modules/pico_icmp4.c deleted file mode 100644 index 3a8ae17..0000000 --- a/kernel/picotcp/modules/pico_icmp4.c +++ /dev/null @@ -1,434 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include "pico_icmp4.h" -#include "pico_config.h" -#include "pico_ipv4.h" -#include "pico_eth.h" -#include "pico_device.h" -#include "pico_stack.h" -#include "pico_tree.h" - -/* Queues */ -static struct pico_queue icmp_in = { - 0 -}; -static struct pico_queue icmp_out = { - 0 -}; - - -/* Functions */ - -static int pico_icmp4_checksum(struct pico_frame *f) -{ - struct pico_icmp4_hdr *hdr = (struct pico_icmp4_hdr *) f->transport_hdr; - if (!hdr) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - hdr->crc = 0; - hdr->crc = short_be(pico_checksum(hdr, f->transport_len)); - return 0; -} - -#ifdef PICO_SUPPORT_PING -static void ping_recv_reply(struct pico_frame *f); -#endif - -static int pico_icmp4_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_icmp4_hdr *hdr = (struct pico_icmp4_hdr *) f->transport_hdr; - static int firstpkt = 1; - static uint16_t last_id = 0; - static uint16_t last_seq = 0; - IGNORE_PARAMETER(self); - - if (hdr->type == PICO_ICMP_ECHO) { - hdr->type = PICO_ICMP_ECHOREPLY; - /* outgoing frames require a f->len without the ethernet header len */ - if (f->dev && f->dev->eth) - f->len -= PICO_SIZE_ETHHDR; - - if (!firstpkt && (hdr->hun.ih_idseq.idseq_id == last_id) && (last_seq == hdr->hun.ih_idseq.idseq_seq)) { - /* The network duplicated the echo. Do not reply. */ - pico_frame_discard(f); - return 0; - } - - firstpkt = 0; - last_id = hdr->hun.ih_idseq.idseq_id; - last_seq = hdr->hun.ih_idseq.idseq_seq; - pico_icmp4_checksum(f); - pico_ipv4_rebound(f); - } else if (hdr->type == PICO_ICMP_UNREACH) { - f->net_hdr = f->transport_hdr + PICO_ICMPHDR_UN_SIZE; - pico_ipv4_unreachable(f, hdr->code); - } else if (hdr->type == PICO_ICMP_ECHOREPLY) { -#ifdef PICO_SUPPORT_PING - ping_recv_reply(f); -#endif - pico_frame_discard(f); - } else { - pico_frame_discard(f); - } - - return 0; -} - -static int pico_icmp4_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - IGNORE_PARAMETER(f); - dbg("Called %s\n", __FUNCTION__); - return 0; -} - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_icmp4 = { - .name = "icmp4", - .proto_number = PICO_PROTO_ICMP4, - .layer = PICO_LAYER_TRANSPORT, - .process_in = pico_icmp4_process_in, - .process_out = pico_icmp4_process_out, - .q_in = &icmp_in, - .q_out = &icmp_out, -}; - -static int pico_icmp4_notify(struct pico_frame *f, uint8_t type, uint8_t code) -{ - struct pico_frame *reply; - struct pico_icmp4_hdr *hdr; - struct pico_ipv4_hdr *info; - uint16_t f_tot_len; - - if (f == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - f_tot_len = short_be(((struct pico_ipv4_hdr *)f->net_hdr)->len); - - if (f_tot_len < (sizeof(struct pico_ipv4_hdr))) - return -1; - - /* Truncate tot len to be at most 8 bytes + iphdr */ - if (f_tot_len > (sizeof(struct pico_ipv4_hdr) + 8u)) { - f_tot_len = (sizeof(struct pico_ipv4_hdr) + 8u); - } - - reply = pico_proto_ipv4.alloc(&pico_proto_ipv4, f->dev, (uint16_t) (f_tot_len + PICO_ICMPHDR_UN_SIZE)); - info = (struct pico_ipv4_hdr*)(f->net_hdr); - hdr = (struct pico_icmp4_hdr *) reply->transport_hdr; - hdr->type = type; - hdr->code = code; - hdr->hun.ih_pmtu.ipm_nmtu = short_be(1500); - hdr->hun.ih_pmtu.ipm_void = 0; - reply->transport_len = (uint16_t)(f_tot_len + PICO_ICMPHDR_UN_SIZE); - reply->payload = reply->transport_hdr + PICO_ICMPHDR_UN_SIZE; - memcpy(reply->payload, f->net_hdr, f_tot_len); - pico_icmp4_checksum(reply); - pico_ipv4_frame_push(reply, &info->src, PICO_PROTO_ICMP4); - return 0; -} - -int pico_icmp4_port_unreachable(struct pico_frame *f) -{ - /*Parameter check executed in pico_icmp4_notify*/ - return pico_icmp4_notify(f, PICO_ICMP_UNREACH, PICO_ICMP_UNREACH_PORT); -} - -int pico_icmp4_proto_unreachable(struct pico_frame *f) -{ - /*Parameter check executed in pico_icmp4_notify*/ - return pico_icmp4_notify(f, PICO_ICMP_UNREACH, PICO_ICMP_UNREACH_PROTOCOL); -} - -int pico_icmp4_dest_unreachable(struct pico_frame *f) -{ - /*Parameter check executed in pico_icmp4_notify*/ - return pico_icmp4_notify(f, PICO_ICMP_UNREACH, PICO_ICMP_UNREACH_HOST); -} - -int pico_icmp4_ttl_expired(struct pico_frame *f) -{ - /*Parameter check executed in pico_icmp4_notify*/ - return pico_icmp4_notify(f, PICO_ICMP_TIME_EXCEEDED, PICO_ICMP_TIMXCEED_INTRANS); -} - -MOCKABLE int pico_icmp4_frag_expired(struct pico_frame *f) -{ - /*Parameter check executed in pico_icmp4_notify*/ - return pico_icmp4_notify(f, PICO_ICMP_TIME_EXCEEDED, PICO_ICMP_TIMXCEED_REASS); -} - -int pico_icmp4_mtu_exceeded(struct pico_frame *f) -{ - /*Parameter check executed in pico_icmp4_notify*/ - return pico_icmp4_notify(f, PICO_ICMP_UNREACH, PICO_ICMP_UNREACH_NEEDFRAG); -} - -int pico_icmp4_packet_filtered(struct pico_frame *f) -{ - /*Parameter check executed in pico_icmp4_notify*/ - /*Packet Filtered: type 3, code 13 (Communication Administratively Prohibited)*/ - return pico_icmp4_notify(f, PICO_ICMP_UNREACH, PICO_ICMP_UNREACH_FILTER_PROHIB); -} - -int pico_icmp4_param_problem(struct pico_frame *f, uint8_t code) -{ - return pico_icmp4_notify(f, PICO_ICMP_PARAMPROB, code); -} - -/***********************/ -/* Ping implementation */ -/***********************/ -/***********************/ -/***********************/ -/***********************/ - - -#ifdef PICO_SUPPORT_PING - - -struct pico_icmp4_ping_cookie -{ - struct pico_ip4 dst; - uint16_t err; - uint16_t id; - uint16_t seq; - uint16_t size; - int count; - pico_time timestamp; - int interval; - int timeout; - void (*cb)(struct pico_icmp4_stats*); -}; - -static int cookie_compare(void *ka, void *kb) -{ - struct pico_icmp4_ping_cookie *a = ka, *b = kb; - if (a->id < b->id) - return -1; - - if (a->id > b->id) - return 1; - - return (a->seq - b->seq); -} - -static PICO_TREE_DECLARE(Pings, cookie_compare); - -static int8_t pico_icmp4_send_echo(struct pico_icmp4_ping_cookie *cookie) -{ - struct pico_frame *echo = NULL; - struct pico_icmp4_hdr *hdr; - struct pico_device *dev = pico_ipv4_source_dev_find(&cookie->dst); - if (!dev) - return -1; - - echo = pico_proto_ipv4.alloc(&pico_proto_ipv4, dev, (uint16_t)(PICO_ICMPHDR_UN_SIZE + cookie->size)); - if (!echo) - return -1; - - hdr = (struct pico_icmp4_hdr *) echo->transport_hdr; - - hdr->type = PICO_ICMP_ECHO; - hdr->code = 0; - hdr->hun.ih_idseq.idseq_id = short_be(cookie->id); - hdr->hun.ih_idseq.idseq_seq = short_be(cookie->seq); - echo->transport_len = (uint16_t)(PICO_ICMPHDR_UN_SIZE + cookie->size); - echo->payload = echo->transport_hdr + PICO_ICMPHDR_UN_SIZE; - echo->payload_len = cookie->size; - /* XXX: Fill payload */ - pico_icmp4_checksum(echo); - pico_ipv4_frame_push(echo, &cookie->dst, PICO_PROTO_ICMP4); - return 0; -} - - -static void ping_timeout(pico_time now, void *arg) -{ - struct pico_icmp4_ping_cookie *cookie = (struct pico_icmp4_ping_cookie *)arg; - IGNORE_PARAMETER(now); - - if(pico_tree_findKey(&Pings, cookie)) { - if (cookie->err == PICO_PING_ERR_PENDING) { - struct pico_icmp4_stats stats; - stats.dst = cookie->dst; - stats.seq = cookie->seq; - stats.time = 0; - stats.size = cookie->size; - stats.err = PICO_PING_ERR_TIMEOUT; - dbg(" ---- Ping timeout!!!\n"); - cookie->cb(&stats); - } - - pico_tree_delete(&Pings, cookie); - PICO_FREE(cookie); - } -} - -static void next_ping(pico_time now, void *arg); -static int send_ping(struct pico_icmp4_ping_cookie *cookie) -{ - uint32_t timeout_timer = 0; - struct pico_icmp4_stats stats; - pico_icmp4_send_echo(cookie); - cookie->timestamp = pico_tick; - timeout_timer = pico_timer_add((uint32_t)cookie->timeout, ping_timeout, cookie); - if (!timeout_timer) { - goto fail; - } - if (cookie->seq < (uint16_t)cookie->count) { - if (!pico_timer_add((uint32_t)cookie->interval, next_ping, cookie)) { - pico_timer_cancel(timeout_timer); - goto fail; - } - } - return 0; - -fail: - dbg("ICMP4: Failed to start timer\n"); - cookie->err = PICO_PING_ERR_ABORTED; - stats.err = cookie->err; - cookie->cb(&stats); - pico_tree_delete(&Pings, cookie); - - return -1; -} - -static void next_ping(pico_time now, void *arg) -{ - struct pico_icmp4_ping_cookie *newcookie, *cookie = (struct pico_icmp4_ping_cookie *)arg; - IGNORE_PARAMETER(now); - - if(pico_tree_findKey(&Pings, cookie)) { - if (cookie->err == PICO_PING_ERR_ABORTED) - return; - - if (cookie->seq < (uint16_t)cookie->count) { - newcookie = PICO_ZALLOC(sizeof(struct pico_icmp4_ping_cookie)); - if (!newcookie) - return; - - memcpy(newcookie, cookie, sizeof(struct pico_icmp4_ping_cookie)); - newcookie->seq++; - - if (pico_tree_insert(&Pings, newcookie)) { - dbg("ICMP4: Failed to insert new cookie in tree \n"); - PICO_FREE(newcookie); - return; - } - - if (send_ping(newcookie)) { - dbg("ICMP4: Failed to send ping\n"); - PICO_FREE(newcookie); - } - } - } -} - - -static void ping_recv_reply(struct pico_frame *f) -{ - struct pico_icmp4_ping_cookie test, *cookie; - struct pico_icmp4_hdr *hdr = (struct pico_icmp4_hdr *) f->transport_hdr; - test.id = short_be(hdr->hun.ih_idseq.idseq_id ); - test.seq = short_be(hdr->hun.ih_idseq.idseq_seq); - - cookie = pico_tree_findKey(&Pings, &test); - if (cookie) { - struct pico_icmp4_stats stats; - if (cookie->err == PICO_PING_ERR_ABORTED) - return; - - cookie->err = PICO_PING_ERR_REPLIED; - stats.dst = ((struct pico_ipv4_hdr *)f->net_hdr)->src; - stats.seq = cookie->seq; - stats.size = cookie->size; - stats.time = pico_tick - cookie->timestamp; - stats.err = cookie->err; - stats.ttl = ((struct pico_ipv4_hdr *)f->net_hdr)->ttl; - if(cookie->cb != NULL) - cookie->cb(&stats); - } else { - dbg("Reply for seq=%d, not found.\n", test.seq); - } -} - -int pico_icmp4_ping(char *dst, int count, int interval, int timeout, int size, void (*cb)(struct pico_icmp4_stats *)) -{ - static uint16_t next_id = 0x91c0; - struct pico_icmp4_ping_cookie *cookie; - - if((dst == NULL) || (interval == 0) || (timeout == 0) || (count == 0)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - cookie = PICO_ZALLOC(sizeof(struct pico_icmp4_ping_cookie)); - if (!cookie) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if (pico_string_to_ipv4(dst, (uint32_t *)&cookie->dst.addr) < 0) { - pico_err = PICO_ERR_EINVAL; - PICO_FREE(cookie); - return -1; - } - - cookie->seq = 1; - cookie->id = next_id++; - cookie->err = PICO_PING_ERR_PENDING; - cookie->size = (uint16_t)size; - cookie->interval = interval; - cookie->timeout = timeout; - cookie->cb = cb; - cookie->count = count; - - if (pico_tree_insert(&Pings, cookie)) { - dbg("ICMP4: Failed to insert cookie in tree \n"); - PICO_FREE(cookie); - return -1; - } - - if (send_ping(cookie)) { - PICO_FREE(cookie); - return -1; - } - - return cookie->id; - -} - -int pico_icmp4_ping_abort(int id) -{ - struct pico_tree_node *node; - int found = 0; - pico_tree_foreach(node, &Pings) - { - struct pico_icmp4_ping_cookie *ck = - (struct pico_icmp4_ping_cookie *) node->keyValue; - if (ck->id == (uint16_t)id) { - ck->err = PICO_PING_ERR_ABORTED; - found++; - } - } - if (found > 0) - return 0; /* OK if at least one pending ping has been canceled */ - - pico_err = PICO_ERR_ENOENT; - return -1; -} - -#endif diff --git a/kernel/picotcp/modules/pico_icmp4.h b/kernel/picotcp/modules/pico_icmp4.h deleted file mode 100644 index 7a3a5e8..0000000 --- a/kernel/picotcp/modules/pico_icmp4.h +++ /dev/null @@ -1,162 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef INCLUDE_PICO_ICMP4 -#define INCLUDE_PICO_ICMP4 -#include "pico_defines.h" -#include "pico_addressing.h" -#include "pico_protocol.h" - - -extern struct pico_protocol pico_proto_icmp4; - -PACKED_STRUCT_DEF pico_icmp4_hdr { - uint8_t type; - uint8_t code; - uint16_t crc; - - /* hun */ - PACKED_UNION_DEF hun_u { - uint8_t ih_pptr; - struct pico_ip4 ih_gwaddr; - PEDANTIC_STRUCT_DEF ih_idseq_s { - uint16_t idseq_id; - uint16_t idseq_seq; - } ih_idseq; - uint32_t ih_void; - PEDANTIC_STRUCT_DEF ih_pmtu_s { - uint16_t ipm_void; - uint16_t ipm_nmtu; - } ih_pmtu; - PEDANTIC_STRUCT_DEF ih_rta_s { - uint8_t rta_numgw; - uint8_t rta_wpa; - uint16_t rta_lifetime; - } ih_rta; - } hun; - - /* dun */ - PACKED_UNION_DEF dun_u { - PEDANTIC_STRUCT_DEF id_ts_s { - uint32_t ts_otime; - uint32_t ts_rtime; - uint32_t ts_ttime; - } id_ts; - PEDANTIC_STRUCT_DEF id_ip_s { - uint32_t ip_options; - uint32_t ip_data_hi; - uint32_t ip_data_lo; - } id_ip; - PEDANTIC_STRUCT_DEF id_ra_s { - uint32_t ira_addr; - uint32_t ira_pref; - } id_ra; - uint32_t id_mask; - uint8_t id_data[1]; - } dun; -}; - -#define PICO_ICMPHDR_DRY_SIZE 4 -#define PICO_ICMPHDR_UN_SIZE 8u - -#define PICO_ICMP_ECHOREPLY 0 -#define PICO_ICMP_DEST_UNREACH 3 -#define PICO_ICMP_SOURCE_QUENCH 4 -#define PICO_ICMP_REDIRECT 5 -#define PICO_ICMP_ECHO 8 -#define PICO_ICMP_TIME_EXCEEDED 11 -#define PICO_ICMP_PARAMETERPROB 12 -#define PICO_ICMP_TIMESTAMP 13 -#define PICO_ICMP_TIMESTAMPREPLY 14 -#define PICO_ICMP_INFO_REQUEST 15 -#define PICO_ICMP_INFO_REPLY 16 -#define PICO_ICMP_ADDRESS 17 -#define PICO_ICMP_ADDRESSREPLY 18 - - -#define PICO_ICMP_UNREACH 3 -#define PICO_ICMP_SOURCEQUENCH 4 -#define PICO_ICMP_ROUTERADVERT 9 -#define PICO_ICMP_ROUTERSOLICIT 10 -#define PICO_ICMP_TIMXCEED 11 -#define PICO_ICMP_PARAMPROB 12 -#define PICO_ICMP_TSTAMP 13 -#define PICO_ICMP_TSTAMPREPLY 14 -#define PICO_ICMP_IREQ 15 -#define PICO_ICMP_IREQREPLY 16 -#define PICO_ICMP_MASKREQ 17 -#define PICO_ICMP_MASKREPLY 18 - -#define PICO_ICMP_MAXTYPE 18 - - -#define PICO_ICMP_UNREACH_NET 0 -#define PICO_ICMP_UNREACH_HOST 1 -#define PICO_ICMP_UNREACH_PROTOCOL 2 -#define PICO_ICMP_UNREACH_PORT 3 -#define PICO_ICMP_UNREACH_NEEDFRAG 4 -#define PICO_ICMP_UNREACH_SRCFAIL 5 -#define PICO_ICMP_UNREACH_NET_UNKNOWN 6 -#define PICO_ICMP_UNREACH_HOST_UNKNOWN 7 -#define PICO_ICMP_UNREACH_ISOLATED 8 -#define PICO_ICMP_UNREACH_NET_PROHIB 9 -#define PICO_ICMP_UNREACH_HOST_PROHIB 10 -#define PICO_ICMP_UNREACH_TOSNET 11 -#define PICO_ICMP_UNREACH_TOSHOST 12 -#define PICO_ICMP_UNREACH_FILTER_PROHIB 13 -#define PICO_ICMP_UNREACH_HOST_PRECEDENCE 14 -#define PICO_ICMP_UNREACH_PRECEDENCE_CUTOFF 15 - - -#define PICO_ICMP_REDIRECT_NET 0 -#define PICO_ICMP_REDIRECT_HOST 1 -#define PICO_ICMP_REDIRECT_TOSNET 2 -#define PICO_ICMP_REDIRECT_TOSHOST 3 - - -#define PICO_ICMP_TIMXCEED_INTRANS 0 -#define PICO_ICMP_TIMXCEED_REASS 1 - - -#define PICO_ICMP_PARAMPROB_OPTABSENT 1 - -#define PICO_SIZE_ICMP4HDR ((sizeof(struct pico_icmp4_hdr))) - -struct pico_icmp4_stats -{ - struct pico_ip4 dst; - unsigned long size; - unsigned long seq; - pico_time time; - unsigned long ttl; - int err; -}; - -int pico_icmp4_port_unreachable(struct pico_frame *f); -int pico_icmp4_proto_unreachable(struct pico_frame *f); -int pico_icmp4_dest_unreachable(struct pico_frame *f); -int pico_icmp4_mtu_exceeded(struct pico_frame *f); -int pico_icmp4_ttl_expired(struct pico_frame *f); -int pico_icmp4_frag_expired(struct pico_frame *f); -int pico_icmp4_ping(char *dst, int count, int interval, int timeout, int size, void (*cb)(struct pico_icmp4_stats *)); -int pico_icmp4_ping_abort(int id); - -#ifdef PICO_SUPPORT_ICMP4 -int pico_icmp4_packet_filtered(struct pico_frame *f); -int pico_icmp4_param_problem(struct pico_frame *f, uint8_t code); -#else -# define pico_icmp4_packet_filtered(f) (-1) -# define pico_icmp4_param_problem(f, c) (-1) -#endif /* PICO_SUPPORT_ICMP4 */ - -#define PICO_PING_ERR_REPLIED 0 -#define PICO_PING_ERR_TIMEOUT 1 -#define PICO_PING_ERR_UNREACH 2 -#define PICO_PING_ERR_ABORTED 3 -#define PICO_PING_ERR_PENDING 0xFFFF - -#endif diff --git a/kernel/picotcp/modules/pico_icmp6.c b/kernel/picotcp/modules/pico_icmp6.c deleted file mode 100644 index fdf6a98..0000000 --- a/kernel/picotcp/modules/pico_icmp6.c +++ /dev/null @@ -1,901 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Kristof Roelants, Daniele Lacamera - *********************************************************************/ - -#include "pico_config.h" -#include "pico_icmp6.h" -#include "pico_ipv6_nd.h" -#include "pico_6lowpan.h" -#include "pico_eth.h" -#include "pico_device.h" -#include "pico_stack.h" -#include "pico_tree.h" -#include "pico_socket.h" -#include "pico_mld.h" - -#ifdef DEBUG_ICMP6 - #define icmp6_dbg dbg -#else - #define icmp6_dbg(...) do { } while(0) -#endif - -static struct pico_queue icmp6_in; -static struct pico_queue icmp6_out; - -/****************************************************************************** - * Function prototypes - ******************************************************************************/ - -#ifdef PICO_SUPPORT_6LOWPAN -static int pico_6lp_nd_neighbor_solicitation(struct pico_device *dev, struct pico_ip6 *tgt, uint8_t type, struct pico_ip6 *dst); -#endif - -uint16_t pico_icmp6_checksum(struct pico_frame *f) -{ - struct pico_ipv6_hdr *ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - - struct pico_icmp6_hdr *icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - struct pico_ipv6_pseudo_hdr pseudo; - - pseudo.src = ipv6_hdr->src; - pseudo.dst = ipv6_hdr->dst; - pseudo.len = long_be(f->transport_len); - pseudo.nxthdr = PICO_PROTO_ICMP6; - - pseudo.zero[0] = 0; - pseudo.zero[1] = 0; - pseudo.zero[2] = 0; - - return pico_dualbuffer_checksum(&pseudo, sizeof(struct pico_ipv6_pseudo_hdr), icmp6_hdr, f->transport_len); -} - -#ifdef PICO_SUPPORT_PING -static void pico_icmp6_ping_recv_reply(struct pico_frame *f); -#endif - -static int pico_icmp6_send_echoreply(struct pico_frame *echo) -{ - struct pico_frame *reply = NULL; - struct pico_icmp6_hdr *ehdr = NULL, *rhdr = NULL; - struct pico_ip6 src; - struct pico_ip6 dst; - - reply = pico_proto_ipv6.alloc(&pico_proto_ipv6, echo->dev, (uint16_t)(echo->transport_len)); - if (!reply) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - echo->payload = echo->transport_hdr + PICO_ICMP6HDR_ECHO_REQUEST_SIZE; - reply->payload = reply->transport_hdr + PICO_ICMP6HDR_ECHO_REQUEST_SIZE; - reply->payload_len = echo->transport_len; - - ehdr = (struct pico_icmp6_hdr *)echo->transport_hdr; - rhdr = (struct pico_icmp6_hdr *)reply->transport_hdr; - rhdr->type = PICO_ICMP6_ECHO_REPLY; - rhdr->code = 0; - rhdr->msg.info.echo_reply.id = ehdr->msg.info.echo_reply.id; - rhdr->msg.info.echo_reply.seq = ehdr->msg.info.echo_request.seq; - memcpy(reply->payload, echo->payload, (uint32_t)(echo->transport_len - PICO_ICMP6HDR_ECHO_REQUEST_SIZE)); - rhdr->crc = 0; - rhdr->crc = short_be(pico_icmp6_checksum(reply)); - /* Get destination and source swapped */ - memcpy(dst.addr, ((struct pico_ipv6_hdr *)echo->net_hdr)->src.addr, PICO_SIZE_IP6); - memcpy(src.addr, ((struct pico_ipv6_hdr *)echo->net_hdr)->dst.addr, PICO_SIZE_IP6); - pico_ipv6_frame_push(reply, &src, &dst, PICO_PROTO_ICMP6, 0); - return 0; -} - -static int pico_icmp6_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_icmp6_hdr *hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - - IGNORE_PARAMETER(self); - - icmp6_dbg("Process IN, type = %d\n", hdr->type); - - switch (hdr->type) - { - case PICO_ICMP6_DEST_UNREACH: - pico_ipv6_unreachable(f, hdr->code); - break; - - case PICO_ICMP6_ECHO_REQUEST: - icmp6_dbg("ICMP6: Received ECHO REQ\n"); - f->transport_len = (uint16_t)(f->len - f->net_len - (uint16_t)(f->net_hdr - f->buffer)); - pico_icmp6_send_echoreply(f); - pico_frame_discard(f); - break; - - case PICO_ICMP6_ECHO_REPLY: -#ifdef PICO_SUPPORT_PING - pico_icmp6_ping_recv_reply(f); -#endif - pico_frame_discard(f); - break; -#if defined(PICO_SUPPORT_MCAST) && defined(PICO_SUPPORT_MLD) - case PICO_MLD_QUERY: - case PICO_MLD_REPORT: - case PICO_MLD_DONE: - case PICO_MLD_REPORTV2: - pico_mld_process_in(f); - break; -#endif - default: - return pico_ipv6_nd_recv(f); /* CAUTION -- Implies: pico_frame_discard in any case, keep in the default! */ - } - return -1; -} - -static int pico_icmp6_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - IGNORE_PARAMETER(f); - return 0; -} - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_icmp6 = { - .name = "icmp6", - .proto_number = PICO_PROTO_ICMP6, - .layer = PICO_LAYER_TRANSPORT, - .process_in = pico_icmp6_process_in, - .process_out = pico_icmp6_process_out, - .q_in = &icmp6_in, - .q_out = &icmp6_out, -}; - -static int pico_icmp6_notify(struct pico_frame *f, uint8_t type, uint8_t code, uint32_t ptr) -{ - struct pico_frame *notice = NULL; - struct pico_ipv6_hdr *ipv6_hdr = NULL; - struct pico_icmp6_hdr *icmp6_hdr = NULL; - uint16_t len = 0; - - if (!f) - return -1; - - ipv6_hdr = (struct pico_ipv6_hdr *)(f->net_hdr); - len = (uint16_t)(short_be(ipv6_hdr->len) + PICO_SIZE_IP6HDR); - switch (type) - { - case PICO_ICMP6_DEST_UNREACH: - /* as much of invoking packet as possible without exceeding the minimum IPv6 MTU */ - if (PICO_SIZE_IP6HDR + PICO_ICMP6HDR_DEST_UNREACH_SIZE + len > PICO_IPV6_MIN_MTU) - len = PICO_IPV6_MIN_MTU - (PICO_SIZE_IP6HDR + PICO_ICMP6HDR_DEST_UNREACH_SIZE); - - notice = pico_proto_ipv6.alloc(&pico_proto_ipv6, f->dev, (uint16_t)(PICO_ICMP6HDR_DEST_UNREACH_SIZE + len)); - if (!notice) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - notice->payload = notice->transport_hdr + PICO_ICMP6HDR_DEST_UNREACH_SIZE; - notice->payload_len = len; - icmp6_hdr = (struct pico_icmp6_hdr *)notice->transport_hdr; - icmp6_hdr->msg.err.dest_unreach.unused = 0; - break; - - case PICO_ICMP6_TIME_EXCEEDED: - /* as much of invoking packet as possible without exceeding the minimum IPv6 MTU */ - if (PICO_SIZE_IP6HDR + PICO_ICMP6HDR_TIME_XCEEDED_SIZE + len > PICO_IPV6_MIN_MTU) - len = PICO_IPV6_MIN_MTU - (PICO_SIZE_IP6HDR + PICO_ICMP6HDR_TIME_XCEEDED_SIZE); - - notice = pico_proto_ipv6.alloc(&pico_proto_ipv6, f->dev, (uint16_t)(PICO_ICMP6HDR_TIME_XCEEDED_SIZE + len)); - if (!notice) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - notice->payload = notice->transport_hdr + PICO_ICMP6HDR_TIME_XCEEDED_SIZE; - notice->payload_len = len; - icmp6_hdr = (struct pico_icmp6_hdr *)notice->transport_hdr; - icmp6_hdr->msg.err.time_exceeded.unused = 0; - break; - - case PICO_ICMP6_PARAM_PROBLEM: - if (PICO_SIZE_IP6HDR + PICO_ICMP6HDR_PARAM_PROBLEM_SIZE + len > PICO_IPV6_MIN_MTU) - len = PICO_IPV6_MIN_MTU - (PICO_SIZE_IP6HDR + PICO_ICMP6HDR_PARAM_PROBLEM_SIZE); - - notice = pico_proto_ipv6.alloc(&pico_proto_ipv6, f->dev, (uint16_t)(PICO_ICMP6HDR_PARAM_PROBLEM_SIZE + len)); - if (!notice) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - notice->payload = notice->transport_hdr + PICO_ICMP6HDR_PARAM_PROBLEM_SIZE; - notice->payload_len = len; - icmp6_hdr = (struct pico_icmp6_hdr *)notice->transport_hdr; - icmp6_hdr->msg.err.param_problem.ptr = long_be(ptr); - break; - - default: - return -1; - } - - icmp6_hdr->type = type; - icmp6_hdr->code = code; - memcpy(notice->payload, f->net_hdr, notice->payload_len); - /* f->src is set in frame_push, checksum calculated there */ - pico_ipv6_frame_push(notice, NULL, &ipv6_hdr->src, PICO_PROTO_ICMP6, 0); - return 0; -} - -int pico_icmp6_port_unreachable(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_multicast(hdr->dst.addr)) - return 0; - - return pico_icmp6_notify(f, PICO_ICMP6_DEST_UNREACH, PICO_ICMP6_UNREACH_PORT, 0); -} - -int pico_icmp6_proto_unreachable(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_multicast(hdr->dst.addr)) - return 0; - - return pico_icmp6_notify(f, PICO_ICMP6_DEST_UNREACH, PICO_ICMP6_UNREACH_ADDR, 0); -} - -int pico_icmp6_dest_unreachable(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_multicast(hdr->dst.addr)) - return 0; - - return pico_icmp6_notify(f, PICO_ICMP6_DEST_UNREACH, PICO_ICMP6_UNREACH_ADDR, 0); -} - -int pico_icmp6_ttl_expired(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_multicast(hdr->dst.addr)) - return 0; - - return pico_icmp6_notify(f, PICO_ICMP6_TIME_EXCEEDED, PICO_ICMP6_TIMXCEED_INTRANS, 0); -} - -int pico_icmp6_pkt_too_big(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_multicast(hdr->dst.addr)) - return 0; - - return pico_icmp6_notify(f, PICO_ICMP6_PKT_TOO_BIG, 0, 0); -} - -#ifdef PICO_SUPPORT_IPFILTER -int pico_icmp6_packet_filtered(struct pico_frame *f) -{ - return pico_icmp6_notify(f, PICO_ICMP6_DEST_UNREACH, PICO_ICMP6_UNREACH_ADMIN, 0); -} -#endif - -int pico_icmp6_parameter_problem(struct pico_frame *f, uint8_t problem, uint32_t ptr) -{ - return pico_icmp6_notify(f, PICO_ICMP6_PARAM_PROBLEM, problem, ptr); -} - -MOCKABLE int pico_icmp6_frag_expired(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_multicast(hdr->dst.addr)) - return 0; - - return pico_icmp6_notify(f, PICO_ICMP6_TIME_EXCEEDED, PICO_ICMP6_TIMXCEED_REASS, 0); -} - -/* Provide a Link-Layer Address Option, either Source (SLLAO) or Destination (DLLAO) */ -static int pico_icmp6_provide_llao(struct pico_icmp6_opt_lladdr *llao, uint8_t type, struct pico_device *dev, struct pico_ip6 *src) -{ -#ifdef PICO_SUPPORT_6LOWPAN - struct pico_6lowpan_info *info = (struct pico_6lowpan_info *)dev->eth; -#endif - IGNORE_PARAMETER(src); - llao->type = type; - - if (!dev->mode && dev->eth) { - memcpy(llao->addr.mac.addr, dev->eth->mac.addr, PICO_SIZE_ETH); - llao->len = 1; - } -#ifdef PICO_SUPPORT_6LOWPAN - else if (PICO_DEV_IS_6LOWPAN(dev) && dev->eth) { - if (src && IID_16(&src->addr[8])) { - memcpy(llao->addr.pan.data, (uint8_t *)&info->addr_short.addr, SIZE_6LOWPAN_SHORT); - memset(llao->addr.pan.data + SIZE_6LOWPAN_SHORT, 0, 4); - llao->len = 1; - } else { - memcpy(llao->addr.pan.data, info->addr_ext.addr, SIZE_6LOWPAN_EXT); - memset(llao->addr.pan.data + SIZE_6LOWPAN_EXT, 0, 6); - llao->len = 2; - } - } -#endif - else { - return -1; - } - - return 0; -} - -/* Prepares a ICMP6 neighbor solicitation message */ -static struct pico_frame *pico_icmp6_neigh_sol_prep(struct pico_device *dev, struct pico_ip6 *dst, uint16_t len) -{ - struct pico_icmp6_hdr *icmp = NULL; - struct pico_frame *sol = NULL; - IGNORE_PARAMETER(dev); - - /* Create pico_frame to contain the Neighbor Solicitation */ - sol = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, len); - if (!sol) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - sol->payload = sol->transport_hdr + len; - sol->payload_len = 0; - icmp = (struct pico_icmp6_hdr *)sol->transport_hdr; - icmp->type = PICO_ICMP6_NEIGH_SOL; - icmp->code = 0; - icmp->msg.info.neigh_sol.unused = 0; - icmp->msg.info.neigh_sol.target = *dst; - return sol; -} - -/* RFC 4861 $7.2.2: sending neighbor solicitations */ -int pico_icmp6_neighbor_solicitation(struct pico_device *dev, struct pico_ip6 *tgt, uint8_t type, struct pico_ip6 *dst) -{ - struct pico_ip6 daddr = {{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x01, 0xff, 0x00, 0x00, 0x00 }}; - struct pico_icmp6_opt_lladdr *llao = NULL; - struct pico_icmp6_hdr *icmp = NULL; - struct pico_frame *sol = NULL; - uint8_t i = 0; - uint16_t len = 0; -#ifndef PICO_SUPPORT_6LOWPAN - IGNORE_PARAMETER(dst); -#endif - - if (pico_ipv6_is_multicast(tgt->addr)) { - return -1; - } -#ifdef PICO_SUPPORT_6LOWPAN - else if (PICO_DEV_IS_6LOWPAN(dev)) { - return pico_6lp_nd_neighbor_solicitation(dev, tgt, type, dst); - } -#endif - else { - /* Determine the size frame needs to be for the Neighbor Solicitation */ - len = PICO_ICMP6HDR_NEIGH_SOL_SIZE; - if (PICO_ICMP6_ND_DAD != type) - len = (uint16_t)(len + 8); - - /* Prepare a neighbor solicitation message */ - sol = pico_icmp6_neigh_sol_prep(dev, tgt, len); - if (sol) { - icmp = (struct pico_icmp6_hdr *)sol->transport_hdr; - - /* Provide SLLAO if it's neighbor solicitation for DAD */ - llao = (struct pico_icmp6_opt_lladdr *)(((uint8_t *)&icmp->msg.info.neigh_sol) + sizeof(struct neigh_sol_s)); - if (PICO_ICMP6_ND_DAD != type && pico_icmp6_provide_llao(llao, PICO_ND_OPT_LLADDR_SRC, dev, NULL)) { - pico_frame_discard(sol); - return -1; - } else { - /* Determine destination address */ - if (type == PICO_ICMP6_ND_SOLICITED || type == PICO_ICMP6_ND_DAD) { - for (i = 1; i <= 3; ++i) - daddr.addr[PICO_SIZE_IP6 - i] = tgt->addr[PICO_SIZE_IP6 - i]; - } else { - daddr = *tgt; - } - - sol->dev = dev; - /* f->src is set in frame_push, checksum calculated there */ - pico_ipv6_frame_push(sol, NULL, &daddr, PICO_PROTO_ICMP6, (type == PICO_ICMP6_ND_DAD)); - return 0; - } - } - } - return -1; -} - -#ifdef PICO_SUPPORT_6LOWPAN -/* Provide an Address Registration Option */ -static void pico_6lp_nd_provide_aro(struct pico_icmp6_opt_aro *aro, struct pico_device *dev, uint8_t type) -{ - struct pico_6lowpan_info *info = (struct pico_6lowpan_info *)dev->eth; - aro->type = PICO_ND_OPT_ARO; - aro->len = 2; - aro->status = 0; - if (PICO_ICMP6_ND_DEREGISTER == type) - aro->lifetime = 0; - else - aro->lifetime = short_be(PICO_6LP_ND_DEFAULT_LIFETIME); - memcpy(aro->eui64.addr, info->addr_ext.addr, SIZE_6LOWPAN_EXT); -} - -/* Send an ICMP6 neighbor solicitation according to RFC6775 */ -static int pico_6lp_nd_neighbor_solicitation(struct pico_device *dev, struct pico_ip6 *tgt, uint8_t type, struct pico_ip6 *dst) -{ - uint32_t llao_len = IID_16(&tgt->addr[8]) ? 8 : 16; - struct pico_icmp6_opt_lladdr *llao = NULL; - struct pico_icmp6_opt_aro *aro = NULL; - struct pico_icmp6_hdr *icmp = NULL; - struct pico_frame *sol = NULL; - uint16_t len = 0; - - /* Determine the size frame needs to be for the Neighbor Solicitation */ - len = (uint16_t)(PICO_ICMP6HDR_NEIGH_SOL_SIZE + llao_len); - if (PICO_ICMP6_ND_DAD == type) - len = (uint16_t)(len + sizeof(struct pico_icmp6_opt_aro)); - - /* Prepare a neighbor solicitation message */ - sol = pico_icmp6_neigh_sol_prep(dev, tgt, len); - if (sol) { - icmp = (struct pico_icmp6_hdr *)sol->transport_hdr; - - /* Provide SLLAO if it's a neighbor solicitation for address registration */ - llao = (struct pico_icmp6_opt_lladdr *)(((uint8_t *)&icmp->msg.info.neigh_sol) + sizeof(struct neigh_sol_s)); - if (pico_icmp6_provide_llao(llao, PICO_ND_OPT_LLADDR_SRC, dev, NULL)) { - pico_frame_discard(sol); - return -1; - } else { - /* Provide ARO when it's a neighbor solicitation for address registration or re-registration */ - aro = (struct pico_icmp6_opt_aro *)(((uint8_t *)&icmp->msg.info.neigh_sol) + sizeof(struct neigh_sol_s) + llao_len); - pico_6lp_nd_provide_aro(aro, dev, type); - - /* RFC6775: The address that is to be registered MUST be the IPv6 source address of the - * NS message. */ - sol->dev = dev; - pico_ipv6_frame_push(sol, tgt, dst, PICO_PROTO_ICMP6, (type == PICO_ICMP6_ND_DAD)); - return 0; - } - } - return -1; -} -#endif - -/* RFC 4861 $7.2.4: sending solicited neighbor advertisements */ -int pico_icmp6_neighbor_advertisement(struct pico_frame *f, struct pico_ip6 *target) -{ - struct pico_frame *adv = NULL; - struct pico_ipv6_hdr *ipv6_hdr = NULL; - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_icmp6_opt_lladdr *opt = NULL; - struct pico_ip6 dst = {{0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}}; - - ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - adv = pico_proto_ipv6.alloc(&pico_proto_ipv6, f->dev, PICO_ICMP6HDR_NEIGH_ADV_SIZE + 8); - if (!adv) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - adv->payload = adv->transport_hdr + PICO_ICMP6HDR_NEIGH_ADV_SIZE + 8; - adv->payload_len = 0; - - icmp6_hdr = (struct pico_icmp6_hdr *)adv->transport_hdr; - icmp6_hdr->type = PICO_ICMP6_NEIGH_ADV; - icmp6_hdr->code = 0; - icmp6_hdr->msg.info.neigh_adv.target = *target; - icmp6_hdr->msg.info.neigh_adv.rsor = long_be(0x60000000); /* !router && solicited && override */ - if (pico_ipv6_is_unspecified(ipv6_hdr->src.addr)) { - /* solicited = clear && dst = all-nodes address (scope link-local) */ - icmp6_hdr->msg.info.neigh_adv.rsor ^= long_be(0x40000000); - } else { - /* solicited = set && dst = source of solicitation */ - dst = ipv6_hdr->src; - } - - /* XXX if the target address is either an anycast address or a unicast - * address for which the node is providing proxy service, or the target - * link-layer Address option is not included, the Override flag SHOULD - * be set to zero. - */ - - /* XXX if the target address is an anycast address, the sender SHOULD delay - * sending a response for a random time between 0 and MAX_ANYCAST_DELAY_TIME seconds. - */ - - opt = (struct pico_icmp6_opt_lladdr *)(((uint8_t *)&icmp6_hdr->msg.info.neigh_adv) + sizeof(struct neigh_adv_s)); - opt->type = PICO_ND_OPT_LLADDR_TGT; - opt->len = 1; - memcpy(opt->addr.mac.addr, f->dev->eth->mac.addr, PICO_SIZE_ETH); - - /* f->src is set in frame_push, checksum calculated there */ - pico_ipv6_frame_push(adv, NULL, &dst, PICO_PROTO_ICMP6, 0); - return 0; -} - -/* RFC 4861 $6.3.7: sending router solicitations */ -int pico_icmp6_router_solicitation(struct pico_device *dev, struct pico_ip6 *src, struct pico_ip6 *dst) -{ - struct pico_ip6 daddr = {{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}; - struct pico_icmp6_opt_lladdr *lladdr = NULL; - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_frame *sol = NULL; - uint16_t len = 0; - - len = PICO_ICMP6HDR_ROUTER_SOL_SIZE; - if (!pico_ipv6_is_unspecified(src->addr)) { - len = (uint16_t)(len + 8); -#ifdef PICO_SUPPORT_6LOWPAN - if (PICO_DEV_IS_6LOWPAN(dev)) - len = (uint16_t)(len + 8); - } else if (PICO_DEV_IS_6LOWPAN(dev) && pico_ipv6_is_unspecified(src->addr)) { - return -1; /* RFC6775 (6LoWPAN): An unspecified source address MUST NOT be used in RS messages. */ -#endif - } - - sol = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, len); - if (!sol) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - sol->payload = sol->transport_hdr + len; - sol->payload_len = 0; - - icmp6_hdr = (struct pico_icmp6_hdr *)sol->transport_hdr; - icmp6_hdr->type = PICO_ICMP6_ROUTER_SOL; - icmp6_hdr->code = 0; - - if (!pico_ipv6_is_unspecified(src->addr)) { - lladdr = (struct pico_icmp6_opt_lladdr *)((uint8_t *)&icmp6_hdr->msg.info.router_sol + sizeof(struct router_sol_s)); - if (pico_icmp6_provide_llao(lladdr, PICO_ND_OPT_LLADDR_SRC, dev, NULL)) { - pico_frame_discard(sol); - return -1; - } - } - - sol->dev = dev; - - if (!dev->mode) { - /* f->src is set in frame_push, checksum calculated there */ - pico_ipv6_frame_push(sol, NULL, &daddr, PICO_PROTO_ICMP6, 0); - } -#ifdef PICO_SUPPORT_6LOWPAN - else { - if (dst) - daddr = *dst; - /* Force this frame to be send with the EUI-64-address */ - pico_ipv6_frame_push(sol, src, &daddr, PICO_PROTO_ICMP6, 0); - } -#else - IGNORE_PARAMETER(dst); -#endif - return 0; -} - -#define PICO_RADV_VAL_LIFETIME (long_be(86400)) -#define PICO_RADV_PREF_LIFETIME (long_be(14400)) - -static struct pico_ip6 pico_icmp6_address_to_prefix(struct pico_ip6 addr, struct pico_ip6 nm) -{ - struct pico_ip6 prefix; - uint8_t i = 0; - - for (i = 0; i < PICO_SIZE_IP6; i++) { - prefix.addr[i] = (uint8_t)(addr.addr[i] & nm.addr[i]); - } - - return prefix; -} - -/* RFC 4861: sending router advertisements */ -int pico_icmp6_router_advertisement(struct pico_device *dev, struct pico_ip6 *dst) -{ - struct pico_frame *adv = NULL; - struct pico_ip6 prefix_addr = {{ 0x00 }}; - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_icmp6_opt_lladdr *lladdr; - struct pico_icmp6_opt_prefix *prefix; - struct pico_ipv6_link *global = NULL; - uint16_t len = 0; - uint8_t *nxt_opt; - - len = PICO_ICMP6HDR_ROUTER_ADV_SIZE + PICO_ICMP6_OPT_LLADDR_SIZE + sizeof(struct pico_icmp6_opt_prefix); - - adv = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, len); - if (!adv) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - adv->payload = adv->transport_hdr + len; - adv->payload_len = 0; - - icmp6_hdr = (struct pico_icmp6_hdr *)adv->transport_hdr; - icmp6_hdr->type = PICO_ICMP6_ROUTER_ADV; - icmp6_hdr->code = 0; - icmp6_hdr->msg.info.router_adv.life_time = short_be(45); - icmp6_hdr->msg.info.router_adv.hop = 64; - nxt_opt = (uint8_t *)&icmp6_hdr->msg.info.router_adv + sizeof(struct router_adv_s); - - prefix = (struct pico_icmp6_opt_prefix *)nxt_opt; - prefix->type = PICO_ND_OPT_PREFIX; - prefix->len = sizeof(struct pico_icmp6_opt_prefix) >> 3; - prefix->prefix_len = 64; /* Only /64 are forwarded */ - prefix->aac = 1; - prefix->onlink = 1; - prefix->val_lifetime = PICO_RADV_VAL_LIFETIME; - prefix->pref_lifetime = PICO_RADV_PREF_LIFETIME; - /* Find the globally routable prefix of the router-interface */ - if ((global = pico_ipv6_global_get(dev))) { - prefix_addr = pico_icmp6_address_to_prefix(global->address, global->netmask); - memcpy(&prefix->prefix, &prefix_addr, sizeof(struct pico_ip6)); - } - - nxt_opt += (sizeof (struct pico_icmp6_opt_prefix)); - lladdr = (struct pico_icmp6_opt_lladdr *)nxt_opt; - - lladdr->type = PICO_ND_OPT_LLADDR_SRC; - - if (!dev->mode && dev->eth) { - lladdr->len = 1; - memcpy(lladdr->addr.mac.addr, dev->eth->mac.addr, PICO_SIZE_ETH); - } else { - - return -1; - } - - icmp6_hdr->crc = 0; - icmp6_hdr->crc = short_be(pico_icmp6_checksum(adv)); - /* f->src is set in frame_push, checksum calculated there */ - pico_ipv6_frame_push(adv, NULL, dst, PICO_PROTO_ICMP6, 0); - return 0; -} - -/***********************/ -/* Ping implementation */ -/***********************/ - -#ifdef PICO_SUPPORT_PING -struct pico_icmp6_ping_cookie -{ - uint16_t id; - uint16_t seq; - uint16_t size; - uint16_t err; - int count; - int interval; - int timeout; - pico_time timestamp; - struct pico_ip6 dst; - struct pico_device *dev; - void (*cb)(struct pico_icmp6_stats*); -}; - -static int icmp6_cookie_compare(void *ka, void *kb) -{ - struct pico_icmp6_ping_cookie *a = ka, *b = kb; - if (a->id < b->id) - return -1; - - if (a->id > b->id) - return 1; - - return (a->seq - b->seq); -} -static PICO_TREE_DECLARE(IPV6Pings, icmp6_cookie_compare); - -static int pico_icmp6_send_echo(struct pico_icmp6_ping_cookie *cookie) -{ - struct pico_frame *echo = NULL; - struct pico_icmp6_hdr *hdr = NULL; - - echo = pico_proto_ipv6.alloc(&pico_proto_ipv6, cookie->dev, (uint16_t)(PICO_ICMP6HDR_ECHO_REQUEST_SIZE + cookie->size)); - if (!echo) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - echo->payload = echo->transport_hdr + PICO_ICMP6HDR_ECHO_REQUEST_SIZE; - echo->payload_len = cookie->size; - - hdr = (struct pico_icmp6_hdr *)echo->transport_hdr; - hdr->type = PICO_ICMP6_ECHO_REQUEST; - hdr->code = 0; - hdr->msg.info.echo_request.id = short_be(cookie->id); - hdr->msg.info.echo_request.seq = short_be(cookie->seq); - /* XXX: Fill payload */ - hdr->crc = 0; - hdr->crc = short_be(pico_icmp6_checksum(echo)); - pico_ipv6_frame_push(echo, NULL, &cookie->dst, PICO_PROTO_ICMP6, 0); - return 0; -} - - -static void pico_icmp6_ping_timeout(pico_time now, void *arg) -{ - struct pico_icmp6_ping_cookie *cookie = NULL; - - IGNORE_PARAMETER(now); - - cookie = (struct pico_icmp6_ping_cookie *)arg; - if (pico_tree_findKey(&IPV6Pings, cookie)) { - if (cookie->err == PICO_PING6_ERR_PENDING) { - struct pico_icmp6_stats stats = { - 0 - }; - stats.dst = cookie->dst; - stats.seq = cookie->seq; - stats.time = 0; - stats.size = cookie->size; - stats.err = PICO_PING6_ERR_TIMEOUT; - dbg(" ---- Ping6 timeout!!!\n"); - if (cookie->cb) - cookie->cb(&stats); - } - - pico_tree_delete(&IPV6Pings, cookie); - PICO_FREE(cookie); - } -} - -static void pico_icmp6_next_ping(pico_time now, void *arg); -static int pico_icmp6_send_ping(struct pico_icmp6_ping_cookie *cookie) -{ - uint32_t interval_timer = 0; - struct pico_icmp6_stats stats; - pico_icmp6_send_echo(cookie); - cookie->timestamp = pico_tick; - interval_timer = pico_timer_add((pico_time)(cookie->interval), pico_icmp6_next_ping, cookie); - if (!interval_timer) { - goto fail; - } - if (!pico_timer_add((pico_time)(cookie->timeout), pico_icmp6_ping_timeout, cookie)) { - pico_timer_cancel(interval_timer); - goto fail; - } - return 0; - -fail: - dbg("ICMP6: Failed to start timer\n"); - cookie->err = PICO_PING6_ERR_ABORTED; - stats.err = cookie->err; - cookie->cb(&stats); - pico_tree_delete(&IPV6Pings, cookie); - - return -1; -} - -static void pico_icmp6_next_ping(pico_time now, void *arg) -{ - struct pico_icmp6_ping_cookie *cookie = NULL, *new = NULL; - - IGNORE_PARAMETER(now); - - cookie = (struct pico_icmp6_ping_cookie *)arg; - if (pico_tree_findKey(&IPV6Pings, cookie)) { - if (cookie->err == PICO_PING6_ERR_ABORTED) - return; - - if (cookie->seq < (uint16_t)cookie->count) { - new = PICO_ZALLOC(sizeof(struct pico_icmp6_ping_cookie)); - if (!new) { - pico_err = PICO_ERR_ENOMEM; - return; - } - - memcpy(new, cookie, sizeof(struct pico_icmp6_ping_cookie)); - new->seq++; - - if (pico_tree_insert(&IPV6Pings, new)) { - dbg("ICMP6: Failed to insert new cookie in tree\n"); - PICO_FREE(new); - return; - } - - if (pico_icmp6_send_ping(new)) { - dbg("ICMP6: Failed to send ping\n"); - PICO_FREE(new); - } - } - } -} - -static void pico_icmp6_ping_recv_reply(struct pico_frame *f) -{ - struct pico_icmp6_ping_cookie *cookie = NULL, test = { - 0 - }; - struct pico_icmp6_hdr *hdr = NULL; - - hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - test.id = short_be(hdr->msg.info.echo_reply.id); - test.seq = short_be(hdr->msg.info.echo_reply.seq); - cookie = pico_tree_findKey(&IPV6Pings, &test); - if (cookie) { - struct pico_icmp6_stats stats = { - 0 - }; - if (cookie->err == PICO_PING6_ERR_ABORTED) - return; - - cookie->err = PICO_PING6_ERR_REPLIED; - stats.dst = cookie->dst; - stats.seq = cookie->seq; - stats.size = cookie->size; - stats.time = pico_tick - cookie->timestamp; - stats.err = cookie->err; - stats.ttl = ((struct pico_ipv6_hdr *)f->net_hdr)->hop; - if(cookie->cb) - cookie->cb(&stats); - } else { - dbg("Reply for seq=%d, not found.\n", test.seq); - } -} - -int pico_icmp6_ping(char *dst, int count, int interval, int timeout, int size, void (*cb)(struct pico_icmp6_stats *), struct pico_device *dev) -{ - static uint16_t next_id = 0x91c0; - struct pico_icmp6_ping_cookie *cookie = NULL; - - if(!dst || !count || !interval || !timeout) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - cookie = PICO_ZALLOC(sizeof(struct pico_icmp6_ping_cookie)); - if (!cookie) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if (pico_string_to_ipv6(dst, cookie->dst.addr) < 0) { - pico_err = PICO_ERR_EINVAL; - PICO_FREE(cookie); - return -1; - } - - cookie->seq = 1; - cookie->id = next_id++; - cookie->err = PICO_PING6_ERR_PENDING; - cookie->size = (uint16_t)size; - cookie->interval = interval; - cookie->timeout = timeout; - cookie->cb = cb; - cookie->count = count; - cookie->dev = dev; - - if (pico_tree_insert(&IPV6Pings, cookie)) { - dbg("ICMP6: Failed to insert cookie in tree\n"); - PICO_FREE(cookie); - return -1; - } - - if (pico_icmp6_send_ping(cookie)) { - PICO_FREE(cookie); - return -1; - } - return (int)cookie->id; -} - -int pico_icmp6_ping_abort(int id) -{ - struct pico_tree_node *node; - int found = 0; - pico_tree_foreach(node, &IPV6Pings) - { - struct pico_icmp6_ping_cookie *ck = - (struct pico_icmp6_ping_cookie *) node->keyValue; - if (ck->id == (uint16_t)id) { - ck->err = PICO_PING6_ERR_ABORTED; - found++; - } - } - if (found > 0) - return 0; /* OK if at least one pending ping has been canceled */ - - pico_err = PICO_ERR_ENOENT; - return -1; -} - -#endif diff --git a/kernel/picotcp/modules/pico_icmp6.h b/kernel/picotcp/modules/pico_icmp6.h deleted file mode 100644 index b449075..0000000 --- a/kernel/picotcp/modules/pico_icmp6.h +++ /dev/null @@ -1,326 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef _INCLUDE_PICO_ICMP6 -#define _INCLUDE_PICO_ICMP6 -#include "pico_addressing.h" -#include "pico_protocol.h" -#include "pico_mld.h" -/* ICMP header sizes */ -#define PICO_ICMP6HDR_DRY_SIZE 4 -#define PICO_ICMP6HDR_ECHO_REQUEST_SIZE 8 -#define PICO_ICMP6HDR_DEST_UNREACH_SIZE 8 -#define PICO_ICMP6HDR_TIME_XCEEDED_SIZE 8 -#define PICO_ICMP6HDR_PARAM_PROBLEM_SIZE 8 -#define PICO_ICMP6HDR_NEIGH_SOL_SIZE 24 -#define PICO_ICMP6HDR_NEIGH_ADV_SIZE 24 -#define PICO_ICMP6HDR_ROUTER_SOL_SIZE 8 -#define PICO_ICMP6HDR_ROUTER_SOL_SIZE_6LP 16 -#define PICO_ICMP6HDR_ROUTER_ADV_SIZE 16 -#define PICO_ICMP6HDR_REDIRECT_SIZE 40 - -/* ICMP types */ -#define PICO_ICMP6_DEST_UNREACH 1 -#define PICO_ICMP6_PKT_TOO_BIG 2 -#define PICO_ICMP6_TIME_EXCEEDED 3 -#define PICO_ICMP6_PARAM_PROBLEM 4 -#define PICO_ICMP6_ECHO_REQUEST 128 -#define PICO_ICMP6_ECHO_REPLY 129 -#define PICO_ICMP6_ROUTER_SOL 133 -#define PICO_ICMP6_ROUTER_ADV 134 -#define PICO_ICMP6_NEIGH_SOL 135 -#define PICO_ICMP6_NEIGH_ADV 136 -#define PICO_ICMP6_REDIRECT 137 - -/* destination unreachable codes */ -#define PICO_ICMP6_UNREACH_NOROUTE 0 -#define PICO_ICMP6_UNREACH_ADMIN 1 -#define PICO_ICMP6_UNREACH_SRCSCOPE 2 -#define PICO_ICMP6_UNREACH_ADDR 3 -#define PICO_ICMP6_UNREACH_PORT 4 -#define PICO_ICMP6_UNREACH_SRCFILTER 5 -#define PICO_ICMP6_UNREACH_REJROUTE 6 - -/* time exceeded codes */ -#define PICO_ICMP6_TIMXCEED_INTRANS 0 -#define PICO_ICMP6_TIMXCEED_REASS 1 - -/* parameter problem codes */ -#define PICO_ICMP6_PARAMPROB_HDRFIELD 0 -#define PICO_ICMP6_PARAMPROB_NXTHDR 1 -#define PICO_ICMP6_PARAMPROB_IPV6OPT 2 - -/* ping error codes */ -#define PICO_PING6_ERR_REPLIED 0 -#define PICO_PING6_ERR_TIMEOUT 1 -#define PICO_PING6_ERR_UNREACH 2 -#define PICO_PING6_ERR_ABORTED 3 -#define PICO_PING6_ERR_PENDING 0xFFFF - -/* ND configuration */ -#define PICO_ND_MAX_FRAMES_QUEUED 4 /* max frames queued while awaiting address resolution */ - -/* ND RFC constants */ -#define PICO_ND_MAX_SOLICIT 3 -#define PICO_ND_MAX_NEIGHBOR_ADVERT 3 -#define PICO_ND_DELAY_INCOMPLETE 1000 /* msec */ -#define PICO_ND_DELAY_FIRST_PROBE_TIME 5000 /* msec */ - -/* neighbor discovery options */ -#define PICO_ND_OPT_LLADDR_SRC 1 -#define PICO_ND_OPT_LLADDR_TGT 2 -#define PICO_ND_OPT_PREFIX 3 -#define PICO_ND_OPT_REDIRECT 4 -#define PICO_ND_OPT_MTU 5 -#define PICO_ND_OPT_RDNSS 25 /* RFC 5006 */ -#define PICO_ND_OPT_ARO 33 /* RFC 6775 */ -#define PICO_ND_OPT_6CO 34 /* RFC 6775 */ -#define PICO_ND_OPT_ABRO 35 /* RFC 6775 */ - -/* ND advertisement flags */ -#define PICO_ND_ROUTER 0x80000000 -#define PICO_ND_SOLICITED 0x40000000 -#define PICO_ND_OVERRIDE 0x20000000 -#define IS_ROUTER(x) (long_be(x->msg.info.neigh_adv.rsor) & (PICO_ND_ROUTER)) /* router flag set? */ -#define IS_SOLICITED(x) (long_be(x->msg.info.neigh_adv.rsor) & (PICO_ND_SOLICITED)) /* solicited flag set? */ -#define IS_OVERRIDE(x) (long_be(x->msg.info.neigh_adv.rsor) & (PICO_ND_OVERRIDE)) /* override flag set? */ - -#define PICO_ND_PREFIX_LIFETIME_INF 0xFFFFFFFFu -/* #define PICO_ND_DESTINATION_LRU_TIME 600000u / * msecs (10min) * / */ - -/* custom defines */ -#define PICO_ICMP6_ND_UNICAST 0 -#define PICO_ICMP6_ND_ANYCAST 1 -#define PICO_ICMP6_ND_SOLICITED 2 -#define PICO_ICMP6_ND_DAD 3 -#define PICO_ICMP6_ND_DEREGISTER 4 - -#define PICO_ICMP6_MAX_RTR_SOL_DELAY 1000 - -#define PICO_ICMP6_OPT_LLADDR_SIZE (8) - -/****************************************************************************** - * 6LoWPAN Constants - ******************************************************************************/ - -/* Address registration lifetime */ -#define PICO_6LP_ND_DEFAULT_LIFETIME (120) /* TWO HOURS */ - -extern struct pico_protocol pico_proto_icmp6; - -PACKED_STRUCT_DEF pico_icmp6_hdr { - uint8_t type; - uint8_t code; - uint16_t crc; - - PACKED_UNION_DEF icmp6_msg_u { - /* error messages */ - PACKED_UNION_DEF icmp6_err_u { - PEDANTIC_STRUCT_DEF dest_unreach_s { - uint32_t unused; - } dest_unreach; - PEDANTIC_STRUCT_DEF pkt_too_big_s { - uint32_t mtu; - } pkt_too_big; - PEDANTIC_STRUCT_DEF time_exceeded_s { - uint32_t unused; - } time_exceeded; - PEDANTIC_STRUCT_DEF param_problem_s { - uint32_t ptr; - } param_problem; - } err; - - /* informational messages */ - PACKED_UNION_DEF icmp6_info_u { - PEDANTIC_STRUCT_DEF echo_request_s { - uint16_t id; - uint16_t seq; - } echo_request; - PEDANTIC_STRUCT_DEF echo_reply_s { - uint16_t id; - uint16_t seq; - } echo_reply; - PEDANTIC_STRUCT_DEF router_sol_s { - uint32_t unused; - } router_sol; - PEDANTIC_STRUCT_DEF router_adv_s { - uint8_t hop; - uint8_t mor; - uint16_t life_time; - uint32_t reachable_time; - uint32_t retrans_time; - } router_adv; - PEDANTIC_STRUCT_DEF neigh_sol_s { - uint32_t unused; - struct pico_ip6 target; - } neigh_sol; - PEDANTIC_STRUCT_DEF neigh_adv_s { - uint32_t rsor; - struct pico_ip6 target; - } neigh_adv; - PEDANTIC_STRUCT_DEF redirect_s { - uint32_t reserved; - struct pico_ip6 target; - struct pico_ip6 dest; - } redirect; - PEDANTIC_STRUCT_DEF mld_s { - uint16_t max_resp_time; - uint16_t reserved; - struct pico_ip6 mmcast_group; - /*MLDv2*/ - uint8_t reserverd; /* With S and QRV */ - uint8_t QQIC; - uint16_t nbr_src; - struct pico_ip6 src[1]; - } mld; - /* 6LoWPAN Duplicate Address Message */ - PEDANTIC_STRUCT_DEF da_s { - uint8_t status; - uint8_t reserved; - uint16_t lifetime; - struct pico_6lowpan_ext eui64; - struct pico_ip6 addr; - } da; - } info; - } msg; -}; - -PACKED_UNION_DEF pico_hw_addr { - struct pico_eth mac; -#ifdef PICO_SUPPORT_6LOWPAN - union pico_6lowpan_u pan; -#endif /* PICO_SUPPORT_6LOWPAN */ - uint8_t data[8]; -}; - -/****************************************************************************** - * ICMP6 Neighbor Discovery Options - ******************************************************************************/ - -PACKED_STRUCT_DEF pico_icmp6_opt_lladdr -{ - uint8_t type; - uint8_t len; - union pico_hw_addr addr; -}; - -PACKED_STRUCT_DEF pico_icmp6_opt_prefix -{ - uint8_t type; - uint8_t len; - uint8_t prefix_len; - uint8_t res : 6; - uint8_t aac : 1; - uint8_t onlink : 1; - uint32_t val_lifetime; - uint32_t pref_lifetime; - uint32_t reserved; - struct pico_ip6 prefix; -}; - -PACKED_STRUCT_DEF pico_icmp6_opt_mtu -{ - uint8_t type; - uint8_t len; - uint16_t res; - uint32_t mtu; -}; - -PACKED_STRUCT_DEF pico_icmp6_opt_redirect -{ - uint8_t type; - uint8_t len; - uint16_t res0; - uint32_t res1; -}; - -PACKED_STRUCT_DEF pico_icmp6_opt_rdnss -{ - uint8_t type; - uint8_t len; - uint16_t res0; - uint32_t lifetime; - struct pico_ip6 *addr; -}; - -PACKED_STRUCT_DEF pico_icmp6_opt_na -{ - uint8_t type; - uint8_t len; -}; - -/* 6LoWPAN Address Registration Option (ARO) */ -PACKED_STRUCT_DEF pico_icmp6_opt_aro -{ - uint8_t type; - uint8_t len; - uint8_t status; - uint8_t res0; - uint16_t res1; - uint16_t lifetime; - struct pico_6lowpan_ext eui64; -}; - -#define ICMP6_ARO_SUCCES (0u) -#define ICMP6_ARO_DUP (1u) -#define ICMP6_ARO_FULL (2u) - -/* 6LoWPAN Context Option (6CO) */ -PACKED_STRUCT_DEF pico_icmp6_opt_6co -{ - uint8_t type; - uint8_t len; - uint8_t clen; - uint8_t id: 4; - uint8_t res: 3; - uint8_t c: 1; - uint16_t lifetime; - uint8_t prefix; -}; - -/* 6LoWPAN Authoritative Border Router Option (ABRO) */ -PACKED_STRUCT_DEF pico_icmp6_opt_abro -{ - uint8_t type; - uint8_t len; - uint16_t version_low; - uint16_t version_high; - uint16_t lifetime; - struct pico_ip6 addr; -}; - -struct pico_icmp6_stats -{ - unsigned long size; - unsigned long seq; - pico_time time; - unsigned long ttl; - int err; - struct pico_ip6 dst; -}; - -int pico_icmp6_ping(char *dst, int count, int interval, int timeout, int size, void (*cb)(struct pico_icmp6_stats *), struct pico_device *dev); -int pico_icmp6_ping_abort(int id); - - -int pico_icmp6_neighbor_solicitation(struct pico_device *dev, struct pico_ip6 *tgt, uint8_t type, struct pico_ip6 *dst); -int pico_icmp6_neighbor_advertisement(struct pico_frame *f, struct pico_ip6 *target); -int pico_icmp6_router_solicitation(struct pico_device *dev, struct pico_ip6 *src, struct pico_ip6 *dst); - -int pico_icmp6_port_unreachable(struct pico_frame *f); -int pico_icmp6_proto_unreachable(struct pico_frame *f); -int pico_icmp6_dest_unreachable(struct pico_frame *f); -int pico_icmp6_ttl_expired(struct pico_frame *f); -int pico_icmp6_packet_filtered(struct pico_frame *f); -int pico_icmp6_parameter_problem(struct pico_frame *f, uint8_t problem, uint32_t ptr); -int pico_icmp6_pkt_too_big(struct pico_frame *f); -int pico_icmp6_frag_expired(struct pico_frame *f); - -uint16_t pico_icmp6_checksum(struct pico_frame *f); -int pico_icmp6_router_advertisement(struct pico_device *dev, struct pico_ip6 *dst); - -#endif diff --git a/kernel/picotcp/modules/pico_igmp.c b/kernel/picotcp/modules/pico_igmp.c deleted file mode 100644 index 49f8358..0000000 --- a/kernel/picotcp/modules/pico_igmp.c +++ /dev/null @@ -1,1170 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - RFC 1112, 2236, 3376, 3569, 3678, 4607 - - Authors: Kristof Roelants (IGMPv3), Simon Maes, Brecht Van Cauwenberghe - *********************************************************************/ - -#include "pico_stack.h" -#include "pico_ipv4.h" -#include "pico_igmp.h" -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_addressing.h" -#include "pico_frame.h" -#include "pico_tree.h" -#include "pico_device.h" -#include "pico_socket.h" -#include "pico_mcast.h" - -#if defined(PICO_SUPPORT_IGMP) && defined(PICO_SUPPORT_MCAST) - -#ifdef DEBUG_IGMP - #define igmp_dbg dbg -#else - #define igmp_dbg(...) do {} while(0) -#endif - -/* membership states */ -#define IGMP_STATE_NON_MEMBER (0x0) -#define IGMP_STATE_DELAYING_MEMBER (0x1) -#define IGMP_STATE_IDLE_MEMBER (0x2) - -/* events */ -#define IGMP_EVENT_DELETE_GROUP (0x0) -#define IGMP_EVENT_CREATE_GROUP (0x1) -#define IGMP_EVENT_UPDATE_GROUP (0x2) -#define IGMP_EVENT_QUERY_RECV (0x3) -#define IGMP_EVENT_REPORT_RECV (0x4) -#define IGMP_EVENT_TIMER_EXPIRED (0x5) - -/* message types */ -#define IGMP_TYPE_MEM_QUERY (0x11) -#define IGMP_TYPE_MEM_REPORT_V1 (0x12) -#define IGMP_TYPE_MEM_REPORT_V2 (0x16) -#define IGMP_TYPE_LEAVE_GROUP (0x17) -#define IGMP_TYPE_MEM_REPORT_V3 (0x22) - -/* group record types */ -#define IGMP_MODE_IS_INCLUDE (1) -#define IGMP_MODE_IS_EXCLUDE (2) -#define IGMP_CHANGE_TO_INCLUDE_MODE (3) -#define IGMP_CHANGE_TO_EXCLUDE_MODE (4) -#define IGMP_ALLOW_NEW_SOURCES (5) -#define IGMP_BLOCK_OLD_SOURCES (6) - -/* host flag */ -#define IGMP_HOST_LAST (0x1) -#define IGMP_HOST_NOT_LAST (0x0) - -/* list of timers, counters and their default values */ -#define IGMP_ROBUSTNESS (2u) -#define IGMP_QUERY_INTERVAL (125) /* secs */ -#define IGMP_QUERY_RESPONSE_INTERVAL (10u) /* secs */ -#define IGMP_STARTUP_QUERY_INTERVAL (IGMPV3_QUERY_INTERVAL / 4) -#define IGMP_STARTUP_QUERY_COUNT (IGMPV3_ROBUSTNESS) -#define IGMP_LAST_MEMBER_QUERY_INTERVAL (1) /* secs */ -#define IGMP_LAST_MEMBER_QUERY_COUNT (IGMPV3_ROBUSTNESS) -#define IGMP_UNSOLICITED_REPORT_INTERVAL (1) /* secs */ -#define IGMP_DEFAULT_MAX_RESPONSE_TIME (100) - -/* custom timers types */ -#define IGMP_TIMER_GROUP_REPORT (1) -#define IGMP_TIMER_V1_QUERIER (2) -#define IGMP_TIMER_V2_QUERIER (3) - -/* IGMP groups */ -#define IGMP_ALL_HOST_GROUP long_be(0xE0000001) /* 224.0.0.1 */ -#define IGMP_ALL_ROUTER_GROUP long_be(0xE0000002) /* 224.0.0.2 */ -#define IGMPV3_ALL_ROUTER_GROUP long_be(0xE0000016) /* 224.0.0.22 */ - -/* misc */ -#define IGMP_TIMER_STOPPED (1) -#define IP_OPTION_ROUTER_ALERT_LEN (4u) -#define IGMP_MAX_GROUPS (32) /* max 255 */ - -PACKED_STRUCT_DEF igmp_message { - uint8_t type; - uint8_t max_resp_time; - uint16_t crc; - uint32_t mcast_group; -}; - -PACKED_STRUCT_DEF igmpv3_query { - uint8_t type; - uint8_t max_resp_time; - uint16_t crc; - uint32_t mcast_group; - uint8_t rsq; - uint8_t qqic; - uint16_t sources; -}; - -PACKED_STRUCT_DEF igmpv3_group_record { - uint8_t type; - uint8_t aux; - uint16_t sources; - uint32_t mcast_group; -}; - -PACKED_STRUCT_DEF igmpv3_report { - uint8_t type; - uint8_t res0; - uint16_t crc; - uint16_t res1; - uint16_t groups; -}; - -struct igmp_timer { - uint8_t type; - uint8_t stopped; - pico_time start; - pico_time delay; - struct pico_ip4 mcast_link; - struct pico_ip4 mcast_group; - struct pico_frame *f; - void (*callback)(struct igmp_timer *t); -}; - -/* queues */ -static struct pico_queue igmp_in = { - 0 -}; -static struct pico_queue igmp_out = { - 0 -}; - -/* finite state machine caller */ -static int pico_igmp_process_event(struct mcast_parameters *p); - -/* state callback prototype */ -typedef int (*callback)(struct mcast_parameters *); - -static inline int igmpt_type_compare(struct igmp_timer *a, struct igmp_timer *b) -{ - if (a->type < b->type) - return -1; - - if (a->type > b->type) - return 1; - - return 0; -} - - -static inline int igmpt_group_compare(struct igmp_timer *a, struct igmp_timer *b) -{ - return pico_ipv4_compare(&a->mcast_group, &b->mcast_group); -} - -static inline int igmpt_link_compare(struct igmp_timer *a, struct igmp_timer *b) -{ - return pico_ipv4_compare(&a->mcast_link, &b->mcast_link); -} - -/* redblack trees */ -static int igmp_timer_cmp(void *ka, void *kb) -{ - struct igmp_timer *a = ka, *b = kb; - int cmp = igmpt_type_compare(a, b); - if (cmp) - return cmp; - - cmp = igmpt_group_compare(a, b); - if (cmp) - return cmp; - - return igmpt_link_compare(a, b); - -} -static PICO_TREE_DECLARE(IGMPTimers, igmp_timer_cmp); - -static inline int igmpparm_group_compare(struct mcast_parameters *a, struct mcast_parameters *b) -{ - return pico_ipv4_compare(&a->mcast_group.ip4, &b->mcast_group.ip4); -} - -static inline int igmpparm_link_compare(struct mcast_parameters *a, struct mcast_parameters *b) -{ - return pico_ipv4_compare(&a->mcast_link.ip4, &b->mcast_link.ip4); -} - -static int igmp_parameters_cmp(void *ka, void *kb) -{ - struct mcast_parameters *a = ka, *b = kb; - int cmp = igmpparm_group_compare(a, b); - if (cmp) - return cmp; - - return igmpparm_link_compare(a, b); -} -static PICO_TREE_DECLARE(IGMPParameters, igmp_parameters_cmp); - -static int igmp_sources_cmp(void *ka, void *kb) -{ - struct pico_ip4 *a = ka, *b = kb; - return pico_ipv4_compare(a, b); -} -static PICO_TREE_DECLARE(IGMPAllow, igmp_sources_cmp); -static PICO_TREE_DECLARE(IGMPBlock, igmp_sources_cmp); - -static struct mcast_parameters *pico_igmp_find_parameter(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group) -{ - struct mcast_parameters test = { - 0 - }; - if (!mcast_link || !mcast_group) - return NULL; - - test.mcast_link.ip4 = *mcast_link; - test.mcast_group.ip4 = *mcast_group; - return pico_tree_findKey(&IGMPParameters, &test); -} - -static int pico_igmp_delete_parameter(struct mcast_parameters *p) -{ - if (pico_tree_delete(&IGMPParameters, p)) - PICO_FREE(p); - else - return -1; - - return 0; -} - -static void pico_igmp_timer_expired(pico_time now, void *arg) -{ - struct igmp_timer *t = NULL, *timer = NULL, test = { - 0 - }; - - IGNORE_PARAMETER(now); - t = (struct igmp_timer *)arg; - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - igmp_dbg("IGMP: timer expired for %08X link %08X type %u, delay %lu\n", t->mcast_group.addr, t->mcast_link.addr, t->type, t->delay); - timer = pico_tree_findKey(&IGMPTimers, &test); - if (!timer) { - return; - } - - if (timer->stopped == IGMP_TIMER_STOPPED) { - pico_tree_delete(&IGMPTimers, timer); - PICO_FREE(t); - return; - } - - if (timer->start + timer->delay < PICO_TIME_MS()) { - pico_tree_delete(&IGMPTimers, timer); - if (timer->callback) - timer->callback(timer); - - PICO_FREE(timer); - } else { - igmp_dbg("IGMP: restart timer for %08X, delay %lu, new delay %lu\n", t->mcast_group.addr, t->delay, (timer->start + timer->delay) - PICO_TIME_MS()); - if (!pico_timer_add((timer->start + timer->delay) - PICO_TIME_MS(), &pico_igmp_timer_expired, timer)) { - igmp_dbg("IGMP: Failed to start expiration timer\n"); - pico_tree_delete(&IGMPTimers, timer); - PICO_FREE(timer); - } - } - - return; -} - -static int pico_igmp_timer_reset(struct igmp_timer *t) -{ - struct igmp_timer *timer = NULL, test = { - 0 - }; - - igmp_dbg("IGMP: reset timer for %08X, delay %lu\n", t->mcast_group.addr, t->delay); - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&IGMPTimers, &test); - if (!timer) - return -1; - - *timer = *t; - timer->start = PICO_TIME_MS(); - return 0; -} - -static int pico_igmp_timer_start(struct igmp_timer *t) -{ - struct igmp_timer *timer = NULL, test = { - 0 - }; - - igmp_dbg("IGMP: start timer for %08X link %08X type %u, delay %lu\n", t->mcast_group.addr, t->mcast_link.addr, t->type, t->delay); - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&IGMPTimers, &test); - if (timer) - return pico_igmp_timer_reset(t); - - timer = PICO_ZALLOC(sizeof(struct igmp_timer)); - if (!timer) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - *timer = *t; - timer->start = PICO_TIME_MS(); - - if (pico_tree_insert(&IGMPTimers, timer)) { - igmp_dbg("IGMP: Failed to insert timer in tree\n"); - PICO_FREE(timer); - return -1; - } - - if (!pico_timer_add(timer->delay, &pico_igmp_timer_expired, timer)) { - igmp_dbg("IGMP: Failed to start expiration timer\n"); - pico_tree_delete(&IGMPTimers, timer); - PICO_FREE(timer); - return -1; - } - return 0; -} - -static int pico_igmp_timer_stop(struct igmp_timer *t) -{ - struct igmp_timer *timer = NULL, test = { - 0 - }; - - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&IGMPTimers, &test); - if (!timer) - return -1; - - igmp_dbg("IGMP: stop timer for %08X, delay %lu\n", timer->mcast_group.addr, timer->delay); - timer->stopped = IGMP_TIMER_STOPPED; - return 0; -} - -static int pico_igmp_timer_is_running(struct igmp_timer *t) -{ - struct igmp_timer *timer = NULL, test = { - 0 - }; - - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&IGMPTimers, &test); - if (timer) - return 1; - - return 0; -} - -static struct igmp_timer *pico_igmp_find_timer(uint8_t type, struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group) -{ - struct igmp_timer test = { - 0 - }; - - test.type = type; - test.mcast_link = *mcast_link; - test.mcast_group = *mcast_group; - return pico_tree_findKey(&IGMPTimers, &test); -} - -static void pico_igmp_report_expired(struct igmp_timer *t) -{ - struct mcast_parameters *p = NULL; - - p = pico_igmp_find_parameter(&t->mcast_link, &t->mcast_group); - if (!p) - return; - - p->event = IGMP_EVENT_TIMER_EXPIRED; - pico_igmp_process_event(p); -} - -static void pico_igmp_v2querier_expired(struct igmp_timer *t) -{ - struct pico_ipv4_link *link = NULL; - struct pico_tree_node *index = NULL, *_tmp = NULL; - - link = pico_ipv4_link_by_dev(t->f->dev); - if (!link) - return; - - /* When changing compatibility mode, cancel all pending response - * and retransmission timers. - */ - pico_tree_foreach_safe(index, &IGMPTimers, _tmp) - { - ((struct igmp_timer *)index->keyValue)->stopped = IGMP_TIMER_STOPPED; - pico_tree_delete(&IGMPTimers, index->keyValue); - } - igmp_dbg("IGMP: switch to compatibility mode IGMPv3\n"); - link->mcast_compatibility = PICO_IGMPV3; - return; -} - -static int pico_igmp_is_checksum_valid(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = NULL; - uint8_t ihl = 24, datalen = 0; - - hdr = (struct pico_ipv4_hdr *)f->net_hdr; - ihl = (uint8_t)((hdr->vhl & 0x0F) * 4); /* IHL is in 32bit words */ - datalen = (uint8_t)(short_be(hdr->len) - ihl); - - if (short_be(pico_checksum(f->transport_hdr, datalen)) == 0) - return 1; - - igmp_dbg("IGMP: invalid checksum\n"); - return 0; -} - -/* RFC 3376 $7.1 */ -static int pico_igmp_compatibility_mode(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = NULL; - struct pico_ipv4_link *link = NULL; - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct igmp_timer t = { - 0 - }; - uint8_t ihl = 24, datalen = 0; - struct igmp_message *message = NULL; - struct mcast_parameters *p = NULL; - struct pico_ip4 mcast_group = { - 0 - }; - - link = pico_ipv4_link_by_dev(f->dev); - if (!link) - return -1; - - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - ihl = (uint8_t)((hdr->vhl & 0x0F) * 4); /* IHL is in 32bit words */ - datalen = (uint8_t)(short_be(hdr->len) - ihl); - igmp_dbg("IGMP: IHL = %u, LEN = %u, OCTETS = %u\n", ihl, short_be(hdr->len), datalen); - - if (datalen >= 12) { - /* IGMPv3 query */ - t.type = IGMP_TIMER_V2_QUERIER; - if (pico_igmp_timer_is_running(&t)) { /* IGMPv2 querier present timer still running */ - igmp_dbg("Timer is already running\n"); - return -1; - } else { - link->mcast_compatibility = PICO_IGMPV3; - igmp_dbg("IGMP Compatibility: v3\n"); - return 0; - } - } else if (datalen == 8) { - struct igmp_message *query = (struct igmp_message *)f->transport_hdr; - /* Check if max_resp_time is set RFC 3376 $7.1 */ - if (query->max_resp_time != 0) { - /* IGMPv2 query */ - /* When changing compatibility mode, cancel all pending response - * and retransmission timers. - */ - pico_tree_foreach_safe(index, &IGMPTimers, _tmp) - { - ((struct igmp_timer *)index->keyValue)->stopped = IGMP_TIMER_STOPPED; - pico_tree_delete(&IGMPTimers, index->keyValue); - } - igmp_dbg("IGMP: switch to compatibility mode IGMPv2\n"); - link->mcast_compatibility = PICO_IGMPV2; - /* Reset the event and state to prevent deadlock */ - message = (struct igmp_message *)f->transport_hdr; - mcast_group.addr = message->mcast_group; - p = pico_igmp_find_parameter(&link->address, &mcast_group); - if(p) { - p->state = IGMP_STATE_NON_MEMBER; - p->event = IGMP_EVENT_CREATE_GROUP; - } - - t.type = IGMP_TIMER_V2_QUERIER; - t.delay = ((IGMP_ROBUSTNESS * link->mcast_last_query_interval) + IGMP_QUERY_RESPONSE_INTERVAL) * 1000; - t.f = f; - t.callback = pico_igmp_v2querier_expired; - /* only one of this type of timer may exist! */ - if (pico_igmp_timer_start(&t) < 0) - return -1; - } else { - /* IGMPv1 query, not supported */ - return -1; - } - } else { - /* invalid query, silently ignored */ - return -1; - } - - return 0; -} - -static struct mcast_parameters *pico_igmp_analyse_packet(struct pico_frame *f) -{ - struct igmp_message *message = NULL; - struct mcast_parameters *p = NULL; - struct pico_ipv4_link *link = NULL; - struct pico_ip4 mcast_group = { - 0 - }; - - link = pico_ipv4_link_by_dev(f->dev); - if (!link) - return NULL; - - /* IGMPv2 and IGMPv3 have a similar structure for the first 8 bytes */ - message = (struct igmp_message *)f->transport_hdr; - mcast_group.addr = message->mcast_group; - p = pico_igmp_find_parameter(&link->address, &mcast_group); - if (!p && mcast_group.addr == 0) { /* general query */ - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - if (!p) - return NULL; - - p->state = IGMP_STATE_NON_MEMBER; - p->mcast_link.ip4 = link->address; - p->mcast_group.ip4 = mcast_group; - if (pico_tree_insert(&IGMPParameters, p)) { - igmp_dbg("IGMP: Failed to insert parameters in tree\n"); - PICO_FREE(p); - return NULL; - } - } else if (!p) { - return NULL; - } - - switch (message->type) { - case IGMP_TYPE_MEM_QUERY: - p->event = IGMP_EVENT_QUERY_RECV; - break; - case IGMP_TYPE_MEM_REPORT_V1: - p->event = IGMP_EVENT_REPORT_RECV; - break; - case IGMP_TYPE_MEM_REPORT_V2: - p->event = IGMP_EVENT_REPORT_RECV; - break; - case IGMP_TYPE_MEM_REPORT_V3: - p->event = IGMP_EVENT_REPORT_RECV; - break; - default: - return NULL; - } - p->max_resp_time = message->max_resp_time; /* if IGMPv3 report this will be 0 (res0 field) */ - p->f = f; - - return p; -} - -static int pico_igmp_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - struct mcast_parameters *p = NULL; - IGNORE_PARAMETER(self); - - if (!pico_igmp_is_checksum_valid(f)) - goto out; - - p = pico_igmp_analyse_packet(f); - if (!p) - goto out; - - if (pico_igmp_compatibility_mode(f) < 0) - goto out; - - return pico_igmp_process_event(p); - -out: - pico_frame_discard(f); - return 0; -} - -static int pico_igmp_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - /* packets are directly transferred to the IP layer by calling pico_ipv4_frame_push */ - IGNORE_PARAMETER(self); - IGNORE_PARAMETER(f); - return 0; -} - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_igmp = { - .name = "igmp", - .proto_number = PICO_PROTO_IGMP, - .layer = PICO_LAYER_TRANSPORT, - .process_in = pico_igmp_process_in, - .process_out = pico_igmp_process_out, - .q_in = &igmp_in, - .q_out = &igmp_out, -}; - -int pico_igmp_state_change(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state) -{ - struct mcast_parameters *p = NULL; - - if (!mcast_link || !mcast_group) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (mcast_group->addr == IGMP_ALL_HOST_GROUP) - return 0; - - p = pico_igmp_find_parameter(mcast_link, mcast_group); - if (!p && state == PICO_IGMP_STATE_CREATE) { - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - if (!p) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - p->state = IGMP_STATE_NON_MEMBER; - p->mcast_link.ip4 = *mcast_link; - p->mcast_group.ip4 = *mcast_group; - if (pico_tree_insert(&IGMPParameters, p)) { - igmp_dbg("IGMP: Failed to insert parameters in tree\n"); - PICO_FREE(p); - return -1; - } - - } else if (!p) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - switch (state) { - case PICO_IGMP_STATE_CREATE: - p->event = IGMP_EVENT_CREATE_GROUP; - break; - - case PICO_IGMP_STATE_UPDATE: - p->event = IGMP_EVENT_UPDATE_GROUP; - break; - - case PICO_IGMP_STATE_DELETE: - p->event = IGMP_EVENT_DELETE_GROUP; - break; - - default: - return -1; - } - p->filter_mode = filter_mode; - p->MCASTFilter = _MCASTFilter; - - return pico_igmp_process_event(p); -} - -static int pico_igmp_send_report(struct mcast_parameters *p, struct pico_frame *f) -{ - struct pico_ip4 dst = { - 0 - }; - struct pico_ip4 mcast_group = { - 0 - }; - struct pico_ipv4_link *link = NULL; - - link = pico_ipv4_link_get((struct pico_ip4*)&p->mcast_link); - if (!link) - return -1; - - mcast_group = p->mcast_group.ip4; - switch (link->mcast_compatibility) { - case PICO_IGMPV2: - if (p->event == IGMP_EVENT_DELETE_GROUP) - dst.addr = IGMP_ALL_ROUTER_GROUP; - else - dst.addr = mcast_group.addr; - - break; - - case PICO_IGMPV3: - dst.addr = IGMPV3_ALL_ROUTER_GROUP; - break; - - default: - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - } - - igmp_dbg("IGMP: send membership report on group %08X to %08X\n", mcast_group.addr, dst.addr); - pico_ipv4_frame_push(f, &dst, PICO_PROTO_IGMP); - return 0; -} -static int8_t pico_igmpv3_generate_filter(struct mcast_filter_parameters *filter, struct mcast_parameters *p) -{ - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_tree *IGMPFilter = NULL; - struct pico_ipv4_link *link = (struct pico_ipv4_link*) filter->link; - filter->p = (struct mcast_parameters *)p; - filter->allow = &IGMPAllow; - filter->block = &IGMPBlock; - filter->filter = IGMPFilter; - filter->sources = 0; - filter->proto = PICO_IGMPV3; - test.mcast_addr = p->mcast_group; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (!g) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - filter->g = (struct pico_mcast_group *)g; - return pico_mcast_generate_filter(filter, p); -} -static int8_t pico_igmpv3_generate_report(struct mcast_filter_parameters *filter, struct mcast_parameters *p) -{ - struct igmpv3_report *report = NULL; - struct igmpv3_group_record *record = NULL; - struct pico_tree_node *index = NULL; - struct pico_device *dev = NULL; - uint16_t len = 0; - uint16_t i = 0; - len = (uint16_t)(sizeof(struct igmpv3_report) + sizeof(struct igmpv3_group_record) + (filter->sources * sizeof(struct pico_ip4))); - dev = pico_ipv4_link_find((struct pico_ip4 *)&p->mcast_link); - p->f = pico_proto_ipv4.alloc(&pico_proto_ipv4, dev, (uint16_t)(IP_OPTION_ROUTER_ALERT_LEN + len)); - p->f->net_len = (uint16_t)(p->f->net_len + IP_OPTION_ROUTER_ALERT_LEN); - p->f->transport_hdr += IP_OPTION_ROUTER_ALERT_LEN; - p->f->transport_len = (uint16_t)(p->f->transport_len - IP_OPTION_ROUTER_ALERT_LEN); - /* p->f->len is correctly set by alloc */ - - report = (struct igmpv3_report *)p->f->transport_hdr; - report->type = IGMP_TYPE_MEM_REPORT_V3; - report->res0 = 0; - report->crc = 0; - report->res1 = 0; - report->groups = short_be(1); - - record = (struct igmpv3_group_record *)(((uint8_t *)report) + sizeof(struct igmpv3_report)); - record->type = filter->record_type; - record->aux = 0; - record->sources = short_be(filter->sources); - record->mcast_group = p->mcast_group.ip4.addr; - if (filter->filter && !pico_tree_empty(filter->filter)) { - uint32_t *source_addr = (uint32_t *)((uint8_t *)record + sizeof(struct igmpv3_group_record)); - i = 0; - pico_tree_foreach(index, filter->filter) - { - source_addr[i] = ((struct pico_ip4 *)index->keyValue)->addr; - i++; - } - } - - if(i != filter->sources) { - return -1; - } - - report->crc = short_be(pico_checksum(report, len)); - return 0; -} -static int8_t pico_igmpv2_generate_report(struct mcast_parameters *p) -{ - struct igmp_message *report = NULL; - uint8_t report_type = IGMP_TYPE_MEM_REPORT_V2; - struct pico_device *dev = NULL; - if (p->event == IGMP_EVENT_DELETE_GROUP) - report_type = IGMP_TYPE_LEAVE_GROUP; - - dev = pico_ipv4_link_find((struct pico_ip4 *)&p->mcast_link); - p->f = pico_proto_ipv4.alloc(&pico_proto_ipv4, dev, IP_OPTION_ROUTER_ALERT_LEN + sizeof(struct igmp_message)); - p->f->net_len = (uint16_t)(p->f->net_len + IP_OPTION_ROUTER_ALERT_LEN); - p->f->transport_hdr += IP_OPTION_ROUTER_ALERT_LEN; - p->f->transport_len = (uint16_t)(p->f->transport_len - IP_OPTION_ROUTER_ALERT_LEN); - /* p->f->len is correctly set by alloc */ - - report = (struct igmp_message *)p->f->transport_hdr; - report->type = report_type; - report->max_resp_time = IGMP_DEFAULT_MAX_RESPONSE_TIME; - report->mcast_group = p->mcast_group.ip4.addr; - - report->crc = 0; - report->crc = short_be(pico_checksum(report, sizeof(struct igmp_message))); - return 0; -} -static int8_t pico_igmp_generate_report(struct mcast_parameters *p) -{ - struct mcast_filter_parameters filter; - int8_t result; - - filter.link = (union pico_link *)pico_ipv4_link_get((struct pico_ip4 *) &p->mcast_link); - if (!filter.link) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - switch (filter.link->ipv4.mcast_compatibility) { - case PICO_IGMPV1: - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - - case PICO_IGMPV2: - { - return pico_igmpv2_generate_report(p); - } - case PICO_IGMPV3: - { - result = pico_igmpv3_generate_filter(&filter, p); - if(result < 0) - return -1; - - if(result != MCAST_NO_REPORT) - return pico_igmpv3_generate_report(&filter, p); - } - break; - default: - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; -} - -/* stop timer, send leave if flag set */ -static int stslifs(struct mcast_parameters *p) -{ - struct igmp_timer t = { - 0 - }; - - igmp_dbg("IGMP: event = leave group | action = stop timer, send leave if flag set\n"); - - t.type = IGMP_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip4; - t.mcast_group = p->mcast_group.ip4; - if (pico_igmp_timer_stop(&t) < 0) - return -1; - - if(pico_igmp_generate_report(p) < 0) - return -1; - /* always send leave, even if not last host */ - if (pico_igmp_send_report(p, p->f) < 0) - return -1; - - pico_igmp_delete_parameter(p); - igmp_dbg("IGMP: new state = non-member\n"); - return 0; -} - -/* send report, set flag, start timer */ -static int srsfst(struct mcast_parameters *p) -{ - struct igmp_timer t = { - 0 - }; - struct pico_frame *copy_frame = NULL; - - igmp_dbg("IGMP: event = join group | action = send report, set flag, start timer\n"); - - p->last_host = IGMP_HOST_LAST; - - if (pico_igmp_generate_report(p) < 0) - return -1; - - if (!p->f) - return 0; - - copy_frame = pico_frame_copy(p->f); - if (!copy_frame) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if (pico_igmp_send_report(p, copy_frame) < 0) - return -1; - - t.type = IGMP_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip4; - t.mcast_group = p->mcast_group.ip4; - t.delay = (pico_rand() % (IGMP_UNSOLICITED_REPORT_INTERVAL * 10000)); - t.f = p->f; - t.callback = pico_igmp_report_expired; - if (pico_igmp_timer_start(&t) < 0) - return -1; - - p->state = IGMP_STATE_DELAYING_MEMBER; - igmp_dbg("IGMP: new state = delaying member\n"); - return 0; -} - -/* merge report, send report, reset timer (IGMPv3 only) */ -static int mrsrrt(struct mcast_parameters *p) -{ - struct igmp_timer *t = NULL; - struct pico_frame *copy_frame = NULL; - struct pico_ipv4_link *link = NULL; - - igmp_dbg("IGMP: event = update group | action = merge report, send report, reset timer (IGMPv3 only)\n"); - - link = pico_ipv4_link_get((struct pico_ip4 *)&p->mcast_link); - if (!link) - return -1; - - if (link->mcast_compatibility != PICO_IGMPV3) { - igmp_dbg("IGMP: no IGMPv3 compatible router on network\n"); - return -1; - } - - /* XXX: merge with pending report rfc 3376 $5.1 */ - - copy_frame = pico_frame_copy(p->f); - if (!copy_frame) - return -1; - - if (pico_igmp_send_report(p, copy_frame) < 0) - return -1; - - t = pico_igmp_find_timer(IGMP_TIMER_GROUP_REPORT, &p->mcast_link.ip4, &p->mcast_group.ip4); - if (!t) - return -1; - - t->delay = (pico_rand() % (IGMP_UNSOLICITED_REPORT_INTERVAL * 10000)); - pico_igmp_timer_reset(t); - - p->state = IGMP_STATE_DELAYING_MEMBER; - igmp_dbg("IGMP: new state = delaying member\n"); - return 0; -} - -/* send report, start timer (IGMPv3 only) */ -static int srst(struct mcast_parameters *p) -{ - struct igmp_timer t = { - 0 - }; - struct pico_frame *copy_frame = NULL; - struct pico_ipv4_link *link = NULL; - - igmp_dbg("IGMP: event = update group | action = send report, start timer (IGMPv3 only)\n"); - - link = pico_ipv4_link_get(&p->mcast_link.ip4); - if (!link) - return -1; - - if (link->mcast_compatibility != PICO_IGMPV3) { - igmp_dbg("IGMP: no IGMPv3 compatible router on network\n"); - return -1; - } - - if (pico_igmp_generate_report(p) < 0) - return -1; - - if (!p->f) - return 0; - - copy_frame = pico_frame_copy(p->f); - if (!copy_frame) - return -1; - - if (pico_igmp_send_report(p, copy_frame) < 0) - return -1; - - t.type = IGMP_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip4; - t.mcast_group = p->mcast_group.ip4; - t.delay = (pico_rand() % (IGMP_UNSOLICITED_REPORT_INTERVAL * 10000)); - t.f = p->f; - t.callback = pico_igmp_report_expired; - if (pico_igmp_timer_start(&t) < 0) - return -1; - - p->state = IGMP_STATE_DELAYING_MEMBER; - igmp_dbg("IGMP: new state = delaying member\n"); - return 0; -} - -/* send leave if flag set */ -static int slifs(struct mcast_parameters *p) -{ - igmp_dbg("IGMP: event = leave group | action = send leave if flag set\n"); - - /* always send leave, even if not last host */ - if(pico_igmp_generate_report(p) < 0) - return -1; - if (pico_igmp_send_report(p, p->f) < 0) - return -1; - - pico_igmp_delete_parameter(p); - igmp_dbg("IGMP: new state = non-member\n"); - return 0; -} - -/* start timer */ -static int st(struct mcast_parameters *p) -{ - struct igmp_timer t = { - 0 - }; - - igmp_dbg("IGMP: event = query received | action = start timer\n"); - - if (pico_igmp_generate_report(p) < 0) { - igmp_dbg("Failed to generate report\n"); - return -1; - } - - if (!p->f) { - igmp_dbg("No pending frame\n"); - return -1; - } - - t.type = IGMP_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip4; - t.mcast_group = p->mcast_group.ip4; - t.delay = (pico_rand() % ((1u + p->max_resp_time) * 100u)); - t.f = p->f; - t.callback = pico_igmp_report_expired; - if (pico_igmp_timer_start(&t) < 0) - return -1; - - p->state = IGMP_STATE_DELAYING_MEMBER; - igmp_dbg("IGMP: new state = delaying member\n"); - return 0; -} - -/* stop timer, clear flag */ -static int stcl(struct mcast_parameters *p) -{ - struct igmp_timer t = { - 0 - }; - - igmp_dbg("IGMP: event = report received | action = stop timer, clear flag\n"); - - t.type = IGMP_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip4; - t.mcast_group = p->mcast_group.ip4; - if (pico_igmp_timer_stop(&t) < 0) - return -1; - - p->last_host = IGMP_HOST_NOT_LAST; - p->state = IGMP_STATE_IDLE_MEMBER; - igmp_dbg("IGMP: new state = idle member\n"); - return 0; -} - -/* send report, set flag */ -static int srsf(struct mcast_parameters *p) -{ - igmp_dbg("IGMP: event = timer expired | action = send report, set flag\n"); - - if (pico_igmp_send_report(p, p->f) < 0) - return -1; - - p->state = IGMP_STATE_IDLE_MEMBER; - igmp_dbg("IGMP: new state = idle member\n"); - return 0; -} - -/* reset timer if max response time < current timer */ -static int rtimrtct(struct mcast_parameters *p) -{ - struct igmp_timer *t = NULL; - uint32_t time_to_run = 0; - - igmp_dbg("IGMP: event = query received | action = reset timer if max response time < current timer\n"); - - t = pico_igmp_find_timer(IGMP_TIMER_GROUP_REPORT, &p->mcast_link.ip4, &p->mcast_group.ip4); - if (!t) - return -1; - - time_to_run = (uint32_t)(t->start + t->delay - PICO_TIME_MS()); - if ((p->max_resp_time * 100u) < time_to_run) { /* max_resp_time in units of 1/10 seconds */ - t->delay = pico_rand() % ((1u + p->max_resp_time) * 100u); - pico_igmp_timer_reset(t); - } - - p->state = IGMP_STATE_DELAYING_MEMBER; - igmp_dbg("IGMP: new state = delaying member\n"); - return 0; -} - -static int discard(struct mcast_parameters *p) -{ - igmp_dbg("IGMP: ignore and discard frame\n"); - pico_frame_discard(p->f); - return 0; -} - -/* finite state machine table */ -static const callback host_membership_diagram_table[3][6] = -{ /* event |Delete Group |Create Group |Update Group |Query Received |Report Received |Timer Expired */ -/* state Non-Member */ - { discard, srsfst, srsfst, discard, discard, discard }, -/* state Delaying Member */ { stslifs, mrsrrt, mrsrrt, rtimrtct, stcl, srsf }, -/* state Idle Member */ { slifs, srst, srst, st, discard, discard } -}; - -static int pico_igmp_process_event(struct mcast_parameters *p) -{ - struct pico_tree_node *index = NULL; - struct mcast_parameters *_p = NULL; - - igmp_dbg("IGMP: process event on group address %08X\n", p->mcast_group.ip4.addr); - if (p->event == IGMP_EVENT_QUERY_RECV && p->mcast_group.ip4.addr == 0) { /* general query */ - pico_tree_foreach(index, &IGMPParameters) { - _p = index->keyValue; - _p->max_resp_time = p->max_resp_time; - _p->event = IGMP_EVENT_QUERY_RECV; - igmp_dbg("IGMP: for each mcast_group = %08X | state = %u\n", _p->mcast_group.ip4.addr, _p->state); - host_membership_diagram_table[_p->state][_p->event](_p); - } - } else { - igmp_dbg("IGMP: state = %u (0: non-member - 1: delaying member - 2: idle member)\n", p->state); - host_membership_diagram_table[p->state][p->event](p); - } - - return 0; -} - -#else -static struct pico_queue igmp_in = { - 0 -}; -static struct pico_queue igmp_out = { - 0 -}; - -static int pico_igmp_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - IGNORE_PARAMETER(f); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -static int pico_igmp_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - IGNORE_PARAMETER(f); - return -1; -} - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_igmp = { - .name = "igmp", - .proto_number = PICO_PROTO_IGMP, - .layer = PICO_LAYER_TRANSPORT, - .process_in = pico_igmp_process_in, - .process_out = pico_igmp_process_out, - .q_in = &igmp_in, - .q_out = &igmp_out, -}; - -int pico_igmp_state_change(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state) -{ - IGNORE_PARAMETER(mcast_link); - IGNORE_PARAMETER(mcast_group); - IGNORE_PARAMETER(filter_mode); - IGNORE_PARAMETER(_MCASTFilter); - IGNORE_PARAMETER(state); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} -#endif diff --git a/kernel/picotcp/modules/pico_igmp.h b/kernel/picotcp/modules/pico_igmp.h deleted file mode 100644 index 58aa6a9..0000000 --- a/kernel/picotcp/modules/pico_igmp.h +++ /dev/null @@ -1,26 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Kristof Roelants, Simon Maes, Brecht Van Cauwenberghe - *********************************************************************/ - -#ifndef INCLUDE_PICO_IGMP -#define INCLUDE_PICO_IGMP - -#define PICO_IGMPV1 1 -#define PICO_IGMPV2 2 -#define PICO_IGMPV3 3 - -#define PICO_IGMP_STATE_CREATE 1 -#define PICO_IGMP_STATE_UPDATE 2 -#define PICO_IGMP_STATE_DELETE 3 - -#define PICO_IGMP_QUERY_INTERVAL 125 - -extern struct pico_protocol pico_proto_igmp; - -int pico_igmp_state_change(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state); -#endif /* _INCLUDE_PICO_IGMP */ diff --git a/kernel/picotcp/modules/pico_ipfilter.c b/kernel/picotcp/modules/pico_ipfilter.c deleted file mode 100644 index e28775e..0000000 --- a/kernel/picotcp/modules/pico_ipfilter.c +++ /dev/null @@ -1,464 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Andrei Carp - Simon Maes - *********************************************************************/ - -#include "pico_ipv4.h" -#include "pico_config.h" -#include "pico_icmp4.h" -#include "pico_stack.h" -#include "pico_eth.h" -#include "pico_socket.h" -#include "pico_device.h" -#include "pico_ipfilter.h" -#include "pico_tcp.h" -#include "pico_udp.h" -#include "pico_tree.h" - -/**************** LOCAL MACROS ****************/ -#define MAX_PRIORITY (10) -#define MIN_PRIORITY (-10) - -#ifdef DEBUG_IPF - #define ipf_dbg dbg -#else - #define ipf_dbg(...) do {} while(0) -#endif - -/**************** LOCAL DECLARATIONS ****************/ -struct filter_node; -static int filter_compare(void *filterA, void *filterB); - -/**************** FILTER TREE ****************/ - -struct filter_node { - struct pico_device *fdev; - /* output address */ - uint32_t out_addr; - uint32_t out_addr_netmask; - /* input address */ - uint32_t in_addr; - uint32_t in_addr_netmask; - /* transport */ - uint16_t out_port; - uint16_t in_port; - /* filter details */ - uint8_t proto; - int8_t priority; - uint8_t tos; - uint32_t filter_id; - int (*function_ptr)(struct filter_node *filter, struct pico_frame *f); -}; - -static PICO_TREE_DECLARE(filter_tree, &filter_compare); - -static inline int ipfilter_uint32_cmp(uint32_t a, uint32_t b) -{ - if (a < b) - return -1; - - if (b < a) - return 1; - - return 0; -} - -static inline int ipfilter_uint16_cmp(uint16_t a, uint16_t b) -{ - if (a < b) - return -1; - - if (b < a) - return 1; - - return 0; -} - -static inline int ipfilter_uint8_cmp(uint8_t a, uint8_t b) -{ - if (a < b) - return -1; - - if (b < a) - return 1; - - return 0; -} - -static inline int ipfilter_ptr_cmp(void *a, void *b) -{ - if (a < b) - return -1; - - if (b < a) - return 1; - - return 0; -} - - - -static inline int filter_compare_ports(struct filter_node *a, struct filter_node *b) -{ - int cmp; - cmp = ipfilter_uint16_cmp(a->in_port, b->in_port); - if (cmp) - return cmp; - - cmp = ipfilter_uint16_cmp(a->out_port, b->out_port); - return cmp; -} - -static inline int filter_compare_addresses(struct filter_node *a, struct filter_node *b) -{ - int cmp; - /* Compare source address */ - cmp = ipfilter_uint32_cmp((a->in_addr & a->in_addr_netmask), (b->in_addr & b->in_addr_netmask)); - if (cmp) - return cmp; - - /* Compare destination address */ - cmp = ipfilter_uint32_cmp((a->out_addr & a->out_addr_netmask), (b->out_addr & b->out_addr_netmask)); - return cmp; -} - -static inline int filter_compare_proto(struct filter_node *a, struct filter_node *b) -{ - return ipfilter_uint8_cmp(a->proto, b->proto); -} - -static inline int filter_compare_address_port(struct filter_node *a, struct filter_node *b) -{ - int cmp; - cmp = filter_compare_addresses(a, b); - if (cmp) - return cmp; - - return filter_compare_ports(a, b); -} - -static inline int filter_match_packet_dev(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp; - /* 1. Compare devices */ - if (rule->fdev) { - cmp = ipfilter_ptr_cmp(a->fdev, b->fdev); - if (cmp) - return cmp; - } - - return 0; - -} - -static inline int filter_match_packet_proto(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp; - /* 2. Compare protocol */ - if (rule->proto) { - cmp = filter_compare_proto(a, b); - if (cmp) - return cmp; - } - - return 0; - -} -static inline int filter_match_packet_addr_in(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp; - /* 3. Compare addresses order: in, out */ - if (rule->in_addr_netmask) { - cmp = ipfilter_uint32_cmp(a->in_addr & rule->in_addr_netmask, b->in_addr & rule->in_addr_netmask); - if (cmp) - return cmp; - } - - return 0; -} -static inline int filter_match_packet_addr_out(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp; - if (rule->out_addr_netmask) { - cmp = ipfilter_uint32_cmp(a->out_addr & rule->out_addr_netmask, b->out_addr & rule->out_addr_netmask); - if (cmp) { - return cmp; - } - } - - return 0; -} -static inline int filter_match_packet_port_in(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp; - /* 4. Compare ports order: in, out */ - if (rule->in_port) { - cmp = ipfilter_uint16_cmp(a->in_port, b->in_port); - if (cmp) - return cmp; - } - - return 0; -} -static inline int filter_match_packet_port_out(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp; - if (rule->out_port) { - cmp = ipfilter_uint16_cmp(a->out_port, b->out_port); - if (cmp) - return cmp; - } - - return 0; -} - -static inline int filter_match_packet_dev_and_proto(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp = filter_match_packet_dev(a, b, rule); - if (cmp) - return cmp; - - return filter_match_packet_proto(a, b, rule); -} - -static inline int filter_match_packet_addr(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp = filter_match_packet_addr_in(a, b, rule); - if (cmp) - return cmp; - - return filter_match_packet_addr_out(a, b, rule); - -} - -static inline int filter_match_packet_port(struct filter_node *a, struct filter_node *b, struct filter_node *rule) -{ - int cmp = filter_match_packet_port_in(a, b, rule); - if (cmp) - return cmp; - - return filter_match_packet_port_out(a, b, rule); -} - -static inline struct filter_node *filter_match_packet_find_rule(struct filter_node *a, struct filter_node *b) -{ - if (!a->filter_id) - return b; - - return a; -} - -static inline int filter_match_packet(struct filter_node *a, struct filter_node *b) -{ - struct filter_node *rule; - int cmp = 0; - rule = filter_match_packet_find_rule(a, b); - - cmp = filter_match_packet_dev_and_proto(a, b, rule); - if (cmp) - return cmp; - - cmp = filter_match_packet_addr(a, b, rule); - if (cmp) - return cmp; - - cmp = filter_match_packet_port(a, b, rule); - if (cmp) - return cmp; - - return 0; -} - - -int filter_compare(void *filterA, void *filterB) -{ - - struct filter_node *a = (struct filter_node *)filterA; - struct filter_node *b = (struct filter_node *)filterB; - int cmp = 0; - if (a->filter_id == 0 || b->filter_id == 0) { - return filter_match_packet(a, b); - } - - /* improve the search */ - if(a->filter_id == b->filter_id) - return 0; - - /* 1. Compare devices */ - cmp = ipfilter_ptr_cmp(a->fdev, a->fdev); - if (cmp) - return cmp; - - /* 2. Compare protocol */ - cmp = filter_compare_proto(a, b); - if(cmp) - return cmp; - - /* 3. Compare addresses order: in, out */ - /* 4. Compare ports order: in, out */ - cmp = filter_compare_address_port(a, b); - - return cmp; -} - -/**************** FILTER CALLBACKS ****************/ - -static int fp_priority(struct filter_node *filter, struct pico_frame *f) -{ - /* TODO do priority-stuff */ - IGNORE_PARAMETER(filter); - IGNORE_PARAMETER(f); - return 0; -} - -static int fp_reject(struct filter_node *filter, struct pico_frame *f) -{ -/* TODO check first if sender is pico itself or not */ - IGNORE_PARAMETER(filter); - ipf_dbg("ipfilter> reject\n"); - (void)pico_icmp4_packet_filtered(f); - pico_frame_discard(f); - return 1; -} - -static int fp_drop(struct filter_node *filter, struct pico_frame *f) -{ - IGNORE_PARAMETER(filter); - ipf_dbg("ipfilter> drop\n"); - pico_frame_discard(f); - return 1; -} - -struct fp_function { - int (*fn)(struct filter_node *filter, struct pico_frame *f); -}; - - -static const struct fp_function fp_function[FILTER_COUNT] = -{ - {&fp_priority}, - {&fp_reject}, - {&fp_drop} -}; - -static int pico_ipv4_filter_add_validate(int8_t priority, enum filter_action action) -{ - if ( priority > MAX_PRIORITY || priority < MIN_PRIORITY) { - return -1; - } - - if (action >= FILTER_COUNT) { - return -1; - } - - return 0; -} - - -/**************** FILTER API's ****************/ -uint32_t pico_ipv4_filter_add(struct pico_device *dev, uint8_t proto, - struct pico_ip4 *out_addr, struct pico_ip4 *out_addr_netmask, - struct pico_ip4 *in_addr, struct pico_ip4 *in_addr_netmask, - uint16_t out_port, uint16_t in_port, int8_t priority, - uint8_t tos, enum filter_action action) -{ - static uint32_t filter_id = 1u; /* 0 is a special value used in the binary-tree search for packets being processed */ - struct filter_node *new_filter; - - if (pico_ipv4_filter_add_validate(priority, action) < 0) { - pico_err = PICO_ERR_EINVAL; - return 0; - } - - new_filter = PICO_ZALLOC(sizeof(struct filter_node)); - if (!new_filter) { - pico_err = PICO_ERR_ENOMEM; - return 0; - } - - new_filter->fdev = dev; - new_filter->proto = proto; - new_filter->out_addr = (!out_addr) ? (0U) : (out_addr->addr); - new_filter->out_addr_netmask = (!out_addr_netmask) ? (0U) : (out_addr_netmask->addr); - new_filter->in_addr = (!in_addr) ? (0U) : (in_addr->addr); - new_filter->in_addr_netmask = (!in_addr_netmask) ? (0U) : (in_addr_netmask->addr); - new_filter->out_port = out_port; - new_filter->in_port = in_port; - new_filter->priority = priority; - new_filter->tos = tos; - new_filter->filter_id = filter_id++; - new_filter->function_ptr = fp_function[action].fn; - - if(pico_tree_insert(&filter_tree, new_filter)) - { - PICO_FREE(new_filter); - filter_id--; - return 0; - } - - return new_filter->filter_id; -} - -int pico_ipv4_filter_del(uint32_t filter_id) -{ - struct filter_node *node = NULL; - struct filter_node dummy = { - 0 - }; - - dummy.filter_id = filter_id; - if((node = pico_tree_delete(&filter_tree, &dummy)) == NULL) - { - ipf_dbg("ipfilter> failed to delete filter :%d\n", filter_id); - return -1; - } - - PICO_FREE(node); - return 0; -} - -static int ipfilter_apply_filter(struct pico_frame *f, struct filter_node *pkt) -{ - struct filter_node *filter_frame = NULL; - filter_frame = pico_tree_findKey(&filter_tree, pkt); - if(filter_frame) - { - filter_frame->function_ptr(filter_frame, f); - return 1; - } - - return 0; -} - -int ipfilter(struct pico_frame *f) -{ - struct filter_node temp; - struct pico_ipv4_hdr *ipv4_hdr = (struct pico_ipv4_hdr *) f->net_hdr; - struct pico_trans *trans; - struct pico_icmp4_hdr *icmp_hdr; - - memset(&temp, 0u, sizeof(struct filter_node)); - - temp.fdev = f->dev; - temp.out_addr = ipv4_hdr->dst.addr; - temp.in_addr = ipv4_hdr->src.addr; - if ((ipv4_hdr->proto == PICO_PROTO_TCP) || (ipv4_hdr->proto == PICO_PROTO_UDP)) { - trans = (struct pico_trans *) f->transport_hdr; - temp.out_port = short_be(trans->dport); - temp.in_port = short_be(trans->sport); - } - else if(ipv4_hdr->proto == PICO_PROTO_ICMP4) { - icmp_hdr = (struct pico_icmp4_hdr *) f->transport_hdr; - if(icmp_hdr->type == PICO_ICMP_UNREACH && icmp_hdr->code == PICO_ICMP_UNREACH_FILTER_PROHIB) - return 0; - } - - temp.proto = ipv4_hdr->proto; - temp.priority = f->priority; - temp.tos = ipv4_hdr->tos; - return ipfilter_apply_filter(f, &temp); -} - diff --git a/kernel/picotcp/modules/pico_ipfilter.h b/kernel/picotcp/modules/pico_ipfilter.h deleted file mode 100644 index c966b28..0000000 --- a/kernel/picotcp/modules/pico_ipfilter.h +++ /dev/null @@ -1,29 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Simon Maes - *********************************************************************/ -#ifndef INCLUDE_PICO_IPFILTER -#define INCLUDE_PICO_IPFILTER - -#include "pico_device.h" - -enum filter_action { - FILTER_PRIORITY = 0, - FILTER_REJECT, - FILTER_DROP, - FILTER_COUNT -}; - -uint32_t pico_ipv4_filter_add(struct pico_device *dev, uint8_t proto, - struct pico_ip4 *out_addr, struct pico_ip4 *out_addr_netmask, struct pico_ip4 *in_addr, - struct pico_ip4 *in_addr_netmask, uint16_t out_port, uint16_t in_port, - int8_t priority, uint8_t tos, enum filter_action action); - -int pico_ipv4_filter_del(uint32_t filter_id); - -int ipfilter(struct pico_frame *f); - -#endif /* _INCLUDE_PICO_IPFILTER */ - diff --git a/kernel/picotcp/modules/pico_ipv4.c b/kernel/picotcp/modules/pico_ipv4.c deleted file mode 100644 index fda8178..0000000 --- a/kernel/picotcp/modules/pico_ipv4.c +++ /dev/null @@ -1,1658 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera, Markian Yskout - *********************************************************************/ - - -#include "pico_config.h" -#include "pico_ipfilter.h" -#include "pico_ipv4.h" -#include "pico_icmp4.h" -#include "pico_stack.h" -#include "pico_eth.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_socket.h" -#include "pico_device.h" -#include "pico_nat.h" -#include "pico_igmp.h" -#include "pico_tree.h" -#include "pico_aodv.h" -#include "pico_socket_multicast.h" -#include "pico_fragments.h" -#include "pico_ethernet.h" -#include "pico_mcast.h" - -#ifdef PICO_SUPPORT_IPV4 - -#ifdef PICO_SUPPORT_MCAST - -#ifdef DEBUG_MCAST -#define ip_mcast_dbg dbg -#else -#define ip_mcast_dbg(...) do {} while(0) -#endif - -# define PICO_MCAST_ALL_HOSTS long_be(0xE0000001) /* 224.0.0.1 */ -/* Default network interface for multicast transmission */ -static struct pico_ipv4_link *mcast_default_link = NULL; -#endif - -/* Queues */ -static struct pico_queue in = { - 0 -}; -static struct pico_queue out = { - 0 -}; - -/* Functions */ -static int ipv4_route_compare(void *ka, void *kb); -static struct pico_frame *pico_ipv4_alloc(struct pico_protocol *self, struct pico_device *dev, uint16_t size); - - -int pico_ipv4_compare(struct pico_ip4 *a, struct pico_ip4 *b) -{ - if (a->addr < b->addr) - return -1; - - if (a->addr > b->addr) - return 1; - - return 0; -} - -int pico_ipv4_to_string(char *ipbuf, const uint32_t ip) -{ - const unsigned char *addr = (const unsigned char *) &ip; - int i; - - if (!ipbuf) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - for(i = 0; i < 4; i++) - { - if (addr[i] > 99) { - *ipbuf++ = (char)('0' + (addr[i] / 100)); - *ipbuf++ = (char)('0' + ((addr[i] % 100) / 10)); - *ipbuf++ = (char)('0' + ((addr[i] % 100) % 10)); - } else if (addr[i] > 9) { - *ipbuf++ = (char)('0' + (addr[i] / 10)); - *ipbuf++ = (char)('0' + (addr[i] % 10)); - } else { - *ipbuf++ = (char)('0' + addr[i]); - } - - if (i < 3) - *ipbuf++ = '.'; - } - *ipbuf = '\0'; - - return 0; -} - -static int pico_string_check_null_args(const char *ipstr, uint32_t *ip) -{ - - if (!ipstr || !ip) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; - -} - -int pico_string_to_ipv4(const char *ipstr, uint32_t *ip) -{ - unsigned char buf[PICO_SIZE_IP4] = { - 0 - }; - int cnt = 0; - char p; - - if (pico_string_check_null_args(ipstr, ip) < 0) - return -1; - - while((p = *ipstr++) != 0 && cnt < PICO_SIZE_IP4) - { - if (pico_is_digit(p)) { - buf[cnt] = (uint8_t)((10 * buf[cnt]) + (p - '0')); - } else if (p == '.') { - cnt++; - } else { - return -1; - } - } - /* Handle short notation */ - if (cnt == 1) { - buf[3] = buf[1]; - buf[1] = 0; - buf[2] = 0; - } else if (cnt == 2) { - buf[3] = buf[2]; - buf[2] = 0; - } else if (cnt != 3) { - /* String could not be parsed, return error */ - return -1; - } - - *ip = long_from(buf); - - return 0; -} - -int pico_ipv4_valid_netmask(uint32_t mask) -{ - int cnt = 0; - int end = 0; - int i; - uint32_t mask_swap = long_be(mask); - - /* - * Swap bytes for convenient parsing - * e.g. 0x..f8ff will become 0xfff8.. - * Then, we count the consecutive bits - * - * */ - - for(i = 0; i < 32; i++) { - if ((mask_swap << i) & 0x80000000) { - if (end) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - cnt++; - } else { - end = 1; - } - } - return cnt; -} - -int pico_ipv4_is_unicast(uint32_t address) -{ - const unsigned char *addr = (unsigned char *) &address; - if ((addr[0] & 0xe0) == 0xe0) - return 0; /* multicast */ - - return 1; -} - -int pico_ipv4_is_multicast(uint32_t address) -{ - const unsigned char *addr = (unsigned char *) &address; - if ((addr[0] != 0xff) && ((addr[0] & 0xe0) == 0xe0)) - return 1; /* multicast */ - - return 0; -} - -int pico_ipv4_is_loopback(uint32_t address) -{ - const unsigned char *addr = (unsigned char *) &address; - if (addr[0] == 0x7f) - return 1; - - return 0; -} - -static int pico_ipv4_is_invalid_loopback(uint32_t address, struct pico_device *dev) -{ - return pico_ipv4_is_loopback(address) && ((!dev) || strcmp(dev->name, "loop")); -} - -int pico_ipv4_is_valid_src(uint32_t address, struct pico_device *dev) -{ - if (pico_ipv4_is_broadcast(address)) { - dbg("Source is a broadcast address, discard packet\n"); - return 0; - } else if ( pico_ipv4_is_multicast(address)) { - dbg("Source is a multicast address, discard packet\n"); - return 0; - } else if (pico_ipv4_is_invalid_loopback(address, dev)) { - dbg("Source is a loopback address, discard packet\n"); - return 0; - } else { -#ifdef PICO_SUPPORT_AODV - union pico_address src; - src.ip4.addr = address; - pico_aodv_refresh(&src); -#endif - return 1; - } -} - -static int pico_ipv4_checksum(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - if (!hdr) - return -1; - - hdr->crc = 0; - hdr->crc = short_be(pico_checksum(hdr, f->net_len)); - return 0; -} - - -#ifdef PICO_SUPPORT_CRC -static inline int pico_ipv4_crc_check(struct pico_frame *f) -{ - uint16_t checksum_invalid = 1; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - - checksum_invalid = short_be(pico_checksum(hdr, f->net_len)); - if (checksum_invalid) { - dbg("IP: checksum failed!\n"); - pico_frame_discard(f); - return 0; - } - - return 1; -} -#else -static inline int pico_ipv4_crc_check(struct pico_frame *f) -{ - IGNORE_PARAMETER(f); - return 1; -} -#endif /* PICO_SUPPORT_CRC */ - -static int pico_ipv4_forward(struct pico_frame *f); -#ifdef PICO_SUPPORT_MCAST -static int pico_ipv4_mcast_filter(struct pico_frame *f); -#endif - -static int ipv4_link_compare(void *ka, void *kb) -{ - struct pico_ipv4_link *a = ka, *b = kb; - int cmp = pico_ipv4_compare(&a->address, &b->address); - if (cmp) - return cmp; - - /* zero can be assigned multiple times (e.g. for DHCP) */ - if (a->dev != NULL && b->dev != NULL && a->address.addr == PICO_IP4_ANY && b->address.addr == PICO_IP4_ANY) { - if (a->dev < b->dev) - return -1; - - if (a->dev > b->dev) - return 1; - } - - return 0; -} - -static PICO_TREE_DECLARE(Tree_dev_link, ipv4_link_compare); - -static int pico_ipv4_process_bcast_in(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; -#ifdef PICO_SUPPORT_UDP - if (pico_ipv4_is_broadcast(hdr->dst.addr) && (hdr->proto == PICO_PROTO_UDP)) { - /* Receiving UDP broadcast datagram */ - f->flags |= PICO_FRAME_FLAG_BCAST; - pico_enqueue(pico_proto_udp.q_in, f); - return 1; - } - -#endif - -#ifdef PICO_SUPPORT_ICMP4 - if (pico_ipv4_is_broadcast(hdr->dst.addr) && (hdr->proto == PICO_PROTO_ICMP4)) { - /* Receiving ICMP4 bcast packet */ - f->flags |= PICO_FRAME_FLAG_BCAST; - pico_enqueue(pico_proto_icmp4.q_in, f); - return 1; - } - -#endif - return 0; -} - -static int pico_ipv4_process_mcast_in(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - if (pico_ipv4_is_multicast(hdr->dst.addr)) { -#ifdef PICO_SUPPORT_IGMP - /* Receiving UDP multicast datagram TODO set f->flags? */ - if (hdr->proto == PICO_PROTO_IGMP) { - ip_mcast_dbg("MCAST: received IGMP message\n"); - pico_transport_receive(f, PICO_PROTO_IGMP); - return 1; - } else if ((pico_ipv4_mcast_filter(f) == 0) && (hdr->proto == PICO_PROTO_UDP)) { - pico_enqueue(pico_proto_udp.q_in, f); - return 1; - } - -#endif - pico_frame_discard(f); - return 1; - } - - return 0; -} - -static int pico_ipv4_process_local_unicast_in(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - struct pico_ipv4_link test = { - .address = {.addr = PICO_IP4_ANY}, .dev = NULL - }; - if (pico_ipv4_link_find(&hdr->dst)) { - if (pico_ipv4_nat_inbound(f, &hdr->dst) == 0) - pico_enqueue(pico_proto_ipv4.q_in, f); /* dst changed, reprocess */ - else - pico_transport_receive(f, hdr->proto); - - return 1; - } else if (pico_tree_findKey(&Tree_dev_link, &test)) { -#ifdef PICO_SUPPORT_UDP - /* address of this device is apparently 0.0.0.0; might be a DHCP packet */ - /* XXX KRO: is obsolete. Broadcast flag is set on outgoing DHCP messages. - * incomming DHCP messages are to be broadcasted. Our current DHCP server - * implementation does not take this flag into account yet though ... */ - pico_enqueue(pico_proto_udp.q_in, f); - return 1; -#endif - } - - return 0; -} - -static void pico_ipv4_process_finally_try_forward(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - if ((pico_ipv4_is_broadcast(hdr->dst.addr)) || ((f->flags & PICO_FRAME_FLAG_BCAST) != 0)) { - /* don't forward broadcast frame, discard! */ - pico_frame_discard(f); - } else if (pico_ipv4_forward(f) != 0) { - pico_frame_discard(f); - /* dbg("Forward failed.\n"); */ - } -} - - - -static int pico_ipv4_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - uint8_t option_len = 0; - int ret = 0; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - uint16_t max_allowed = (uint16_t) ((int)f->buffer_len - (f->net_hdr - f->buffer) - (int)PICO_SIZE_IP4HDR); - - if (!hdr) - return -1; - - (void)self; - - /* NAT needs transport header information */ - if (((hdr->vhl) & 0x0F) > 5) { - option_len = (uint8_t)(4 * (((hdr->vhl) & 0x0F) - 5)); - } - - f->transport_hdr = ((uint8_t *)f->net_hdr) + PICO_SIZE_IP4HDR + option_len; - f->transport_len = (uint16_t)(short_be(hdr->len) - PICO_SIZE_IP4HDR - option_len); - f->net_len = (uint16_t)(PICO_SIZE_IP4HDR + option_len); -#if defined(PICO_SUPPORT_IPV4FRAG) || defined(PICO_SUPPORT_IPV6FRAG) - f->frag = short_be(hdr->frag); -#endif - - if (f->transport_len > max_allowed) { - pico_frame_discard(f); - return 0; /* Packet is discarded due to unfeasible length */ - } - -#ifdef PICO_SUPPORT_IPFILTER - if (ipfilter(f)) { - /*pico_frame is discarded as result of the filtering*/ - return 0; - } - -#endif - - - /* ret == 1 indicates to continue the function */ - ret = pico_ipv4_crc_check(f); - if (ret < 1) - return ret; - - /* Validate source IP address. Discard quietly if invalid */ - if (!pico_ipv4_is_valid_src(hdr->src.addr, f->dev)) { - pico_frame_discard(f); - return 0; - } - -#if defined(PICO_SUPPORT_IPV4FRAG) || defined(PICO_SUPPORT_IPV6FRAG) - if (f->frag & PICO_IPV4_EVIL) { - (void)pico_icmp4_param_problem(f, 0); - pico_frame_discard(f); /* RFC 3514 */ - return 0; - } -#endif - - if ((hdr->vhl & 0x0f) < 5) { - /* RFC 791: IHL minimum value is 5 */ - (void)pico_icmp4_param_problem(f, 0); - pico_frame_discard(f); - return 0; - } - -#if defined(PICO_SUPPORT_IPV4FRAG) || defined(PICO_SUPPORT_IPV6FRAG) - if (f->frag & (PICO_IPV4_MOREFRAG | PICO_IPV4_FRAG_MASK)) - { -#ifdef PICO_SUPPORT_IPV4FRAG - pico_ipv4_process_frag(hdr, f, hdr->proto); - /* Frame can be discarded, frag will handle its own copy */ -#endif - /* We do not support fragmentation, discard quietly */ - pico_frame_discard(f); - return 0; - } -#endif - - if (pico_ipv4_process_bcast_in(f) > 0) - return 0; - - if (pico_ipv4_process_mcast_in(f) > 0) - return 0; - - if (pico_ipv4_process_local_unicast_in(f) > 0) - return 0; - - pico_ipv4_process_finally_try_forward(f); - - return 0; -} - -PICO_TREE_DECLARE(Routes, ipv4_route_compare); - - -static int pico_ipv4_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - f->start = (uint8_t*) f->net_hdr; -#ifdef PICO_SUPPORT_IPFILTER - if (ipfilter(f)) { - /*pico_frame is discarded as result of the filtering*/ - return 0; - } - -#endif - return pico_datalink_send(f); -} - - -static struct pico_frame *pico_ipv4_alloc(struct pico_protocol *self, struct pico_device *dev, uint16_t size) -{ - struct pico_frame *f = NULL; - IGNORE_PARAMETER(self); - - f = pico_proto_ethernet.alloc(&pico_proto_ethernet, dev, (uint16_t)(size + PICO_SIZE_IP4HDR)); - /* TODO: In 6LoWPAN topic branch update to make use of dev->ll_mode */ - - if (!f) - return NULL; - - f->net_len = PICO_SIZE_IP4HDR; - f->transport_hdr = f->net_hdr + PICO_SIZE_IP4HDR; - f->transport_len = (uint16_t)size; - - /* Datalink size is accounted for in pico_datalink_send (link layer) */ - f->len = (uint32_t)(size + PICO_SIZE_IP4HDR); - - return f; -} - -static int pico_ipv4_frame_sock_push(struct pico_protocol *self, struct pico_frame *f); - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_ipv4 = { - .name = "ipv4", - .proto_number = PICO_PROTO_IPV4, - .layer = PICO_LAYER_NETWORK, - .alloc = pico_ipv4_alloc, - .process_in = pico_ipv4_process_in, - .process_out = pico_ipv4_process_out, - .push = pico_ipv4_frame_sock_push, - .q_in = &in, - .q_out = &out, -}; - - -static int ipv4_route_compare(void *ka, void *kb) -{ - struct pico_ipv4_route *a = ka, *b = kb; - uint32_t a_nm, b_nm; - int cmp; - - a_nm = long_be(a->netmask.addr); - b_nm = long_be(b->netmask.addr); - - /* Routes are sorted by (host side) netmask len, then by addr, then by metric. */ - if (a_nm < b_nm) - return -1; - - if (b_nm < a_nm) - return 1; - - cmp = pico_ipv4_compare(&a->dest, &b->dest); - if (cmp) - return cmp; - - if (a->metric < b->metric) - return -1; - - if (a->metric > b->metric) - return 1; - - return 0; -} - - -static struct pico_ipv4_route default_bcast_route = { - .dest = {PICO_IP4_BCAST}, - .netmask = {PICO_IP4_BCAST}, - .gateway = { 0 }, - .link = NULL, - .metric = 1000 -}; - -static struct pico_ipv4_route *route_find_default_bcast(void) -{ - return &default_bcast_route; -} - - -static struct pico_ipv4_route *route_find(const struct pico_ip4 *addr) -{ - struct pico_ipv4_route *r; - struct pico_tree_node *index; - - if (addr->addr == PICO_IP4_ANY) { - return NULL; - } - - if (addr->addr != PICO_IP4_BCAST) { - pico_tree_foreach_reverse(index, &Routes) { - r = index->keyValue; - if ((addr->addr & (r->netmask.addr)) == (r->dest.addr)) { - return r; - } - } - return NULL; - } - - return route_find_default_bcast(); -} - -struct pico_ip4 pico_ipv4_route_get_gateway(struct pico_ip4 *addr) -{ - struct pico_ip4 nullip; - struct pico_ipv4_route *route; - nullip.addr = 0U; - - if (!addr) { - pico_err = PICO_ERR_EINVAL; - return nullip; - } - - route = route_find(addr); - if (!route) { - pico_err = PICO_ERR_EHOSTUNREACH; - return nullip; - } - else - return route->gateway; -} - -struct pico_ip4 *pico_ipv4_source_find(const struct pico_ip4 *dst) -{ - struct pico_ip4 *myself = NULL; - struct pico_ipv4_route *rt; -#ifdef PICO_SUPPORT_AODV - union pico_address node_address; -#endif - - if (!dst) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - -#ifdef PICO_SUPPORT_AODV - node_address.ip4.addr = dst->addr; - if (dst->addr && pico_ipv4_is_unicast(dst->addr)) - pico_aodv_lookup(&node_address); - -#endif - - rt = route_find(dst); - if (rt && rt->link) { - myself = &rt->link->address; - } else { - pico_err = PICO_ERR_EHOSTUNREACH; - } - - return myself; -} - -struct pico_device *pico_ipv4_source_dev_find(const struct pico_ip4 *dst) -{ - struct pico_device *dev = NULL; - struct pico_ipv4_route *rt; - - if (!dst) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - rt = route_find(dst); - if (rt && rt->link) { - dev = rt->link->dev; - } else { - pico_err = PICO_ERR_EHOSTUNREACH; - } - - return dev; -} - - -#ifdef PICO_SUPPORT_MCAST -/* link - * | - * MCASTGroups - * | | | - * ------------ | ------------ - * | | | - * MCASTSources MCASTSources MCASTSources - * | | | | | | | | | | | | - * S S S S S S S S S S S S - * - * MCASTGroups: RBTree(mcast_group) - * MCASTSources: RBTree(source) - */ -static int ipv4_mcast_groups_cmp(void *ka, void *kb) -{ - struct pico_mcast_group *a = ka, *b = kb; - return pico_ipv4_compare(&a->mcast_addr.ip4, &b->mcast_addr.ip4); -} - -static int ipv4_mcast_sources_cmp(void *ka, void *kb) -{ - struct pico_ip4 *a = ka, *b = kb; - return pico_ipv4_compare(a, b); -} - -static void pico_ipv4_mcast_print_groups(struct pico_ipv4_link *mcast_link) -{ - uint16_t i = 0; - struct pico_mcast_group *g = NULL; - struct pico_ip4 *source = NULL; - struct pico_tree_node *index = NULL, *index2 = NULL; - (void) source; - - ip_mcast_dbg("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - ip_mcast_dbg("+ MULTICAST list interface %-16s +\n", mcast_link->dev->name); - ip_mcast_dbg("+---------------------------------------------------------------------------------+\n"); - ip_mcast_dbg("+ nr | interface | host group | reference count | filter mode | source +\n"); - ip_mcast_dbg("+---------------------------------------------------------------------------------+\n"); - - pico_tree_foreach(index, mcast_link->MCASTGroups) { - g = index->keyValue; - ip_mcast_dbg("+ %04d | %16s | %08X | %05u | %u | %8s +\n", i, mcast_link->dev->name, g->mcast_addr.ip4.addr, g->reference_count, g->filter_mode, ""); - pico_tree_foreach(index2, &g->MCASTSources) { - source = index2->keyValue; - ip_mcast_dbg("+ %4s | %16s | %8s | %5s | %s | %08X +\n", "", "", "", "", "", source->addr); - } - i++; - } - ip_mcast_dbg("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); -} - -static int mcast_group_update(struct pico_mcast_group *g, struct pico_tree *MCASTFilter, uint8_t filter_mode) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ip4 *source = NULL; - /* cleanup filter */ - pico_tree_foreach_safe(index, &g->MCASTSources, _tmp) { - source = index->keyValue; - pico_tree_delete(&g->MCASTSources, source); - PICO_FREE(source); - } - /* insert new filter */ - if (MCASTFilter) { - pico_tree_foreach(index, MCASTFilter) { - if (index->keyValue) { - source = PICO_ZALLOC(sizeof(struct pico_ip4)); - if (!source) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - source->addr = ((struct pico_ip4 *)index->keyValue)->addr; - if (pico_tree_insert(&g->MCASTSources, source)) { - dbg("IPv4: Failed to insert source in tree\n"); - PICO_FREE(source); - return -1; - } - } - } - } - - g->filter_mode = filter_mode; - return 0; -} - -int pico_ipv4_mcast_join(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *MCASTFilter) -{ - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_ipv4_link *link = NULL; - - if (mcast_link) - link = pico_ipv4_link_get(mcast_link); - - if (!link) - link = mcast_default_link; - - test.mcast_addr.ip4 = *mcast_group; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (g) { - if (reference_count) - g->reference_count++; - -#ifdef PICO_SUPPORT_IGMP - pico_igmp_state_change(mcast_link, mcast_group, filter_mode, MCASTFilter, PICO_IGMP_STATE_UPDATE); -#endif - } else { - g = PICO_ZALLOC(sizeof(struct pico_mcast_group)); - if (!g) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - /* "non-existent" state of filter mode INCLUDE and empty source list */ - g->filter_mode = PICO_IP_MULTICAST_INCLUDE; - g->reference_count = 1; - g->mcast_addr.ip4 = *mcast_group; - g->MCASTSources.root = &LEAF; - g->MCASTSources.compare = ipv4_mcast_sources_cmp; - if (pico_tree_insert(link->MCASTGroups, g)) { - dbg("IPv4: Failed to insert group in tree\n"); - PICO_FREE(g); - return -1; - } - -#ifdef PICO_SUPPORT_IGMP - pico_igmp_state_change(mcast_link, mcast_group, filter_mode, MCASTFilter, PICO_IGMP_STATE_CREATE); -#endif - } - - if (mcast_group_update(g, MCASTFilter, filter_mode) < 0) { - dbg("Error in mcast_group update\n"); - return -1; - } - - pico_ipv4_mcast_print_groups(link); - return 0; -} - -int pico_ipv4_mcast_leave(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *MCASTFilter) -{ - - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_ipv4_link *link = NULL; - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ip4 *source = NULL; - - if (mcast_link) - link = pico_ipv4_link_get(mcast_link); - - if (!link) - link = mcast_default_link; - - if (!link) - return -1; - - test.mcast_addr.ip4 = *mcast_group; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (!g) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - if (reference_count && (--(g->reference_count) < 1)) { -#ifdef PICO_SUPPORT_IGMP - pico_igmp_state_change(mcast_link, mcast_group, filter_mode, MCASTFilter, PICO_IGMP_STATE_DELETE); -#endif - /* cleanup filter */ - pico_tree_foreach_safe(index, &g->MCASTSources, _tmp) { - source = index->keyValue; - pico_tree_delete(&g->MCASTSources, source); - PICO_FREE(source); - } - pico_tree_delete(link->MCASTGroups, g); - PICO_FREE(g); - } else { -#ifdef PICO_SUPPORT_IGMP - pico_igmp_state_change(mcast_link, mcast_group, filter_mode, MCASTFilter, PICO_IGMP_STATE_UPDATE); -#endif - if (mcast_group_update(g, MCASTFilter, filter_mode) < 0) - return -1; - } - } - - pico_ipv4_mcast_print_groups(link); - return 0; -} - -struct pico_ipv4_link *pico_ipv4_get_default_mcastlink(void) -{ - return mcast_default_link; -} - -static int pico_ipv4_mcast_filter(struct pico_frame *f) -{ - struct pico_ipv4_link *link = NULL; - struct pico_tree_node *index = NULL, *index2 = NULL; - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - - test.mcast_addr.ip4 = hdr->dst; - - pico_tree_foreach(index, &Tree_dev_link) { - link = index->keyValue; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (g) { - if (f->dev == link->dev) { - ip_mcast_dbg("MCAST: IP %08X is group member of current link %s\n", hdr->dst.addr, f->dev->name); - /* perform source filtering */ - switch (g->filter_mode) { - case PICO_IP_MULTICAST_INCLUDE: - pico_tree_foreach(index2, &g->MCASTSources) { - if (hdr->src.addr == ((struct pico_ip4 *)index2->keyValue)->addr) { - ip_mcast_dbg("MCAST: IP %08X in included interface source list\n", hdr->src.addr); - return 0; - } - } - ip_mcast_dbg("MCAST: IP %08X NOT in included interface source list\n", hdr->src.addr); - return -1; - - case PICO_IP_MULTICAST_EXCLUDE: - pico_tree_foreach(index2, &g->MCASTSources) { - if (hdr->src.addr == ((struct pico_ip4 *)index2->keyValue)->addr) { - ip_mcast_dbg("MCAST: IP %08X in excluded interface source list\n", hdr->src.addr); - return -1; - } - } - ip_mcast_dbg("MCAST: IP %08X NOT in excluded interface source list\n", hdr->src.addr); - return 0; - - default: - return -1; - } - } else { - ip_mcast_dbg("MCAST: IP %08X is group member of different link %s\n", hdr->dst.addr, link->dev->name); - } - } else { - ip_mcast_dbg("MCAST: IP %08X is not a group member of link %s\n", hdr->dst.addr, f->dev->name); - } - } - return -1; -} - -#else - -int pico_ipv4_mcast_join(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *MCASTFilter) -{ - IGNORE_PARAMETER(mcast_link); - IGNORE_PARAMETER(mcast_group); - IGNORE_PARAMETER(reference_count); - IGNORE_PARAMETER(filter_mode); - IGNORE_PARAMETER(MCASTFilter); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -int pico_ipv4_mcast_leave(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *MCASTFilter) -{ - IGNORE_PARAMETER(mcast_link); - IGNORE_PARAMETER(mcast_group); - IGNORE_PARAMETER(reference_count); - IGNORE_PARAMETER(filter_mode); - IGNORE_PARAMETER(MCASTFilter); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -struct pico_ipv4_link *pico_ipv4_get_default_mcastlink(void) -{ - pico_err = PICO_ERR_EPROTONOSUPPORT; - return NULL; -} -#endif /* PICO_SUPPORT_MCAST */ - -/* #define DEBUG_ROUTE */ -#ifdef DEBUG_ROUTE -void dbg_route(void) -{ - struct pico_ipv4_route *r; - struct pico_tree_node *index; - int count_hosts = 0; - dbg("==== ROUTING TABLE =====\n"); - pico_tree_foreach(index, &Routes) { - r = index->keyValue; - dbg("Route to %08x/%08x, gw %08x, dev: %s, metric: %d\n", r->dest.addr, r->netmask.addr, r->gateway.addr, r->link->dev->name, r->metric); - if (r->netmask.addr == 0xFFFFFFFF) - count_hosts++; - } - dbg("================ total HOST nodes: %d ======\n\n\n", count_hosts); -} -#else -#define dbg_route() do { } while(0) -#endif - -int pico_ipv4_frame_push(struct pico_frame *f, struct pico_ip4 *dst, uint8_t proto) -{ - - struct pico_ipv4_route *route; - struct pico_ipv4_link *link; - struct pico_ipv4_hdr *hdr; - uint8_t ttl = PICO_IPV4_DEFAULT_TTL; - uint8_t vhl = 0x45; /* version 4, header length 20 */ - int32_t retval = 0; - static uint16_t ipv4_progressive_id = 0x91c0; -#ifdef PICO_SUPPORT_MCAST - struct pico_tree_node *index; -#endif - - if (!f || !dst) { - pico_err = PICO_ERR_EINVAL; - goto drop; - } - - - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - if (!hdr) { - dbg("IP header error\n"); - pico_err = PICO_ERR_EINVAL; - goto drop; - } - - if (dst->addr == 0) { - dbg("IP destination addr error\n"); - pico_err = PICO_ERR_EINVAL; - goto drop; - } - - route = route_find(dst); - if (!route) { - /* dbg("Route to %08x not found.\n", long_be(dst->addr)); */ - - - pico_err = PICO_ERR_EHOSTUNREACH; - goto drop; - } else { - link = route->link; -#ifdef PICO_SUPPORT_MCAST - if (pico_ipv4_is_multicast(dst->addr)) { /* if multicast */ - switch (proto) { - case PICO_PROTO_UDP: - if (pico_udp_get_mc_ttl(f->sock, &ttl) < 0) - ttl = PICO_IP_DEFAULT_MULTICAST_TTL; - - break; -#ifdef PICO_SUPPORT_IGMP - case PICO_PROTO_IGMP: - vhl = 0x46; /* header length 24 */ - ttl = 1; - /* router alert (RFC 2113) */ - hdr->options[0] = 0x94; - hdr->options[1] = 0x04; - hdr->options[2] = 0x00; - hdr->options[3] = 0x00; - if (f->dev && link->dev != f->dev) { /* default link is not requested link */ - pico_tree_foreach(index, &Tree_dev_link) { - link = index->keyValue; - if (link->dev == f->dev) - break; - } - } - - break; -#endif - default: - ttl = PICO_IPV4_DEFAULT_TTL; - } - } - -#endif - } - - hdr->vhl = vhl; - hdr->len = short_be((uint16_t)(f->transport_len + f->net_len)); - hdr->id = short_be(ipv4_progressive_id); - - if ( -#ifdef PICO_SUPPORT_IPV4FRAG - (0 == (f->frag & PICO_IPV4_MOREFRAG)) && -#endif - 1 ) - ipv4_progressive_id++; - - if (f->send_ttl > 0) { - ttl = f->send_ttl; - } - - hdr->dst.addr = dst->addr; - hdr->src.addr = link->address.addr; - hdr->ttl = ttl; - hdr->tos = f->send_tos; - hdr->proto = proto; - hdr->frag = short_be(PICO_IPV4_DONTFRAG); - -#ifdef PICO_SUPPORT_IPV4FRAG -# ifdef PICO_SUPPORT_UDP - if (proto == PICO_PROTO_UDP) { - /* first fragment, can not use transport_len to calculate IP length */ - if (f->transport_hdr != f->payload) - hdr->len = short_be((uint16_t)(f->payload_len + sizeof(struct pico_udp_hdr) + f->net_len)); - - /* set fragmentation flags and offset calculated in socket layer */ - hdr->frag = short_be(f->frag); - } - - if (proto == PICO_PROTO_ICMP4) - { - hdr->frag = short_be(f->frag); - } - -# endif -#endif /* PICO_SUPPORT_IPV4FRAG */ - pico_ipv4_checksum(f); - - if (f->sock && f->sock->dev) { - /* if the socket has its device set, use that (currently used for DHCP) */ - f->dev = f->sock->dev; - } else { - f->dev = link->dev; - if (f->sock) - f->sock->dev = f->dev; - } - -#ifdef PICO_SUPPORT_MCAST - if (pico_ipv4_is_multicast(hdr->dst.addr)) { - struct pico_frame *cpy; - /* Sending UDP multicast datagram, am I member? If so, loopback copy */ - if ((proto != PICO_PROTO_IGMP) && (pico_ipv4_mcast_filter(f) == 0)) { - ip_mcast_dbg("MCAST: sender is member of group, loopback copy\n"); - cpy = pico_frame_copy(f); - if (!cpy) { - pico_err = PICO_ERR_ENOMEM; - ip_mcast_dbg("MCAST: Failed to copy frame\n"); - goto drop; - } - - retval = pico_enqueue(&in, cpy); - if (retval <= 0) - pico_frame_discard(cpy); - } - } - -#endif - -/* #ifdef PICO_SUPPORT_AODV */ -#if 0 - { - union pico_address node_address; - node_address.ip4.addr = hdr->dst.addr; - if(hdr->dst.addr && pico_ipv4_is_unicast(hdr->dst.addr)) - pico_aodv_lookup(&node_address); - } -#endif - - if (pico_ipv4_link_get(&hdr->dst)) { - /* it's our own IP */ - retval = pico_enqueue(&in, f); - if (retval > 0) - return retval; - } else{ - /* TODO: Check if there are members subscribed here */ - retval = pico_enqueue(&out, f); - if (retval > 0) - return retval; - } - -drop: - pico_frame_discard(f); - return -1; -} - - -static int pico_ipv4_frame_sock_push(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_ip4 *dst; - struct pico_remote_endpoint *remote_endpoint = (struct pico_remote_endpoint *) f->info; - IGNORE_PARAMETER(self); - - if (!f->sock) { - pico_frame_discard(f); - return -1; - } - - if (remote_endpoint) { - dst = &remote_endpoint->remote_addr.ip4; - } else { - dst = &f->sock->remote_addr.ip4; - } - - return pico_ipv4_frame_push(f, dst, (uint8_t)f->sock->proto->proto_number); -} - - -int MOCKABLE pico_ipv4_route_add(struct pico_ip4 address, struct pico_ip4 netmask, struct pico_ip4 gateway, int metric, struct pico_ipv4_link *link) -{ - struct pico_ipv4_route test, *new; - test.dest.addr = address.addr; - test.netmask.addr = netmask.addr; - test.metric = (uint32_t)metric; - - if (pico_tree_findKey(&Routes, &test)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - new = PICO_ZALLOC(sizeof(struct pico_ipv4_route)); - if (!new) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - new->dest.addr = address.addr; - new->netmask.addr = netmask.addr; - new->gateway.addr = gateway.addr; - new->metric = (uint32_t)metric; - if (gateway.addr == 0) { - /* No gateway provided, use the link */ - new->link = link; - } else { - struct pico_ipv4_route *r = route_find(&gateway); - if (!r ) { /* Specified Gateway is unreachable */ - pico_err = PICO_ERR_EHOSTUNREACH; - PICO_FREE(new); - return -1; - } - - if (r->gateway.addr) { /* Specified Gateway is not a neighbor */ - pico_err = PICO_ERR_ENETUNREACH; - PICO_FREE(new); - return -1; - } - - new->link = r->link; - } - - if (!new->link) { - pico_err = PICO_ERR_EINVAL; - PICO_FREE(new); - return -1; - } - - if (pico_tree_insert(&Routes, new)) { - dbg("IPv4: Failed to insert route in tree\n"); - PICO_FREE(new); - return -1; - } - - dbg_route(); - return 0; -} - -int pico_ipv4_route_del(struct pico_ip4 address, struct pico_ip4 netmask, int metric) -{ - struct pico_ipv4_route test, *found; - - test.dest.addr = address.addr; - test.netmask.addr = netmask.addr; - test.metric = (uint32_t)metric; - - found = pico_tree_findKey(&Routes, &test); - if (found) { - - pico_tree_delete(&Routes, found); - PICO_FREE(found); - - dbg_route(); - return 0; - } - - pico_err = PICO_ERR_EINVAL; - return -1; -} - - -int pico_ipv4_link_add(struct pico_device *dev, struct pico_ip4 address, struct pico_ip4 netmask) -{ - struct pico_ipv4_link test, *new; - struct pico_ip4 network, gateway; - char ipstr[30]; - - if (!dev) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - test.address.addr = address.addr; - test.netmask.addr = netmask.addr; - test.dev = dev; - /** XXX: Valid netmask / unicast address test **/ - - if (pico_tree_findKey(&Tree_dev_link, &test)) { - pico_err = PICO_ERR_EADDRINUSE; - return -1; - } - - /** XXX: Check for network already in use (e.g. trying to assign 10.0.0.1/24 where 10.1.0.1/8 is in use) **/ - new = PICO_ZALLOC(sizeof(struct pico_ipv4_link)); - if (!new) { - dbg("IPv4: Out of memory!\n"); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - new->address.addr = address.addr; - new->netmask.addr = netmask.addr; - new->dev = dev; -#ifdef PICO_SUPPORT_MCAST - new->MCASTGroups = PICO_ZALLOC(sizeof(struct pico_tree)); - if (!new->MCASTGroups) { - PICO_FREE(new); - dbg("IPv4: Out of memory!\n"); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - new->MCASTGroups->root = &LEAF; - new->MCASTGroups->compare = ipv4_mcast_groups_cmp; -#ifdef PICO_SUPPORT_IGMP - new->mcast_compatibility = PICO_IGMPV3; /* default RFC 3376 $7.2.1 */ - new->mcast_last_query_interval = PICO_IGMP_QUERY_INTERVAL; -#endif -#endif - - if (pico_tree_insert(&Tree_dev_link, new)) { - dbg("IPv4: Failed to insert link in tree\n"); -#ifdef PICO_SUPPORT_MCAST - PICO_FREE(new->MCASTGroups); -#endif - PICO_FREE(new); - return -1; - } - -#ifdef PICO_SUPPORT_MCAST - do { - struct pico_ip4 mcast_all_hosts, mcast_addr, mcast_nm, mcast_gw; - if (!mcast_default_link) { - mcast_addr.addr = long_be(0xE0000000); /* 224.0.0.0 */ - mcast_nm.addr = long_be(0xF0000000); /* 15.0.0.0 */ - mcast_gw.addr = long_be(0x00000000); - mcast_default_link = new; - pico_ipv4_route_add(mcast_addr, mcast_nm, mcast_gw, 1, new); - } - - mcast_all_hosts.addr = PICO_MCAST_ALL_HOSTS; - pico_ipv4_mcast_join(&address, &mcast_all_hosts, 1, PICO_IP_MULTICAST_EXCLUDE, NULL); - } while(0); -#endif - - network.addr = address.addr & netmask.addr; - gateway.addr = 0U; - pico_ipv4_route_add(network, netmask, gateway, 1, new); - pico_ipv4_to_string(ipstr, new->address.addr); - dbg("Assigned ipv4 %s to device %s\n", ipstr, new->dev->name); - if (default_bcast_route.link == NULL) - default_bcast_route.link = new; - - return 0; -} - -static int pico_ipv4_cleanup_routes(struct pico_ipv4_link *link) -{ - struct pico_tree_node *index = NULL, *tmp = NULL; - struct pico_ipv4_route *route = NULL; - - pico_tree_foreach_safe(index, &Routes, tmp) { - route = index->keyValue; - if (link == route->link) - pico_ipv4_route_del(route->dest, route->netmask, (int)route->metric); - } - return 0; -} - -void MOCKABLE pico_ipv4_route_set_bcast_link(struct pico_ipv4_link *link) -{ - if (link) - default_bcast_route.link = link; -} - -int pico_ipv4_link_del(struct pico_device *dev, struct pico_ip4 address) -{ - struct pico_ipv4_link test, *found; - - if (!dev) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - test.address.addr = address.addr; - test.dev = dev; - found = pico_tree_findKey(&Tree_dev_link, &test); - if (!found) { - pico_err = PICO_ERR_ENXIO; - return -1; - } - -#ifdef PICO_SUPPORT_MCAST - do { - struct pico_ip4 mcast_all_hosts, mcast_addr, mcast_nm; - struct pico_mcast_group *g = NULL; - struct pico_tree_node *index, *_tmp; - if (found == mcast_default_link) { - mcast_addr.addr = long_be(0xE0000000); /* 224.0.0.0 */ - mcast_nm.addr = long_be(0xF0000000); /* 15.0.0.0 */ - mcast_default_link = NULL; - pico_ipv4_route_del(mcast_addr, mcast_nm, 1); - } - - mcast_all_hosts.addr = PICO_MCAST_ALL_HOSTS; - pico_ipv4_mcast_leave(&address, &mcast_all_hosts, 1, PICO_IP_MULTICAST_EXCLUDE, NULL); - pico_tree_foreach_safe(index, found->MCASTGroups, _tmp) { - g = index->keyValue; - pico_tree_delete(found->MCASTGroups, g); - PICO_FREE(g); - } - } while(0); - PICO_FREE(found->MCASTGroups); -#endif - - pico_ipv4_cleanup_routes(found); - pico_tree_delete(&Tree_dev_link, found); - if (default_bcast_route.link == found) - default_bcast_route.link = NULL; - - PICO_FREE(found); - - return 0; -} - - -struct pico_ipv4_link *pico_ipv4_link_get(struct pico_ip4 *address) -{ - struct pico_ipv4_link test = { - 0 - }, *found = NULL; - test.address.addr = address->addr; - - found = pico_tree_findKey(&Tree_dev_link, &test); - if (!found) - return NULL; - else - return found; -} - -struct pico_ipv4_link *MOCKABLE pico_ipv4_link_by_dev(struct pico_device *dev) -{ - struct pico_tree_node *index = NULL; - struct pico_ipv4_link *link = NULL; - - pico_tree_foreach(index, &Tree_dev_link) { - link = index->keyValue; - if (link->dev == dev) - return link; - } - return NULL; -} - -struct pico_ipv4_link *pico_ipv4_link_by_dev_next(struct pico_device *dev, struct pico_ipv4_link *last) -{ - struct pico_tree_node *index = NULL; - struct pico_ipv4_link *link = NULL; - int valid = 0; - - if (last == NULL) - valid = 1; - - pico_tree_foreach(index, &Tree_dev_link) { - link = index->keyValue; - if (link->dev == dev) { - if (last == link) - valid = 1; - else if (valid > 0) - return link; - } - } - return NULL; -} - -struct pico_device *MOCKABLE pico_ipv4_link_find(struct pico_ip4 *address) -{ - struct pico_ipv4_link test, *found; - if (!address) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - test.dev = NULL; - test.address.addr = address->addr; - found = pico_tree_findKey(&Tree_dev_link, &test); - if (!found) { - pico_err = PICO_ERR_ENXIO; - return NULL; - } - - return found->dev; -} - - -static int pico_ipv4_rebound_large(struct pico_frame *f) -{ -#ifdef PICO_SUPPORT_IPV4FRAG - uint16_t total_payload_written = 0; - uint32_t len = f->transport_len; - struct pico_frame *fr; - struct pico_ip4 dst; - struct pico_ipv4_hdr *hdr; - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - dst.addr = hdr->src.addr; - - while(total_payload_written < len) { - uint32_t space = (uint32_t)len - total_payload_written; - if (space > PICO_IPV4_MAXPAYLOAD) - space = PICO_IPV4_MAXPAYLOAD; - - fr = pico_ipv4_alloc(&pico_proto_ipv4, NULL, (uint16_t)space); - if (!fr) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if (space + total_payload_written < len) - { - fr->frag |= PICO_IPV4_MOREFRAG; - } - else - { - fr->frag &= PICO_IPV4_FRAG_MASK; - } - - fr->frag = (((total_payload_written) >> 3u) & 0xffffu) | fr->frag; - - memcpy(fr->transport_hdr, f->transport_hdr + total_payload_written, fr->transport_len); - if (pico_ipv4_frame_push(fr, &dst, hdr->proto) > 0) { - total_payload_written = (uint16_t)((uint16_t)fr->transport_len + total_payload_written); - } else { - /* No need to discard frame here, pico_ipv4_frame_push() already did that */ - break; - } - } /* while() */ - return (int)total_payload_written; -#else - (void)f; - return -1; -#endif -} - -int pico_ipv4_rebound(struct pico_frame *f) -{ - struct pico_ip4 dst; - struct pico_ipv4_hdr *hdr; - if (!f) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - if (!hdr) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - dst.addr = hdr->src.addr; - if (f->transport_len > PICO_IPV4_MAXPAYLOAD) { - return pico_ipv4_rebound_large(f); - } - - return pico_ipv4_frame_push(f, &dst, hdr->proto); -} - -static int pico_ipv4_pre_forward_checks(struct pico_frame *f) -{ - static uint16_t last_id = 0; - static uint16_t last_proto = 0; - static struct pico_ip4 last_src = { - 0 - }; - static struct pico_ip4 last_dst = { - 0 - }; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *)f->net_hdr; - - /* Decrease TTL, check if expired */ - hdr->ttl = (uint8_t)(hdr->ttl - 1); - if (hdr->ttl < 1) { - pico_notify_ttl_expired(f); - dbg(" ------------------- TTL EXPIRED\n"); - return -1; - } - - /* HACK: increase crc to compensate decreased TTL */ - hdr->crc++; - - /* If source is local, discard anyway (packets bouncing back and forth) */ - if (pico_ipv4_link_get(&hdr->src)) - return -1; - - /* If this was the last forwarded packet, silently discard to prevent duplications */ - if ((last_src.addr == hdr->src.addr) && (last_id == hdr->id) - && (last_dst.addr == hdr->dst.addr) && (last_proto == hdr->proto)) { - return -1; - } else { - last_src.addr = hdr->src.addr; - last_dst.addr = hdr->dst.addr; - last_id = hdr->id; - last_proto = hdr->proto; - } - - return 0; -} - -static int pico_ipv4_forward_check_dev(struct pico_frame *f) -{ - if (f->dev->eth != NULL) - f->len -= PICO_SIZE_ETHHDR; - - if (f->len > f->dev->mtu) { - pico_notify_pkt_too_big(f); - return -1; - } - - return 0; -} - -static int pico_ipv4_forward(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *)f->net_hdr; - struct pico_ipv4_route *rt; - if (!hdr) { - return -1; - } - - rt = route_find(&hdr->dst); - if (!rt) { - pico_notify_dest_unreachable(f); - return -1; - } - - f->dev = rt->link->dev; - - if (pico_ipv4_pre_forward_checks(f) < 0) - return -1; - - pico_ipv4_nat_outbound(f, &rt->link->address); - - f->start = f->net_hdr; - - if (pico_ipv4_forward_check_dev(f) < 0) - return -1; - - pico_datalink_send(f); - return 0; - -} - -int pico_ipv4_is_broadcast(uint32_t addr) -{ - struct pico_ipv4_link *link; - struct pico_tree_node *index; - if (addr == PICO_IP4_BCAST) - return 1; - - pico_tree_foreach(index, &Tree_dev_link) { - link = index->keyValue; - if ((link->address.addr | (~link->netmask.addr)) == addr) - return 1; - } - return 0; -} - -void pico_ipv4_unreachable(struct pico_frame *f, int err) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; -#if defined PICO_SUPPORT_TCP || defined PICO_SUPPORT_UDP - f->transport_hdr = ((uint8_t *)f->net_hdr) + PICO_SIZE_IP4HDR; - pico_transport_error(f, hdr->proto, err); -#endif -} - -int pico_ipv4_cleanup_links(struct pico_device *dev) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ipv4_link *link = NULL; - - pico_tree_foreach_safe(index, &Tree_dev_link, _tmp) { - link = index->keyValue; - if (dev == link->dev) - pico_ipv4_link_del(dev, link->address); - } - return 0; -} - - -#endif diff --git a/kernel/picotcp/modules/pico_ipv4.h b/kernel/picotcp/modules/pico_ipv4.h deleted file mode 100644 index 4ab7b69..0000000 --- a/kernel/picotcp/modules/pico_ipv4.h +++ /dev/null @@ -1,115 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef INCLUDE_PICO_IPV4 -#define INCLUDE_PICO_IPV4 -#include "pico_addressing.h" -#include "pico_protocol.h" -#include "pico_tree.h" - -#define PICO_IPV4_INADDR_ANY 0x00000000U - -#define PICO_IPV4_MTU (1500u) -#define PICO_SIZE_IP4HDR (uint32_t)((sizeof(struct pico_ipv4_hdr))) -#define PICO_IPV4_MAXPAYLOAD (PICO_IPV4_MTU - PICO_SIZE_IP4HDR) -#define PICO_IPV4_DONTFRAG 0x4000U -#define PICO_IPV4_MOREFRAG 0x2000U -#define PICO_IPV4_EVIL 0x8000U -#define PICO_IPV4_FRAG_MASK 0x1FFFU -#define PICO_IPV4_DEFAULT_TTL 64 -#ifndef MBED - #define PICO_IPV4_FRAG_MAX_SIZE (uint32_t)(63 * 1024) -#else - #define PICO_IPV4_FRAG_MAX_SIZE PICO_DEFAULT_SOCKETQ -#endif - -extern struct pico_protocol pico_proto_ipv4; - -PACKED_STRUCT_DEF pico_ipv4_hdr { - uint8_t vhl; - uint8_t tos; - uint16_t len; - uint16_t id; - uint16_t frag; - uint8_t ttl; - uint8_t proto; - uint16_t crc; - struct pico_ip4 src; - struct pico_ip4 dst; - uint8_t options[]; -}; - -PACKED_STRUCT_DEF pico_ipv4_pseudo_hdr -{ - struct pico_ip4 src; - struct pico_ip4 dst; - uint8_t zeros; - uint8_t proto; - uint16_t len; -}; - -/* Interface: link to device */ -struct pico_mcast_list; - -struct pico_ipv4_link -{ - struct pico_device *dev; - struct pico_ip4 address; - struct pico_ip4 netmask; -#ifdef PICO_SUPPORT_MCAST - struct pico_tree *MCASTGroups; - uint8_t mcast_compatibility; - uint8_t mcast_last_query_interval; -#endif -}; - - -struct pico_ipv4_route -{ - struct pico_ip4 dest; - struct pico_ip4 netmask; - struct pico_ip4 gateway; - struct pico_ipv4_link *link; - uint32_t metric; -}; - -extern struct pico_tree Routes; - - -int pico_ipv4_compare(struct pico_ip4 *a, struct pico_ip4 *b); -int pico_ipv4_to_string(char *ipbuf, const uint32_t ip); -int pico_string_to_ipv4(const char *ipstr, uint32_t *ip); -int pico_ipv4_valid_netmask(uint32_t mask); -int pico_ipv4_is_unicast(uint32_t address); -int pico_ipv4_is_multicast(uint32_t address); -int pico_ipv4_is_broadcast(uint32_t addr); -int pico_ipv4_is_loopback(uint32_t addr); -int pico_ipv4_is_valid_src(uint32_t addr, struct pico_device *dev); - -int pico_ipv4_link_add(struct pico_device *dev, struct pico_ip4 address, struct pico_ip4 netmask); -int pico_ipv4_link_del(struct pico_device *dev, struct pico_ip4 address); -int pico_ipv4_rebound(struct pico_frame *f); - -int pico_ipv4_frame_push(struct pico_frame *f, struct pico_ip4 *dst, uint8_t proto); -struct pico_ipv4_link *pico_ipv4_link_get(struct pico_ip4 *address); -struct pico_ipv4_link *pico_ipv4_link_by_dev(struct pico_device *dev); -struct pico_ipv4_link *pico_ipv4_link_by_dev_next(struct pico_device *dev, struct pico_ipv4_link *last); -struct pico_device *pico_ipv4_link_find(struct pico_ip4 *address); -struct pico_ip4 *pico_ipv4_source_find(const struct pico_ip4 *dst); -struct pico_device *pico_ipv4_source_dev_find(const struct pico_ip4 *dst); -int pico_ipv4_route_add(struct pico_ip4 address, struct pico_ip4 netmask, struct pico_ip4 gateway, int metric, struct pico_ipv4_link *link); -int pico_ipv4_route_del(struct pico_ip4 address, struct pico_ip4 netmask, int metric); -struct pico_ip4 pico_ipv4_route_get_gateway(struct pico_ip4 *addr); -void pico_ipv4_route_set_bcast_link(struct pico_ipv4_link *link); -void pico_ipv4_unreachable(struct pico_frame *f, int err); - -int pico_ipv4_mcast_join(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *MCASTFilter); -int pico_ipv4_mcast_leave(struct pico_ip4 *mcast_link, struct pico_ip4 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *MCASTFilter); -struct pico_ipv4_link *pico_ipv4_get_default_mcastlink(void); -int pico_ipv4_cleanup_links(struct pico_device *dev); - -#endif /* _INCLUDE_PICO_IPV4 */ diff --git a/kernel/picotcp/modules/pico_ipv6.c b/kernel/picotcp/modules/pico_ipv6.c deleted file mode 100644 index 922b679..0000000 --- a/kernel/picotcp/modules/pico_ipv6.c +++ /dev/null @@ -1,2140 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera, Kristof Roelants - *********************************************************************/ - - -#include "pico_ipv6.h" -#include "pico_icmp6.h" -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_eth.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_socket.h" -#include "pico_device.h" -#include "pico_tree.h" -#include "pico_fragments.h" -#include "pico_ethernet.h" -#include "pico_6lowpan_ll.h" -#include "pico_mld.h" -#include "pico_mcast.h" -#ifdef PICO_SUPPORT_IPV6 - - -#define PICO_IPV6_EXTHDR_OPT_PAD1 0 -#define PICO_IPV6_EXTHDR_OPT_PADN 1 -#define PICO_IPV6_EXTHDR_OPT_SRCADDR 201 - -#define PICO_IPV6_EXTHDR_OPT_ACTION_MASK 0xC0 /* highest-order two bits */ -#define PICO_IPV6_EXTHDR_OPT_ACTION_SKIP 0x00 /* skip and continue processing */ -#define PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD 0x40 /* discard packet */ -#define PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD_SI 0x80 /* discard and send ICMP parameter problem */ -#define PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD_SINM 0xC0 /* discard and send ICMP parameter problem if not multicast */ - -#define PICO_IPV6_MAX_RTR_SOLICITATION_DELAY 1000 -#define PICO_IPV6_DEFAULT_DAD_RETRANS 1 - -#ifdef DEBUG_IPV6 -#define ipv6_dbg dbg -#else -#define ipv6_dbg(...) do { } while(0) -#endif - -#ifdef PICO_SUPPORT_MCAST - -#ifdef DEBUG_MCAST -#define ipv6_mcast_dbg dbg -#else -#define ipv6_mcast_dbg(...) do { } while(0) -#endif - -static struct pico_ipv6_link *mcast_default_link_ipv6 = NULL; -#endif -/* queues */ -static struct pico_queue ipv6_in; -static struct pico_queue ipv6_out; - -const uint8_t PICO_IP6_ANY[PICO_SIZE_IP6] = { - 0 -}; -#ifdef PICO_SUPPORT_MCAST -static int pico_ipv6_mcast_filter(struct pico_frame *f); -#endif - - -int pico_ipv6_compare(struct pico_ip6 *a, struct pico_ip6 *b) -{ - uint32_t i; - for (i = 0; i < sizeof(struct pico_ip6); i++) { - if (a->addr[i] < b->addr[i]) - return -1; - - if (a->addr[i] > b->addr[i]) - return 1; - } - return 0; -} - -static int ipv6_link_compare(void *ka, void *kb) -{ - struct pico_ipv6_link *a = ka, *b = kb; - struct pico_ip6 *a_addr, *b_addr; - int ret; - a_addr = &a->address; - b_addr = &b->address; - - ret = pico_ipv6_compare(a_addr, b_addr); - if (ret) - return ret; - - /* zero can be assigned multiple times (e.g. for DHCP) */ - if (a->dev != NULL && b->dev != NULL && !memcmp(a->address.addr, PICO_IP6_ANY, PICO_SIZE_IP6) && !memcmp(b->address.addr, PICO_IP6_ANY, PICO_SIZE_IP6)) { - /* XXX change PICO_IP6_ANY */ - if (a->dev < b->dev) - return -1; - - if (a->dev > b->dev) - return 1; - } - - return 0; -} - -static inline int ipv6_compare_metric(struct pico_ipv6_route *a, struct pico_ipv6_route *b) -{ - if (a->metric < b->metric) - return -1; - - if (a->metric > b->metric) - return 1; - - return 0; -} - -static int ipv6_route_compare(void *ka, void *kb) -{ - struct pico_ipv6_route *a = ka, *b = kb; - int ret; - - /* Routes are sorted by (host side) netmask len, then by addr, then by metric. */ - ret = pico_ipv6_compare(&a->netmask, &b->netmask); - if (ret) - return ret; - - ret = pico_ipv6_compare(&a->dest, &b->dest); - if (ret) - return ret; - - return ipv6_compare_metric(a, b); - -} - -static PICO_TREE_DECLARE(Tree_dev_ip6_link, ipv6_link_compare); -PICO_TREE_DECLARE(IPV6Routes, ipv6_route_compare); -static PICO_TREE_DECLARE(IPV6Links, ipv6_link_compare); - -static char pico_ipv6_dec_to_char(uint8_t u) -{ - if (u < 10) - return (char)('0' + u); - else if (u < 16) - return (char)('a' + (u - 10)); - else - return '0'; -} - -static int pico_ipv6_hex_to_dec(char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - - if (c >= 'a' && c <= 'f') - return 10 + (c - 'a'); - - if (c >= 'A' && c <= 'F') - return 10 + (c - 'A'); - - return 0; -} - -int pico_ipv6_to_string(char *ipbuf, const uint8_t ip[PICO_SIZE_IP6]) -{ - uint8_t dec = 0, i = 0; - - if (!ipbuf || !ip) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* every nibble is one char */ - for (i = 0; i < ((uint8_t)PICO_SIZE_IP6) * 2u; ++i) { - if (i % 4 == 0 && i != 0) - *ipbuf++ = ':'; - - if (i % 2 == 0) { /* upper nibble */ - dec = ip[i / 2] >> 4; - } else { /* lower nibble */ - dec = ip[i / 2] & 0x0F; - } - - *ipbuf++ = pico_ipv6_dec_to_char(dec); - } - *ipbuf = '\0'; - - return 0; -} - -int pico_string_to_ipv6(const char *ipstr, uint8_t *ip) -{ - uint8_t buf[PICO_SIZE_IP6] = { - 0 - }; - uint8_t doublecolon = 0, byte = 0; - char p = 0; - int i = 0, diff = 0, nibble = 0, hex = 0, colons = 0; - int zeros = 0, shift = 0; - - pico_err = PICO_ERR_EINVAL; - if (!ipstr || !ip) - return -1; - - memset(ip, 0, PICO_SIZE_IP6); - - while((p = *ipstr++) != 0) - { - if (pico_is_hex(p) || (p == ':') || *ipstr == '\0') { /* valid signs */ - if (pico_is_hex(p)) { - buf[byte] = (uint8_t)((buf[byte] << 4) + pico_ipv6_hex_to_dec(p)); - if (++nibble % 2 == 0) - ++byte; - } - - if (p == ':' || *ipstr == '\0') { /* account for leftout leading zeros */ - ++hex; - if (p == ':') - ++colons; - - diff = (hex * 4) - nibble; - nibble += diff; - switch (diff) { - case 0: - /* 16-bit hex block ok f.e. 1db8 */ - break; - case 1: - /* one zero f.e. db8: byte = 1, buf[byte-1] = 0xdb, buf[byte] = 0x08 */ - buf[byte] |= (uint8_t)(buf[byte - 1] << 4); - buf[byte - 1] >>= 4; - byte++; - break; - case 2: - /* two zeros f.e. b8: byte = 1, buf[byte] = 0x00, buf[byte-1] = 0xb8 */ - buf[byte] = buf[byte - 1]; - buf[byte - 1] = 0x00; - byte++; - break; - case 3: - /* three zeros f.e. 8: byte = 0, buf[byte] = 0x08, buf[byte+1] = 0x00 */ - buf[byte + 1] = buf[byte]; - buf[byte] = 0x00; - byte = (uint8_t)(byte + 2); - break; - case 4: - /* case of :: */ - if (doublecolon && colons != 2) /* catch case x::x::x but not ::x */ - return -1; - else - doublecolon = byte; - - break; - default: - /* case of missing colons f.e. 20011db8 instead of 2001:1db8 */ - return -1; - } - } - } else { - return -1; - } - } - if (colons < 2) /* valid IPv6 has atleast two colons */ - return -1; - - /* account for leftout :: zeros */ - zeros = PICO_SIZE_IP6 - byte; - if (zeros) { - shift = PICO_SIZE_IP6 - zeros - doublecolon; - for (i = shift; i >= 0; --i) { - /* (i-1) as arrays are indexed from 0 onwards */ - if ((doublecolon + (i - 1)) >= 0) - buf[doublecolon + zeros + (i - 1)] = buf[doublecolon + (i - 1)]; - } - memset(&buf[doublecolon], 0, (size_t)zeros); - } - - memcpy(ip, buf, 16); - pico_err = PICO_ERR_NOERR; - return 0; -} - -int pico_ipv6_is_linklocal(const uint8_t addr[PICO_SIZE_IP6]) -{ - /* prefix: fe80::/10 */ - if ((addr[0] == 0xfe) && ((addr[1] >> 6) == 0x02)) - return 1; - - return 0; -} - -int pico_ipv6_is_sitelocal(const uint8_t addr[PICO_SIZE_IP6]) -{ - /* prefix: fec0::/10 */ - if ((addr[0] == 0xfe) && ((addr[1] >> 6) == 0x03)) - return 1; - - return 0; -} - -int pico_ipv6_is_uniquelocal(const uint8_t addr[PICO_SIZE_IP6]) -{ - /* prefix: fc00::/7 */ - if (((addr[0] >> 1) == 0x7e)) - return 1; - - return 0; -} - -int pico_ipv6_is_global(const uint8_t addr[PICO_SIZE_IP6]) -{ - /* prefix: 2000::/3 */ - if (((addr[0] >> 5) == 0x01)) - return 1; - - return 0; -} - -int pico_ipv6_is_localhost(const uint8_t addr[PICO_SIZE_IP6]) -{ - const uint8_t localhost[PICO_SIZE_IP6] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 - }; - if (memcmp(addr, localhost, PICO_SIZE_IP6) == 0) - return 1; - - return 0; - -} - -int pico_ipv6_is_unicast(struct pico_ip6 *a) -{ - if (pico_ipv6_is_global(a->addr)) - return 1; - else if (pico_ipv6_is_uniquelocal(a->addr)) - return 1; - else if (pico_ipv6_is_sitelocal(a->addr)) - return 1; - else if (pico_ipv6_is_linklocal(a->addr)) - return 1; - else if (pico_ipv6_is_localhost(a->addr)) - return 1; - else if(pico_ipv6_link_get(a)) - return 1; - else - return 0; - -} - -int pico_ipv6_is_multicast(const uint8_t addr[PICO_SIZE_IP6]) -{ - /* prefix: ff00::/8 */ - if ((addr[0] == 0xff)) - return 1; - - return 0; -} - -int pico_ipv6_is_allhosts_multicast(const uint8_t addr[PICO_SIZE_IP6]) -{ - struct pico_ip6 allhosts = {{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}; - return !memcmp(allhosts.addr, addr, PICO_SIZE_IP6); -} - -int pico_ipv6_is_solicited(const uint8_t addr[PICO_SIZE_IP6]) -{ - struct pico_ip6 solicited_node = {{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0x00, 0x00, 0x00 }}; - return !memcmp(solicited_node.addr, addr, 13); -} - -int pico_ipv6_is_solnode_multicast(const uint8_t addr[PICO_SIZE_IP6], struct pico_device *dev) -{ - struct pico_ipv6_link *link; - if (pico_ipv6_is_multicast(addr) == 0) - return 0; - - link = pico_ipv6_link_by_dev(dev); - while(link) { - if (pico_ipv6_is_linklocal(link->address.addr)) { - int i, match = 0; - for(i = 13; i < 16; i++) { - if (addr[i] == link->address.addr[i]) - ++match; - } - /* Solicitation: last 3 bytes match a local address. */ - if (match == 3) - return 1; - } - - link = pico_ipv6_link_by_dev_next(dev, link); - } - return 0; -} - -int pico_ipv6_is_unspecified(const uint8_t addr[PICO_SIZE_IP6]) -{ - return !memcmp(PICO_IP6_ANY, addr, PICO_SIZE_IP6); -} - -static struct pico_ipv6_route *pico_ipv6_route_find(const struct pico_ip6 *addr) -{ - struct pico_tree_node *index = NULL; - struct pico_ipv6_route *r = NULL; - int i = 0; - if (!pico_ipv6_is_localhost(addr->addr) && (pico_ipv6_is_linklocal(addr->addr) || pico_ipv6_is_sitelocal(addr->addr))) { - return NULL; - } - - pico_tree_foreach_reverse(index, &IPV6Routes) { - r = index->keyValue; - for (i = 0; i < PICO_SIZE_IP6; ++i) { - if ((addr->addr[i] & (r->netmask.addr[i])) != ((r->dest.addr[i]) & (r->netmask.addr[i]))) { - break; - } - - if (i + 1 == PICO_SIZE_IP6) { - return r; - } - } - } - return NULL; -} - -struct pico_ip6 *pico_ipv6_source_find(const struct pico_ip6 *dst) -{ - struct pico_ip6 *myself = NULL; - struct pico_ipv6_route *rt; - - if(!dst) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - rt = pico_ipv6_route_find(dst); - if (rt) { - myself = &rt->link->address; - } else - pico_err = PICO_ERR_EHOSTUNREACH; - - return myself; -} - -struct pico_device *pico_ipv6_source_dev_find(const struct pico_ip6 *dst) -{ - struct pico_device *dev = NULL; - struct pico_ipv6_route *rt; - - if(!dst) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - rt = pico_ipv6_route_find(dst); - if (rt && rt->link) { - dev = rt->link->dev; - } else - pico_err = PICO_ERR_EHOSTUNREACH; - - return dev; -} - -static int pico_ipv6_forward_check_dev(struct pico_frame *f) -{ - if(f->dev->mode == LL_MODE_ETHERNET && f->dev->eth != NULL) - f->len -= PICO_SIZE_ETHHDR; - - if(f->len > f->dev->mtu) { - pico_notify_pkt_too_big(f); - return -1; - } - - return 0; -} - -static int pico_ipv6_pre_forward_checks(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - - /* Decrease HOP count, check if expired */ - hdr->hop = (uint8_t)(hdr->hop - 1); - if (hdr->hop < 1) { - pico_notify_ttl_expired(f); - dbg(" ------------------- HOP COUNT EXPIRED\n"); - return -1; - } - - /* If source is local, discard anyway (packets bouncing back and forth) */ - if (pico_ipv6_link_get(&hdr->src)) - return -1; - - if (pico_ipv6_forward_check_dev(f) < 0) - return -1; - - return 0; -} - -static int pico_ipv6_forward(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - struct pico_ipv6_route *rt; - if (!hdr) { - pico_frame_discard(f); - return -1; - } - - rt = pico_ipv6_route_find(&hdr->dst); - if (!rt) { - pico_notify_dest_unreachable(f); - pico_frame_discard(f); - return -1; - } - - f->dev = rt->link->dev; - - if (pico_ipv6_pre_forward_checks(f) < 0) - { - pico_frame_discard(f); - return -1; - } - - f->start = f->net_hdr; - - return pico_datalink_send(f); -} - - -static int pico_ipv6_process_hopbyhop(struct pico_ipv6_exthdr *hbh, struct pico_frame *f) -{ - uint8_t *option = NULL; - uint8_t len = 0, optlen = 0; - uint32_t ptr = sizeof(struct pico_ipv6_hdr); - uint8_t *extensions_start = (uint8_t *)hbh; - uint8_t must_align = 1; - IGNORE_PARAMETER(f); - - option = ((uint8_t *)&hbh->ext.hopbyhop) + sizeof(struct hopbyhop_s); - len = (uint8_t)HBH_LEN(hbh); - ipv6_dbg("IPv6: hop by hop extension header length %u\n", len + 2); - while (len) { - switch (*option) - { - case PICO_IPV6_EXTHDR_OPT_PAD1: - ++option; - --len; - break; - - case PICO_IPV6_EXTHDR_OPT_PADN: - optlen = (uint8_t)((*(option + 1)) + 2); /* plus type and len byte */ - option += optlen; - len = (uint8_t)(len - optlen); - break; - case PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT: - optlen = (uint8_t)((*(option + 1)) + 2); /* plus type and len byte */ - /* MLD package */ - if(*(option + 1) == 2) - must_align = 0; - - option += optlen; - len = (uint8_t)(len - optlen); - break; - default: - /* unknown option */ - optlen = (uint8_t)(*(option + 1) + 2); /* plus type and len byte */ - switch ((*option) & PICO_IPV6_EXTHDR_OPT_ACTION_MASK) { - case PICO_IPV6_EXTHDR_OPT_ACTION_SKIP: - break; - case PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD: - return -1; - case PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD_SI: - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_IPV6OPT, ptr + (uint32_t)(option - extensions_start)); - return -1; - case PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD_SINM: - if (!pico_ipv6_is_multicast(((struct pico_ipv6_hdr *)(f->net_hdr))->dst.addr)) - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_IPV6OPT, ptr + (uint32_t)(option - extensions_start)); - - return -1; - } - ipv6_dbg("IPv6: option with type %u and length %u\n", *option, optlen); - option += optlen; - len = (uint8_t)(len - optlen); - } - } - return must_align; -} - - -static int pico_ipv6_process_routing(struct pico_ipv6_exthdr *routing, struct pico_frame *f, uint32_t ptr) -{ - IGNORE_PARAMETER(f); - - if (routing->ext.routing.segleft == 0) - return 0; - - ipv6_dbg("IPv6: routing extension header with len %u\n", routing->ext.routing.len + 2); - switch (routing->ext.routing.routtype) { - case 0x00: - /* deprecated */ - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_HDRFIELD, ptr + 2); - return -1; - case 0x02: - /* routing type for MIPv6: not supported yet */ - break; - default: - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_HDRFIELD, ptr + 2); - return -1; - } - return 0; -} - -#define IP6FRAG_MORE(x) ((x & 0x0001)) - -static int pico_ipv6_process_destopt(struct pico_ipv6_exthdr *destopt, struct pico_frame *f, uint32_t opt_ptr) -{ - uint8_t *option = NULL; - uint8_t len = 0, optlen = 0; - opt_ptr += (uint32_t)(2u); /* Skip Dest_opts header */ - IGNORE_PARAMETER(f); - option = ((uint8_t *)&destopt->ext.destopt) + sizeof(struct destopt_s); - len = (uint8_t)(((destopt->ext.destopt.len + 1) << 3) - 2); /* len in bytes, minus nxthdr and len byte */ - ipv6_dbg("IPv6: destination option extension header length %u\n", len + 2); - while (len) { - optlen = (uint8_t)(*(option + 1) + 2); - switch (*option) - { - case PICO_IPV6_EXTHDR_OPT_PAD1: - break; - - case PICO_IPV6_EXTHDR_OPT_PADN: - break; - - case PICO_IPV6_EXTHDR_OPT_SRCADDR: - ipv6_dbg("IPv6: home address option with length %u\n", optlen); - break; - - default: - ipv6_dbg("IPv6: option with type %u and length %u\n", *option, optlen); - switch (*option & PICO_IPV6_EXTHDR_OPT_ACTION_MASK) { - case PICO_IPV6_EXTHDR_OPT_ACTION_SKIP: - break; - case PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD: - return -1; - case PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD_SI: - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_IPV6OPT, opt_ptr); - return -1; - case PICO_IPV6_EXTHDR_OPT_ACTION_DISCARD_SINM: - if (!pico_ipv6_is_multicast(((struct pico_ipv6_hdr *)(f->net_hdr))->dst.addr)) { - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_IPV6OPT, opt_ptr); - } - - return -1; - } - break; - } - opt_ptr += optlen; - option += optlen; - len = (uint8_t)(len - optlen); - } - return 0; -} - -static int pico_ipv6_check_headers_sequence(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - int ptr = sizeof(struct pico_ipv6_hdr); - int cur_nexthdr = 6; /* Starts with nexthdr field in ipv6 pkt */ - uint8_t nxthdr = hdr->nxthdr; - for (;; ) { - uint8_t optlen = *(f->net_hdr + ptr + 1); - switch (nxthdr) { - case PICO_IPV6_EXTHDR_DESTOPT: - case PICO_IPV6_EXTHDR_ROUTING: - case PICO_IPV6_EXTHDR_HOPBYHOP: - case PICO_IPV6_EXTHDR_ESP: - case PICO_IPV6_EXTHDR_AUTH: - optlen = (uint8_t)IPV6_OPTLEN(optlen); - break; - case PICO_IPV6_EXTHDR_FRAG: - optlen = 8; - break; - case PICO_IPV6_EXTHDR_NONE: - return 0; - - case PICO_PROTO_TCP: - case PICO_PROTO_UDP: - case PICO_PROTO_ICMP6: - return 0; - default: - /* Invalid next header */ - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_NXTHDR, (uint32_t)cur_nexthdr); - return -1; - } - cur_nexthdr = ptr; - nxthdr = *(f->net_hdr + ptr); - ptr += optlen; - } -} - -static int pico_ipv6_check_aligned(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if ((short_be(hdr->len) % 8) != 0) { - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_HDRFIELD, 4); - return -1; - } - - return 0; -} - -static int pico_ipv6_extension_headers(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - uint8_t nxthdr = hdr->nxthdr; - struct pico_ipv6_exthdr *exthdr = NULL, *frag_hdr = NULL; - uint32_t ptr = sizeof(struct pico_ipv6_hdr); - uint16_t cur_optlen; - uint32_t cur_nexthdr = 6; - int must_align = 0; - - f->net_len = sizeof(struct pico_ipv6_hdr); - - if (pico_ipv6_check_headers_sequence(f) < 0) - return -1; - - for (;; ) { - exthdr = (struct pico_ipv6_exthdr *)(f->net_hdr + f->net_len); - cur_optlen = 0; - - switch (nxthdr) { - case PICO_IPV6_EXTHDR_HOPBYHOP: - if (cur_nexthdr != 6) { - /* The Hop-by-Hop Options header, - * when present, must immediately follow the IPv6 header. - */ - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_NXTHDR, cur_nexthdr); - return -1; - } - - cur_optlen = IPV6_OPTLEN(exthdr->ext.hopbyhop.len); - f->net_len = (uint16_t) (f->net_len + cur_optlen); - must_align = pico_ipv6_process_hopbyhop(exthdr, f); - if(must_align < 0) - return -1; - - break; - case PICO_IPV6_EXTHDR_ROUTING: - cur_optlen = IPV6_OPTLEN(exthdr->ext.routing.len); - f->net_len = (uint16_t) (f->net_len + cur_optlen); - if (pico_ipv6_process_routing(exthdr, f, ptr) < 0) - return -1; - - break; - case PICO_IPV6_EXTHDR_FRAG: - cur_optlen = 8u; - f->net_len = (uint16_t) (f->net_len + cur_optlen); - frag_hdr = exthdr; - f->frag = (uint16_t)((frag_hdr->ext.frag.om[0] << 8) + frag_hdr->ext.frag.om[1]); - /* If M-Flag is set, and packet is not 8B aligned, discard and alert */ - if (IP6FRAG_MORE(f->frag) && ((short_be(hdr->len) % 8) != 0)) { - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_HDRFIELD, 4); - return -1; - } - - break; - case PICO_IPV6_EXTHDR_DESTOPT: - cur_optlen = IPV6_OPTLEN(exthdr->ext.destopt.len); - f->net_len = (uint16_t) (f->net_len + cur_optlen); - must_align = 1; - if (pico_ipv6_process_destopt(exthdr, f, ptr) < 0) - return -1; - - break; - case PICO_IPV6_EXTHDR_ESP: - /* not supported, ignored. */ - return 0; - case PICO_IPV6_EXTHDR_AUTH: - /* not supported, ignored */ - return 0; - case PICO_IPV6_EXTHDR_NONE: - /* no next header */ - if (must_align && (pico_ipv6_check_aligned(f) < 0)) - return -1; - - return 0; - - case PICO_PROTO_TCP: - case PICO_PROTO_UDP: - case PICO_PROTO_ICMP6: - if (must_align && (pico_ipv6_check_aligned(f) < 0)) - return -1; - - f->transport_hdr = f->net_hdr + f->net_len; - f->transport_len = (uint16_t)(short_be(hdr->len) - (f->net_len - sizeof(struct pico_ipv6_hdr))); - if (frag_hdr) { -#ifdef PICO_SUPPORT_IPV6FRAG - pico_ipv6_process_frag(frag_hdr, f, nxthdr); -#endif - return -1; - } else { - return nxthdr; - } - - default: - /* Invalid next header */ - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_NXTHDR, cur_nexthdr); - return -1; - } - nxthdr = exthdr->nxthdr; - cur_nexthdr = ptr; - ptr += cur_optlen; - } -} -static int pico_ipv6_process_mcast_in(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *) f->net_hdr; - struct pico_ipv6_exthdr *hbh = NULL; - if (pico_ipv6_is_multicast(hdr->dst.addr)) { -#ifdef PICO_SUPPORT_MCAST - /* Receiving UDP multicast datagram TODO set f->flags? */ - if(hdr->nxthdr == 0) { - hbh = (struct pico_ipv6_exthdr *) (f->transport_hdr); - } - - if (hdr->nxthdr == PICO_PROTO_ICMP6 || (hbh != NULL && hbh->nxthdr == PICO_PROTO_ICMP6)) { - pico_transport_receive(f, PICO_PROTO_ICMP6); - return 1; - } else if ((pico_ipv6_mcast_filter(f) == 0) && (hdr->nxthdr == PICO_PROTO_UDP)) { - pico_enqueue(pico_proto_udp.q_in, f); - return 1; - } - -#else - IGNORE_PARAMETER(hbh); -#endif - pico_frame_discard(f); - return 1; - } - - return 0; -} -static int pico_ipv6_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - int proto = 0; - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - struct pico_ipv6_exthdr *hbh; - IGNORE_PARAMETER(self); - /* forward if not local, except if router alert is set */ - if (pico_ipv6_is_unicast(&hdr->dst) && !pico_ipv6_link_get(&hdr->dst)) { - if(hdr->nxthdr == 0) { - hbh = (struct pico_ipv6_exthdr *) f->transport_hdr; - if(hbh->ext.routing.routtype == 0) - return pico_ipv6_forward(f); - } else - /* not local, try to forward. */ - return pico_ipv6_forward(f); - } - - proto = pico_ipv6_extension_headers(f); - if (proto <= 0) { - pico_frame_discard(f); - return 0; - } - - f->proto = (uint8_t)proto; - ipv6_dbg("IPv6: payload %u net_len %u nxthdr %u\n", short_be(hdr->len), f->net_len, proto); - - if (pico_ipv6_is_unicast(&hdr->dst)) { - pico_transport_receive(f, f->proto); - } else if (pico_ipv6_is_multicast(hdr->dst.addr)) { - /* XXX perform multicast filtering: solicited-node multicast address MUST BE allowed! */ - if (pico_ipv6_process_mcast_in(f) > 0) - return 0; - - pico_transport_receive(f, f->proto); - } - - return 0; -} - -static int pico_ipv6_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - - f->start = (uint8_t*)f->net_hdr; - - return pico_datalink_send(f); -} - -/* allocates an IPv6 packet without extension headers. If extension headers are needed, - * include the len of the extension headers in the size parameter. Once a frame acquired - * increment net_len and transport_hdr with the len of the extension headers, decrement - * transport_len with this value. - */ -static struct pico_frame *pico_ipv6_alloc(struct pico_protocol *self, struct pico_device *dev, uint16_t size) -{ - struct pico_frame *f = NULL; - - IGNORE_PARAMETER(self); - - if (0) {} -#ifdef PICO_SUPPORT_6LOWPAN - else if (PICO_DEV_IS_6LOWPAN(dev)) { - f = pico_proto_6lowpan_ll.alloc(&pico_proto_6lowpan_ll, dev, (uint16_t)(size + PICO_SIZE_IP6HDR)); - } -#endif - else { -#ifdef PICO_SUPPORT_ETH - f = pico_proto_ethernet.alloc(&pico_proto_ethernet, dev, (uint16_t)(size + PICO_SIZE_IP6HDR)); -#else - f = pico_frame_alloc(size + PICO_SIZE_IP6HDR + PICO_SIZE_ETHHDR); -#endif - } - - if (!f) - return NULL; - - f->net_len = PICO_SIZE_IP6HDR; - f->transport_hdr = f->net_hdr + PICO_SIZE_IP6HDR; - f->transport_len = (uint16_t)size; - - /* Datalink size is accounted for in pico_datalink_send (link layer) */ - f->len = (uint32_t)(size + PICO_SIZE_IP6HDR); - - return f; -} - -static inline int ipv6_pushed_frame_valid(struct pico_frame *f, struct pico_ip6 *dst) -{ - struct pico_ipv6_hdr *hdr = NULL; - if(!f || !dst) - return -1; - - hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (!hdr) { - dbg("IPv6: IP header error\n"); - return -1; - } - - return 0; -} -int pico_ipv6_is_null_address(struct pico_ip6 *ip6) -{ - struct pico_ip6 null_addr = {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}; - return !memcmp(ip6, &null_addr, sizeof(struct pico_ip6)); -} -#ifdef PICO_SUPPORT_MCAST -/* link - * | - * MCASTGroups - * | | | - * ------------ | ------------ - * | | | - * MCASTSources MCASTSources MCASTSources - * | | | | | | | | | | | | - * S S S S S S S S S S S S - * - * MCASTGroups: RBTree(mcast_group) - * MCASTSources: RBTree(source) - */ -static int ipv6_mcast_groups_cmp(void *ka, void *kb) -{ - struct pico_mcast_group *a = ka, *b = kb; - return pico_ipv6_compare(&a->mcast_addr.ip6, &b->mcast_addr.ip6); -} -static int ipv6_mcast_sources_cmp(void *ka, void *kb) -{ - struct pico_ip6 *a = ka, *b = kb; - return pico_ipv6_compare(a, b); -} - -static void pico_ipv6_mcast_print_groups(struct pico_ipv6_link *mcast_link) -{ -#ifdef PICO_DEBUG_MULTICAST - uint16_t i = 0; - struct pico_mcast_group *g = NULL; - struct pico_ip6 *source = NULL; - struct pico_tree_node *index = NULL, *index2 = NULL; - char *ipv6_addr; - (void) source; - ipv6_mcast_dbg("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - ipv6_mcast_dbg("+ MULTICAST list interface %-16s +\n", mcast_link->dev->name); - ipv6_mcast_dbg("+------------------------------------------------------------------------------------------+\n"); - ipv6_mcast_dbg("+ nr | interface | host group | reference count | filter mode | source +\n"); - ipv6_mcast_dbg("+------------------------------------------------------------------------------------------+\n"); - ipv6_addr = PICO_ZALLOC(PICO_IPV6_STRING); - pico_tree_foreach(index, mcast_link->MCASTGroups) { - g = index->keyValue; - pico_ipv6_to_string(ipv6_addr, &g->mcast_addr.addr[0]); - ipv6_mcast_dbg("+ %04d | %16s | %s | %05u | %u | %8s +\n", i, mcast_link->dev->name, ipv6_addr, g->reference_count, g->filter_mode, ""); - pico_tree_foreach(index2, &g->MCASTSources) { - source = index2->keyValue; - pico_ipv6_to_string(ipv6_addr, source->addr); - ipv6_mcast_dbg("+ %4s | %16s | %8s | %5s | %s | %s +\n", "", "", "", "", "", ipv6_addr); - } - i++; - } - PICO_FREE(ipv6_addr); - ipv6_mcast_dbg("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); -#else - IGNORE_PARAMETER(mcast_link); -#endif - -} - -static int mcast_group_update_ipv6(struct pico_mcast_group *g, struct pico_tree *_MCASTFilter, uint8_t filter_mode) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ip6 *source = NULL; - /* cleanup filter */ - pico_tree_foreach_safe(index, &g->MCASTSources, _tmp) { - source = index->keyValue; - pico_tree_delete(&g->MCASTSources, source); - PICO_FREE(source); - } - /* insert new filter */ - if (_MCASTFilter) { - pico_tree_foreach(index, _MCASTFilter) { - if (index->keyValue) { - source = PICO_ZALLOC(sizeof(struct pico_ip6)); - if (!source) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - *source = *((struct pico_ip6 *)index->keyValue); - if (pico_tree_insert(&g->MCASTSources, source)) { - ipv6_mcast_dbg("IPv6 MCAST: Failed to insert source in tree\n"); - PICO_FREE(source); - return -1; - } - } - } - } - g->filter_mode = filter_mode; - return 0; -} - -int pico_ipv6_mcast_join(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *_MCASTFilter) -{ - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_ipv6_link *link = NULL; - int res = -1; - if (mcast_link) { - link = pico_ipv6_link_get(mcast_link); - } - - if (!link) { - link = mcast_default_link_ipv6; - } - - test.mcast_addr.ip6 = *mcast_group; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (g) { - if (reference_count) - g->reference_count++; - -#ifdef PICO_SUPPORT_MLD - res = pico_mld_state_change(mcast_link, mcast_group, filter_mode, _MCASTFilter, PICO_MLD_STATE_UPDATE); -#endif - } else { - g = PICO_ZALLOC(sizeof(struct pico_mcast_group)); - if (!g) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - /* "non-existent" state of filter mode INCLUDE and empty source list */ - g->filter_mode = PICO_IP_MULTICAST_INCLUDE; - g->reference_count = 1; - g->mcast_addr.ip6 = *mcast_group; - g->MCASTSources.root = &LEAF; - g->MCASTSources.compare = ipv6_mcast_sources_cmp; - if (pico_tree_insert(link->MCASTGroups, g)) { - ipv6_mcast_dbg("IPv6 MCAST: Failed to insert group in tree\n"); - PICO_FREE(g); - return -1; - } - -#ifdef PICO_SUPPORT_MLD - res = pico_mld_state_change(mcast_link, mcast_group, filter_mode, _MCASTFilter, PICO_MLD_STATE_CREATE); -#endif - } - - if (mcast_group_update_ipv6(g, _MCASTFilter, filter_mode) < 0) { - dbg("Error in mcast_group update\n"); - return -1; - } - - pico_ipv6_mcast_print_groups(link); - return res; -} - -int pico_ipv6_mcast_leave(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *_MCASTFilter) -{ - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_ipv6_link *link = NULL; - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ip6 *source = NULL; - int res = -1; - if (mcast_link) - link = pico_ipv6_link_get(mcast_link); - - if (!link) - link = mcast_default_link_ipv6; - - test.mcast_addr.ip6 = *mcast_group; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (!g) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - if (reference_count && (--(g->reference_count) < 1)) { -#ifdef PICO_SUPPORT_MLD - res = pico_mld_state_change(mcast_link, mcast_group, filter_mode, _MCASTFilter, PICO_MLD_STATE_DELETE); -#endif - /* cleanup filter */ - pico_tree_foreach_safe(index, &g->MCASTSources, _tmp) { - source = index->keyValue; - pico_tree_delete(&g->MCASTSources, source); - PICO_FREE(source); - } - pico_tree_delete(link->MCASTGroups, g); - PICO_FREE(g); - } else { -#ifdef PICO_SUPPORT_MLD - res = pico_mld_state_change(mcast_link, mcast_group, filter_mode, _MCASTFilter, PICO_MLD_STATE_UPDATE); -#endif - if (mcast_group_update_ipv6(g, _MCASTFilter, filter_mode) < 0) - return -1; - } - } - - pico_ipv6_mcast_print_groups(link); - return res; -} - -struct pico_ipv6_link *pico_ipv6_get_default_mcastlink(void) -{ - return mcast_default_link_ipv6; -} - -static int pico_ipv6_mcast_filter(struct pico_frame *f) -{ - struct pico_ipv6_link *link = NULL; - struct pico_tree_node *index = NULL, *index2 = NULL; - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *) f->net_hdr; -#ifdef PICO_DEBUG_MULTICAST - char ipv6_addr[PICO_IPV6_STRING]; -#endif - test.mcast_addr.ip6 = hdr->dst; - - pico_tree_foreach(index, &Tree_dev_ip6_link) { - link = index->keyValue; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (g) { - if (f->dev == link->dev) { -#ifdef PICO_DEBUG_MULTICAST - pico_ipv6_to_string( ipv6_addr, &hdr->dst.addr[0]); - ipv6_mcast_dbg("MCAST: IP %s is group member of current link %s\n", ipv6_addr, f->dev->name); -#endif - /* perform source filtering */ - switch (g->filter_mode) { - case PICO_IP_MULTICAST_INCLUDE: - pico_tree_foreach(index2, &g->MCASTSources) { - if (hdr->src.addr == ((struct pico_ip6 *)index2->keyValue)->addr) { -#ifdef PICO_DEBUG_MULTICAST - pico_ipv6_to_string(ipv6_addr, &hdr->src.addr[0]); - ipv6_mcast_dbg("MCAST: IP %s in included interface source list\n", ipv6_addr); -#endif - return 0; - } - } -#ifdef PICO_DEBUG_MULTICAST - pico_ipv6_to_string(ipv6_addr, &hdr->src.addr[0]); - ipv6_mcast_dbg("MCAST: IP %s NOT in included interface source list\n", ipv6_addr); -#endif - return -1; - - case PICO_IP_MULTICAST_EXCLUDE: - pico_tree_foreach(index2, &g->MCASTSources) { - if (memcmp(hdr->src.addr, (((struct pico_ip6 *)index2->keyValue)->addr), sizeof(struct pico_ip6)) == 0) { -#ifdef PICO_DEBUG_MULTICAST - pico_ipv6_to_string(ipv6_addr, &hdr->src.addr[0]); - ipv6_mcast_dbg("MCAST: IP %s in excluded interface source list\n", ipv6_addr); -#endif - return -1; - } - } -#ifdef PICO_DEBUG_MULTICAST - pico_ipv6_to_string(ipv6_addr, &hdr->src.addr[0]); - ipv6_mcast_dbg("MCAST: IP %s NOT in excluded interface source list\n", ipv6_addr); -#endif - return 0; - - default: - return -1; - } - } else { -#ifdef PICO_DEBUG_MULTICAST - pico_ipv6_to_string(ipv6_addr, &hdr->dst.addr[0]); - ipv6_mcast_dbg("MCAST: IP %s is group member of different link %s\n", ipv6_addr, link->dev->name); -#endif - } - } else { -#ifdef PICO_DEBUG_MULTICAST - pico_ipv6_to_string(ipv6_addr, &hdr->dst.addr[0]); - ipv6_mcast_dbg("MCAST: IP %s is not a group member of link %s\n", ipv6_addr, f->dev->name); -#endif - } - } - return -1; -} - -#else - -int pico_ipv6_mcast_join(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *_MCASTFilter) -{ - IGNORE_PARAMETER(mcast_link); - IGNORE_PARAMETER(mcast_group); - IGNORE_PARAMETER(reference_count); - IGNORE_PARAMETER(filter_mode); - IGNORE_PARAMETER(_MCASTFilter); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -int pico_ipv6_mcast_leave(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *_MCASTFilter) -{ - IGNORE_PARAMETER(mcast_link); - IGNORE_PARAMETER(mcast_group); - IGNORE_PARAMETER(reference_count); - IGNORE_PARAMETER(filter_mode); - IGNORE_PARAMETER(_MCASTFilter); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -struct pico_ipv6_link *pico_ipv6_get_default_mcastlink(void) -{ - pico_err = PICO_ERR_EPROTONOSUPPORT; - return NULL; -} -#endif /* PICO_SUPPORT_MCAST */ -static inline struct pico_ipv6_route *ipv6_pushed_frame_checks(struct pico_frame *f, struct pico_ip6 *dst) -{ - struct pico_ipv6_route *route = NULL; - - if (ipv6_pushed_frame_valid(f, dst) < 0) - return NULL; - - if (memcmp(dst->addr, PICO_IP6_ANY, PICO_SIZE_IP6) == 0) { - dbg("IPv6: IP destination address error\n"); - return NULL; - } - - route = pico_ipv6_route_find(dst); - if (!route && !f->dev) { - dbg("IPv6: route not found.\n"); - pico_err = PICO_ERR_EHOSTUNREACH; - return NULL; - } - - return route; -} - -static inline void ipv6_push_hdr_adjust(struct pico_frame *f, struct pico_ipv6_link *link, struct pico_ip6 *src, struct pico_ip6 *dst, uint8_t proto, int is_dad) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_ipv6_hdr *hdr = NULL; - struct pico_ipv6_exthdr *hbh = NULL; - const uint8_t vtf = (uint8_t)long_be(0x60000000); /* version 6, traffic class 0, flow label 0 */ - - hdr = (struct pico_ipv6_hdr *)f->net_hdr; - hdr->vtf = vtf; - hdr->len = short_be((uint16_t)(f->transport_len + f->net_len - (uint16_t)sizeof(struct pico_ipv6_hdr))); - hdr->nxthdr = proto; - hdr->hop = f->dev->hostvars.hoplimit; - hdr->dst = *dst; - - if (!src || !pico_ipv6_is_unicast(src)) - /* Address defaults to the link information: src address selection is done via link */ - hdr->src = link->address; - else { - /* Sender protocol is forcing an IPv6 address */ - hdr->src = *src; - } - - if (f->send_ttl) { - hdr->hop = f->send_ttl; - } - - if (f->send_tos) { - hdr->vtf |= ((uint32_t)f->send_tos << 20u); - } - - /* make adjustments to defaults according to proto */ - switch (proto) - { -#ifdef PICO_SUPPORT_MLD - case 0: - { - hbh = (struct pico_ipv6_exthdr *) f->transport_hdr; - switch(hbh->nxthdr) { - case PICO_PROTO_ICMP6: - { - icmp6_hdr = (struct pico_icmp6_hdr *)(f->transport_hdr + sizeof(struct pico_ipv6_exthdr)); - if((icmp6_hdr->type >= PICO_MLD_QUERY && icmp6_hdr->type <= PICO_MLD_DONE) || icmp6_hdr->type == PICO_MLD_REPORTV2) { - hdr->hop = 1; - } - - icmp6_hdr->crc = 0; - icmp6_hdr->crc = short_be(pico_mld_checksum(f)); - break; - } - } - break; - } -#else - IGNORE_PARAMETER(hbh); -#endif - case PICO_PROTO_ICMP6: - { - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - if (icmp6_hdr->type == PICO_ICMP6_NEIGH_SOL || icmp6_hdr->type == PICO_ICMP6_NEIGH_ADV || icmp6_hdr->type == PICO_ICMP6_ROUTER_SOL || icmp6_hdr->type == PICO_ICMP6_ROUTER_ADV) - hdr->hop = 255; - - /* RFC6775 $5.5.1: - * ... An unspecified source address MUST NOT be used in NS messages. - */ - if (f->dev->mode == LL_MODE_ETHERNET && (is_dad || link->istentative) && icmp6_hdr->type == PICO_ICMP6_NEIGH_SOL) { - memcpy(hdr->src.addr, PICO_IP6_ANY, PICO_SIZE_IP6); - } - - icmp6_hdr->crc = 0; - icmp6_hdr->crc = short_be(pico_icmp6_checksum(f)); - break; - } -#ifdef PICO_SUPPORT_UDP - case PICO_PROTO_UDP: - { - struct pico_udp_hdr *udp_hdr = (struct pico_udp_hdr *) f->transport_hdr; - udp_hdr->crc = short_be(pico_udp_checksum_ipv6(f)); - break; - } -#endif - - default: - break; - } - -} - -static int ipv6_frame_push_final(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = NULL; - hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if(pico_ipv6_link_get(&hdr->dst)) { - return pico_enqueue(&ipv6_in, f); - } - else { - return pico_enqueue(&ipv6_out, f); - } -} - -struct pico_ipv6_link *pico_ipv6_linklocal_get(struct pico_device *dev); - -int pico_ipv6_frame_push(struct pico_frame *f, struct pico_ip6 *src, struct pico_ip6 *dst, uint8_t proto, int is_dad) -{ - struct pico_ipv6_route *route = NULL; - struct pico_ipv6_link *link = NULL; - - if (dst && (pico_ipv6_is_linklocal(dst->addr) || pico_ipv6_is_multicast(dst->addr) || pico_ipv6_is_sitelocal(dst->addr))) { - if (!f->dev) { - pico_frame_discard(f); - return -1; - } - - if (pico_ipv6_is_sitelocal(dst->addr)) - link = pico_ipv6_sitelocal_get(f->dev); - else - link = pico_ipv6_linklocal_get(f->dev); - - if (link) - goto push_final; - } - - if (pico_ipv6_is_localhost(dst->addr)) { - f->dev = pico_get_device("loop"); - } - - route = ipv6_pushed_frame_checks(f, dst); - if (!route) { - pico_frame_discard(f); - return -1; - } - - link = route->link; - - if (f->sock && f->sock->dev) - f->dev = f->sock->dev; - else { - f->dev = link->dev; - if (f->sock) - f->sock->dev = f->dev; - } - - - #if 0 - if (pico_ipv6_is_multicast(hdr->dst.addr)) { - /* XXX: reimplement loopback */ - } - - #endif - -push_final: - ipv6_push_hdr_adjust(f, link, src, dst, proto, is_dad); - return ipv6_frame_push_final(f); -} - -static int pico_ipv6_frame_sock_push(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_ip6 *dst = NULL; - struct pico_remote_endpoint *remote_endpoint = NULL; - - IGNORE_PARAMETER(self); - - if (!f->sock) { - pico_frame_discard(f); - return -1; - } - - remote_endpoint = (struct pico_remote_endpoint *)f->info; - if (remote_endpoint) { - dst = &remote_endpoint->remote_addr.ip6; - } else { - dst = &f->sock->remote_addr.ip6; - } - - return pico_ipv6_frame_push(f, NULL, dst, (uint8_t)f->sock->proto->proto_number, 0); -} - -/* interface: protocol definition */ -struct pico_protocol pico_proto_ipv6 = { - .name = "ipv6", - .proto_number = PICO_PROTO_IPV6, - .layer = PICO_LAYER_NETWORK, - .alloc = pico_ipv6_alloc, - .process_in = pico_ipv6_process_in, - .process_out = pico_ipv6_process_out, - .push = pico_ipv6_frame_sock_push, - .q_in = &ipv6_in, - .q_out = &ipv6_out, -}; - -#ifdef DEBUG_IPV6_ROUTE -static void pico_ipv6_dbg_route(void) -{ - struct pico_ipv6_route *r; - struct pico_tree_node *index; - char ipv6_addr[PICO_IPV6_STRING]; - char netmask_addr[PICO_IPV6_STRING]; - char gateway_addr[PICO_IPV6_STRING]; - - pico_tree_foreach(index, &Routes){ - r = index->keyValue; - pico_ipv6_to_string(ipv6_addr, r->dest.addr); - pico_ipv6_to_string(netmask_addr, r->netmask.addr); - pico_ipv6_to_string(gateway_addr, r->gateway.addr); - dbg("Route to %s/%s, gw %s, dev: %s, metric: %d\n", ipv6_addr, netmask_addr, gateway_addr, r->link->dev->name, r->metric); - } -} -#else -#define pico_ipv6_dbg_route() do { } while(0) -#endif - -static inline struct pico_ipv6_route *ipv6_route_add_link(struct pico_ip6 gateway) -{ - struct pico_ip6 zerogateway = {{0}}; - struct pico_ipv6_route *r = pico_ipv6_route_find(&gateway); - - if (!r) { /* Specified Gateway is unreachable */ - pico_err = PICO_ERR_EHOSTUNREACH; - return NULL; - } - - if (memcmp(r->gateway.addr, zerogateway.addr, PICO_SIZE_IP6) != 0) { /* Specified Gateway is not a neighbor */ - pico_err = PICO_ERR_ENETUNREACH; - return NULL; - } - - return r; -} - -struct pico_ipv6_route *pico_ipv6_gateway_by_dev(struct pico_device *dev) -{ - struct pico_ipv6_link *link = pico_ipv6_link_by_dev(dev); - struct pico_ipv6_route *route = NULL; - struct pico_tree_node *node = NULL; - - /* Iterate over the IPv6-routes */ - pico_tree_foreach(node, &IPV6Routes) { - route = (struct pico_ipv6_route *)node->keyValue; - /* If the route is a default router, specified by the gw being set */ - if (!pico_ipv6_is_unspecified(route->gateway.addr) && pico_ipv6_is_unspecified(route->netmask.addr)) { - /* Iterate over device's links */ - while (link) { - /* If link is equal to route's link, router list is not empty */ - if (0 == ipv6_link_compare(link, route->link)) - return route; - link = pico_ipv6_link_by_dev_next(dev, link); - } - } - } - - return NULL; -} - -struct pico_ipv6_route *pico_ipv6_gateway_by_dev_next(struct pico_device *dev, struct pico_ipv6_route *last) -{ - struct pico_ipv6_link *link = NULL; - struct pico_ipv6_route *gw = NULL; - struct pico_tree_node *i = NULL; - int valid = 0; - - if (last == NULL) - valid = 1; - - pico_tree_foreach(i, &IPV6Routes) { - gw = (struct pico_ipv6_route *)i->keyValue; - /* If the route is a default router, specified by the gw being set */ - if (!pico_ipv6_is_unspecified(gw->gateway.addr) && pico_ipv6_is_unspecified(gw->netmask.addr)) { - /* Iterate over device's links */ - link = pico_ipv6_link_by_dev(dev); - while (link) { - /* If link is equal to route's link, routing list is not empty */ - if (0 == ipv6_link_compare(link, gw->link)) { - if (last == gw) { - valid = 1; - } else if (valid) { - return gw; - } - link = pico_ipv6_link_by_dev_next(dev, link); - } - } - } - } - return NULL; -} - -int pico_ipv6_route_add(struct pico_ip6 address, struct pico_ip6 netmask, struct pico_ip6 gateway, int metric, struct pico_ipv6_link *link) -{ - struct pico_ip6 zerogateway = {{0}}; - struct pico_ipv6_route test, *new = NULL; - test.dest = address; - test.netmask = netmask; - test.metric = (uint32_t)metric; - if (pico_tree_findKey(&IPV6Routes, &test)) { - /* Route already exists */ - pico_err = PICO_ERR_EINVAL; - return -1; - } - - new = PICO_ZALLOC(sizeof(struct pico_ipv6_route)); - if (!new) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - ipv6_dbg("Adding IPV6 static route\n"); - new->dest = address; - new->netmask = netmask; - new->gateway = gateway; - new->metric = (uint32_t)metric; - if (memcmp(gateway.addr, zerogateway.addr, PICO_SIZE_IP6) == 0) { - /* No gateway provided, use the link */ - new->link = link; - } else { - struct pico_ipv6_route *r = ipv6_route_add_link(gateway); - if (!r) { - if (link) - new->link = link; - else { - PICO_FREE(new); - return -1; - } - } else { - new->link = r->link; - } - } - - if (new->link && (pico_ipv6_is_global(address.addr)) && (!pico_ipv6_is_global(new->link->address.addr))) { - new->link = pico_ipv6_global_get(new->link->dev); - } - - if (!new->link) { - pico_err = PICO_ERR_EINVAL; - PICO_FREE(new); - return -1; - } - - if (pico_tree_insert(&IPV6Routes, new)) { - ipv6_dbg("IPv6: Failed to insert route in tree\n"); - PICO_FREE(new); - return -1; - } - - pico_ipv6_dbg_route(); - return 0; -} - -int pico_ipv6_route_del(struct pico_ip6 address, struct pico_ip6 netmask, struct pico_ip6 gateway, int metric, struct pico_ipv6_link *link) -{ - struct pico_ipv6_route test, *found = NULL; - - IGNORE_PARAMETER(gateway); - - if (!link) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - test.dest = address; - test.netmask = netmask; - test.metric = (uint32_t)metric; - - found = pico_tree_findKey(&IPV6Routes, &test); - if (found) { - pico_tree_delete(&IPV6Routes, found); - PICO_FREE(found); - pico_ipv6_dbg_route(); - return 0; - } - - pico_err = PICO_ERR_EINVAL; - return -1; -} - -void pico_ipv6_router_down(struct pico_ip6 *address) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ipv6_route *route = NULL; - if (!address) - return; - - pico_tree_foreach_safe(index, &IPV6Routes, _tmp) - { - route = index->keyValue; - if (pico_ipv6_compare(address, &route->gateway) == 0) - pico_ipv6_route_del(route->dest, route->netmask, route->gateway, (int)route->metric, route->link); - } -} - -#ifndef UNIT_TEST -static void pico_ipv6_nd_dad(pico_time now, void *arg) -{ - struct pico_ip6 *address = (struct pico_ip6 *)arg; - struct pico_ipv6_link *l = NULL; - struct pico_ip6 old_address; - if (!arg) - return; - - IGNORE_PARAMETER(now); - - l = pico_ipv6_link_istentative(address); - if (!l) - return; - - if (pico_device_link_state(l->dev) == 0) { - l->dad_timer = pico_timer_add(100, pico_ipv6_nd_dad, &l->address); - if (!l->dad_timer) { - dbg("IPv6: Failed to start nd_dad timer\n"); - /* TODO does this have disastrous consequences? */ - } - return; - } - - if (l->isduplicate) { - dbg("IPv6: duplicate address.\n"); - old_address = *address; - if (pico_ipv6_is_linklocal(address->addr)) { - address->addr[8] = (uint8_t)((uint8_t)(pico_rand() & 0xff) & (uint8_t)(~0x03)); - address->addr[9] = pico_rand() & 0xff; - address->addr[10] = pico_rand() & 0xff; - address->addr[11] = pico_rand() & 0xff; - address->addr[12] = pico_rand() & 0xff; - address->addr[13] = pico_rand() & 0xff; - address->addr[14] = pico_rand() & 0xff; - address->addr[15] = pico_rand() & 0xff; - pico_ipv6_link_add(l->dev, *address, l->netmask); - } - - pico_ipv6_link_del(l->dev, old_address); - } - else { - if (l->dup_detect_retrans-- == 0) { - dbg("IPv6: DAD verified valid address.\n"); - - l->istentative = 0; - } else { - /* Duplicate Address Detection */ - pico_icmp6_neighbor_solicitation(l->dev, &l->address, PICO_ICMP6_ND_DAD, NULL); - l->dad_timer = pico_timer_add(PICO_ICMP6_MAX_RTR_SOL_DELAY, pico_ipv6_nd_dad, &l->address); - if (!l->dad_timer) { - dbg("IPv6: Failed to start nd_dad timer\n"); - /* TODO does this have disastrous consequences? */ - } - } - } -} -#endif - -static struct pico_ipv6_link *pico_ipv6_do_link_add(struct pico_device *dev, struct pico_ip6 address, struct pico_ip6 netmask) -{ - struct pico_ipv6_link test = { - 0 - }, *new = NULL; - struct pico_ip6 network = {{0}}, gateway = {{0}}; - struct pico_ip6 mcast_addr = {{ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}; - struct pico_ip6 mcast_nm = {{ 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}; - struct pico_ip6 mcast_gw = {{0}}; - struct pico_ip6 all_hosts = {{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}; - int i = 0; - if (!dev) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - test.address = address; - test.dev = dev; - /** XXX: Valid netmask / unicast address test **/ - - if (pico_tree_findKey(&IPV6Links, &test)) { - dbg("IPv6: trying to assign an invalid address (in use)\n"); - pico_err = PICO_ERR_EADDRINUSE; - return NULL; - } - - /** XXX: Check for network already in use (e.g. trying to assign 10.0.0.1/24 where 10.1.0.1/8 is in use) **/ - new = PICO_ZALLOC(sizeof(struct pico_ipv6_link)); - if (!new) { - dbg("IPv6: out of memory!\n"); - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - new->address = address; - new->netmask = netmask; - new->dev = dev; - new->istentative = 1; - new->isduplicate = 0; -#ifdef PICO_SUPPORT_MCAST - new->MCASTGroups = PICO_ZALLOC(sizeof(struct pico_tree)); - if (!new->MCASTGroups) { - PICO_FREE(new); - dbg("IPv6: Out of memory!\n"); - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - new->MCASTGroups->root = &LEAF; - new->MCASTGroups->compare = ipv6_mcast_groups_cmp; -#ifdef PICO_SUPPORT_MLD - new->mcast_compatibility = PICO_MLDV2; - new->mcast_last_query_interval = MLD_QUERY_INTERVAL; -#endif -#endif - if (pico_tree_insert(&IPV6Links, new)) { - ipv6_dbg("IPv6: Failed to insert link in tree\n"); -#ifdef PICO_SUPPORT_MCAST - PICO_FREE(new->MCASTGroups); -#endif - PICO_FREE(new); - return NULL; - } - for (i = 0; i < PICO_SIZE_IP6; ++i) { - network.addr[i] = address.addr[i] & netmask.addr[i]; - } -#ifdef PICO_SUPPORT_MCAST - do { - if (!mcast_default_link_ipv6) { - mcast_default_link_ipv6 = new; - pico_ipv6_route_add(mcast_addr, mcast_nm, mcast_gw, 1, new); - } - - pico_ipv6_mcast_join(&address, &all_hosts, 1, PICO_IP_MULTICAST_EXCLUDE, NULL); - } while(0); -#else - IGNORE_PARAMETER(all_hosts); -#endif - pico_ipv6_route_add(network, netmask, gateway, 1, new); -#ifdef PICO_SUPPORT_6LOWPAN - if (!PICO_DEV_IS_6LOWPAN(dev)) -#endif - pico_ipv6_route_add(mcast_addr, mcast_nm, mcast_gw, 1, new); - /* XXX MUST join the all-nodes multicast address on that interface, as well as - * the solicited-node multicast address corresponding to each of the IP - * addresses assigned to the interface. (RFC 4861 $7.2.1) - * XXX RFC6775 (6LoWPAN): There is no need to join the solicited-node multicast address, since - * nobody multicasts NSs in this type of network. A host MUST join the all-nodes multicast - * address. */ -#ifdef PICO_DEBUG_IPV6 - pico_ipv6_to_string(ipstr, new->address.addr); - dbg("Assigned ipv6 %s to device %s\n", ipstr, new->dev->name); -#endif - return new; -} - -struct pico_ipv6_link *pico_ipv6_link_add_no_dad(struct pico_device *dev, struct pico_ip6 address, struct pico_ip6 netmask) -{ - struct pico_ipv6_link *new = pico_ipv6_do_link_add(dev, address, netmask); - if (new) { - new->istentative = 0; - } - return new; -} - -struct pico_ipv6_link *pico_ipv6_link_add(struct pico_device *dev, struct pico_ip6 address, struct pico_ip6 netmask) -{ -#ifdef DEBUG_IPV6 - char ipstr[40] = { - 0 - }; -#endif - /* Try to add the basic link */ - struct pico_ipv6_link *new = pico_ipv6_do_link_add(dev, address, netmask); - if (!new) - return NULL; - - /* Apply DAD */ - new->dup_detect_retrans = PICO_IPV6_DEFAULT_DAD_RETRANS; -#ifndef UNIT_TEST - /* Duplicate Address Detection */ - new->dad_timer = pico_timer_add(100, pico_ipv6_nd_dad, &new->address); - if (!new->dad_timer) { - dbg("IPv6: Failed to start nd_dad timer\n"); - pico_ipv6_link_del(dev, address); - return NULL; - } -#else - new->istentative = 0; -#endif - -#ifdef DEBUG_IPV6 - pico_ipv6_to_string(ipstr, new->address.addr); - dbg("Assigned ipv6 %s to device %s\n", ipstr, new->dev->name); -#endif - return new; -} - -static int pico_ipv6_cleanup_routes(struct pico_ipv6_link *link) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ipv6_route *route = NULL; - - pico_tree_foreach_safe(index, &IPV6Routes, _tmp) - { - route = index->keyValue; - if (link == route->link) - pico_ipv6_route_del(route->dest, route->netmask, route->gateway, (int)route->metric, route->link); - } - return 0; -} - -int pico_ipv6_cleanup_links(struct pico_device *dev) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ipv6_link *link = NULL; - - pico_tree_foreach_safe(index, &IPV6Links, _tmp) - { - link = index->keyValue; - if (dev == link->dev) - pico_ipv6_link_del(dev, link->address); - } - return 0; -} - -int pico_ipv6_link_del(struct pico_device *dev, struct pico_ip6 address) -{ - struct pico_ipv6_link test = { - 0 - }, *found = NULL; - - if (!dev) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - test.address = address; - test.dev = dev; - found = pico_tree_findKey(&IPV6Links, &test); - if (!found) { - pico_err = PICO_ERR_ENXIO; - return -1; - } - - pico_ipv6_cleanup_routes(found); - if (found->dad_timer) - pico_timer_cancel(found->dad_timer); - - pico_tree_delete(&IPV6Links, found); - /* XXX MUST leave the solicited-node multicast address corresponding to the address (RFC 4861 $7.2.1) */ - PICO_FREE(found); - return 0; -} - -struct pico_ipv6_link *pico_ipv6_link_istentative(struct pico_ip6 *address) -{ - struct pico_ipv6_link test = { - 0 - }, *found = NULL; - test.address = *address; - - found = pico_tree_findKey(&IPV6Links, &test); - if (!found) - return NULL; - - if (found->istentative) - return found; - - return NULL; -} - -struct pico_ipv6_link *pico_ipv6_link_get(struct pico_ip6 *address) -{ - struct pico_ipv6_link test = { - 0 - }, *found = NULL; - test.address = *address; - found = pico_tree_findKey(&IPV6Links, &test); - if (!found) { - return NULL; - } - - if (found->istentative) { - return NULL; - } - - return found; -} - -struct pico_device *pico_ipv6_link_find(struct pico_ip6 *address) -{ - struct pico_ipv6_link test = { - 0 - }, *found = NULL; - if(!address) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - test.dev = NULL; - test.address = *address; - found = pico_tree_findKey(&IPV6Links, &test); - if (!found) { - pico_err = PICO_ERR_ENXIO; - return NULL; - } - - if (found->istentative) { - return NULL; - } - - return found->dev; -} - -struct pico_ip6 pico_ipv6_route_get_gateway(struct pico_ip6 *addr) -{ - struct pico_ip6 nullip = {{0}}; - struct pico_ipv6_route *route = NULL; - - if (!addr) { - pico_err = PICO_ERR_EINVAL; - return nullip; - } - - route = pico_ipv6_route_find(addr); - if (!route) { - pico_err = PICO_ERR_EHOSTUNREACH; - return nullip; - } - else - return route->gateway; -} - - -struct pico_ipv6_link *pico_ipv6_link_by_dev(struct pico_device *dev) -{ - struct pico_tree_node *index = NULL; - struct pico_ipv6_link *link = NULL; - - pico_tree_foreach(index, &IPV6Links) - { - link = index->keyValue; - if (dev == link->dev) - return link; - } - return NULL; -} - -struct pico_ipv6_link *pico_ipv6_link_by_dev_next(struct pico_device *dev, struct pico_ipv6_link *last) -{ - struct pico_tree_node *index = NULL; - struct pico_ipv6_link *link = NULL; - int valid = 0; - - if (last == NULL) - valid = 1; - - pico_tree_foreach(index, &IPV6Links) - { - link = index->keyValue; - if (link->dev == dev) { - if (last == link) - valid = 1; - else if (valid > 0) - return link; - } - } - return NULL; -} - -struct pico_ipv6_link *pico_ipv6_prefix_configured(struct pico_ip6 *prefix) -{ - unsigned int nm64_len = 8; - struct pico_tree_node *index = NULL; - struct pico_ipv6_link *link = NULL; - pico_tree_foreach(index, &IPV6Links) { - link = index->keyValue; - if (memcmp(link->address.addr, prefix->addr, nm64_len) == 0) - return link; - } - return NULL; -} - -struct pico_ipv6_link *pico_ipv6_linklocal_get(struct pico_device *dev) -{ - struct pico_ipv6_link *link = pico_ipv6_link_by_dev(dev); - while (link && !pico_ipv6_is_linklocal(link->address.addr)) { - link = pico_ipv6_link_by_dev_next(dev, link); - } - return link; -} - -struct pico_ipv6_link *pico_ipv6_sitelocal_get(struct pico_device *dev) -{ - struct pico_ipv6_link *link = pico_ipv6_link_by_dev(dev); - while (link && !pico_ipv6_is_sitelocal(link->address.addr)) { - link = pico_ipv6_link_by_dev_next(dev, link); - } - return link; -} - -struct pico_ipv6_link *pico_ipv6_global_get(struct pico_device *dev) -{ - struct pico_ipv6_link *link = pico_ipv6_link_by_dev(dev); - while (link && !pico_ipv6_is_global(link->address.addr)) { - dbg("[0x%02X] - is global: %d - %d\n", link->address.addr[0], pico_ipv6_is_global(link->address.addr), link->address.addr[0] >> 0x05); - link = pico_ipv6_link_by_dev_next(dev, link); - } - return link; -} - -#define TWO_HOURS ((pico_time)(1000 * 60 * 60 * 2)) - -void pico_ipv6_check_lifetime_expired(pico_time now, void *arg) -{ - struct pico_tree_node *index = NULL, *temp; - struct pico_ipv6_link *link = NULL; -#ifdef PICO_SUPPORT_6LOWPAN - struct pico_ipv6_route *gw = NULL; -#endif - (void)arg; - pico_tree_foreach_safe(index, &IPV6Links, temp) { - link = index->keyValue; - if ((link->expire_time > 0) && (link->expire_time < now)) { - dbg("Warning: IPv6 address has expired.\n"); - pico_ipv6_link_del(link->dev, link->address); - } -#ifdef PICO_SUPPORT_6LOWPAN - else if (PICO_DEV_IS_6LOWPAN(link->dev) && !pico_ipv6_is_linklocal(link->address.addr) && - (link->expire_time > 0) && (int)(link->expire_time - now) < (int)(TWO_HOURS >> 4)) { - /* RFC6775: The host SHOULD unicast one or more RSs to the router well before the - * shortest of the, Router Lifetime, PIO lifetimes and the lifetime of the 6COs. */ - while ((gw = pico_ipv6_gateway_by_dev_next(link->dev, gw))) { - pico_6lp_nd_start_soliciting(link, gw); - } - } -#endif - } - if (!pico_timer_add(1000, pico_ipv6_check_lifetime_expired, NULL)) { - dbg("IPv6: Failed to start check_lifetime timer\n"); - /* TODO No more link lifetime checking now */ - } -} - -int pico_ipv6_lifetime_set(struct pico_ipv6_link *l, pico_time expire) -{ - pico_time now = PICO_TIME_MS(); - if (expire <= now) { - return -1; - } - - if (expire > 0xFFFFFFFE) { - l->expire_time = 0u; - }else if ((expire > (now + TWO_HOURS)) || (expire > l->expire_time)) { - l->expire_time = expire; - } else { - l->expire_time = now + TWO_HOURS; - } - - return 0; -} - -int pico_ipv6_dev_routing_enable(struct pico_device *dev) -{ - dev->hostvars.routing = 1; - return 0; -} - -int pico_ipv6_dev_routing_disable(struct pico_device *dev) -{ - dev->hostvars.routing = 0; - return 0; -} - -void pico_ipv6_unreachable(struct pico_frame *f, uint8_t code) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; -#if defined PICO_SUPPORT_TCP || defined PICO_SUPPORT_UDP - pico_transport_error(f, hdr->nxthdr, code); -#endif -} - - - -#endif diff --git a/kernel/picotcp/modules/pico_ipv6.h b/kernel/picotcp/modules/pico_ipv6.h deleted file mode 100644 index 56ddf6a..0000000 --- a/kernel/picotcp/modules/pico_ipv6.h +++ /dev/null @@ -1,182 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef _INCLUDE_PICO_IPV6 -#define _INCLUDE_PICO_IPV6 -#include "pico_addressing.h" -#include "pico_protocol.h" -#include "pico_ipv4.h" - -#define PICO_SIZE_IP6HDR ((uint32_t)(sizeof(struct pico_ipv6_hdr))) -#define PICO_IPV6_DEFAULT_HOP 64 -#define PICO_IPV6_MIN_MTU 1280 -#define PICO_IPV6_STRING 46 - -#define PICO_IPV6_EXTHDR_HOPBYHOP 0 -#define PICO_IPV6_EXTHDR_ROUTING 43 -#define PICO_IPV6_EXTHDR_FRAG 44 -#define PICO_IPV6_EXTHDR_ESP 50 -#define PICO_IPV6_EXTHDR_AUTH 51 -#define PICO_IPV6_EXTHDR_NONE 59 -#define PICO_IPV6_EXTHDR_DESTOPT 60 - -#define PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT 5 -#define PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT_DATALEN 2 - -#define HBH_LEN(hbh) ((((hbh->ext.hopbyhop.len + 1) << 3) - 2)) /* len in bytes, minus nxthdr and len byte */ -#define IPV6_OPTLEN(x) ((uint16_t)(((x + 1) << 3))) - -extern const uint8_t PICO_IP6_ANY[PICO_SIZE_IP6]; -extern struct pico_protocol pico_proto_ipv6; -extern struct pico_tree IPV6Routes; - -PACKED_STRUCT_DEF pico_ipv6_hdr { - uint32_t vtf; - uint16_t len; - uint8_t nxthdr; - uint8_t hop; - struct pico_ip6 src; - struct pico_ip6 dst; -}; - -PACKED_STRUCT_DEF pico_ipv6_pseudo_hdr -{ - struct pico_ip6 src; - struct pico_ip6 dst; - uint32_t len; - uint8_t zero[3]; - uint8_t nxthdr; -}; - -struct pico_ipv6_link -{ - struct pico_device *dev; - struct pico_ip6 address; - struct pico_ip6 netmask; - uint8_t istentative : 1; - uint8_t isduplicate : 1; - uint32_t dad_timer; - uint16_t dup_detect_retrans; - uint8_t retrans; - pico_time expire_time; -#ifdef PICO_SUPPORT_MCAST - struct pico_tree *MCASTGroups; - uint8_t mcast_compatibility; - uint8_t mcast_last_query_interval; -#endif -}; - -union pico_link { - struct pico_ipv4_link ipv4; - struct pico_ipv6_link ipv6; -}; - -struct pico_ipv6_hbhoption { - uint8_t type; - uint8_t len; -}; -#ifdef PICO_SUPPORT_MCAST -struct pico_ipv6_mcast_group { - uint8_t filter_mode; - uint16_t reference_count; - struct pico_ip6 mcast_addr; - struct pico_tree MCASTSources; -}; -#endif -struct pico_ipv6_destoption { - uint8_t type; - uint8_t len; -}; - -struct pico_ipv6_route -{ - struct pico_ip6 dest; - struct pico_ip6 netmask; - struct pico_ip6 gateway; - pico_time backoff; - uint8_t retrans; - struct pico_ipv6_link *link; - uint32_t metric; -}; - -PACKED_STRUCT_DEF pico_ipv6_exthdr { - uint8_t nxthdr; - - PACKED_UNION_DEF ipv6_ext_u { - PEDANTIC_STRUCT_DEF hopbyhop_s { - uint8_t len; - } hopbyhop; - - PEDANTIC_STRUCT_DEF destopt_s { - uint8_t len; - } destopt; - - PEDANTIC_STRUCT_DEF routing_s { - uint8_t len; - uint8_t routtype; - uint8_t segleft; - } routing; - - PEDANTIC_STRUCT_DEF fragmentation_s { - uint8_t res; - uint8_t om[2]; - uint8_t id[4]; - } frag; - } ext; -}; - -int pico_ipv6_compare(struct pico_ip6 *a, struct pico_ip6 *b); -int pico_string_to_ipv6(const char *ipstr, uint8_t *ip); -int pico_ipv6_to_string(char *ipbuf, const uint8_t ip[PICO_SIZE_IP6]); -int pico_ipv6_is_unicast(struct pico_ip6 *a); -int pico_ipv6_is_multicast(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_allhosts_multicast(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_solnode_multicast(const uint8_t addr[PICO_SIZE_IP6], struct pico_device *dev); -int pico_ipv6_is_global(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_uniquelocal(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_sitelocal(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_linklocal(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_solicited(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_unspecified(const uint8_t addr[PICO_SIZE_IP6]); -int pico_ipv6_is_localhost(const uint8_t addr[PICO_SIZE_IP6]); - -int pico_ipv6_frame_push(struct pico_frame *f, struct pico_ip6 *src, struct pico_ip6 *dst, uint8_t proto, int is_dad); -int pico_ipv6_route_add(struct pico_ip6 address, struct pico_ip6 netmask, struct pico_ip6 gateway, int metric, struct pico_ipv6_link *link); -int pico_ipv6_route_del(struct pico_ip6 address, struct pico_ip6 netmask, struct pico_ip6 gateway, int metric, struct pico_ipv6_link *link); -void pico_ipv6_unreachable(struct pico_frame *f, uint8_t code); - -struct pico_ipv6_link *pico_ipv6_link_add(struct pico_device *dev, struct pico_ip6 address, struct pico_ip6 netmask); -struct pico_ipv6_link *pico_ipv6_link_add_no_dad(struct pico_device *dev, struct pico_ip6 address, struct pico_ip6 netmask); -int pico_ipv6_link_del(struct pico_device *dev, struct pico_ip6 address); -int pico_ipv6_cleanup_links(struct pico_device *dev); -struct pico_ipv6_link *pico_ipv6_link_istentative(struct pico_ip6 *address); -struct pico_ipv6_link *pico_ipv6_link_get(struct pico_ip6 *address); -struct pico_device *pico_ipv6_link_find(struct pico_ip6 *address); -struct pico_ip6 pico_ipv6_route_get_gateway(struct pico_ip6 *addr); -struct pico_ip6 *pico_ipv6_source_find(const struct pico_ip6 *dst); -struct pico_device *pico_ipv6_source_dev_find(const struct pico_ip6 *dst); -struct pico_ipv6_link *pico_ipv6_link_by_dev(struct pico_device *dev); -struct pico_ipv6_link *pico_ipv6_link_by_dev_next(struct pico_device *dev, struct pico_ipv6_link *last); -struct pico_ipv6_link *pico_ipv6_global_get(struct pico_device *dev); -struct pico_ipv6_link *pico_ipv6_linklocal_get(struct pico_device *dev); -struct pico_ipv6_link *pico_ipv6_sitelocal_get(struct pico_device *dev); -struct pico_ipv6_link *pico_ipv6_prefix_configured(struct pico_ip6 *prefix); -struct pico_ipv6_route *pico_ipv6_gateway_by_dev(struct pico_device *dev); -struct pico_ipv6_route *pico_ipv6_gateway_by_dev_next(struct pico_device *dev, struct pico_ipv6_route *last); -int pico_ipv6_lifetime_set(struct pico_ipv6_link *l, pico_time expire); -void pico_ipv6_check_lifetime_expired(pico_time now, void *arg); -int pico_ipv6_dev_routing_enable(struct pico_device *dev); -int pico_ipv6_dev_routing_disable(struct pico_device *dev); -void pico_ipv6_router_down(struct pico_ip6 *address); - -int pico_ipv6_mcast_join(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *_MCASTFilter); -int pico_ipv6_mcast_leave(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t reference_count, uint8_t filter_mode, struct pico_tree *_MCASTFilter); - -struct pico_ipv6_link *pico_ipv6_get_default_mcastlink(void); - -int pico_ipv6_is_null_address(struct pico_ip6 *ip6); -#endif diff --git a/kernel/picotcp/modules/pico_ipv6_nd.c b/kernel/picotcp/modules/pico_ipv6_nd.c deleted file mode 100644 index 270cabd..0000000 --- a/kernel/picotcp/modules/pico_ipv6_nd.c +++ /dev/null @@ -1,1586 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - -#include "pico_config.h" -#include "pico_tree.h" -#include "pico_icmp6.h" -#include "pico_ipv6.h" -#include "pico_stack.h" -#include "pico_device.h" -#include "pico_eth.h" -#include "pico_addressing.h" -#include "pico_ipv6_nd.h" -#include "pico_ethernet.h" -#include "pico_6lowpan.h" -#include "pico_6lowpan_ll.h" - -#ifdef PICO_SUPPORT_IPV6 - -#ifdef DEBUG_IPV6_ND -#define nd_dbg dbg -#else -#define nd_dbg(...) do {} while(0) -#endif - -#define ONE_MINUTE ((pico_time)(1000 * 60)) - -#ifdef PICO_SUPPORT_6LOWPAN - #define MAX_RTR_SOLICITATIONS (3) - #define RTR_SOLICITATION_INTERVAL (10000) - #define MAX_RTR_SOLICITATION_INTERVAL (60000) -#endif - -static struct pico_frame *frames_queued_v6[PICO_ND_MAX_FRAMES_QUEUED] = { - 0 -}; - -enum pico_ipv6_neighbor_state { - PICO_ND_STATE_INCOMPLETE = 0, - PICO_ND_STATE_REACHABLE, - PICO_ND_STATE_STALE, - PICO_ND_STATE_DELAY, - PICO_ND_STATE_PROBE -}; - -struct pico_ipv6_neighbor { - enum pico_ipv6_neighbor_state state; - struct pico_ip6 address; - union pico_hw_addr hwaddr; - struct pico_device *dev; - uint16_t is_router; - uint16_t failure_count; - pico_time expire; -}; - -/****************************************************************************** - * Function prototypes - ******************************************************************************/ - -#ifdef PICO_SUPPORT_6LOWPAN -static void pico_6lp_nd_deregister(struct pico_ipv6_link *); -static void pico_6lp_nd_unreachable_gateway(struct pico_ip6 *a); -static int pico_6lp_nd_neigh_adv_process(struct pico_frame *f); -static int neigh_sol_detect_dad_6lp(struct pico_frame *f); -#endif - -static int pico_ipv6_neighbor_compare(void *ka, void *kb) -{ - struct pico_ipv6_neighbor *a = ka, *b = kb; - return pico_ipv6_compare(&a->address, &b->address); -} -PICO_TREE_DECLARE(NCache, pico_ipv6_neighbor_compare); - -static struct pico_ipv6_neighbor *pico_nd_find_neighbor(struct pico_ip6 *dst) -{ - struct pico_ipv6_neighbor test = { - 0 - }; - - test.address = *dst; - return pico_tree_findKey(&NCache, &test); -} - -static void pico_ipv6_nd_queued_trigger(void) -{ - int i; - struct pico_frame *f; - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) - { - f = frames_queued_v6[i]; - if (f) { - if (pico_datalink_send(f) <= 0) - pico_frame_discard(f); - frames_queued_v6[i] = NULL; - } - } -} - -static void ipv6_duplicate_detected(struct pico_ipv6_link *l) -{ - struct pico_device *dev; - int is_ll = pico_ipv6_is_linklocal(l->address.addr); - dev = l->dev; - dbg("IPV6: Duplicate address detected. Removing link.\n"); - pico_ipv6_link_del(l->dev, l->address); -#ifdef PICO_SUPPORT_6LOWPAN - if (PICO_DEV_IS_6LOWPAN(l->dev)) { - pico_6lp_nd_deregister(l); - } -#endif - if (is_ll) - pico_device_ipv6_random_ll(dev); -} - -static struct pico_ipv6_neighbor *pico_nd_add(struct pico_ip6 *addr, struct pico_device *dev) -{ - struct pico_ipv6_neighbor *n; - char address[120]; - /* Create a new NCE */ - n = PICO_ZALLOC(sizeof(struct pico_ipv6_neighbor)); - if (!n) - return NULL; - pico_ipv6_to_string(address, addr->addr); - memcpy(&n->address, addr, sizeof(struct pico_ip6)); - n->dev = dev; - - if (pico_tree_insert(&NCache, n)) { - nd_dbg("IPv6 ND: Failed to insert neigbor in tree\n"); - PICO_FREE(n); - return NULL; - } - - return n; -} - -static void pico_ipv6_nd_unreachable(struct pico_ip6 *a) -{ - int i; - struct pico_frame *f; - struct pico_ipv6_hdr *hdr; - struct pico_ip6 dst; -#ifdef PICO_SUPPORT_6LOWPAN - /* 6LP: Find any 6LoWPAN-hosts for which this address might have been a default gateway. - * If such a host found, send a router solicitation again */ - pico_6lp_nd_unreachable_gateway(a); -#endif /* PICO_SUPPORT_6LOWPAN */ - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) - { - f = frames_queued_v6[i]; - if (f) { - hdr = (struct pico_ipv6_hdr *) f->net_hdr; - dst = pico_ipv6_route_get_gateway(&hdr->dst); - if (pico_ipv6_is_unspecified(dst.addr)) - dst = hdr->dst; - - if (memcmp(dst.addr, a->addr, PICO_SIZE_IP6) == 0) { - if (!pico_source_is_local(f)) { - pico_notify_dest_unreachable(f); - } - - pico_frame_discard(f); - frames_queued_v6[i] = NULL; - } - } - } -} - -static void pico_nd_new_expire_time(struct pico_ipv6_neighbor *n) -{ - if (n->state == PICO_ND_STATE_REACHABLE) - n->expire = PICO_TIME_MS() + PICO_ND_REACHABLE_TIME; - else if ((n->state == PICO_ND_STATE_DELAY) || (n->state == PICO_ND_STATE_STALE)) - n->expire = PICO_TIME_MS() + PICO_ND_DELAY_FIRST_PROBE_TIME; - else { - n->expire = n->dev->hostvars.retranstime + PICO_TIME_MS(); - } -} - -static void pico_nd_discover(struct pico_ipv6_neighbor *n) -{ - char IPADDR[64]; - - if (!n) { - return; - } else { - if (n->expire != (pico_time)0) { - return; - } else { - pico_ipv6_to_string(IPADDR, n->address.addr); - /* dbg("Sending NS for %s\n", IPADDR); */ - if (++n->failure_count > PICO_ND_MAX_SOLICIT) - return; - - if (n->state == PICO_ND_STATE_INCOMPLETE) { - pico_icmp6_neighbor_solicitation(n->dev, &n->address, PICO_ICMP6_ND_SOLICITED, &n->address); - } else { - pico_icmp6_neighbor_solicitation(n->dev, &n->address, PICO_ICMP6_ND_UNICAST, &n->address); - } - - pico_nd_new_expire_time(n); - } - } -} - -static struct pico_eth *pico_nd_get_neighbor(struct pico_ip6 *addr, struct pico_ipv6_neighbor *n, struct pico_device *dev) -{ - /* dbg("Finding neighbor %02x:...:%02x, state = %d\n", addr->addr[0], addr->addr[15], n?n->state:-1); */ - - if (!n) { - n = pico_nd_add(addr, dev); - pico_nd_discover(n); - return NULL; - } else { - if (n->state == PICO_ND_STATE_INCOMPLETE) { - return NULL; - } else if (n->state == PICO_ND_STATE_STALE) { - n->state = PICO_ND_STATE_DELAY; - pico_nd_new_expire_time(n); - } - - if (n->state != PICO_ND_STATE_REACHABLE) { - pico_nd_discover(n); - } - } - return &n->hwaddr.mac; -} - -static struct pico_eth *pico_nd_get(struct pico_ip6 *address, struct pico_device *dev) -{ - struct pico_ip6 gateway = {{0}}, addr = {{0}}; - - /* should we use gateway, or is dst local (gateway == 0)? */ - gateway = pico_ipv6_route_get_gateway(address); - if (memcmp(gateway.addr, PICO_IP6_ANY, PICO_SIZE_IP6) == 0) - addr = *address; - else - addr = gateway; - - return pico_nd_get_neighbor(&addr, pico_nd_find_neighbor(&addr), dev); -} - -static int nd_options(uint8_t *options, struct pico_icmp6_opt_lladdr *opt, uint8_t expected_opt, int optlen, int len) -{ - uint8_t type = 0; - int found = 0; - - while (optlen > 0) { - type = ((struct pico_icmp6_opt_lladdr *)options)->type; - len = ((struct pico_icmp6_opt_lladdr *)options)->len; - optlen -= len << 3; /* len in units of 8 octets */ - if (len <= 0) - return -1; /* malformed option. */ - - if (type == expected_opt) { - if (found > 0) - return -1; /* malformed option: option is there twice. */ - - memcpy(opt, (struct pico_icmp6_opt_lladdr *)options, sizeof(struct pico_icmp6_opt_lladdr)); - found++; - } - - if (optlen > 0) { - options += len << 3; - } else { /* parsing options: terminated. */ - return found; - } - } - return found; -} - -static int neigh_options(struct pico_frame *f, struct pico_icmp6_opt_lladdr *opt, uint8_t expected_opt) -{ - /* RFC 4861 $7.1.2 + $7.2.5. - * * The contents of any defined options that are not specified to be used - * * with Neighbor Advertisement messages MUST be ignored and the packet - * * processed as normal. The only defined option that may appear is the - * * Target Link-Layer Address option. - * */ - struct pico_icmp6_hdr *icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - uint8_t *option = NULL; - int optlen = 0; - int len = 0; - - optlen = f->transport_len - PICO_ICMP6HDR_NEIGH_ADV_SIZE; - if (optlen) - option = ((uint8_t *)&icmp6_hdr->msg.info.neigh_adv) + sizeof(struct neigh_adv_s); - - return nd_options(option, opt, expected_opt, optlen, len); -} - -static size_t pico_hw_addr_len(struct pico_device *dev, struct pico_icmp6_opt_lladdr *opt) -{ - size_t len = PICO_SIZE_ETH; -#ifndef PICO_SUPPORT_6LOWPAN - IGNORE_PARAMETER(dev); - IGNORE_PARAMETER(opt); -#else - if (PICO_DEV_IS_6LOWPAN(dev)) { - if (1 == opt->len) { - len = (size_t)SIZE_6LOWPAN_SHORT; - } else { - len = (size_t)SIZE_6LOWPAN_EXT; - } - } -#endif - return len; -} - -static void pico_ipv6_neighbor_update(struct pico_ipv6_neighbor *n, struct pico_icmp6_opt_lladdr *opt, struct pico_device *dev) -{ - memcpy(n->hwaddr.data, opt->addr.data, pico_hw_addr_len(dev, opt)); -} - -static int pico_ipv6_neighbor_compare_stored(struct pico_ipv6_neighbor *n, struct pico_icmp6_opt_lladdr *opt, struct pico_device *dev) -{ - return memcmp(n->hwaddr.data, opt->addr.data, pico_hw_addr_len(dev, opt)); -} - -static void neigh_adv_reconfirm_router_option(struct pico_ipv6_neighbor *n, unsigned int isRouter) -{ - if (!isRouter && n->is_router) { - pico_ipv6_router_down(&n->address); - } - - if (isRouter) - n->is_router = 1; - else - n->is_router = 0; -} - - -static int neigh_adv_reconfirm_no_tlla(struct pico_ipv6_neighbor *n, struct pico_icmp6_hdr *hdr) -{ - if (IS_SOLICITED(hdr)) { - n->state = PICO_ND_STATE_REACHABLE; - n->failure_count = 0; - pico_ipv6_nd_queued_trigger(); - pico_nd_new_expire_time(n); - return 0; - } - - return -1; -} - - -static int neigh_adv_reconfirm(struct pico_ipv6_neighbor *n, struct pico_icmp6_opt_lladdr *opt, struct pico_icmp6_hdr *hdr, struct pico_device *dev) -{ - - if (IS_SOLICITED(hdr) && !IS_OVERRIDE(hdr) && (pico_ipv6_neighbor_compare_stored(n, opt, dev) == 0)) { - n->state = PICO_ND_STATE_REACHABLE; - n->failure_count = 0; - pico_ipv6_nd_queued_trigger(); - pico_nd_new_expire_time(n); - return 0; - } - - if ((n->state == PICO_ND_STATE_REACHABLE) && IS_SOLICITED(hdr) && !IS_OVERRIDE(hdr)) { - n->state = PICO_ND_STATE_STALE; - return 0; - } - - if (IS_SOLICITED(hdr) && IS_OVERRIDE(hdr)) { - pico_ipv6_neighbor_update(n, opt, dev); - n->state = PICO_ND_STATE_REACHABLE; - n->failure_count = 0; - pico_ipv6_nd_queued_trigger(); - pico_nd_new_expire_time(n); - return 0; - } - - if (!IS_SOLICITED(hdr) && IS_OVERRIDE(hdr) && (pico_ipv6_neighbor_compare_stored(n, opt, dev) != 0)) { - pico_ipv6_neighbor_update(n, opt, dev); - n->state = PICO_ND_STATE_STALE; - pico_ipv6_nd_queued_trigger(); - pico_nd_new_expire_time(n); - return 0; - } - - if ((n->state == PICO_ND_STATE_REACHABLE) && (!IS_SOLICITED(hdr)) && (!IS_OVERRIDE(hdr)) && - (pico_ipv6_neighbor_compare_stored(n, opt, dev) != 0)) { - - /* I. If the Override flag is clear and the supplied link-layer address - * differs from that in the cache, then one of two actions takes - * place: - * a. If the state of the entry is REACHABLE, set it to STALE, but - * do not update the entry in any other way. - * b. Otherwise, the received advertisement should be ignored and - * MUST NOT update the cache. - */ - n->state = PICO_ND_STATE_STALE; - pico_nd_new_expire_time(n); - return 0; - } - - return -1; -} - -static void neigh_adv_process_incomplete(struct pico_ipv6_neighbor *n, struct pico_frame *f, struct pico_icmp6_opt_lladdr *opt) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - if (!n || !f) { - return; - } else { - if (!(icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr)) - return; - else { - if (IS_SOLICITED(icmp6_hdr)) { - n->state = PICO_ND_STATE_REACHABLE; - n->failure_count = 0; - pico_nd_new_expire_time(n); - } else { - n->state = PICO_ND_STATE_STALE; - } - - if (opt) - pico_ipv6_neighbor_update(n, opt, f->dev); - - pico_ipv6_nd_queued_trigger(); - } - } -} - - -static int neigh_adv_process(struct pico_frame *f) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_ipv6_neighbor *n = NULL; - struct pico_icmp6_opt_lladdr opt = { - 0 - }; - int optres = neigh_options(f, &opt, PICO_ND_OPT_LLADDR_TGT); - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - - if (optres < 0) { /* Malformed packet: option field cannot be processed. */ - return -1; - } - -#ifdef PICO_SUPPORT_6LOWPAN - if (PICO_DEV_IS_6LOWPAN(f->dev)) { - /* 6LoWPAN: parse Address Registration Comfirmation(nothing on success, remove link on failure) */ - pico_6lp_nd_neigh_adv_process(f); - } -#endif - - /* Check if there's a NCE in the cache */ - n = pico_nd_find_neighbor(&icmp6_hdr->msg.info.neigh_adv.target); - if (!n) { - return 0; - } - - if ((optres == 0) || IS_OVERRIDE(icmp6_hdr) || (pico_ipv6_neighbor_compare_stored(n, &opt, f->dev) == 0)) { - neigh_adv_reconfirm_router_option(n, IS_ROUTER(icmp6_hdr)); - } - - if ((optres > 0) && (n->state == PICO_ND_STATE_INCOMPLETE)) { - neigh_adv_process_incomplete(n, f, &opt); - return 0; - } - - if (optres > 0) - return neigh_adv_reconfirm(n, &opt, icmp6_hdr, f->dev); - else - return neigh_adv_reconfirm_no_tlla(n, icmp6_hdr); - -} - -static struct pico_ipv6_neighbor *pico_ipv6_neighbor_from_sol_new(struct pico_ip6 *ip, struct pico_icmp6_opt_lladdr *opt, struct pico_device *dev) -{ - size_t len = pico_hw_addr_len(dev, opt); - struct pico_ipv6_neighbor *n = NULL; - n = pico_nd_add(ip, dev); - if (!n) - return NULL; - - memcpy(n->hwaddr.data, opt->addr.data, len); - memset(n->hwaddr.data + len, 0, sizeof(union pico_hw_addr) - len); - n->state = PICO_ND_STATE_STALE; - pico_ipv6_nd_queued_trigger(); - return n; -} - -static void pico_ipv6_neighbor_from_unsolicited(struct pico_frame *f) -{ - struct pico_ipv6_neighbor *n = NULL; - struct pico_icmp6_opt_lladdr opt = { - 0 - }; - struct pico_ipv6_hdr *ip = (struct pico_ipv6_hdr *)f->net_hdr; - int valid_lladdr = neigh_options(f, &opt, PICO_ND_OPT_LLADDR_SRC); - - if (!pico_ipv6_is_unspecified(ip->src.addr) && (valid_lladdr > 0)) { - n = pico_nd_find_neighbor(&ip->src); - if (!n) { - n = pico_ipv6_neighbor_from_sol_new(&ip->src, &opt, f->dev); - } else if (memcmp(opt.addr.data, n->hwaddr.data, pico_hw_addr_len(f->dev, &opt))) { - pico_ipv6_neighbor_update(n, &opt, f->dev); - n->state = PICO_ND_STATE_STALE; - pico_ipv6_nd_queued_trigger(); - pico_nd_new_expire_time(n); - } - - if (!n) - return; - } -} - -static int neigh_sol_detect_dad(struct pico_frame *f) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_ipv6_hdr *ipv6_hdr = NULL; - struct pico_ipv6_link *link = NULL; - ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - - if (!f->dev->mode) { - link = pico_ipv6_link_istentative(&icmp6_hdr->msg.info.neigh_adv.target); - if (link) { - if (pico_ipv6_is_unicast(&ipv6_hdr->src)) - { - /* RFC4862 5.4.3 : sender is performing address resolution, - * our address is not yet valid, discard silently. - */ - dbg("DAD:Sender performing AR\n"); - } - - else if (pico_ipv6_is_unspecified(ipv6_hdr->src.addr) && - !pico_ipv6_is_allhosts_multicast(ipv6_hdr->dst.addr)) - { - /* RFC4862 5.4.3 : sender is performing DaD */ - dbg("DAD:Sender performing DaD\n"); - ipv6_duplicate_detected(link); - } - - return 0; - } - } - - return -1; /* Current link is not tentative */ -} - -static int neigh_sol_process(struct pico_frame *f) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_ipv6_link *link = NULL; - int valid_lladdr; - struct pico_icmp6_opt_lladdr opt = { - 0 - }; - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - - valid_lladdr = neigh_options(f, &opt, PICO_ND_OPT_LLADDR_SRC); - pico_ipv6_neighbor_from_unsolicited(f); - - if (!f->dev->mode && !valid_lladdr && (0 == neigh_sol_detect_dad(f))) - return 0; -#ifdef PICO_SUPPORT_6LOWPAN - else if (PICO_DEV_IS_6LOWPAN(f->dev)) { - nd_dbg("[6LP-ND] Received Address Registration Option\n"); - neigh_sol_detect_dad_6lp(f); - } -#endif - - if (valid_lladdr < 0) - return -1; /* Malformed packet. */ - - link = pico_ipv6_link_get(&icmp6_hdr->msg.info.neigh_adv.target); - if (!link) { /* Not for us. */ - return -1; - } - - pico_icmp6_neighbor_advertisement(f, &icmp6_hdr->msg.info.neigh_adv.target); - return 0; -} - -static int icmp6_initial_checks(struct pico_frame *f) -{ - /* Common "step 0" validation */ - struct pico_ipv6_hdr *ipv6_hdr = NULL; - struct pico_icmp6_hdr *icmp6_hdr = NULL; - - ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - - /* RFC4861 - 7.1.2 : - * - The IP Hop Limit field has a value of 255, i.e., the packet - * could not possibly have been forwarded by a router. - * - ICMP Checksum is valid. - * - ICMP Code is 0. - */ - if (ipv6_hdr->hop != 255 || pico_icmp6_checksum(f) != 0 || icmp6_hdr->code != 0) - return -1; - - return 0; -} - -static int neigh_adv_option_len_validity_check(struct pico_frame *f) -{ - /* Step 4 validation */ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - uint8_t *opt; - int optlen = f->transport_len - PICO_ICMP6HDR_NEIGH_ADV_SIZE; - /* RFC4861 - 7.1.2 : - * - All included options have a length that is greater than zero. - */ - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - opt = ((uint8_t *)&icmp6_hdr->msg.info.neigh_adv) + sizeof(struct neigh_adv_s); - - while(optlen > 0) { - int opt_size = (opt[1] << 3); - if (opt_size == 0) - return -1; - - opt = opt + opt_size; - optlen -= opt_size; - } - return 0; -} - -static int neigh_adv_mcast_validity_check(struct pico_frame *f) -{ - /* Step 3 validation */ - struct pico_ipv6_hdr *ipv6_hdr = NULL; - struct pico_icmp6_hdr *icmp6_hdr = NULL; - /* RFC4861 - 7.1.2 : - * - If the IP Destination Address is a multicast address the - * Solicited flag is zero. - */ - ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - if (pico_ipv6_is_multicast(ipv6_hdr->dst.addr) && IS_SOLICITED(icmp6_hdr)) - return -1; - - return neigh_adv_option_len_validity_check(f); -} - -static int neigh_adv_validity_checks(struct pico_frame *f) -{ - /* Step 2 validation */ - /* RFC4861 - 7.1.2: - * - ICMP length (derived from the IP length) is 24 or more octets. - */ - if (f->transport_len < PICO_ICMP6HDR_NEIGH_ADV_SIZE) - return -1; - - return neigh_adv_mcast_validity_check(f); -} - - -static int neigh_sol_mcast_validity_check(struct pico_frame *f) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - if (pico_ipv6_is_solnode_multicast(icmp6_hdr->msg.info.neigh_sol.target.addr, f->dev) == 0) - return -1; - - return 0; -} - -static int neigh_sol_unicast_validity_check(struct pico_frame *f) -{ - struct pico_ipv6_link *link; - struct pico_icmp6_hdr *icmp6_hdr = NULL; - -#ifdef PICO_SUPPORT_6LOWPAN - /* Don't validate target address, the sol is always targeted at 6LBR so - * no possible interface on the 6LBR can have the same address as specified in - * the target */ - if (PICO_DEV_IS_6LOWPAN(f->dev)) - return 0; -#endif - - link = pico_ipv6_link_by_dev(f->dev); - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - while(link) { - /* RFC4861, 7.2.3: - * - * - The Target Address is a "valid" unicast or anycast address - * assigned to the receiving interface [ADDRCONF], - * - The Target Address is a unicast or anycast address for which the - * node is offering proxy service, or - * - The Target Address is a "tentative" address on which Duplicate - * Address Detection is being performed - */ - if (pico_ipv6_compare(&link->address, &icmp6_hdr->msg.info.neigh_sol.target) == 0) - return 0; - - link = pico_ipv6_link_by_dev_next(f->dev, link); - } - return -1; - -} - -static int neigh_sol_validate_unspec(struct pico_frame *f) -{ - /* RFC4861, 7.1.1: - * - * - If the IP source address is the unspecified address, the IP - * destination address is a solicited-node multicast address. - * - * - If the IP source address is the unspecified address, there is no - * source link-layer address option in the message. - * - */ - - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)(f->net_hdr); - struct pico_icmp6_opt_lladdr opt = { - 0 - }; - int valid_lladdr = neigh_options(f, &opt, PICO_ND_OPT_LLADDR_SRC); - if (!f->dev->mode && pico_ipv6_is_solnode_multicast(hdr->dst.addr, f->dev) == 0) { - return -1; - } - - if (valid_lladdr) { - return -1; - } - - return 0; -} - -static int neigh_sol_validity_checks(struct pico_frame *f) -{ - /* Step 2 validation */ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)(f->net_hdr); - if (f->transport_len < PICO_ICMP6HDR_NEIGH_ADV_SIZE) - return -1; - - if ((pico_ipv6_is_unspecified(hdr->src.addr)) && (neigh_sol_validate_unspec(f) < 0)) - { - return -1; - } - - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - if (pico_ipv6_is_multicast(icmp6_hdr->msg.info.neigh_adv.target.addr)) { - return neigh_sol_mcast_validity_check(f); - } - - return neigh_sol_unicast_validity_check(f); -} - -static int router_adv_validity_checks(struct pico_frame *f) -{ - /* Step 2 validation */ - if (f->transport_len < PICO_ICMP6HDR_ROUTER_ADV_SIZE) - return -1; - - return 0; -} - -static int neigh_adv_checks(struct pico_frame *f) -{ - /* Step 1 validation */ - if (icmp6_initial_checks(f) < 0) - return -1; - - return neigh_adv_validity_checks(f); -} - -/*MARK*/ -#ifdef PICO_SUPPORT_6LOWPAN -static void pico_6lp_nd_unreachable_gateway(struct pico_ip6 *a) -{ - struct pico_ipv6_route *route = NULL; - struct pico_ipv6_link *local = NULL; - struct pico_tree_node *node = NULL; - struct pico_device *dev = NULL; - - /* RFC6775, 5.3: - * ... HOSTS need to intelligently retransmit RSs when one of its - * default routers becomes unreachable ... - */ - pico_tree_foreach(node, &Device_tree) { - if (PICO_DEV_IS_6LOWPAN(dev) && (!dev->hostvars.routing)) { - /* Check if there's a gateway configured */ - route = pico_ipv6_gateway_by_dev(dev); - while (route) { - if (0 == pico_ipv6_compare(&route->gateway, a)) { - local = pico_ipv6_linklocal_get(dev); - pico_6lp_nd_start_soliciting(local, route); - break; - } - route = pico_ipv6_gateway_by_dev_next(dev, route); - } - } - } -} - -static int pico_6lp_nd_validate_sol_aro(struct pico_icmp6_opt_aro *aro) -{ - if (aro->len != 2 || aro->status != 0) - return -1; - return 0; -} - -static int pico_6lp_nd_validate_adv_aro(struct pico_device *dev, struct pico_icmp6_opt_aro *aro, uint8_t *status) -{ - union pico_ll_addr addr, eui; - - /* RFC6775 - 5.5.2 : - * - If the length field is not two, the option is silently ignored. - * - If the EUI-64 field does not match the EUI-64 of the interface, - * the option is silently ignored. - */ - if (aro->len != 2) - return -1; - - /* TODO: Update to abstract address, e.g. remove dependency of '.pan' */ - eui.pan.addr._ext = aro->eui64; - eui.pan.mode = AM_6LOWPAN_EXT; - addr.pan.addr._ext = ((struct pico_6lowpan_info *)dev->eth)->addr_ext; - addr.pan.mode = AM_6LOWPAN_EXT; - - if (dev && pico_6lowpan_lls[dev->mode].addr_cmp) { - if (pico_6lowpan_lls[dev->mode].addr_cmp(&addr, &eui)) - return -1; - } else { - return -1; - } - - *status = aro->status; - return 0; -} - -/* Deregisters a link from all default gateways */ -static void pico_6lp_nd_deregister(struct pico_ipv6_link *l) -{ - struct pico_ipv6_route *gw = pico_ipv6_gateway_by_dev(l->dev); - while (gw) { - pico_icmp6_neighbor_solicitation(l->dev, &l->address, PICO_ICMP6_ND_DEREGISTER, &gw->gateway); - gw = pico_ipv6_gateway_by_dev_next(l->dev, gw); - } -} - -/* Retransmits neighbors solicitations with address registration if ARO is not acknowledged */ -static void pico_6lp_nd_register_try(pico_time now, void *arg) -{ - struct pico_ipv6_link *l = arg; - struct pico_ipv6_route *gw = pico_ipv6_gateway_by_dev(l->dev); - IGNORE_PARAMETER(now); - while (gw) { - l->istentative = 1; - pico_icmp6_neighbor_solicitation(l->dev, &l->address, PICO_ICMP6_ND_DAD, &gw->gateway); - gw = pico_ipv6_gateway_by_dev_next(l->dev, gw); - } - pico_timer_add(l->dev->hostvars.retranstime, pico_6lp_nd_register_try, l); -} - -/* Tries to register a link with one or more of its default routers */ -void pico_6lp_nd_register(struct pico_ipv6_link *link) -{ - /* RFC6775: When a host has configured a non-link-local IPv6 address, it registers that - * address with one or more of its default routers using the Address Registration - * Option (ARO) in an NS message. */ - pico_6lp_nd_register_try(PICO_TIME_MS(), link); -} - -/* Check if there are default routers configured. If not, sent a router solicitation */ -static void pico_6lp_nd_do_solicit(pico_time now, void *arg) -{ - struct pico_ipv6_route *gw = arg; - struct pico_ip6 *dst = NULL; - IGNORE_PARAMETER(now); - - if (!pico_ipv6_gateway_by_dev(gw->link->dev) && !gw->link->dev->hostvars.routing) { - /* If the solicitation is to be sent unicast */ - if (!pico_ipv6_is_unspecified(gw->gateway.addr) && gw->retrans < MAX_RTR_SOLICITATIONS) - dst = &gw->gateway; - - /* Exponential backoff */ - if (++gw->retrans == MAX_RTR_SOLICITATIONS) { - gw->backoff <<= 1; - if (gw->backoff >= MAX_RTR_SOLICITATION_INTERVAL) - gw->backoff = (pico_time)MAX_RTR_SOLICITATION_INTERVAL; - } - - /* If router list is empty, send router solicitation */ - pico_icmp6_router_solicitation(gw->link->dev, &gw->link->address, dst); - - /* Apply exponential retransmission timer, see RFC6775 5.3 */ - pico_timer_add(gw->backoff, pico_6lp_nd_do_solicit, gw); - nd_dbg("[6LP-ND]$ No default routers configured, soliciting\n"); - } else { - PICO_FREE(gw); - } -} - -/* Start transmitting repetitive router solicitations */ -int pico_6lp_nd_start_soliciting(struct pico_ipv6_link *l, struct pico_ipv6_route *gw) -{ - struct pico_ipv6_route *dummy = PICO_ZALLOC(sizeof(struct pico_ipv6_route)); - struct pico_ip6 *dst = NULL; - - if (dummy) { - if (gw) { // If the router solicitation has to be sent unicast ... - dst = &gw->gateway; // ... the gateway is the destination - memcpy(dummy->gateway.addr, gw->gateway.addr, PICO_SIZE_IP6); // and should be retrievable in the timer event - } - dummy->link = l; // the link that has to be reconfirmed as well. - - /* If router list is empty, send router solicitation */ - pico_icmp6_router_solicitation(l->dev, &l->address, dst); - - if (!l->dev->hostvars.routing) { - dummy->retrans = 0; - dummy->backoff = RTR_SOLICITATION_INTERVAL; - if (!pico_timer_add(dummy->backoff, pico_6lp_nd_do_solicit, dummy)) { - PICO_FREE(dummy); - return -1; - } - } else { - PICO_FREE(dummy); - } - return 0; - } - return -1; -} - -/* Validate Neighbor advertisement mesaage */ -static int pico_6lp_nd_neigh_adv_validate(struct pico_frame *f, uint8_t *status) -{ - struct pico_icmp6_hdr *icmp = (struct pico_icmp6_hdr *)f->transport_hdr; - struct pico_icmp6_opt_aro *aro = (struct pico_icmp6_opt_aro *)((uint8_t *)&icmp->msg.info.neigh_adv + sizeof(struct neigh_sol_s)); - struct pico_ipv6_hdr *ip = (struct pico_ipv6_hdr *)f->net_hdr; - - /* 6LP: Target address cannot be MCAST and the Source IP-address cannot be UNSPECIFIED or MCAST */ - if (pico_ipv6_is_multicast(icmp->msg.info.neigh_adv.target.addr) || pico_ipv6_is_unspecified(ip->src.addr) || - pico_ipv6_is_multicast(ip->src.addr)) - return -1; - - return pico_6lp_nd_validate_adv_aro(f->dev, aro, status); -} - -/* Process neighbor advertisement */ -static int pico_6lp_nd_neigh_adv_process(struct pico_frame *f) -{ - struct pico_icmp6_hdr *icmp = (struct pico_icmp6_hdr *)f->transport_hdr; - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - struct pico_ipv6_link *l = NULL; - struct pico_ip6 zero = { - .addr = {0} - }; - uint8_t status = 0; - - if (pico_6lp_nd_neigh_adv_validate(f, &status)) { - return -1; - } else { - l = pico_ipv6_link_get(&icmp->msg.info.neigh_adv.target); - if (l) - l->istentative = 0; - else - return -1; - - /* Globally routable address has been registered @ 6LoWPAN Border Router */ - if (1 == status) { // Duplicate address detected - nd_dbg("[6LP-ND]: Registering routable address failed, removing link...\n"); - ipv6_duplicate_detected(l); - return -1; - } else if (2 == status) { // Router's NCE is full, remove router from default router list - pico_ipv6_route_del(zero, zero, hdr->src, 10, l); - pico_6lp_nd_start_soliciting(pico_ipv6_linklocal_get(l->dev), NULL); - } else { // Registration success - nd_dbg("[6LP-ND]: Registering routable address succeeded!\n"); - } - } - return 0; -} - -/* Add a new 6LoWPAN neighbor with lifetime from ARO */ -static struct pico_ipv6_neighbor *pico_nd_add_6lp(struct pico_ip6 naddr, struct pico_icmp6_opt_aro *aro, struct pico_device *dev) -{ - struct pico_ipv6_neighbor *new = NULL; - - if ((new = pico_nd_add(&naddr, dev))) { - new->expire = PICO_TIME_MS() + (pico_time)(ONE_MINUTE * aro->lifetime); - dbg("ARO Lifetime: %d minutes\n", aro->lifetime); - } else { - return NULL; - } - - return new; -} - -/* RFC6775 §6.5.2. Returning Address Registration Errors */ -static int neigh_sol_dad_reply(struct pico_frame *sol, struct pico_icmp6_opt_lladdr *sllao, struct pico_icmp6_opt_aro *aro, uint8_t status) -{ - uint8_t sllao_len = (uint8_t)(sllao->len * 8); - struct pico_icmp6_hdr *icmp = NULL; - struct pico_frame *adv = pico_frame_copy(sol); - struct pico_ip6 ll = {{0xfe,0x80,0,0,0,0,0,0, 0,0,0,0,0,0,0,0}}; - size_t len = pico_hw_addr_len(sol->dev, sllao); - union pico_ll_addr lladdr; - - if (!adv) { - return -1; - } else { - icmp = (struct pico_icmp6_hdr *)adv->transport_hdr; - - /* Set the status of the Address Registration */ - aro->status = status; - if (PICO_DEV_IS_6LOWPAN(sol->dev)) { - memcpy(lladdr.pan.addr.data, aro->eui64.addr, len); - lladdr.pan.mode = (len == SIZE_6LOWPAN_EXT) ? AM_6LOWPAN_EXT : AM_6LOWPAN_SHORT; - if (pico_6lowpan_lls[sol->dev->mode].addr_iid) - pico_6lowpan_lls[sol->dev->mode].addr_iid(ll.addr + 8, &lladdr); - } - - /* Remove the SLLAO from the frame */ - memmove(((uint8_t *)&icmp->msg.info.neigh_sol) + sizeof(struct neigh_sol_s), ((uint8_t *)&icmp->msg.info.neigh_sol) + sizeof(struct neigh_sol_s) + sllao_len, (size_t)(aro->len * 8)); - adv->transport_len = (uint16_t)(adv->transport_len - sllao_len); - adv->len = (uint16_t)(adv->len - sllao_len); - - /* I'm a router, and it's always solicited */ - icmp->msg.info.neigh_adv.rsor = 0xE0; - - /* Set the ICMPv6 message type to Neighbor Advertisements */ - icmp->type = PICO_ICMP6_NEIGH_ADV; - icmp->code = 0; - icmp->crc = pico_icmp6_checksum(adv); - - pico_ipv6_frame_push(adv, NULL, &ll, PICO_PROTO_ICMP6, 0); - return 0; - } -} - -/* RFC6775 §6.5.1. Checking for Duplicates */ -static int neigh_sol_detect_dad_6lp(struct pico_frame *f) -{ - struct pico_ipv6_neighbor *n = NULL; - struct pico_icmp6_opt_lladdr *sllao = NULL; - struct pico_icmp6_hdr *icmp = NULL; - struct pico_icmp6_opt_aro *aro = NULL; - size_t len = 0; - - icmp = (struct pico_icmp6_hdr *)f->transport_hdr; - sllao = (struct pico_icmp6_opt_lladdr *)((uint8_t *)&icmp->msg.info.neigh_sol + sizeof(struct neigh_sol_s)); - aro = (struct pico_icmp6_opt_aro *)(((uint8_t *)&icmp->msg.info.neigh_sol) + sizeof(struct neigh_sol_s) + (sllao->len * 8)); - - /* Validate Address Registration Option */ - if (pico_6lp_nd_validate_sol_aro(aro)) - return -1; - - /* See RFC6775 $6.5.1: Checking for duplicates */ - if (!(n = pico_nd_find_neighbor(&icmp->msg.info.neigh_sol.target))) { - /* No dup, add neighbor to cache */ - if (pico_nd_add_6lp(icmp->msg.info.neigh_sol.target, aro, f->dev)) - neigh_sol_dad_reply(f, sllao, aro, ICMP6_ARO_SUCCES); - else /* No dup, but neighbor cache is full */ - neigh_sol_dad_reply(f, sllao, aro, ICMP6_ARO_FULL); - return 0; - } else { - if (!aro->lifetime) { - pico_tree_delete(&NCache, n); - PICO_FREE(n); - neigh_sol_dad_reply(f, sllao, aro, ICMP6_ARO_SUCCES); - return 0; - } - /* Check if hwaddr differs */ - len = pico_hw_addr_len(f->dev, sllao); - if (memcmp(sllao->addr.data, n->hwaddr.data, len) == 0) { - n->expire = PICO_TIME_MS() + (pico_time)(ONE_MINUTE * aro->lifetime); - neigh_sol_dad_reply(f, sllao, aro, ICMP6_ARO_DUP); - } - return 0; - } -} - -static int router_options(struct pico_frame *f, struct pico_icmp6_opt_lladdr *opt, uint8_t expected_opt) -{ - /* RFC 4861 $6.1 - * The contents of any defined options that are not specified to be used - * with Router Solicitation messages MUST be ignored and the packet - * processed as normal. The only defined option that may appear is the - * Source Link-Layer Address option. - */ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - uint8_t *options = NULL; - int optlen = 0; - int len = 0; - - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - optlen = f->transport_len - PICO_ICMP6HDR_ROUTER_SOL_SIZE; - if (optlen) - options = ((uint8_t *)&icmp6_hdr->msg.info.router_sol) + sizeof(struct router_sol_s); - - return nd_options(options, opt, expected_opt, optlen, len); -} - -static int router_sol_validity_checks(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)(f->net_hdr); - struct pico_icmp6_opt_lladdr opt = { 0 }; - int sllao_present = 0; - - /* Step 2 validation */ - if (f->transport_len < PICO_ICMP6HDR_ROUTER_SOL_SIZE_6LP) - return -1; - - /* RFC4861, 6.1.1: - * - If the IP source address is the unspecified address, there is no - * source link-layer address option in the message. - */ - /* Check for SLLAO if the IP source address is UNSPECIFIED */ - sllao_present = router_options(f, &opt, PICO_ND_OPT_LLADDR_SRC); - if (pico_ipv6_is_unspecified(hdr->src.addr)) { - /* Frame is not valid when SLLAO is present if IP6-SRC is UNSPEC. */ - if (sllao_present) { - return -1; - } - } else { - /* Frame is not valid when no SLLAO if present if there's a IP6-SRC */ - if (sllao_present <= 0) { - return -1; - } - } - - return 0; -} - -static int router_sol_checks(struct pico_frame *f) -{ - /* Step 1 validation */ - if (icmp6_initial_checks(f) < 0) - return -1; - - return router_sol_validity_checks(f); -} - -static int router_sol_process(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = NULL; - - /* Determine if i'm a 6LBR, if i'm not, can't do anything with a router solicitation */ - if (!f->dev->hostvars.routing) - return -1; - - nd_dbg("[6LBR]: Processing router solicitation...\n"); - - /* Router solicitation message validation */ - if (router_sol_checks(f) < 0) - return -1; - - /* Maybe create a tentative NCE? No, will do it later */ - - /* Send a router advertisement via unicast to requesting host */ - hdr = (struct pico_ipv6_hdr *)f->net_hdr; - return pico_icmp6_router_advertisement(f->dev, &hdr->src); -} - -#endif /* PICO_SUPPORT_6LOWPAN */ - -static int pico_nd_router_sol_recv(struct pico_frame *f) -{ -#ifdef PICO_SUPPORT_6LOWPAN - /* 6LoWPAN: reply on explicit router solicitations via unicast */ - if (PICO_DEV_IS_6LOWPAN(f->dev)) - return router_sol_process(f); -#endif - - pico_ipv6_neighbor_from_unsolicited(f); - /* Host only: router solicitation is discarded. */ - return 0; -} -static int radv_process(struct pico_frame *f) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - uint8_t *nxtopt, *opt_start; - struct pico_ipv6_link *link; - uint32_t pref_lifetime = 0; - struct pico_ipv6_hdr *hdr; - struct pico_ip6 zero = { - .addr = {0} - }; - int optlen; -#ifdef PICO_SUPPORT_6LOWPAN - int sllao = 0; -#endif - - hdr = (struct pico_ipv6_hdr *)f->net_hdr; - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - optlen = f->transport_len - PICO_ICMP6HDR_ROUTER_ADV_SIZE; - opt_start = ((uint8_t *)&icmp6_hdr->msg.info.router_adv) + sizeof(struct router_adv_s); - nxtopt = opt_start; - - while (optlen > 0) { - uint8_t *type = (uint8_t *)nxtopt; - switch (*type) { - case PICO_ND_OPT_PREFIX: - { - pico_time now = PICO_TIME_MS(); - struct pico_icmp6_opt_prefix *prefix = - (struct pico_icmp6_opt_prefix *) nxtopt; - /* RFC4862 5.5.3 */ - /* a) If the Autonomous flag is not set, silently ignore the Prefix - * Information option. - */ - if (prefix->aac == 0) - goto ignore_opt_prefix; - - /* b) If the prefix is the link-local prefix, silently ignore the - * Prefix Information option - */ - if (pico_ipv6_is_linklocal(prefix->prefix.addr)) - goto ignore_opt_prefix; - - /* c) If the preferred lifetime is greater than the valid lifetime, - * silently ignore the Prefix Information option - */ - pref_lifetime = long_be(prefix->pref_lifetime); - if (pref_lifetime > long_be(prefix->val_lifetime)) - goto ignore_opt_prefix; - -#ifdef PICO_SUPPORT_6LOWPAN - /* RFC6775 (6LoWPAN): Should the host erroneously receive a PIO with the L (on-link) - * flag set, then that PIO MUST be ignored. - */ - if (PICO_DEV_IS_6LOWPAN(f->dev) && prefix->onlink) - goto ignore_opt_prefix; -#endif - - if (prefix->val_lifetime == 0) - goto ignore_opt_prefix; - - if (prefix->prefix_len != 64) { - return -1; - } - - /* Refresh lifetime of a prefix */ - link = pico_ipv6_prefix_configured(&prefix->prefix); - if (link) { - pico_ipv6_lifetime_set(link, now + (1000 * (pico_time)(long_be(prefix->val_lifetime)))); - goto ignore_opt_prefix; - } - - /* Configure a an non linklocal IPv6 address */ - link = pico_ipv6_link_add_local(f->dev, &prefix->prefix); - if (link) { - pico_ipv6_lifetime_set(link, now + (1000 * (pico_time)(long_be(prefix->val_lifetime)))); - /* Add a default gateway to the default routers list with source of RADV */ - pico_ipv6_route_add(zero, zero, hdr->src, 10, link); -#ifdef PICO_SUPPORT_6LOWPAN - if (PICO_DEV_IS_6LOWPAN(f->dev)) { - pico_6lp_nd_register(link); - } -#endif - } - -ignore_opt_prefix: - optlen -= (prefix->len << 3); - nxtopt += (prefix->len << 3); - } - break; - case PICO_ND_OPT_LLADDR_SRC: - { - struct pico_icmp6_opt_lladdr *lladdr_src = - (struct pico_icmp6_opt_lladdr *) nxtopt; -#ifdef PICO_SUPPORT_6LOWPAN - sllao = 1; // RFC6775 (6LoWPAN): An SLLAO MUST be included in the RA. -#endif - optlen -= (lladdr_src->len << 3); - nxtopt += (lladdr_src->len << 3); - } - break; - case PICO_ND_OPT_MTU: - { - struct pico_icmp6_opt_mtu *mtu = - (struct pico_icmp6_opt_mtu *) nxtopt; - /* Skip this */ - optlen -= (mtu->len << 3); - nxtopt += (mtu->len << 3); - } - break; - case PICO_ND_OPT_REDIRECT: - { - struct pico_icmp6_opt_redirect *redirect = - (struct pico_icmp6_opt_redirect *) nxtopt; - /* Skip this */ - optlen -= (redirect->len << 3); - nxtopt += (redirect->len << 3); - - } - break; - case PICO_ND_OPT_RDNSS: - { - struct pico_icmp6_opt_rdnss *rdnss = - (struct pico_icmp6_opt_rdnss *) nxtopt; - /* Skip this */ - optlen -= (rdnss->len << 3); - nxtopt += (rdnss->len << 3); - } - break; -#ifdef PICO_SUPPORT_6LOWPAN - case PICO_ND_OPT_6CO: - { - struct pico_icmp6_opt_6co *co = (struct pico_icmp6_opt_6co *)nxtopt; -#ifdef PICO_6LOWPAN_IPHC_ENABLED - if (PICO_DEV_IS_6LOWPAN(f->dev)) { - struct pico_ip6 prefix; - memcpy(prefix.addr, (uint8_t *)&co->prefix, (size_t)(co->len - 1) << 3); - ctx_update(prefix, co->id, co->clen, co->lifetime, co->c, f->dev); - } -#endif - optlen -= (co->len << 3); - nxtopt += (co->len << 3); - } - break; - case PICO_ND_OPT_ABRO: - { - struct pico_icmp6_opt_abro *abro = (struct pico_icmp6_opt_abro *)nxtopt; - /* TODO: Process */ - optlen -= (abro->len << 3); - nxtopt += (abro->len << 3); - } - break; -#endif - default: - pico_icmp6_parameter_problem(f, PICO_ICMP6_PARAMPROB_IPV6OPT, - (uint32_t)sizeof(struct pico_ipv6_hdr) + (uint32_t)PICO_ICMP6HDR_ROUTER_ADV_SIZE + (uint32_t)(nxtopt - opt_start)); - return -1; - } - } -#ifdef PICO_SUPPORT_6LOWPAN - if (PICO_DEV_IS_6LOWPAN(f->dev) && !sllao) { - return -1; - } -#endif - if (icmp6_hdr->msg.info.router_adv.retrans_time != 0u) { - f->dev->hostvars.retranstime = long_be(icmp6_hdr->msg.info.router_adv.retrans_time); - } - - return 0; -} - - -static int pico_nd_router_adv_recv(struct pico_frame *f) -{ - if (icmp6_initial_checks(f) < 0) - return -1; - - if (router_adv_validity_checks(f) < 0) - return -1; - - pico_ipv6_neighbor_from_unsolicited(f); - return radv_process(f); -} - -static int pico_nd_neigh_sol_recv(struct pico_frame *f) -{ - if (icmp6_initial_checks(f) < 0) - return -1; - - if (neigh_sol_validity_checks(f) < 0) - return -1; - - return neigh_sol_process(f); -} - -static int pico_nd_neigh_adv_recv(struct pico_frame *f) -{ - struct pico_icmp6_hdr *icmp6_hdr = NULL; - struct pico_ipv6_link *link = NULL; - - icmp6_hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - if (neigh_adv_checks(f) < 0) { - return -1; - } - - /* ETH: Target address belongs to a tentative link on this device, DaD detected a dup */ - link = pico_ipv6_link_istentative(&icmp6_hdr->msg.info.neigh_adv.target); - if (link && !link->dev->mode) - ipv6_duplicate_detected(link); - - return neigh_adv_process(f); -} - -static int pico_nd_redirect_recv(struct pico_frame *f) -{ - pico_ipv6_neighbor_from_unsolicited(f); - /* TODO */ - return 0; -} - -static void pico_ipv6_nd_timer_elapsed(pico_time now, struct pico_ipv6_neighbor *n) -{ - (void)now; - switch(n->state) { - case PICO_ND_STATE_INCOMPLETE: - /* intentional fall through */ - case PICO_ND_STATE_PROBE: - if (n->failure_count > PICO_ND_MAX_SOLICIT) { - pico_ipv6_nd_unreachable(&n->address); - pico_tree_delete(&NCache, n); - PICO_FREE(n); - return; - } - - n->expire = 0ull; - pico_nd_discover(n); - break; - - case PICO_ND_STATE_REACHABLE: - n->state = PICO_ND_STATE_STALE; - /* dbg("IPv6_ND: neighbor expired!\n"); */ - return; - - case PICO_ND_STATE_STALE: - break; - - case PICO_ND_STATE_DELAY: - n->expire = 0ull; - n->state = PICO_ND_STATE_PROBE; - break; - default: - dbg("IPv6_ND: neighbor in wrong state!\n"); - } - pico_nd_new_expire_time(n); -} - -static void pico_ipv6_nd_timer_callback(pico_time now, void *arg) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_ipv6_neighbor *n; - - (void)arg; - pico_tree_foreach_safe(index, &NCache, _tmp) - { - n = index->keyValue; - if ( now > n->expire ) { - pico_ipv6_nd_timer_elapsed(now, n); - } - } - if (!pico_timer_add(200, pico_ipv6_nd_timer_callback, NULL)) { - dbg("IPV6 ND: Failed to start callback timer\n"); - /* TODO no idea what consequences this has */ - } -} - -#define PICO_IPV6_ND_MIN_RADV_INTERVAL (5000) -#define PICO_IPV6_ND_MAX_RADV_INTERVAL (15000) - -static void pico_ipv6_nd_ra_timer_callback(pico_time now, void *arg) -{ - struct pico_tree_node *devindex = NULL; - struct pico_tree_node *rindex = NULL; - struct pico_device *dev; - struct pico_ipv6_route *rt; - struct pico_ip6 nm64 = { {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0 } }; - pico_time next_timer_expire = 0u; - - (void)arg; - (void)now; - pico_tree_foreach(rindex, &IPV6Routes) - { - rt = rindex->keyValue; - if (pico_ipv6_compare(&nm64, &rt->netmask) == 0) { - pico_tree_foreach(devindex, &Device_tree) { - dev = devindex->keyValue; - /* Do not send periodic router advertisements when there aren't 2 interfaces from and to the device can route */ - if ((!pico_ipv6_is_linklocal(rt->dest.addr)) && dev->hostvars.routing && (rt->link) - && (dev != rt->link->dev) && !PICO_DEV_IS_6LOWPAN(dev)) { - pico_icmp6_router_advertisement(dev, &rt->dest); - } - } - } - } - - next_timer_expire = PICO_IPV6_ND_MIN_RADV_INTERVAL + (pico_rand() % (PICO_IPV6_ND_MAX_RADV_INTERVAL - PICO_IPV6_ND_MIN_RADV_INTERVAL)); - if (!pico_timer_add(next_timer_expire, pico_ipv6_nd_ra_timer_callback, NULL)) { - dbg("IPv6 ND: Failed to start callback timer\n"); - /* TODO no idea what consequences this has */ - } -} - -/* Public API */ - -struct pico_eth *pico_ipv6_get_neighbor(struct pico_frame *f) -{ - struct pico_ipv6_hdr *hdr = NULL; - struct pico_ipv6_link *l = NULL; - if (!f) - return NULL; - - hdr = (struct pico_ipv6_hdr *)f->net_hdr; - /* If we are still probing for Duplicate Address, abort now. */ - if (pico_ipv6_link_istentative(&hdr->src)) - return NULL; - - /* address belongs to ourselves? */ - l = pico_ipv6_link_get(&hdr->dst); - if (l && !l->dev->mode) - return &l->dev->eth->mac; - else if (l && PICO_DEV_IS_6LOWPAN(l->dev)) - return (struct pico_eth *)l->dev->eth; - - return pico_nd_get(&hdr->dst, f->dev); -} - -void pico_ipv6_nd_postpone(struct pico_frame *f) -{ - int i; - static int last_enq = -1; - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) - { - if (!frames_queued_v6[i]) { - frames_queued_v6[i] = f; - last_enq = i; - return; - } - } - /* Overwrite the oldest frame in the buffer */ - if (++last_enq >= PICO_ND_MAX_FRAMES_QUEUED) { - last_enq = 0; - } - - if (frames_queued_v6[last_enq]) - pico_frame_discard(frames_queued_v6[last_enq]); - - frames_queued_v6[last_enq] = f; -} - - -int pico_ipv6_nd_recv(struct pico_frame *f) -{ - - struct pico_icmp6_hdr *hdr = (struct pico_icmp6_hdr *)f->transport_hdr; - int ret = -1; - switch(hdr->type) { - case PICO_ICMP6_ROUTER_SOL: - nd_dbg("ICMP6: received ROUTER SOL\n"); - ret = pico_nd_router_sol_recv(f); - break; - - case PICO_ICMP6_ROUTER_ADV: - nd_dbg("ICMP6: received ROUTER ADV\n"); - ret = pico_nd_router_adv_recv(f); - break; - - case PICO_ICMP6_NEIGH_SOL: - nd_dbg("ICMP6: received NEIGH SOL\n"); - ret = pico_nd_neigh_sol_recv(f); - break; - - case PICO_ICMP6_NEIGH_ADV: - nd_dbg("ICMP6: received NEIGH ADV\n"); - ret = pico_nd_neigh_adv_recv(f); - break; - - case PICO_ICMP6_REDIRECT: - ret = pico_nd_redirect_recv(f); - break; - } - pico_frame_discard(f); - return ret; -} - -void pico_ipv6_nd_init(void) -{ - uint32_t timer_cb = 0, ra_timer_cb = 0; - - timer_cb = pico_timer_add(200, pico_ipv6_nd_timer_callback, NULL); - if (!timer_cb) { - nd_dbg("IPv6 ND: Failed to start callback timer\n"); - return; - } - - ra_timer_cb = pico_timer_add(200, pico_ipv6_nd_ra_timer_callback, NULL); - if (!ra_timer_cb) { - nd_dbg("IPv6 ND: Failed to start RA callback timer\n"); - pico_timer_cancel(timer_cb); - return; - } - - if (!pico_timer_add(1000, pico_ipv6_check_lifetime_expired, NULL)) { - nd_dbg("IPv6 ND: Failed to start check_lifetime timer\n"); - pico_timer_cancel(timer_cb); - pico_timer_cancel(ra_timer_cb); - return; - } -} - -#endif diff --git a/kernel/picotcp/modules/pico_ipv6_nd.h b/kernel/picotcp/modules/pico_ipv6_nd.h deleted file mode 100644 index 792b229..0000000 --- a/kernel/picotcp/modules/pico_ipv6_nd.h +++ /dev/null @@ -1,36 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - *********************************************************************/ -#ifndef _INCLUDE_PICO_ND -#define _INCLUDE_PICO_ND -#include "pico_frame.h" -#include "pico_ipv6.h" - -/* RFC constants */ -#define PICO_ND_REACHABLE_TIME 30000 /* msec */ -#define PICO_ND_RETRANS_TIMER 1000 /* msec */ - -struct pico_nd_hostvars { - uint8_t routing; - uint8_t hoplimit; - pico_time basetime; - pico_time reachabletime; - pico_time retranstime; -#ifdef PICO_SUPPORT_6LOWPAN - uint8_t lowpan_flags; -#endif -}; - -void pico_ipv6_nd_init(void); -struct pico_eth *pico_ipv6_get_neighbor(struct pico_frame *f); -void pico_ipv6_nd_postpone(struct pico_frame *f); -int pico_ipv6_nd_recv(struct pico_frame *f); - -#ifdef PICO_SUPPORT_6LOWPAN -int pico_6lp_nd_start_soliciting(struct pico_ipv6_link *l, struct pico_ipv6_route *gw); -void pico_6lp_nd_register(struct pico_ipv6_link *link); -#endif - -#endif diff --git a/kernel/picotcp/modules/pico_mcast.c b/kernel/picotcp/modules/pico_mcast.c deleted file mode 100644 index 74b4a01..0000000 --- a/kernel/picotcp/modules/pico_mcast.c +++ /dev/null @@ -1,259 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - This module handles the equalities between the IGMP and the MLD protocol - Authors: Roel Postelmans - *********************************************************************/ - -#include "pico_stack.h" -#include "pico_ipv6.h" -#include "pico_mld.h" -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_addressing.h" -#include "pico_frame.h" -#include "pico_tree.h" -#include "pico_device.h" -#include "pico_socket.h" -#include "pico_icmp6.h" -#include "pico_dns_client.h" -#include "pico_mld.h" -#include "pico_igmp.h" -#include "pico_constants.h" -#include "pico_mcast.h" - -#if (((defined(PICO_SUPPORT_MLD) && defined(PICO_SUPPORT_IPV6)) || defined(PICO_SUPPORT_IGMP)) && defined(PICO_SUPPORT_MCAST)) - -#ifdef DEBUG_MCAST -#define multicast_dbg dbg -#else -#define multicast_dbg(...) do {} while(0) -#endif - -#define MCAST_EVENT_DELETE_GROUP (0x0) -#define MCAST_EVENT_CREATE_GROUP (0x1) -#define MCAST_EVENT_UPDATE_GROUP (0x2) -#define MCAST_EVENT_QUERY_RECV (0x3) -#define MCAST_EVENT_REPORT_RECV (0x4) -#define MCAST_EVENT_TIMER_EXPIRED (0x5) - -#define MCAST_MODE_IS_INCLUDE (1) -#define MCAST_MODE_IS_EXCLUDE (2) -#define MCAST_CHANGE_TO_INCLUDE_MODE (3) -#define MCAST_CHANGE_TO_EXCLUDE_MODE (4) - -#define MCAST_MODE_IS_INCLUDE (1) -#define MCAST_MODE_IS_EXCLUDE (2) -#define MCAST_CHANGE_TO_INCLUDE_MODE (3) -#define MCAST_CHANGE_TO_EXCLUDE_MODE (4) -#define MCAST_ALLOW_NEW_SOURCES (5) -#define MCAST_BLOCK_OLD_SOURCES (6) - -typedef int (*mcast_callback)(struct mcast_filter_parameters *); - -static void pico_mcast_src_filtering_cleanup(struct mcast_filter_parameters*mcast ) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - /* cleanup filters */ - pico_tree_foreach_safe(index, mcast->allow, _tmp) - { - pico_tree_delete(mcast->allow, index->keyValue); - } - pico_tree_foreach_safe(index, mcast->block, _tmp) - { - pico_tree_delete(mcast->block, index->keyValue); - } -} -static int pico_mcast_src_filtering_inc_inc(struct mcast_filter_parameters*mcast ) -{ - struct pico_tree_node *index = NULL; - union pico_address *source; - /* all ADD_SOURCE_MEMBERSHIP had an equivalent DROP_SOURCE_MEMBERSHIP */ - if (mcast->p->event == MCAST_EVENT_DELETE_GROUP) { - /* TO_IN (B) */ - mcast->record_type = MCAST_CHANGE_TO_INCLUDE_MODE; - mcast->filter = mcast->allow; - if (mcast->p->MCASTFilter) { - pico_tree_foreach(index, mcast->p->MCASTFilter) /* B */ - { - if (pico_tree_insert(mcast->allow, index->keyValue) == &LEAF) { - multicast_dbg("MCAST: Failed to insert entry in tree\n"); - return -1; - } - mcast->sources++; - } - } /* else { allow stays empty } */ - - return 0; - } - - /* ALLOW (B-A) */ - /* if event is CREATE A will be empty, thus only ALLOW (B-A) has sense */ - if (mcast->p->event == MCAST_EVENT_CREATE_GROUP) /* first ADD_SOURCE_MEMBERSHIP */ - mcast->record_type = MCAST_CHANGE_TO_INCLUDE_MODE; - else - mcast->record_type = MCAST_ALLOW_NEW_SOURCES; - - mcast->filter = mcast->allow; - pico_tree_foreach(index, mcast->p->MCASTFilter) /* B */ - { - if (pico_tree_insert(mcast->allow, index->keyValue) == &LEAF) { - multicast_dbg("MCAST: Failed to insert entry in tree\n"); - return -1; - } - mcast->sources++; - } - pico_tree_foreach(index, &mcast->g->MCASTSources) /* A */ - { - source = pico_tree_findKey(mcast->allow, index->keyValue); - if (source) { - pico_tree_delete(mcast->allow, source); - mcast->sources--; - } - } - if (!pico_tree_empty(mcast->allow)) /* record type is ALLOW */ - return 0; - - /* BLOCK (A-B) */ - mcast->record_type = MCAST_BLOCK_OLD_SOURCES; - mcast->filter = mcast->block; - pico_tree_foreach(index, &mcast->g->MCASTSources) /* A */ - { - if (pico_tree_insert(mcast->block, index->keyValue) == &LEAF) { - multicast_dbg("MCAST: Failed to insert entry in tree\n"); - return -1; - } - mcast->sources++; - } - pico_tree_foreach(index, mcast->p->MCASTFilter) /* B */ - { - source = pico_tree_findKey(mcast->block, index->keyValue); - if (source) { - pico_tree_delete(mcast->block, source); - mcast->sources--; - } - } - if (!pico_tree_empty(mcast->block)) /* record type is BLOCK */ - return 0; - - /* ALLOW (B-A) and BLOCK (A-B) are empty: do not send report */ - (mcast->p)->f = NULL; - return MCAST_NO_REPORT; -} - -static int pico_mcast_src_filtering_inc_excl(struct mcast_filter_parameters*mcast ) -{ - struct pico_tree_node *index = NULL; - mcast->record_type = MCAST_CHANGE_TO_EXCLUDE_MODE; - mcast->filter = mcast->block; - pico_tree_foreach(index, mcast->p->MCASTFilter) /* B */ - { - if (pico_tree_insert(mcast->block, index->keyValue) == &LEAF) { - multicast_dbg("MCAST: Failed to insert entry in tree\n"); - return -1; - } - mcast->sources++; - } - return 0; -} -static int pico_mcast_src_filtering_excl_inc(struct mcast_filter_parameters*mcast ) -{ - struct pico_tree_node *index = NULL; - mcast->record_type = MCAST_CHANGE_TO_INCLUDE_MODE; - mcast->filter = mcast->allow; - if (mcast->p->MCASTFilter) { - pico_tree_foreach(index, mcast->p->MCASTFilter) /* B */ - { - if (pico_tree_insert(mcast->allow, index->keyValue) == &LEAF) { - multicast_dbg("MCAST: Failed to insert entry in tree\n"); - return -1; - } - mcast->sources++; - } - } /* else { allow stays empty } */ - - return 0; -} -static int pico_mcast_src_filtering_excl_excl(struct mcast_filter_parameters*mcast ) -{ - struct pico_tree_node *index = NULL; - struct pico_ip6 *source = NULL; - mcast->record_type = MCAST_BLOCK_OLD_SOURCES; - mcast->filter = mcast->block; - pico_tree_foreach(index, mcast->p->MCASTFilter) - { - if (pico_tree_insert(mcast->block, index->keyValue) == &LEAF) { - multicast_dbg("MCAST: Failed to insert entry in tree\n"); - return -1; - } - - mcast->sources++; - } - pico_tree_foreach(index, &mcast->g->MCASTSources) /* A */ - { - source = pico_tree_findKey(mcast->block, index->keyValue); /* B */ - if (source) { - pico_tree_delete(mcast->block, source); - mcast->sources--; - } - } - if (!pico_tree_empty(mcast->block)) /* record type is BLOCK */ - return 0; - - /* ALLOW (A-B) */ - mcast->record_type = MCAST_ALLOW_NEW_SOURCES; - mcast->filter = mcast->allow; - pico_tree_foreach(index, &mcast->g->MCASTSources) - { - if (pico_tree_insert(mcast->allow, index->keyValue) == &LEAF) { - multicast_dbg("MCAST: Failed to insert entry in tree\n"); - return -1; - } - mcast->sources++; - } - pico_tree_foreach(index, mcast->p->MCASTFilter) /* B */ - { - source = pico_tree_findKey(mcast->allow, index->keyValue); /* A */ - if (source) { - pico_tree_delete(mcast->allow, source); - mcast->sources--; - } - } - if (!pico_tree_empty(mcast->allow)) /* record type is ALLOW */ - return 0; - - /* BLOCK (B-A) and ALLOW (A-B) are empty: do not send report */ - mcast->p->f = NULL; - return MCAST_NO_REPORT; -} -static const mcast_callback mcast_filter_state[2][2] = -{ - { pico_mcast_src_filtering_excl_excl, pico_mcast_src_filtering_excl_inc}, - { pico_mcast_src_filtering_inc_excl, pico_mcast_src_filtering_inc_inc } -}; -int8_t pico_mcast_generate_filter(struct mcast_filter_parameters *filter, struct mcast_parameters *p) -{ - int ret = -1; - /* "non-existent" state of filter mode INCLUDE and empty source list */ - if (p->event == MCAST_EVENT_DELETE_GROUP) { - p->filter_mode = PICO_IP_MULTICAST_INCLUDE; - p->MCASTFilter = NULL; - } - - if (p->event == MCAST_EVENT_QUERY_RECV) - return 0; - - pico_mcast_src_filtering_cleanup(filter); - - if(filter->g->filter_mode <= PICO_IP_MULTICAST_INCLUDE ) - { - if(p->filter_mode <= PICO_IP_MULTICAST_INCLUDE) - { - ret = mcast_filter_state[filter->g->filter_mode][p->filter_mode](filter); - } - } - - return (int8_t) ret; -} -#endif diff --git a/kernel/picotcp/modules/pico_mcast.h b/kernel/picotcp/modules/pico_mcast.h deleted file mode 100644 index dc9146d..0000000 --- a/kernel/picotcp/modules/pico_mcast.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef INCLUDE_PICO_MCAST -#define INCLUDE_PICO_MCAST - -#define MCAST_MODE_IS_INCLUDE (1) -#define MCAST_MODE_IS_EXCLUDE (2) -#define MCAST_CHANGE_TO_INCLUDE_MODE (3) -#define MCAST_CHANGE_TO_EXCLUDE_MODE (4) -#define MCAST_ALLOW_NEW_SOURCES (5) -#define MCAST_BLOCK_OLD_SOURCES (6) -#define MCAST_EVENT_DELETE_GROUP (0x0) -#define MCAST_EVENT_CREATE_GROUP (0x1) -#define MCAST_EVENT_UPDATE_GROUP (0x2) -#define MCAST_EVENT_QUERY_RECV (0x3) -#define MCAST_EVENT_REPORT_RECV (0x4) -#define MCAST_EVENT_TIMER_EXPIRED (0x5) -#define MCAST_NO_REPORT (1) - -PACKED_STRUCT_DEF mcast_parameters { - uint8_t event; - uint8_t state; - uint8_t general_query; - uint8_t filter_mode; - uint8_t last_host; - uint16_t max_resp_time; - union pico_address mcast_link; - union pico_address mcast_group; - struct pico_tree *MCASTFilter; - struct pico_frame *f; -}; - -PACKED_STRUCT_DEF pico_mcast_group { - uint8_t filter_mode; - uint16_t reference_count; - union pico_address mcast_addr; - struct pico_tree MCASTSources; -}; - -PACKED_STRUCT_DEF mcast_filter_parameters { - struct mcast_parameters *p; - struct pico_tree *allow; - struct pico_tree *block; - struct pico_tree *filter; - uint16_t sources; - uint8_t proto; - uint8_t record_type; - struct pico_mcast_group *g; - union pico_link *link; -}; - - -extern int8_t pico_mcast_generate_filter(struct mcast_filter_parameters *filter, struct mcast_parameters *p); - -#endif diff --git a/kernel/picotcp/modules/pico_mdns.c b/kernel/picotcp/modules/pico_mdns.c deleted file mode 100644 index 2d6cf07..0000000 --- a/kernel/picotcp/modules/pico_mdns.c +++ /dev/null @@ -1,3687 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2014-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - . - Author: Toon Stegen, Jelle De Vleeschouwer - *********************************************************************/ -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_tree.h" -#include "pico_mdns.h" - -#ifdef PICO_SUPPORT_MDNS - -/* --- Debugging --- */ -#ifdef DEBUG_MDNS -#define mdns_dbg dbg -#else -#define mdns_dbg(...) do {} while(0) -#endif - -#define PICO_MDNS_QUERY_TIMEOUT (10000) /* Ten seconds */ -#define PICO_MDNS_RR_TTL_TICK (1000) /* One second */ - -/* mDNS MTU size */ -#define PICO_MDNS_MAXBUF (1400u) - -/* --- Cookie flags --- */ -#define PICO_MDNS_PACKET_TYPE_ANNOUNCEMENT (0x01u) -#define PICO_MDNS_PACKET_TYPE_ANSWER (0x02u) -#define PICO_MDNS_PACKET_TYPE_QUERY (0x04u) -#define PICO_MDNS_PACKET_TYPE_PROBE (0x08u) -#define PICO_MDNS_PACKET_TYPE_QUERY_ANY (0x00u) -/* --- Cookie status --- */ -#define PICO_MDNS_COOKIE_STATUS_ACTIVE (0xffu) -#define PICO_MDNS_COOKIE_STATUS_INACTIVE (0x00u) -#define PICO_MDNS_COOKIE_STATUS_CANCELLED (0x77u) -#define PICO_MDNS_COOKIE_TIMEOUT (10u) - -#define PICO_MDNS_SECTION_ANSWERS (0) -#define PICO_MDNS_SECTION_AUTHORITIES (1) -#define PICO_MDNS_SETCTIO_ADDITIONALS (2) - -#define PICO_MDNS_CTREE_DESTROY(rtree) \ - pico_tree_destroy((rtree), pico_mdns_cookie_delete); - -/* --- Question flags --- */ -#define PICO_MDNS_QUESTION_FLAG_PROBE (0x01u) -#define PICO_MDNS_QUESTION_FLAG_NO_PROBE (0x00u) -#define PICO_MDNS_QUESTION_FLAG_UNICAST_RES (0x02u) -#define PICO_MDNS_QUESTION_FLAG_MULTICAST_RES (0x00u) - -#define IS_QUESTION_PROBE_FLAG_SET(x) \ - (((x) & PICO_MDNS_QUESTION_FLAG_PROBE) ? (1) : (0)) -#define IS_QUESTION_UNICAST_FLAG_SET(x) \ - (((x) & PICO_MDNS_QUESTION_FLAG_UNICAST_RES) ? (1) : (0)) -#define IS_QUESTION_MULTICAST_FLAG_SET(x) \ - (((x) & PICO_MDNS_QUESTION_FLAG_UNICAST_RES) ? (0) : (1)) - -/* Resource Record flags */ -#define PICO_MDNS_RECORD_ADDITIONAL (0x08u) -#define PICO_MDNS_RECORD_SEND_UNICAST (0x10u) -#define PICO_MDNS_RECORD_CURRENTLY_PROBING (0x20u) -#define PICO_MDNS_RECORD_PROBED (0x40u) -#define PICO_MDNS_RECORD_CLAIMED (0x80u) - -#define IS_SHARED_RECORD(x) \ - ((x)->flags & PICO_MDNS_RECORD_SHARED) -#define IS_UNIQUE_RECORD(x) \ - (!((x)->flags & PICO_MDNS_RECORD_SHARED)) -#define IS_RECORD_PROBING(x) \ - ((x)->flags & PICO_MDNS_RECORD_CURRENTLY_PROBING) -#define IS_UNICAST_REQUESTED(x) \ - ((x)->flags & PICO_MDNS_RECORD_SEND_UNICAST) -#define IS_RECORD_VERIFIED(x) \ - ((x)->flags & PICO_MDNS_RECORD_PROBED) -#define IS_RECORD_CLAIMED(x) \ - ((x)->flags & PICO_MDNS_RECORD_CLAIMED) - -/* Set and clear flags */ -#define PICO_MDNS_SET_FLAG(x, b) (x = ((x) | (uint8_t)(b))) -#define PICO_MDNS_CLR_FLAG(x, b) (x = (uint8_t)(((x) & (~((uint8_t)(b)))))) - -/* Set and clear MSB of BE short */ -#define PICO_MDNS_SET_MSB(x) (x = x | (uint16_t)(0x8000u)) -#define PICO_MDNS_CLR_MSB(x) (x = x & (uint16_t)(0x7fffu)) -#define PICO_MDNS_SET_MSB_BE(x) (x = x | (uint16_t)(short_be(0x8000u))) -#define PICO_MDNS_CLR_MSB_BE(x) (x = x & (uint16_t)(short_be(0x7fffu))) -#define PICO_MDNS_IS_MSB_SET(x) ((x & 0x8000u) ? 1 : 0) - -/* **************************************************************************** - * mDNS cookie - * *****************************************************************************/ -struct pico_mdns_cookie -{ - pico_dns_qtree qtree; /* Question tree */ - pico_mdns_rtree antree; /* Answer tree */ - pico_mdns_rtree artree; /* Additional record tree */ - uint8_t count; /* Times to send the query */ - uint8_t type; /* QUERY/ANNOUNCE/PROBE/ANSWER */ - uint8_t status; /* Active status */ - uint8_t timeout; /* Timeout counter */ - uint32_t send_timer; /* For sending events */ - void (*callback)(pico_mdns_rtree *, - char *, - void *); /* Callback */ - void *arg; /* Argument to pass to callback */ -}; - -/* MARK: TREES & GLOBAL VARIABLES */ - -/* MDNS Communication variables */ -static struct pico_socket *mdns_sock_ipv4 = NULL; -static uint16_t mdns_port = 5353u; -static struct pico_ip4 inaddr_any = { - 0 -}; - -/* **************************************************************************** - * Hostname for this machine, only 1 hostname can be set. - * Following RFC6267: 15.4 Recommendation - * *****************************************************************************/ -static char *_hostname = NULL; - -static void (*init_callback)(pico_mdns_rtree *, char *, void *) = 0; - -/* **************************************************************************** - * Compares 2 mDNS records by name and type only - * - * @param a mDNS record A - * @param b mDNS record B - * @return 0 when name and type of records are equal, returns difference when - * they're not. - * ****************************************************************************/ -static int -pico_mdns_record_cmp_name_type( void *a, void *b ) -{ - struct pico_mdns_record *_a = NULL, *_b = NULL; - - /* Check params */ - if (!(_a = (struct pico_mdns_record *)a) || - !(_b = (struct pico_mdns_record *)b)) { - pico_err = PICO_ERR_EINVAL; - return -1; /* Don't want a wrong result when NULL-pointers are passed */ - } - - return pico_dns_record_cmp_name_type(_a->record, _b->record); -} - -/* **************************************************************************** - * Compares 2 mDNS records by type, name AND rdata for a truly unique result - * - * @param ra mDNS record A - * @param rb mDNS record B - * @return 0 when records are equal, returns difference when they're not. - * ****************************************************************************/ -int -pico_mdns_record_cmp( void *a, void *b ) -{ - /* Check params */ - if (!a || !b) { - if (!a && !b) - return 0; - - pico_err = PICO_ERR_EINVAL; - return -1; /* Don't want a wrong result when NULL-pointers are passed */ - } - - return pico_dns_record_cmp((void*)(((struct pico_mdns_record *)a)->record), - (void*)(((struct pico_mdns_record *)b)->record)); -} - -/* **************************************************************************** - * Compares 2 mDNS cookies again each other. Only compares questions since a - * only a cookie query will be added to the tree. And there shouldn't be 2 - * different cookies with the same questions in the tree. - * - * @param ka mDNS cookie A - * @param kb mDNS cookie B - * @return 0 when cookies are equal, returns difference when they're not. - * ****************************************************************************/ -static int -pico_mdns_cookie_cmp( void *ka, void *kb ) -{ - struct pico_mdns_cookie *a = (struct pico_mdns_cookie *)ka; - struct pico_mdns_cookie *b = (struct pico_mdns_cookie *)kb; - struct pico_dns_question *qa = NULL, *qb = 0; - struct pico_tree_node *na = NULL, *nb = 0; - uint16_t ca = 0, cb = 0; - int ret = 0; - - /* Check params */ - if (!a || !b) { - pico_err = PICO_ERR_EINVAL; - return -1; /* Don't want a wrong result when NULL-pointers are passed */ - } - - /* Start comparing the questions */ - for (na = pico_tree_firstNode(a->qtree.root), - nb = pico_tree_firstNode(b->qtree.root); - (na != &LEAF) && (nb != &LEAF); - na = pico_tree_next(na), - nb = pico_tree_next(nb)) { - qa = na->keyValue; - qb = nb->keyValue; - if ((qa) && (qb) && (ret = pico_dns_question_cmp(qa, qb))) - return ret; - } - /* Check for lengths difference */ - ca = pico_tree_count(&(a->qtree)); - cb = pico_tree_count(&(b->qtree)); - if (ca != cb) - return (int)((int)ca - (int)cb); - - /* Cookies contain same questions, shouldn't happen */ - return 0; -} - -/* - * Hash to identify mDNS timers with - */ -static uint32_t mdns_hash = 0; - -/* - * mDNS specific timer creation, to identify if timers are - * created by mDNS module - */ -static uint32_t -pico_mdns_timer_add(pico_time expire, - void (*timer)(pico_time, void *), - void *arg) -{ - return pico_timer_add_hashed(expire, timer, arg, mdns_hash); -} - -#if PICO_MDNS_ALLOW_CACHING == 1 -/* Cache records from mDNS peers on the network */ -static PICO_TREE_DECLARE(Cache, &pico_mdns_record_cmp); -#endif - -/* My records for which I want to have the authority */ -static PICO_TREE_DECLARE(MyRecords, &pico_mdns_record_cmp_name_type); - -/* Cookie-tree */ -static PICO_TREE_DECLARE(Cookies, &pico_mdns_cookie_cmp); - -/* **************************************************************************** - * MARK: PROTOTYPES */ -static int -pico_mdns_getrecord_generic( const char *url, uint16_t type, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg); - -static void -pico_mdns_send_probe_packet( pico_time now, void *arg ); - -static int -pico_mdns_reclaim( pico_mdns_rtree record_tree, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ); -/* EOF PROTOTYPES - * ****************************************************************************/ - -/* MARK: v MDNS NAMES */ - -#define IS_NUM(c) (((c) >= '0') && ((c) <= '9')) -/* **************************************************************************** - * Tries to convert the characters after '-' to a numeric value. - * - * @param opening Pointer to dash index. - * @param closing Pointer to end of label. - * @return Numeric value of suffix on success - * ****************************************************************************/ -static inline uint16_t -pico_mdns_suffix_to_uint16( char *opening, char *closing) -{ - uint16_t n = 0; - char *i = 0; - - /* Check params */ - if (!opening || !closing || - ((closing - opening) > 5) || - ((closing - opening) < 0)) - return 0; - - for (i = (char *)(opening + 1); i < closing; i++) { - if (!IS_NUM(*i)) - return 0; - - n = (uint16_t)((n * 10) + (*i - '0')); - } - return n; -} - -#define iterate_first_label_name_reverse(iterator, name) \ - for ((iterator) = \ - (*name < (char)63) ? ((char *)(name + *name)) : (name); \ - (iterator) > (name); \ - (iterator)--) - -/* **************************************************************************** - * Checks whether there is already a conflict-suffix already present in the - * first label of a name or not. - * - * @param name Name in DNS name notation you want to check for a suffix. - * @param o_i Pointer-pointer, will get filled with location to '-'-char. - * @param c_i Pointer-pointer, will get filled with end of label. - * @return Returns value of the suffix, when it's present, 0 when no correct - * suffix is present. - * ****************************************************************************/ -static uint16_t -pico_mdns_is_suffix_present( char name[], - char **o_i, - char **c_i ) -{ - char *i = NULL; - uint16_t n = 0; - - *o_i = NULL; /* Clear out indexes */ - *c_i = NULL; - - /* Find the end of label. */ - *c_i = (name + *name + 1); - - iterate_first_label_name_reverse(i, name) { - /* Find the last dash */ - if ((*c_i) && (i < *c_i) && *i == '-') { - *o_i = i; - break; - } - } - - /* Convert the string suffix to a number */ - if (!(n = pico_mdns_suffix_to_uint16(*o_i, *c_i))) { - *o_i = NULL; - *c_i = NULL; - } - - return n; -} - -/* **************************************************************************** - * Manual string to uint16_t conversion. - * - * @param n Numeric value you want to convert. - * @param s String to convert to - * @return void - * ****************************************************************************/ -static void pico_itoa( uint16_t n, char s[] ) -{ - int i = 0, j = 0; - char c = 0; - - /* Get char values */ - do { - s[i++] = (char)(n % 10 + '0'); - } while ((n /= 10) > 0); - - /* Reverse the string */ - for (i = 0, j = (int)(pico_dns_strlen(s) - 1); i < j; i++, j--) { - c = s[i]; - s[i] = s[j]; - s[j] = c; - } -} - -/* **************************************************************************** - * Generates a new name by appending a conflict resolution suffix to the first - * label of an FQDN. - * - * @param rname Name you want to append the suffix to - * @return Newly created FQDN with suffix appended to first label. - * ****************************************************************************/ -static char * -pico_mdns_resolve_name_conflict( char rname[] ) -{ - char *new_rname = NULL; - char suffix[5] = { - 0 - }, nsuffix[5] = { - 0 - }, copy_offset = 0; - char *o_i = NULL, *c_i = NULL; - uint16_t new_len = (uint16_t)(pico_dns_strlen(rname) + 1); - uint8_t nslen = 0, slen = 0, ns = 0; - - /* Check params */ - if (pico_dns_check_namelen(new_len)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Check whether a conflict-suffix is already present in the name */ - if ((ns = (uint8_t)pico_mdns_is_suffix_present(rname, &o_i, &c_i))) { - pico_itoa(ns, suffix); - pico_itoa(++ns, nsuffix); - slen = (uint8_t)pico_dns_strlen(suffix); - nslen = (uint8_t)pico_dns_strlen(nsuffix); - new_len = (uint16_t)(new_len + nslen - slen); - } else { - /* If no suffix is present */ - c_i = (o_i = rname + *rname) + 1; - new_len = (uint16_t)(new_len + 2u); - memcpy((void *)nsuffix, "-2\0", (size_t)3); - } - - /* Provide space for the new name */ - if (!(new_rname = PICO_ZALLOC(new_len))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Assemble the new name again */ - copy_offset = (char)((o_i - rname + 1)); - memcpy(new_rname, rname, (size_t)(copy_offset)); - strcpy(new_rname + copy_offset, nsuffix); - strcpy(new_rname + copy_offset + pico_dns_strlen(nsuffix), c_i); - /* Set the first length-byte */ - new_rname[0] = (char)(new_rname[0] + new_len - pico_dns_strlen(rname) - 1); - return new_rname; -} - -/* MARK: ^ MDNS NAMES */ -/* MARK: v MDNS QUESTIONS */ - -/* **************************************************************************** - * Creates a standalone mDNS Question with a given name and type. - * - * @param url DNS question name in URL format. Will be converted to DNS - * name notation format. - * @param len Will be filled with the total length of the DNS question. - * @param proto Protocol for which you want to create a question. Can be - * either PICO_PROTO_IPV4 or PICO_PROTO_IPV6. - * @param qtype DNS type of the question to be. - * @param flags With the flags you can specify if the question should be - * a QU-question rather than a QM-question - * @param reverse When this is true, a reverse resolution name will be gene- - * from the URL - * @return Returns pointer to the created mDNS Question on success, NULL on - * failure. - * ****************************************************************************/ -static struct pico_dns_question * -pico_mdns_question_create( const char *url, - uint16_t *len, - uint8_t proto, - uint16_t qtype, - uint8_t flags, - uint8_t reverse ) -{ - uint16_t qclass = PICO_DNS_CLASS_IN; - - /* Set the MSB of the qclass field according to the mDNS format */ - if (IS_QUESTION_UNICAST_FLAG_SET(flags)) - PICO_MDNS_SET_MSB(qclass); - - /* Fill in the question suffix */ - if (IS_QUESTION_PROBE_FLAG_SET(flags)) - qtype = PICO_DNS_TYPE_ANY; - - /* Create a question as you would with plain DNS */ - return pico_dns_question_create(url, len, proto, qtype, qclass, reverse); -} - -/* MARK: ^ MDNS QUESTIONS */ -/* MARK: v MDNS RECORDS */ - -/* **************************************************************************** - * Just makes a hardcopy from a single mDNS resource record. - * - * @param record mDNS record you want to create a copy from - * @return Pointer to copied mDNS resource record - * ****************************************************************************/ -static struct pico_mdns_record * -pico_mdns_record_copy( struct pico_mdns_record *record ) -{ - struct pico_mdns_record *copy = NULL; - - /* Check params */ - if (!record) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Provide space for the copy */ - if (!(copy = PICO_ZALLOC(sizeof(struct pico_mdns_record)))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Copy the DNS record */ - if (!(copy->record = pico_dns_record_copy(record->record))) { - PICO_FREE(copy); - return NULL; - } - - /* Copy the fields */ - copy->current_ttl = record->current_ttl; - copy->flags = record->flags; - copy->claim_id = record->claim_id; - - return copy; -} - -/* **************************************************************************** - * Looks for multiple mDNS records in a tree with the same name. - * - * @param tree Tree in which you want to search. - * @param name Name you want to search for. - * @return Tree with found hits, can possibly be empty - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_rtree_find_name( pico_mdns_rtree *tree, - const char *name, - uint8_t copy ) -{ - PICO_MDNS_RTREE_DECLARE(hits); - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - - /* Check params */ - if (!name || !tree) { - pico_err = PICO_ERR_EINVAL; - return hits; - } - - /* Iterate over tree */ - pico_tree_foreach(node, tree) { - record = node->keyValue; - if (record && strcasecmp(record->record->rname, name) == 0) { - if (copy) - record = pico_mdns_record_copy(record); - - if (record) - if (pico_tree_insert(&hits, record) != NULL) - /* either key was already in there, or couldn't be inserted. */ - /* Only delete record if it was copied */ - if (copy) - pico_mdns_record_delete((void **)&record); - } - } - - return hits; -} - -/* **************************************************************************** - * Looks for (possibly) multiple mDNS records in a tree with the same name and - * type. - * - * @param tree Tree in which you want to search. - * @param name Name you want to search for. - * @param rtype DNS type you want to search for. - * @return Tree with found hits, can possibly be empty. - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_rtree_find_name_type( pico_mdns_rtree *tree, - char *name, - uint16_t rtype, - uint8_t copy ) -{ - PICO_MDNS_RTREE_DECLARE(hits); - - struct pico_dns_record_suffix test_dns_suffix = { - 0, 1, 0, 0 - }; - struct pico_dns_record test_dns_record = { - 0 - }; - struct pico_mdns_record test = { - 0 - }; - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - test_dns_record.rsuffix = &test_dns_suffix; - test.record = &test_dns_record; - - /* Check params */ - if (!name || !tree) { - pico_err = PICO_ERR_EINVAL; - return hits; - } - - test.record->rname = name; - test.record->rsuffix->rtype = short_be(rtype); - - /* Iterate over the tree */ - pico_tree_foreach(node, tree) { - record = node->keyValue; - if ((record) && (0 == pico_mdns_record_cmp_name_type(record, &test))) { - if (copy) - record = pico_mdns_record_copy(record); - - if (record){ - if (pico_tree_insert(&hits, record) != NULL) { - /* either key was already in there, or couldn't be inserted. */ - /* Only delete record if it was copied */ - if (copy) - pico_mdns_record_delete((void **)&record); - } - } - } - } - - return hits; -} - -/* **************************************************************************** - * Deletes multiple mDNS records in a tree with the same name. - * - * @param tree Tree from which you want to delete records by name. - * @param name Name of records you want to delete from the tree. - * @return 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_mdns_rtree_del_name( pico_mdns_rtree *tree, - const char *name ) -{ - struct pico_tree_node *node = NULL, *safe = NULL; - struct pico_mdns_record *record = NULL; - - /* Check params */ - if (!name || !tree) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Iterate over tree */ - pico_tree_foreach_safe(node, tree, safe) { - record = node->keyValue; - if (record && strcasecmp(record->record->rname, name) == 0) { - record = pico_tree_delete(tree, record); - pico_mdns_record_delete((void **)&record); - } - } - - return 0; -} - -/* **************************************************************************** - * Deletes (possibly) multiple mDNS records from a tree with same name and - * type. - * - * @param tree Tree from which you want to delete records by name and type. - * @param name Name of records you want to delete. - * @param type DNS type of records you want to delete. - * @return 0 on success, something else on failure. - * ****************************************************************************/ -#if PICO_MDNS_ALLOW_CACHING == 1 -static int -pico_mdns_rtree_del_name_type( pico_mdns_rtree *tree, - char *name, - uint16_t type ) -{ - struct pico_tree_node *node = NULL, *next = NULL; - struct pico_mdns_record *record = NULL; - struct pico_dns_record_suffix test_dns_suffix = { - 0, 1, 0, 0 - }; - struct pico_dns_record test_dns_record = { - 0 - }; - struct pico_mdns_record test = { - 0 - }; - - test_dns_record.rsuffix = &test_dns_suffix; - test.record = &test_dns_record; - - /* Check params */ - if (!name || !tree) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - test.record->rname = name; - test.record->rsuffix->rtype = short_be(type); - - /* Iterate over the tree */ - pico_tree_foreach_safe(node, tree, next) { - record = node->keyValue; - if ((record) && (0 == pico_mdns_record_cmp_name_type(record, &test))) { - record = pico_tree_delete(tree, record); - pico_mdns_record_delete((void **)&record); - } - } - - return 0; -} -#endif - -/* **************************************************************************** - * Makes a hardcopy from a single mDNS resource record, but sets a new name - * for the copy. - * - * @param record mDNS record you want to copy. - * @param new_rname New name you want to set the name of the record to. - * @return Pointer to the copy on success, NULL-pointer on failure. - * ****************************************************************************/ -static struct pico_mdns_record * -pico_mdns_record_copy_with_new_name( struct pico_mdns_record *record, - const char *new_rname ) -{ - struct pico_mdns_record *copy = NULL; - uint16_t slen = (uint16_t)(pico_dns_strlen(new_rname) + 1u); - - /* Check params */ - if (!new_rname || pico_dns_check_namelen(slen)) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Copy the record */ - if (!(copy = pico_mdns_record_copy(record))) - return NULL; - - /* Provide a new string */ - PICO_FREE(copy->record->rname); - if (!(copy->record->rname = PICO_ZALLOC(slen))) { - pico_err = PICO_ERR_ENOMEM; - pico_mdns_record_delete((void **)©); - return NULL; - } - - memcpy((void *)(copy->record->rname), new_rname, slen); - copy->record->rname_length = slen; - - return copy; -} - -/* **************************************************************************** - * Generates (copies) new records from conflicting ones with another name. - * deletes - * - * @param conflict_records mDNS record tree that contains conflicting records - * @param conflict_name Name for which the conflict occurred. This is to be - * able to delete the conflicting records from the tree - * @param new_name To generate new records from the conflicting ones, - * with this new name. - * @return A mDNS record tree that contains all the newly generated records. - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_generate_new_records( pico_mdns_rtree *conflict_records, - char *conflict_name, - char *new_name ) -{ - PICO_MDNS_RTREE_DECLARE(new_records); - struct pico_tree_node *node = NULL, *next = NULL; - struct pico_mdns_record *record = NULL, *new_record = NULL; - - /* Delete all the conflicting records from MyRecords */ - if (pico_mdns_rtree_del_name(&MyRecords, conflict_name)) - return new_records; - - pico_tree_foreach_safe(node, conflict_records, next) { - record = node->keyValue; - if (record && strcasecmp(record->record->rname, conflict_name) == 0) { - /* Create a new record */ - new_record = pico_mdns_record_copy_with_new_name(record, new_name); - if (!new_record) { - mdns_dbg("Could not create new non-conflicting record!\n"); - return new_records; - } - - new_record->flags &= (uint8_t)(~(PICO_MDNS_RECORD_PROBED | - PICO_MDNS_RECORD_SHARED | - PICO_MDNS_RECORD_CURRENTLY_PROBING)); - - /* Add the record to the new tree */ - if (pico_tree_insert(&new_records, new_record)) { - mdns_dbg("Could not add new non-conflicting record to the tree!\n"); - pico_mdns_record_delete((void **)&new_record); - return new_records; - } - - /* Delete the old conflicting record */ - record = pico_tree_delete(conflict_records, record); - if (pico_mdns_record_delete((void **)&record)) { - mdns_dbg("Could not delete old conflict record from tree!\n"); - return new_records; - } - } - } - - return new_records; -} - -/* **************************************************************************** - * When hosts observe an unsolicited record, no cookie is currently active - * for that, so it has to check in MyRecords if no conflict occurred for a - * record it has already registered. When this occurs the conflict should be - * resolved as with a normal cookie, just without the cookie. - * - * @param record mDNS record for which the conflict occurred. - * @param rname DNS name for which the conflict occurred in DNS name notation. - * @return 0 when the resolving is applied successfully, 1 otherwise. - * ****************************************************************************/ -static int -pico_mdns_record_resolve_conflict( struct pico_mdns_record *record, - char *rname ) -{ - int retval; - PICO_MDNS_RTREE_DECLARE(new_records); - struct pico_mdns_record *copy = NULL; - char *new_name = NULL; - - /* Check params */ - if (!record || !rname || IS_SHARED_RECORD(record)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Step 2: Create a new name depending on current name */ - if (!(new_name = pico_mdns_resolve_name_conflict(rname))) - return -1; - - copy = pico_mdns_record_copy_with_new_name(record, new_name); - PICO_FREE(new_name); - if (copy){ - if (pico_tree_insert(&new_records, copy)) { - mdns_dbg("MDNS: Failed to insert copy in tree\n"); - pico_mdns_record_delete((void **)©); - return -1; - } - } - - /* Step 3: delete conflicting record from my records */ - pico_tree_delete(&MyRecords, record); - pico_mdns_record_delete((void **)&record); - - /* Step 4: Try to reclaim the newly created records */ - retval = pico_mdns_reclaim(new_records, init_callback, NULL); - pico_tree_destroy(&new_records, NULL); - return retval; -} - -/* **************************************************************************** - * Determines if my_record is lexicographically later than peer_record, returns - * positive value when this is the case. Check happens by comparing rtype first - * and then rdata as prescribed by RFC6762. - * - * @param my_record Record this hosts want to claim. - * @param peer_record Record the peer host wants to claim (the enemy!) - * @return positive value when my record is lexicographically later - * ****************************************************************************/ -static int -pico_mdns_record_am_i_lexi_later( struct pico_mdns_record *my_record, - struct pico_mdns_record *peer_record) -{ - struct pico_dns_record *my = NULL, *peer = NULL; - uint16_t mclass = 0, pclass = 0, mtype = 0, ptype = 0; - int dif = 0; - - /* Check params */ - if (!my_record || !peer_record || - !(my = my_record->record) || !(peer = peer_record->record)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* - * First compare the record class (excluding cache-flush bit described in - * section 10.2) - * The numerically greater class wins - */ - mclass = PICO_MDNS_CLR_MSB_BE(my->rsuffix->rclass); - pclass = PICO_MDNS_CLR_MSB_BE(peer->rsuffix->rclass); - if ((dif = (int)((int)mclass - (int)pclass))) { - return dif; - } - - /* Second, compare the rrtypes */ - mtype = (my->rsuffix->rtype); - ptype = (peer->rsuffix->rtype); - if ((dif = (int)((int)mtype - (int)ptype))) { - return dif; - } - - /* Third compare binary content of rdata (no regard for meaning or structure) */ - - /* When using name compression, names MUST be uncompressed before comparison. See secion 8.2 in RFC6762 - This is already the case, but we won't check for it here. - The current execution stack to get here is: - > pico_mdns_handle_data_as_answers_generic - > > pico_dns_record_decompress - > > pico_mdns_handle_single_authority - > > > pico_mdns_cookie_apply_spt - > > > > pico_mdns_record_am_i_lexi_later - - Make sure pico_dns_record_decompress is executed before pico_mdns_record_am_i_lexi_later gets called, if problems ever arise with this function. - */ - - /* Then compare rdata */ - return pico_dns_rdata_cmp(my->rdata, peer->rdata, - short_be(my->rsuffix->rdlength), - short_be(peer->rsuffix->rdlength), PICO_DNS_CASE_SENSITIVE); -} - -/* **************************************************************************** - * Deletes a single mDNS resource record. - * - * @param record Void-pointer to mDNS Resource Record. Can be used with pico_- - * tree-destroy. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_mdns_record_delete( void **record ) -{ - struct pico_mdns_record **rr = (struct pico_mdns_record **)record; - - /* Check params */ - if (!rr || !(*rr)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Delete DNS record contained */ - if (((*rr)->record)) { - pico_dns_record_delete((void **)&((*rr)->record)); - } - - /* Delete the record itself */ - PICO_FREE(*rr); - *record = NULL; - - return 0; -} - -/* **************************************************************************** - * Creates a single standalone mDNS resource record with given name, type and - * data. - * - * @param url DNS rrecord name in URL format. Will be converted to DNS - * name notation format. - * @param _rdata Memory buffer with data to insert in the resource record. If - * data of record should contain a DNS name, the name in the - * data buffer needs to be in URL-format. - * @param datalen The exact length in bytes of the _rdata-buffer. If data of - * record should contain a DNS name, datalen needs to be - * pico_dns_strlen(_rdata). - * @param len Will be filled with the total length of the DNS rrecord. - * @param rtype DNS type of the resource record to be. - * @param rclass DNS class of the resource record to be. - * @param rttl DNS ttl of the resource record to be. - * @param flags You can specify if the mDNS record should be a shared record - * rather than a unique record. - * @return Pointer to newly created mDNS resource record. - * ****************************************************************************/ -struct pico_mdns_record * -pico_mdns_record_create( const char *url, - void *_rdata, - uint16_t datalen, - uint16_t rtype, - uint32_t rttl, - uint8_t flags ) -{ - struct pico_mdns_record *record = NULL; - uint16_t len = 0; - uint16_t cl = 0; - - /* Check params */ - if (!url || !_rdata) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } /* Block 1, 2 paths */ - - /* Provide space for the new mDNS resource record */ - if (!(record = PICO_ZALLOC(sizeof(struct pico_mdns_record)))) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } /* Block 2, 1 path */ - else { - /* Try to create the actual DNS record */ - if (!(record->record = pico_dns_record_create(url, _rdata, datalen, - &len, rtype, - PICO_DNS_CLASS_IN, rttl))) { - mdns_dbg("Could not create DNS record for mDNS!\n"); - PICO_FREE(record); - return NULL; - } /* Block 3, 2 paths */ - } /* Block 4, Block 3 = 2 paths */ - /* Block 5, (Block 4 + Block 2) * Block 1 = 6 paths */ - - /* Initialise fields */ - record->current_ttl = rttl; - - /* Set the MSB of the DNS class if it's a unique record */ - if (!((flags) & PICO_MDNS_RECORD_SHARED)) { - cl = record->record->rsuffix->rclass; - record->record->rsuffix->rclass = PICO_MDNS_SET_MSB_BE(cl); - } /* Block 6, 2 paths */ - /* Block 7, Block 6 * Block 5 * Block 1 = 12 paths */ - - record->flags = flags; - record->claim_id = 0; - - return record; -} - -/* MARK: ^ MDNS RECORDS */ -/* MARK: v MDNS COOKIES */ - -/* **************************************************************************** - * Deletes a single mDNS packet cookie and frees memory. - * - * @param cookie Void-pointer to mDNS cookie, allow to be used with pico_tree- - * destroy. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_mdns_cookie_delete( void **ptr ) -{ - struct pico_mdns_cookie **c = (struct pico_mdns_cookie **)ptr; - - /* Check params */ - if (!c || !(*c)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Destroy the vectors contained */ - PICO_DNS_QTREE_DESTROY(&((*c)->qtree)); - PICO_MDNS_RTREE_DESTROY(&((*c)->antree)); - PICO_MDNS_RTREE_DESTROY(&((*c)->artree)); - - /* Delete the cookie itself */ - PICO_FREE(*c); - *c = NULL; - - return 0; -} - -/* **************************************************************************** - * Creates a single standalone mDNS cookie - * - * @param qtree DNS questions you want to insert in the cookie. - * @param antree mDNS answers/authority records you want to add to cookie. - * @param artree mDNS additional records you want to add to cookie. - * @param count Times you want to send the cookie as a packet on the wire. - * @param type Type of packet you want to create from the cookie. - * @param callback Callback when the host receives responses for the cookie. - * @return Pointer to newly create cookie, NULL on failure. - * ****************************************************************************/ -static struct pico_mdns_cookie * -pico_mdns_cookie_create( pico_dns_qtree qtree, - pico_mdns_rtree antree, - pico_mdns_rtree artree, - uint8_t count, - uint8_t type, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - struct pico_mdns_cookie *cookie = NULL; /* Packet cookie to send */ - - /* Provide space for the mDNS packet cookie */ - cookie = PICO_ZALLOC(sizeof(struct pico_mdns_cookie)); - if (!cookie) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Fill in the fields */ - cookie->qtree = qtree; - cookie->antree = antree; - cookie->artree = artree; - cookie->count = count; - cookie->type = type; - cookie->status = PICO_MDNS_COOKIE_STATUS_INACTIVE; - cookie->timeout = PICO_MDNS_COOKIE_TIMEOUT; - cookie->callback = callback; - cookie->arg = arg; - return cookie; -} - -/* **************************************************************************** - * Apply Simultaneous Probe Tiebreakin (S.P.T.) on a probe-cookie. - * See RFC6762: 8.2. Simultaneous Probe Tiebreaking - * - * @param cookie Cookie which contains the record which is simult. probed. - * @param answer Authority record received from peer which is simult. probed. - * @return 0 when SPT is applied correctly, -1 otherwise. - * ****************************************************************************/ -static int -pico_mdns_cookie_apply_spt( struct pico_mdns_cookie *cookie, - struct pico_dns_record *answer) -{ - struct pico_mdns_record *my_record = NULL; - struct pico_mdns_record peer_record; - - /* Check params */ - if ((!cookie) || !answer || (cookie->type != PICO_MDNS_PACKET_TYPE_PROBE)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - cookie->status = PICO_MDNS_COOKIE_STATUS_INACTIVE; - - /* Implement Simultaneous Probe Tiebreaking */ - peer_record.record = answer; - my_record = pico_tree_findKey(&MyRecords, &peer_record); - if (!my_record || !IS_RECORD_PROBING(my_record)) { - mdns_dbg("This is weird! My record magically removed...\n"); - return -1; - } - - if (pico_mdns_record_am_i_lexi_later(my_record, &peer_record) > 0) { - mdns_dbg("My record is lexicographically later! Yay!\n"); - cookie->status = PICO_MDNS_COOKIE_STATUS_ACTIVE; - } else { - pico_timer_cancel(cookie->send_timer); - cookie->timeout = PICO_MDNS_COOKIE_TIMEOUT; - cookie->count = PICO_MDNS_PROBE_COUNT; - cookie->send_timer = pico_mdns_timer_add(1000, pico_mdns_send_probe_packet, - cookie); - if (!cookie->send_timer) { - mdns_dbg("cookie_apply_spt: failed to start timer\n"); - return -1; - } - mdns_dbg("Probing postponed by one second because of S.P.T.\n"); - } - - return 0; -} - -static int -pico_mdns_cookie_del_questions( struct pico_mdns_cookie *cookie, - char *rname ) -{ - uint16_t qc = 0; - - /* Step 1: Remove question with that name from cookie */ - pico_dns_qtree_del_name(&(cookie->qtree), rname); - cookie->antree.root = &LEAF; - - /* Check if there are no questions left, cancel events if so and delete */ - if (!(qc = pico_tree_count(&(cookie->qtree)))) { - pico_timer_cancel(cookie->send_timer); - cookie = pico_tree_delete(&Cookies, cookie); - pico_mdns_cookie_delete((void **)&cookie); - } - - return 0; -} - -/* **************************************************************************** - * Applies conflict resolution mechanism to a cookie, when a conflict occurs - * for a name which is present in the cookie. - * - * @param cookie Cookie on which you want to apply the conflict resolution- - * mechanism. - * @param rname Name for which the conflict occurred. A new non-conflicting - * name will be generated from this string. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_mdns_cookie_resolve_conflict( struct pico_mdns_cookie *cookie, - char *rname ) -{ - struct pico_tree_node *node = NULL; - struct pico_dns_question *question = NULL; - PICO_MDNS_RTREE_DECLARE(new_records); - PICO_MDNS_RTREE_DECLARE(antree); - char *new_name = NULL; - void (*callback)(pico_mdns_rtree *, char *, void *); - void *arg = NULL; - int retval; - - /* Check params */ - if ((!cookie) || !rname || (cookie->type != PICO_MDNS_PACKET_TYPE_PROBE)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Convert rname to url */ - mdns_dbg("CONFLICT for probe query with name '%s' occurred!\n", rname); - - /* Store some information about a cookie for later on */ - antree = cookie->antree; - callback = cookie->callback; - arg = cookie->arg; - - /* Find the first question in the cookie with the name for which - * the conflict occured. When found, generate a new name. - * - * DNS conflict is case-insensitive. However, we want to keep the original - * capitalisation for the new probe. */ - pico_tree_foreach(node, &(cookie->qtree)) { - question = (struct pico_dns_question *)node->keyValue; - if ((question) && (strcasecmp(question->qname, rname) == 0)) { - /* Create a new name depending on current name */ - new_name = pico_mdns_resolve_name_conflict(question->qname); - - /* Step 1: Check if the new name succeeded, if not: error. */ - if (!new_name) { - /* Delete questions from cookie even if generating a new name failed */ - pico_mdns_cookie_del_questions(cookie, rname); - return -1; - } - - break; - } - } - - /* Step 2: Remove questions with this name from the cookie */ - pico_mdns_cookie_del_questions(cookie, rname); - - /* Step 3: Create records with new name for the records with that name */ - new_records = pico_mdns_generate_new_records(&antree, rname, new_name); - PICO_FREE(new_name); - - /* Step 4: Try to reclaim the newly created records */ - retval = pico_mdns_reclaim(new_records, callback, arg); - pico_tree_destroy(&new_records, NULL); - return retval; -} - -/* **************************************************************************** - * Find a query cookie that contains a question for a specific name. - * - * @param name Name of question you want to look for. - * @return Pointer to cookie in tree when one is found, NULL on failure. - * ****************************************************************************/ -static struct pico_mdns_cookie * -pico_mdns_ctree_find_cookie( const char *name, uint8_t type ) -{ - struct pico_mdns_cookie *cookie = NULL; - struct pico_tree_node *node = NULL; - - /* Check params */ - if (!name) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Find the cookie in the tree wherein the question is present */ - pico_tree_foreach(node, &Cookies) { - if ((cookie = node->keyValue) && - pico_dns_qtree_find_name(&(cookie->qtree), name)) { - if (type == PICO_MDNS_PACKET_TYPE_QUERY_ANY) - return cookie; - else if (cookie->type == type) - return cookie; - } - } - - return NULL; -} - -/* MARK: ^ MDNS COOKIES */ -/* MARK: v MY RECORDS */ - -/* **************************************************************************** - * Adds records contained in records-tree to MyRecords. Suppresses adding of - * duplicates. - * - * @param records Tree with records to add to 'MyRecords'. - * @param reclaim If the records contained in records are claimed again. - * @return 0 on success, something else on failure. - * ****************************************************************************/ -static int -pico_mdns_my_records_add( pico_mdns_rtree *records, uint8_t reclaim ) -{ - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - static uint8_t claim_id_count = 0; - - if (!reclaim) { - ++claim_id_count; - } - - /* Iterate over record vector */ - pico_tree_foreach(node, records) { - record = node->keyValue; - if (record) { - /* Set probed flag if record is a shared record */ - if (IS_SHARED_RECORD(record)) { - PICO_MDNS_SET_FLAG(record->flags, PICO_MDNS_RECORD_PROBED); - } - - /* If record is not claimed again, set new claim-ID */ - if (!reclaim) { - record->claim_id = claim_id_count; - } - - if (pico_tree_insert(&MyRecords, record) == &LEAF) { - mdns_dbg("MDNS: Failed to insert record in tree\n"); - return -1; - } - } - } - return 0; -} - -/* **************************************************************************** - * Generates a tree of all My Records for which the probe flag has already - * been set, and for which the CLAIMED flag has NOT been set. - * Copies the records from MyRecords into a new tree. - * - * @return Tree with all records in MyRecords with the PROBED-flag set. - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_my_records_find_probed( void ) -{ - PICO_MDNS_RTREE_DECLARE(probed); - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL, *copy = NULL; - - /* Iterate over MyRecords */ - pico_tree_foreach(node, &MyRecords) { - record = node->keyValue; - - /* IS_RECORD_VERIFIED() checks the PICO_MDNS_RECORD_PROBED flag */ - if (record && IS_RECORD_VERIFIED(record) && !IS_RECORD_CLAIMED(record)) { - copy = pico_mdns_record_copy(record); - if (copy && pico_tree_insert(&probed, copy)) { - pico_mdns_record_delete((void **)©); - } - } - } - - return probed; -} - -/* **************************************************************************** - * Generates a tree of all My Records for which the PROBED-flag has not yet - * been set. Copies the record from MyRecords into a new tree. - * - * @return Tree with all records in MyRecords with the PROBED-flag not set. - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_my_records_find_to_probe( void ) -{ - PICO_MDNS_RTREE_DECLARE(to_probe); - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL, *copy = NULL; - - pico_tree_foreach(node, &MyRecords) { - record = node->keyValue; - /* Check if probed flag is not set of a record */ - if (record && - IS_UNIQUE_RECORD(record) && - !IS_RECORD_VERIFIED(record) && - !IS_RECORD_PROBING(record)) { - /* Set record to currently being probed status */ - record->flags |= PICO_MDNS_RECORD_CURRENTLY_PROBING; - copy = pico_mdns_record_copy(record); - if (copy && pico_tree_insert(&to_probe, copy)) - pico_mdns_record_delete((void **)©); - } - } - return to_probe; -} - -/* **************************************************************************** - * Checks whether all MyRecords with a certain claim ID are claimed or not. - * - * @param claim_id Claim ID of the records to check for already been probed. - * @param reg_records Tree in which all MyRecords with claim ID are inserted. - * @return 1 when all MyRecords with claim ID are probed, 0 when they're not. - * ****************************************************************************/ -static uint8_t -pico_mdns_my_records_claimed_id( uint8_t claim_id, - pico_mdns_rtree *reg_records ) -{ - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - - /* Initialise the iterator for iterating over my records */ - pico_tree_foreach(node, &MyRecords) { - record = node->keyValue; - if (record && record->claim_id == claim_id) { - if (IS_RECORD_VERIFIED(record)) { - if (pico_tree_insert(reg_records, record) == &LEAF) { - mdns_dbg("MDNS: Failed to insert record in tree\n"); - return 0; - } - } else { - return 0; - } - } - } - - return 1; -} - -/* **************************************************************************** - * Marks mDNS resource records in the tree as registered. Checks MyRecords for - * for other records with the same claim ID. If all records with the same - * claim ID as the records in the tree are claimed, - * the callback will get called. - * - * @param rtree Tree with mDNS records that are registered. - * @param callback Callback will get called when all records are registered. - * @return Returns 0 when everything went smooth, something else otherwise. - * ****************************************************************************/ -static int -pico_mdns_my_records_claimed( pico_mdns_rtree rtree, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - PICO_MDNS_RTREE_DECLARE(claimed_records); - struct pico_mdns_record *record = NULL, *myrecord = NULL; - struct pico_tree_node *node = NULL; - uint8_t claim_id = 0; - - /* Iterate over records and set the PROBED flag */ - pico_tree_foreach(node, &rtree) { - if ((record = node->keyValue)) { - if (!claim_id) { - claim_id = record->claim_id; - } - } - - if ((myrecord = pico_tree_findKey(&MyRecords, record))) { - PICO_MDNS_SET_FLAG(myrecord->flags, PICO_MDNS_RECORD_CLAIMED); - } - } - - /* If all_claimed is still true */ - if (pico_mdns_my_records_claimed_id(claim_id, &claimed_records)) { - callback(&claimed_records, _hostname, arg); - } - - pico_tree_destroy(&claimed_records, NULL); - - mdns_dbg(">>>>>> DONE - CLAIM SESSION: %d\n", claim_id); - - return 0; -} - -/* **************************************************************************** - * Makes sure the cache flush bit is set of the records which are probed, and - * set the corresponding MyRecords from 'being probed' to - * 'has been probed'-state. - * - * @param records mDNS records which are probed. - * ****************************************************************************/ -static void -pico_mdns_my_records_probed( pico_mdns_rtree *records ) -{ - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL, *found = NULL; - - pico_tree_foreach(node, records) { - if ((record = node->keyValue)) { - /* Set the cache flush bit again */ - PICO_MDNS_SET_MSB_BE(record->record->rsuffix->rclass); - if ((found = pico_tree_findKey(&MyRecords, record))) { - if (IS_HOSTNAME_RECORD(found)) { - if (_hostname) { - PICO_FREE(_hostname); - } - - _hostname = pico_dns_qname_to_url(found->record->rname); - } - - PICO_MDNS_CLR_FLAG(found->flags, PICO_MDNS_RECORD_CURRENTLY_PROBING); - PICO_MDNS_SET_FLAG(found->flags, PICO_MDNS_RECORD_PROBED); - } else{ - mdns_dbg("Could not find my corresponding record...\n"); - } - } - } -} - -/* MARK: ^ MY RECORDS */ -/* MARK: v CACHE COHERENCY */ -#if PICO_MDNS_ALLOW_CACHING == 1 -/* **************************************************************************** - * Updates TTL of a cache entry. - * - * @param record Record of which you want to update the TTL of - * @param ttl TTL you want to update the TTL of the record to. - * @return void - * ****************************************************************************/ -static inline void -pico_mdns_cache_update_ttl( struct pico_mdns_record *record, - uint32_t ttl ) -{ - if(ttl > 0) { - /* Update the TTL's */ - record->record->rsuffix->rttl = long_be(ttl); - record->current_ttl = ttl; - } else { - /* TTL 0 means delete from cache but we need to wait one second */ - record->record->rsuffix->rttl = long_be(1u); - record->current_ttl = 1u; - } -} - -static int -pico_mdns_cache_flush_name( char *name, struct pico_dns_record_suffix *suffix ) -{ - /* Check if cache flush bit is set */ - if (PICO_MDNS_IS_MSB_SET(short_be(suffix->rclass))) { - mdns_dbg("FLUSH - Cache flush bit was set, triggered flush.\n"); - if (pico_mdns_rtree_del_name_type(&Cache, name, short_be(suffix->rtype))) { - mdns_dbg("Could not flush records from cache!\n"); - return -1; - } - } - - return 0; -} - -/* **************************************************************************** - * Adds a mDNS record to the cache. - * - * @param record mDNS record to add to the Cache. - * @return 0 when entry successfully added, something else when it all went ho- - * rribly wrong... - * ****************************************************************************/ -static int -pico_mdns_cache_add( struct pico_mdns_record *record ) -{ - struct pico_dns_record_suffix *suffix = NULL; - char *name = NULL; - uint32_t rttl = 0; - - /* Check params */ - if (!record) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - /* 2 paths */ - - suffix = record->record->rsuffix; - name = record->record->rname; - rttl = long_be(suffix->rttl); - - if (pico_mdns_cache_flush_name(name, suffix)) { - return -1; - } - /* 4 paths */ - - /* Check if the TTL is not 0*/ - if (!rttl) { - return -1; - } else { - /* Set current TTL to the original TTL before inserting */ - record->current_ttl = rttl; - - if (pico_tree_insert(&Cache, record) != NULL) - return -1; - - mdns_dbg("RR cached. TICK TACK TICK TACK...\n"); - - return 0; - } - /* 12 paths */ -} - -/* **************************************************************************** - * Add a copy of an mDNS resource record to the cache tree. Checks whether the - * entry is already present in the Cache or not. - * - * @param record Record to add to the Cache-tree - * @return 0 on grrrreat success, something else on awkward failure. - * ****************************************************************************/ -static int -pico_mdns_cache_add_record( struct pico_mdns_record *record ) -{ - struct pico_mdns_record *found = NULL, *copy = NULL; - uint32_t rttl = 0; - - /* Check params */ - if (!record) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* See if the record is already contained in the cache */ - if ((found = pico_tree_findKey(&Cache, record))) { - rttl = long_be(record->record->rsuffix->rttl); - pico_mdns_cache_update_ttl(found, rttl); - } else if ((copy = pico_mdns_record_copy(record))) { - if (pico_mdns_cache_add(copy)) { - pico_mdns_record_delete((void **)©); - return -1; - } - } else - return -1; - - return 0; -} - -#if PICO_MDNS_CONTINUOUS_REFRESH == 1 -/* **************************************************************************** - * Determine if the current TTL is at a refreshing point. - * - * @param original Original TTL to calculate refreshing points - * @param current Current TTL to check. - * @return 1 when Current TTL is at refresh point. 0 when it's not. - * ****************************************************************************/ -static int -pico_mdns_ttl_at_refresh_time( uint32_t original, - uint32_t current ) -{ - uint32_t rnd = 0; - rnd = pico_rand() % 3; - - if (((original - current == - ((original * (80 + rnd)) / 100)) ? 1 : 0) || - ((original - current == - ((original * (85 + rnd)) / 100)) ? 1 : 0) || - ((original - current == - ((original * (90 + rnd)) / 100)) ? 1 : 0) || - ((original - current == - ((original * (95 + rnd)) / 100)) ? 1 : 0)) - return 1; - else - return 0; -} -#endif - -/* **************************************************************************** - * Utility function to update the TTL of cache entries and check for expired - * ones. When continuous refreshing is enabled the records will be reconfirmed - * @ 80%, 85%, 90% and 95% of their original TTL. - * ****************************************************************************/ -static void -pico_mdns_cache_check_expiries( void ) -{ - struct pico_tree_node *node = NULL, *next = NULL; - struct pico_mdns_record *record = NULL; -#if PICO_MDNS_CONTINUOUS_REFRESH == 1 - uint32_t current = 0, original = 0; - uint16_t type 0; - char *url = NULL; -#endif - - /* Check for expired cache records */ - pico_tree_foreach_safe(node, &Cache, next) { - if ((record = node->keyValue)) { - /* Update current ttl and delete when TTL is 0*/ - if ((--(record->current_ttl)) == 0) { - record = pico_tree_delete(&Cache, record); - pico_mdns_record_delete((void **)&record); - } - -#if PICO_MDNS_CONTINUOUS_REFRESH == 1 - /* Determine original and current ttl */ - original = long_be(record->record->rsuffix->rttl); - current = record->current_ttl; - - /* Cache refresh at 80 or 85/90/95% of TTL + 2% rnd */ - if (pico_mdns_ttl_at_refresh_time(original, current)) { - url = pico_dns_qname_to_url(record->record->rname); - type = short_be(record->record->rsuffix->rtype) - pico_mdns_getrecord_generic(url, type, NULL, NULL); - PICO_FREE(url); - } - -#endif - } - } -} -#endif /* PICO_MDNS_ALLOW_CACHING */ - -/* **************************************************************************** - * Utility function to update the TTL of cookies and check for expired - * ones. Deletes the expired ones as well. - * ****************************************************************************/ -static void -pico_mdns_cookies_check_timeouts( void ) -{ - struct pico_tree_node *node = NULL, *next = NULL; - struct pico_mdns_cookie *cookie = NULL; - - pico_tree_foreach_safe(node, &Cookies, next) { - if ((cookie = node->keyValue) && --(cookie->timeout) == 0) { - /* Call callback to allow error checking */ - if (cookie->callback) { - cookie->callback(NULL, NULL, cookie->arg); - } - - /* Delete cookie */ - cookie = pico_tree_delete(&Cookies, cookie); - pico_mdns_cookie_delete((void **)&cookie); - - /* If the request was for a reconfirmation of a record, - flush the corresponding record after the timeout */ - } - } -} - -/* **************************************************************************** - * Global mDNS module tick-function, central point where all the timing is - * handled. - * - * @param now Ignore - * @param _arg Ignore - * ****************************************************************************/ -static void -pico_mdns_tick( pico_time now, void *_arg ) -{ - IGNORE_PARAMETER(now); - IGNORE_PARAMETER(_arg); - -#if PICO_MDNS_ALLOW_CACHING == 1 - /* Update the cache */ - pico_mdns_cache_check_expiries(); -#endif - - /* Update the cookies */ - pico_mdns_cookies_check_timeouts(); - - /* Schedule new tick */ - if (!pico_mdns_timer_add(PICO_MDNS_RR_TTL_TICK, pico_mdns_tick, NULL)) { - mdns_dbg("MDNS: Failed to start tick timer\n"); - /* TODO Not ticking anymore, what to do? */ - } -} - -/* MARK: v MDNS PACKET UTILITIES */ - -/* **************************************************************************** - * Sends a Multicast packet on the wire to the mDNS destination port. - * - * @param packet Packet buffer in memory - * @param len Size of the packet in bytes - * @return 0 When the packet is passed successfully on to the lower layers of - * picoTCP. Doesn't mean the packet is successfully sent on the wire. - * ****************************************************************************/ -static int -pico_mdns_send_packet( pico_dns_packet *packet, uint16_t len ) -{ - /* TODO: why only ipv4 support? */ - struct pico_ip4 dst4; - - /* Set the destination address to the mDNS multicast-address */ - pico_string_to_ipv4(PICO_MDNS_DEST_ADDR4, &dst4.addr); - - /* Send packet to IPv4 socket */ - return pico_socket_sendto(mdns_sock_ipv4, packet, (int)len, &dst4, - short_be(mdns_port)); -} - -/* **************************************************************************** - * Sends a Unicast packet on the wire to the mDNS destination port of specific - * peer in the network - * - * @param packet Packet buffer in memory - * @param len Size of the packet in bytes - * @param peer Peer in the network you want to send the packet to. - * @return 0 When the packet is passed successfully on to the lower layers of - * picoTCP. Doesn't mean the packet is successfully send on the wire. - * ****************************************************************************/ -static int -pico_mdns_send_packet_unicast( pico_dns_packet *packet, - uint16_t len, - struct pico_ip4 peer ) -{ - /* Send packet to IPv4 socket */ - return pico_socket_sendto(mdns_sock_ipv4, packet, (int)len, &peer, - short_be(mdns_port)); -} - - -/* **************************************************************************** - * Send DNS records as answers to a peer via unicast - * - * @param unicast_tree Tree with DNS records to send as answers. - * @param peer Peer IPv4-address - * @return 0 when the packet is properly send, something else otherwise. - * ****************************************************************************/ -static int -pico_mdns_unicast_reply( pico_dns_rtree *unicast_tree, - pico_dns_rtree *artree, - struct pico_ip4 peer ) -{ - union pico_address *local_addr = NULL; - pico_dns_packet *packet = NULL; - uint16_t len = 0; - - if (pico_tree_count(unicast_tree) > 0) { - /* Create response DNS packet */ - packet = pico_dns_answer_create(unicast_tree, NULL, artree, &len); - if (!packet || !len) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - packet->id = 0; - - /* Check if source address is on the local link */ - local_addr = (union pico_address *) pico_ipv4_source_find(&peer); - if (!local_addr) { - mdns_dbg("Peer not on same link!\n"); - /* Forced response via multicast */ - - /* RFC6762: 18.6: In both multicast query and response messages, - the RD bit SHOULD be zero on transmission. In - pico_dns_fill_packet_header, the RD bit is set to - PICO_DNS_RD_IS_DESIRED, which is defined to be 1 */ - packet->rd = PICO_DNS_RD_NO_DESIRE; - - - if (pico_mdns_send_packet(packet, len) != (int)len) { - mdns_dbg("Could not send multicast response!\n"); - return -1; - } - } else { - /* Send the packet via unicast */ - if (pico_mdns_send_packet_unicast(packet, len, peer) != (int)len) { - mdns_dbg("Could not send unicast response!\n"); - return -1; - } - - mdns_dbg("Unicast response sent successfully!\n"); - } - - PICO_FREE(packet); - } - - return 0; -} - -/* **************************************************************************** - * Send DNS records as answers to mDNS peers via multicast - * - * @param multicast_tree Tree with DNS records to send as answers. - * @return 0 when the packet is properly send, something else otherwise. - * ****************************************************************************/ -static int -pico_mdns_multicast_reply( pico_dns_rtree *multicast_tree, - pico_dns_rtree *artree ) -{ - pico_dns_packet *packet = NULL; - uint16_t len = 0; - - /* If there are any multicast records */ - if (pico_tree_count(multicast_tree) > 0) { - /* Create response DNS packet */ - packet = pico_dns_answer_create(multicast_tree, NULL, artree, &len); - if (!packet || len == 0) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - packet->id = 0; - - /* RFC6762: 18.6: In both multicast query and response messages, - the RD bit SHOULD be zero on transmission. - In pico_dns_fill_packet_header, the RD bit is set to - PICO_DNS_RD_IS_DESIRED, which is defined to be 1 */ - packet->rd = PICO_DNS_RD_NO_DESIRE; - - /* Send the packet via multicast */ - if (pico_mdns_send_packet(packet, len) != (int)len) { - mdns_dbg("Could not send multicast response!\n"); - return -1; - } - - mdns_dbg("Multicast response sent successfully!\n"); - - PICO_FREE(packet); - } - - return 0; -} - -/* MARK: ^ MDNS PACKET UTILITIES */ -/* MARK: ASYNCHRONOUS MDNS RECEPTION */ - -/* **************************************************************************** - * Merges 2 pico_trees with each other. - * - * @param dest Destination tree to merge the other tree in. - * @param src Source tree to get the node from to insert into the dest-tree. - * @return Returns 0 when properly merged, or not.. - * ****************************************************************************/ -static int -pico_tree_merge( struct pico_tree *dest, struct pico_tree *src ) -{ - struct pico_tree_node *node = NULL; - - /* Check params */ - if (!dest || !src) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Insert source nodes */ - pico_tree_foreach(node, src) { - if (node->keyValue) { - if (pico_tree_insert(dest, node->keyValue) == &LEAF) { - mdns_dbg("MDNS: Failed to insert record in tree\n"); - return -1; - } - } - } - - return 0; -} - -/* **************************************************************************** - * Populates an mDNS record tree with answers from MyRecords depending on name - * , qtype and qclass. - * - * @param name Name of records to look for in MyRecords - * @param qtype Type of records to look for in MyRecords - * @param qclass Whether the answer should be sent via unicast or not. - * @return mDNS record tree with possible answers from MyRecords - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_populate_antree( char *name, uint16_t qtype, uint16_t qclass ) -{ - PICO_MDNS_RTREE_DECLARE(antree); - struct pico_tree_node *node = NULL, *next; - struct pico_mdns_record *record = NULL; - - /* Create an answer record vector */ - if (PICO_DNS_TYPE_ANY == qtype) - antree = pico_mdns_rtree_find_name(&MyRecords, name, 1); - else - antree = pico_mdns_rtree_find_name_type(&MyRecords, name, qtype, 1); - - /* Remove answers which aren't successfully registered yet */ - pico_tree_foreach_safe(node, &antree, next) { - if ((record = node->keyValue) && !IS_RECORD_VERIFIED(record)) { - pico_tree_delete(&antree, record); - } - } - - /* Check if question is a QU-question */ - if (PICO_MDNS_IS_MSB_SET(qclass)) { - /* Set all the flags of the answer accordingly */ - pico_tree_foreach(node, &antree) { - if ((record = node->keyValue)) - PICO_MDNS_SET_FLAG(record->flags, - PICO_MDNS_RECORD_SEND_UNICAST); - } - } - - return antree; -} - -/* **************************************************************************** - * Handles a single received question. - * - * @param question DNS question to parse and handle. - * @param packet Received packet in which the DNS question was present. - * @return mDNS record tree with possible answer to the question. Can possibly - * be empty. - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_handle_single_question( struct pico_dns_question *question, - pico_dns_packet *packet ) -{ - struct pico_mdns_cookie *cookie = NULL; - PICO_MDNS_RTREE_DECLARE(antree); - char *qname_original = NULL; - uint16_t qtype = 0, qclass = 0; - - /* Check params */ - if (!question || !packet) { - pico_err = PICO_ERR_EINVAL; - return antree; - } - - /* Decompress single DNS question */ - qname_original = pico_dns_question_decompress(question, packet); - mdns_dbg("Question RCVD for '%s'\n", question->qname); - - /* Find currently active query cookie */ - if ((cookie = pico_mdns_ctree_find_cookie(question->qname, - PICO_MDNS_PACKET_TYPE_QUERY))) { - mdns_dbg("Query cookie found for question, suppress duplicate.\n"); - cookie->status = PICO_MDNS_COOKIE_STATUS_CANCELLED; - } else { - qtype = short_be(question->qsuffix->qtype); - qclass = short_be(question->qsuffix->qclass); - antree = pico_mdns_populate_antree(question->qname, qtype, qclass); - } - - PICO_FREE(question->qname); - question->qname = qname_original; - return antree; -} - -/* **************************************************************************** - * When a query-cookie is found for a RCVD answer, the cookie should be - * handled accordingly. This function does that. - * - * @param cookie Cookie that contains the question for the RCVD answer. - * @param answer RCVD answer to handle cookie with - * @return Returns 0 when handling went OK, something else when it didn't. - * ****************************************************************************/ -static int -pico_mdns_handle_cookie_with_answer( struct pico_mdns_cookie *cookie, - struct pico_mdns_record *answer ) -{ - PICO_MDNS_RTREE_DECLARE(antree); - uint8_t type = 0, status = 0; - - /* Check params */ - if (!cookie || !answer) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - type = cookie->type; - status = cookie->status; - if (PICO_MDNS_COOKIE_STATUS_ACTIVE == status) { - if (PICO_MDNS_PACKET_TYPE_PROBE == type) { - /* Conflict occurred, resolve it! */ - pico_mdns_cookie_resolve_conflict(cookie, answer->record->rname); - } else if (PICO_MDNS_PACKET_TYPE_QUERY == type) { - if (cookie->callback) { - /* RCVD Answer on query, callback with answer. Callback is - * responsible for aggregating all the received answers. */ - if (pico_tree_insert(&antree, answer) == &LEAF) { - mdns_dbg("MDNS: Failed to insert answer in tree\n"); - return -1; - } - cookie->callback(&antree, NULL, cookie->arg); - } - } else { /* Don't handle answer cookies with answer */ - } - } - - return 0; -} - -/* **************************************************************************** - * Handles a single received answer record. - * - * @param answer Answer mDNS record. - * @return 0 when answer is properly handled, something else when it's not. - * ****************************************************************************/ -static int -pico_mdns_handle_single_answer( struct pico_mdns_record *answer ) -{ - struct pico_mdns_cookie *found = NULL; - struct pico_mdns_record *record = NULL; - - mdns_dbg("Answer RCVD for '%s'\n", answer->record->rname); - - /* Find currently active query cookie */ - found = pico_mdns_ctree_find_cookie(answer->record->rname, - PICO_MDNS_PACKET_TYPE_QUERY_ANY); - if (found && pico_mdns_handle_cookie_with_answer(found, answer)) { - mdns_dbg("Could not handle found cookie correctly!\n"); - return -1; - } else { - mdns_dbg("RCVD an unsolicited record!\n"); - if ((record = pico_tree_findKey(&MyRecords, answer)) && - !IS_RECORD_PROBING(record)) - return pico_mdns_record_resolve_conflict(record, - answer->record->rname); - } - - return 0; -} - -/* **************************************************************************** - * Handles a single received authority record. - * - * @param answer Authority mDNS record. - * @return 0 when authority is properly handled. -1 when it's not. - * ****************************************************************************/ -static int -pico_mdns_handle_single_authority( struct pico_mdns_record *answer ) -{ - struct pico_mdns_cookie *found = NULL; - char *name = NULL; - - name = answer->record->rname; - mdns_dbg("Authority RCVD for '%s'\n", name); - - /* Find currently active probe cookie */ - if ((found = pico_mdns_ctree_find_cookie(name, PICO_MDNS_PACKET_TYPE_PROBE)) - && PICO_MDNS_COOKIE_STATUS_ACTIVE == found->status) { - mdns_dbg("Simultaneous Probing occurred, went tiebreaking...\n"); - if (pico_mdns_cookie_apply_spt(found, answer->record) < 0) { - mdns_dbg("Could not apply S.P.T. to cookie!\n"); - return -1; - } - } - - return 0; -} - -/* **************************************************************************** - * Handles a single received additional [Temporarily unused] - * - * @param answer Additional mDNS record. - * @return 0 - * ****************************************************************************/ -static int -pico_mdns_handle_single_additional( struct pico_mdns_record *answer ) -{ - /* Don't need this for now ... */ - IGNORE_PARAMETER(answer); - return 0; -} - -/* **************************************************************************** - * Handles a flat chunk of memory as if it were all questions in it. - * Generates a tree with responses if there are any questions for records for - * which host has the authority to answer. - * - * @param ptr Pointer-Pointer to location of question section of packet. - * Will point to right after the question section on return. - * @param qdcount Amount of questions contained in the packet - * @param packet DNS packet where the questions are present. - * @return Tree with possible responses on the questions. - * ****************************************************************************/ -static pico_mdns_rtree -pico_mdns_handle_data_as_questions ( uint8_t **ptr, - uint16_t qdcount, - pico_dns_packet *packet ) -{ - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_dns_question question; - uint16_t i = 0; - - /* Check params */ - if ((!ptr) || !packet || !(*ptr)) { - pico_err = PICO_ERR_EINVAL; - return antree; - } - - for (i = 0; i < qdcount; i++) { - /* Set qname of the question to the correct location */ - question.qname = (char *)(*ptr); - - /* Set qsuffix of the question to the correct location */ - question.qsuffix = (struct pico_dns_question_suffix *) - (question.qname + pico_dns_namelen_comp(question.qname) + 1); - - /* Handle a single question and merge the returned tree */ - rtree = pico_mdns_handle_single_question(&question, packet); - pico_tree_merge(&antree, &rtree); - pico_tree_destroy(&rtree, NULL); - - /* Move to next question */ - *ptr = (uint8_t *)question.qsuffix + - sizeof(struct pico_dns_question_suffix); - } - if (pico_tree_count(&antree) == 0) { - mdns_dbg("No 'MyRecords' found that corresponds with this query.\n"); - } - - return antree; -} - -static int -pico_mdns_handle_data_as_answers_generic( uint8_t **ptr, - uint16_t count, - pico_dns_packet *packet, - uint8_t type ) -{ - struct pico_mdns_record mdns_answer = { - .record = NULL, .current_ttl = 0, - .flags = 0, .claim_id = 0 - }; - struct pico_dns_record answer; - char *orname = NULL; - uint16_t i = 0; - - /* Check params */ - if ((!ptr) || !packet || !(*ptr)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* TODO: When receiving multiple authoritative answers, */ - /* they should be sorted in lexicographical order */ - /* (just like in pico_mdns_record_am_i_lexi_later) */ - - for (i = 0; i < count; i++) { - /* Set rname of the record to the correct location */ - answer.rname = (char *)(*ptr); - - /* Set rsuffix of the record to the correct location */ - answer.rsuffix = (struct pico_dns_record_suffix *) - (answer.rname + - pico_dns_namelen_comp(answer.rname) + 1u); - - /* Set rdata of the record to the correct location */ - answer.rdata = (uint8_t *) answer.rsuffix + - sizeof(struct pico_dns_record_suffix); - - /* Make an mDNS record from the DNS answer */ - orname = pico_dns_record_decompress(&answer, packet); - mdns_answer.record = &answer; - mdns_answer.record->rname_length = (uint16_t)(pico_dns_strlen(answer.rname) + 1u); - - /* Handle a single aswer */ - switch (type) { - case 1: - pico_mdns_handle_single_authority(&mdns_answer); - break; - case 2: - pico_mdns_handle_single_additional(&mdns_answer); - break; - default: - pico_mdns_handle_single_answer(&mdns_answer); -#if PICO_MDNS_ALLOW_CACHING == 1 - pico_mdns_cache_add_record(&mdns_answer); -#endif - break; - } - - /* Free decompressed name and mDNS record */ - PICO_FREE(mdns_answer.record->rname); - answer.rname = orname; - - /* Move to next record */ - *ptr = (uint8_t *) answer.rdata + short_be(answer.rsuffix->rdlength); - } - return 0; -} - -/* **************************************************************************** - * Splits an mDNS record tree into two DNS record tree, one to send via - * unicast, one to send via multicast. - * - * @param answers mDNS record tree to split up - * @param unicast_tree DNS record tree with unicast answers. - * @param multicast_tree DNS record tee with multicast answers. - * @return 0 when the tree is properly split up. - * ****************************************************************************/ -static int -pico_mdns_sort_unicast_multicast( pico_mdns_rtree *answers, - pico_dns_rtree *unicast_tree, - pico_dns_rtree *multicast_tree ) -{ - struct pico_mdns_record *record = NULL; - struct pico_tree_node *node = NULL; - - /* Check params */ - if (!answers || !unicast_tree || !multicast_tree) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - pico_tree_foreach(node, answers) { - record = node->keyValue; - if ((record = node->keyValue)) { - if (IS_UNICAST_REQUESTED(record)) { - if (record->record){ - if (pico_tree_insert(unicast_tree, record->record) == &LEAF) { - mdns_dbg("MDNS: Failed to instert unicast record in tree\n"); - return -1; - } - } - } else { - if (record->record){ - if (pico_tree_insert(multicast_tree, record->record) == &LEAF) { - mdns_dbg("MDNS: Failed to instert multicast record in tree\n"); - return -1; - } - } - } - } - } - - return 0; -} - -static uint16_t -pico_mdns_nsec_highest_type( pico_mdns_rtree *rtree ) -{ - struct pico_tree_node *node = NULL, *next = NULL; - struct pico_mdns_record *record = NULL; - uint16_t highest_type = 0, type = 0; - - pico_tree_foreach_safe(node, rtree, next) { - if ((record = node->keyValue)) { - if (IS_SHARED_RECORD(record)) - pico_tree_delete(rtree, record); - - type = short_be(record->record->rsuffix->rtype); - highest_type = (type > highest_type) ? (type) : (highest_type); - } - } - - return highest_type; -} - -static void -pico_mdns_nsec_gen_bitmap( uint8_t *ptr, pico_mdns_rtree *rtree ) -{ - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - uint16_t type = 0; - - pico_tree_foreach(node, rtree) { - if ((record = node->keyValue)) { - type = short_be(record->record->rsuffix->rtype); - *(ptr + 1 + (type / 8)) = (uint8_t)(0x80 >> (type % 8)); - } - } -} - -/* **************************************************************************** - * Generates an NSEC record for a specific name. Looks in MyRecords for unique - * records with given name and generates the NSEC bitmap from them. - * - * @param name Name of the records you want to generate a bitmap for. - * @return Pointer to newly created NSEC record on success, NULL on failure. - * ****************************************************************************/ -static struct pico_mdns_record * -pico_mdns_gen_nsec_record( char *name ) -{ - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *record = NULL; - uint16_t highest_type = 0, rdlen = 0; - uint8_t bitmap_len = 0, *rdata = NULL, *ptr = NULL; - char *url = NULL; - - if (!name) { /* Check params */ - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - /* Determine the highest type of my unique records with this name */ - rtree = pico_mdns_rtree_find_name(&MyRecords, name, 0); - highest_type = pico_mdns_nsec_highest_type(&rtree); - - /* Determine the bimap_len */ - bitmap_len = (uint8_t)(highest_type / 8); - bitmap_len = (uint8_t)(bitmap_len + ((highest_type % 8) ? (1) : (0))); - - /* Provide rdata */ - rdlen = (uint16_t)(pico_dns_strlen(name) + 3u + bitmap_len); - if (!(rdata = PICO_ZALLOC((size_t)rdlen))) { - pico_err = PICO_ERR_ENOMEM; - pico_tree_destroy(&rtree, NULL); - return NULL; - } - - /* Set the next domain name */ - strcpy((char *)rdata, name); - /* Set the bitmap length */ - *(ptr = (uint8_t *)(rdata + pico_dns_strlen(name) + 2)) = bitmap_len; - /* Generate the bitmap */ - pico_mdns_nsec_gen_bitmap(ptr, &rtree); - pico_tree_destroy(&rtree, NULL); - - /* Generate the actual mDNS NSEC record */ - if (!(url = pico_dns_qname_to_url(name))) { - PICO_FREE(rdata); - return NULL; - } - - record = pico_mdns_record_create(url, (void *)rdata, rdlen, - PICO_DNS_TYPE_NSEC, - PICO_MDNS_SERVICE_TTL, - PICO_MDNS_RECORD_UNIQUE); - PICO_FREE(rdata); - PICO_FREE(url); - return record; -} - -/* **************************************************************************** - * Checks in additionals if there is an NSEC record already present with given - * name. If there's not, a new NSEC records will be generated and added to the - * additional tree. - * - * @param artree mDNS record-tree containing additional records. - * @param name Name to check for. - * @return 0 when NSEC is present in additional, whether it was already present - * or a new one is generated doesn't matter. - * ****************************************************************************/ -static int -pico_mdns_additionals_add_nsec( pico_mdns_rtree *artree, - char *name ) -{ - struct pico_mdns_record *record = NULL, *nsec = NULL; - struct pico_tree_node *node = NULL; - uint16_t type = 0; - - /* Check params */ - if (!artree || !name) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Check if there is a NSEC already for this name */ - pico_tree_foreach(node, artree) { - if (node != &LEAF && (record = node->keyValue)) { - type = short_be(record->record->rsuffix->rtype); - if ((PICO_DNS_TYPE_NSEC == type) && 0 == strcasecmp(record->record->rname, name)) { - return 0; - } - } - } - - /* If there is none present generate one for given name */ - if ((nsec = pico_mdns_gen_nsec_record(name))) { - if (pico_tree_insert(artree, nsec)) { - pico_mdns_record_delete((void **)nsec); - return -1; - } - } - - return 0; -} - -/* **************************************************************************** - * Adds hostname records to the additional records - * - * @param artree mDNS record-tree containing additional records. - * @return 0 when hostname records are added successfully to additionals. Rets - * something else on failure. - * ****************************************************************************/ -static int -pico_mdns_additionals_add_host( pico_mdns_rtree *artree ) -{ - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL, *copy = NULL; - - pico_tree_foreach(node, &MyRecords) { - record = node->keyValue; - if (record) { - if (IS_HOSTNAME_RECORD(record) && IS_RECORD_VERIFIED(record)) { - copy = pico_mdns_record_copy(record); - if (copy && pico_tree_insert(artree, copy)) - pico_mdns_record_delete((void **)©); - } - } - } - - return 0; -} /* Satic path count: 4 */ - -static void -pico_rtree_add_copy( pico_mdns_rtree *tree, struct pico_mdns_record *record ) -{ - struct pico_mdns_record *copy = NULL; - - if (!tree || !record) { - pico_err = PICO_ERR_EINVAL; - return; - } - - if ((copy = pico_mdns_record_copy(record))) { - if (pico_tree_insert(tree, copy)) - pico_mdns_record_delete((void **)©); - } -} - -/* **************************************************************************** - * When a service is found, additional records should be generated and - * added to either the answer section or the additional sections. - * This happens here - * - * @param antree mDNS record tree with answers to send - * @param artree mDNS record tree with additionals to send - * @param srv_record Found SRV record in the answers - * @return 0 When additional records are properly generated - * ****************************************************************************/ -static int -pico_mdns_gather_service_meta( pico_mdns_rtree *antree, - pico_mdns_rtree *artree, - struct pico_mdns_record *srv_record ) -{ - struct pico_mdns_record *ptr_record = NULL, *meta_record = NULL; - char *sin = NULL, *service = NULL; - uint32_t ttl = 0; - - /* Generate proper service instance name and service */ - sin = pico_dns_qname_to_url(srv_record->record->rname); // May be leaking - - if (!antree || !artree || !sin) { - pico_err = PICO_ERR_EINVAL; - PICO_FREE(sin); - return -1; - } else { - /* Add hostname records */ - pico_mdns_additionals_add_host(artree); - - service = sin + pico_dns_first_label_length(sin) + 1u; - ttl = long_be(srv_record->record->rsuffix->rttl); - - /* Generate PTR records */ - ptr_record = pico_mdns_record_create(service, (void *)sin, - (uint16_t)strlen(sin), - PICO_DNS_TYPE_PTR, - ttl, PICO_MDNS_RECORD_SHARED); - /* Meta DNS-SD record */ - meta_record = pico_mdns_record_create("_services._dns-sd._udp.local", - (void *)service, - (uint16_t)strlen(service), - PICO_DNS_TYPE_PTR, - ttl, PICO_MDNS_RECORD_SHARED); - PICO_FREE(sin); // Free allocated memory - if (!meta_record || !ptr_record) { - mdns_dbg("Could not generate META or PTR records!\n"); - pico_mdns_record_delete((void **)&ptr_record); - pico_mdns_record_delete((void **)&meta_record); - return -1; - } - - ptr_record->flags |= (PICO_MDNS_RECORD_PROBED | - PICO_MDNS_RECORD_CLAIMED); - meta_record->flags |= (PICO_MDNS_RECORD_PROBED | - PICO_MDNS_RECORD_CLAIMED); - - /* Add copies to the answer tree */ - pico_rtree_add_copy(antree, meta_record); - pico_rtree_add_copy(antree, ptr_record); - - /* Insert the created service record in MyRecords, alread in, destroy */ - if (pico_tree_insert(&MyRecords, meta_record)) { - mdns_dbg("MDNS: Failed to insert meta record in tree\n"); - pico_mdns_record_delete((void **)&meta_record); - pico_mdns_record_delete((void **)&ptr_record); - return -1; - } - - if (pico_tree_insert(&MyRecords, ptr_record)) { - mdns_dbg("MDNS: Failed to insert ptr record in tree\n"); - pico_mdns_record_delete((void **)&ptr_record); - pico_tree_delete(&MyRecords, meta_record); - pico_mdns_record_delete((void **)&meta_record); - } - } - return 0; -} /* Static path count: 9 */ - -/* **************************************************************************** - * Gathers additional records for a to send response. Checks for services and - * whether or not there should be NSEC records added to the additional section - * - * @param antree mDNS record tree with answers to send - * @param artree mDNS record tree with additionals to send - * @return Returns 0 when additionals are properly generated and added - * ****************************************************************************/ -static int -pico_mdns_gather_additionals( pico_mdns_rtree *antree, - pico_mdns_rtree *artree ) -{ - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - int ret = 0; - - /* Check params */ - if (!antree || !artree) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* Look for SRV records in the tree */ - pico_tree_foreach(node, antree) { - if ((record = node->keyValue) && - short_be(record->record->rsuffix->rtype) == PICO_DNS_TYPE_SRV && - (ret = pico_mdns_gather_service_meta(antree, artree, record))) - return ret; - } - - /* Look for unique records in the tree to generate NSEC records */ - pico_tree_foreach(node, antree) { - if ((record = node->keyValue) && IS_UNIQUE_RECORD(record) && - (ret = pico_mdns_additionals_add_nsec(artree, - record->record->rname))) - return ret; - } - - /* Look for unique records in the additionals to generate NSEC records*/ - pico_tree_foreach(node, artree) { - if ((record = node->keyValue) && IS_UNIQUE_RECORD(record) && - (ret = pico_mdns_additionals_add_nsec(artree, - record->record->rname))) - return ret; - } - } - - return 0; -} /* Static path count: 9 */ - -/* **************************************************************************** - * Sends mDNS records to either multicast peer via unicast to a single peer. - * - * @param antree Tree with mDNS records to send as answers - * @param peer IPv4-address of peer who this host has RCVD a packet. - * @return 0 when answers are properly handled, something else otherwise. - * ****************************************************************************/ -static int -pico_mdns_reply( pico_mdns_rtree *antree, struct pico_ip4 peer ) -{ - PICO_DNS_RTREE_DECLARE(antree_m); - PICO_DNS_RTREE_DECLARE(antree_u); - PICO_MDNS_RTREE_DECLARE(artree); - PICO_DNS_RTREE_DECLARE(artree_dummy); - PICO_DNS_RTREE_DECLARE(artree_dns); - - /* Try to gather additionals for the to send response */ - if (pico_mdns_gather_additionals(antree, &artree)) { - mdns_dbg("Could not gather additionals properly!\n"); - return -1; - } - - /* Sort the answers into multicast and unicast answers */ - pico_mdns_sort_unicast_multicast(antree, &antree_u, &antree_m); - - /* Convert the mDNS additional tree to a DNS additional tree to send with - * the the unicast AND the multicast response */ - pico_mdns_sort_unicast_multicast(&artree, &artree_dummy, &artree_dns); - - /* Send response via unicast */ - if (pico_mdns_unicast_reply(&antree_u, &artree_dns, peer)) { - mdns_dbg("Could not sent reply via unicast!\n"); - return -1; - } - - /* Send response via multicast */ - if (pico_mdns_multicast_reply(&antree_m, &artree_dns)) { - mdns_dbg("Could not sent reply via multicast!\n"); - return -1; - } - - pico_tree_destroy(&antree_m, NULL); - pico_tree_destroy(&antree_u, NULL); - pico_tree_destroy(&artree_dummy, NULL); - pico_tree_destroy(&artree_dns, NULL); - PICO_MDNS_RTREE_DESTROY(&artree); - - return 0; -} - -/* **************************************************************************** - * Parses DNS records from a plain chunk of data and looks for them in the - * answer tree. If they're found, they will be removed from the tree. - * - * @param rtree Tree to look in for known answers - * @param packet DNS packet in which to look for known answers - * @param ancount Amount of answers in the DNS packet - * @param data Answer section of the DNS packet as a flat chunk of memory. - * @return 0 K.A.S. could be properly applied, something else when not. - * ****************************************************************************/ -static int -pico_mdns_apply_k_a_s( pico_mdns_rtree *rtree, - pico_dns_packet *packet, - uint16_t ancount, - uint8_t **data ) -{ - struct pico_tree_node *node = NULL, *next = NULL; - struct pico_mdns_record *record = NULL, ka = { - 0 - }; - struct pico_dns_record answer = { - 0 - }; - uint16_t i = 0; - - /* Check params */ - if ((!data) || !rtree || !packet || !(*data)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - for (i = 0; i < ancount; i++) { - /* Set rname of the record to the correct location */ - answer.rname = (char *)(*data); - - /* Set rsuffix of the record to the correct location */ - answer.rsuffix = (struct pico_dns_record_suffix *) - (answer.rname + pico_dns_namelen_comp(answer.rname) + 1u); - - /* Set rdata of the record to the correct location */ - answer.rdata = (uint8_t *) answer.rsuffix + - sizeof(struct pico_dns_record_suffix); - - pico_dns_record_decompress(&answer, packet); - ka.record = &answer; - - /* If the answer is in the record vector */ - pico_tree_foreach_safe(node, rtree, next) { - if ((record = node->keyValue)) { - if (pico_mdns_record_cmp(record, &ka) == 0) - record = pico_tree_delete(rtree, record); - } - } - PICO_FREE(ka.record->rname); - ka.record = NULL; - - /* Move to next record */ - *data = (uint8_t *) answer.rdata + short_be(answer.rsuffix->rdlength); - } - return 0; -} - -/* **************************************************************************** - * Handles a single incoming query packet. Applies Known Answer Suppression - * after handling as well. - * - * @param packet Received packet - * @param peer IPv4 address of the peer who sent the received packet. - * @return Returns 0 when the query packet is properly handled. - * ****************************************************************************/ -static int -pico_mdns_handle_query_packet( pico_dns_packet *packet, struct pico_ip4 peer ) -{ - PICO_MDNS_RTREE_DECLARE(antree); - uint16_t qdcount = 0, ancount = 0; - uint8_t *data = NULL; - - /* Move to the data section of the packet */ - data = (uint8_t *)packet + sizeof(struct pico_dns_header); - - /* Generate a list of answers */ - qdcount = short_be(packet->qdcount); - antree = pico_mdns_handle_data_as_questions(&data, qdcount, packet); - if (pico_tree_count(&antree) == 0) { - mdns_dbg("No records found that correspond with this query!\n"); - return 0; - } - - /* Apply Known Answer Suppression */ - ancount = short_be(packet->ancount); - if (pico_mdns_apply_k_a_s(&antree, packet, ancount, &data)) { - mdns_dbg("Could not apply known answer suppression!\n"); - return -1; - } - - /* Try to reply with the left-over answers */ - pico_mdns_reply(&antree, peer); - PICO_MDNS_RTREE_DESTROY(&antree); - - return 0; -} - -/* **************************************************************************** - * Handles a single incoming probe packet. Checks for Simultaneous Probe - * Tiebreaking as well. - * - * @param packet Received probe packet. - * @param peer IPv4 address of the peer who sent the probe packet. - * @return Returns 0 when the probe packet is properly handled. - * ****************************************************************************/ -static int -pico_mdns_handle_probe_packet( pico_dns_packet *packet, struct pico_ip4 peer ) -{ - PICO_MDNS_RTREE_DECLARE(antree); - uint16_t qdcount = 0, nscount = 0; - uint8_t *data = NULL; - - /* Move to the data section of the packet */ - data = (uint8_t *)packet + sizeof(struct pico_dns_header); - - /* Generate a list of answers */ - qdcount = short_be(packet->qdcount); - antree = pico_mdns_handle_data_as_questions(&data, qdcount, packet); - - /* Check for Simultaneous Probe Tiebreaking */ - nscount = short_be(packet->nscount); - pico_mdns_handle_data_as_answers_generic(&data, nscount, packet, 1); - - /* Try to reply with the answers */ - if (pico_tree_count(&antree) != 0) { - int retval = pico_mdns_reply(&antree, peer); - PICO_MDNS_RTREE_DESTROY(&antree); - return retval; - } - - return 0; -} - -/* **************************************************************************** - * Handles a single incoming answer packet. - * - * @param packet Received answer packet. - * @return Returns 0 when the response packet is properly handled. - * ****************************************************************************/ -static int -pico_mdns_handle_response_packet( pico_dns_packet *packet ) -{ - uint8_t *data = NULL; - uint16_t ancount = 0; - - /* Move to the data section of the packet */ - data = (uint8_t *)packet + sizeof(struct pico_dns_header); - - /* Generate a list of answers */ - ancount = short_be(packet->ancount); - if (pico_mdns_handle_data_as_answers_generic(&data, ancount, packet, 0)) { - mdns_dbg("Could not handle data as answers\n"); - return -1; - } - - return 0; -} - -/* **************************************************************************** - * Parses an incoming packet and handles it according to the type of the - * packet. Packet type determination happens in this function. - * - * @param buf Memory buffer containing the received packet - * @param buflen Length in bytes of the memory buffer - * @param peer IPv4 address of the peer who sent the received packet. - * @return 0 when the packet is properly handled. Something else when it's not - * ****************************************************************************/ -static int -pico_mdns_recv( void *buf, int buflen, struct pico_ip4 peer ) -{ - pico_dns_packet *packet = (pico_dns_packet *) buf; - uint16_t qdcount = short_be(packet->qdcount); - uint16_t ancount = short_be(packet->ancount); - uint16_t authcount = short_be(packet->nscount); - uint16_t addcount = short_be(packet->arcount); - - /* RFC6762: */ - /* 18.3: Messages received with an opcode other than zero MUST be silently */ - /* ignored. */ - /* 18.11: messages received with non-zero Response Codes MUST be silently */ - /* ignored */ - if(packet->opcode == 0 && packet->rcode == 0) { - mdns_dbg(">>>>>>> QDcount: %u, ANcount: %u, NScount: %u, ARcount: %u\n", - qdcount, ancount, authcount, addcount); - - IGNORE_PARAMETER(buflen); - IGNORE_PARAMETER(addcount); - - /* DNS PACKET TYPE DETERMINATION */ - if ((qdcount > 0)) { - if (authcount > 0) { - mdns_dbg(">>>>>>> RCVD a mDNS probe query:\n"); - /* Packet is probe query */ - if (pico_mdns_handle_probe_packet(packet, peer) < 0) { - mdns_dbg("Could not handle mDNS probe query!\n"); - return -1; - } - } else { - mdns_dbg(">>>>>>> RCVD a plain mDNS query:\n"); - /* Packet is a plain query */ - if (pico_mdns_handle_query_packet(packet, peer) < 0) { - mdns_dbg("Could not handle plain DNS query!\n"); - return -1; - } - } - } else { - if (ancount > 0) { - mdns_dbg(">>>>>>> RCVD a mDNS response:\n"); - /* Packet is a response */ - if (pico_mdns_handle_response_packet(packet) < 0) { - mdns_dbg("Could not handle DNS response!\n"); - return -1; - } - } else { - /* Something went wrong here... */ - mdns_dbg("RCVD Packet contains no questions or answers...\n"); - return -1; - } - } - } - - return 0; -} - -/* **************************************************************************** - * picoTCP callback for UDP IPv4 Socket events - * - * @param ev Determination of the occurred event - * @param s Socket on which the event occurred - * ****************************************************************************/ -static void -pico_mdns_event4( uint16_t ev, struct pico_socket *s ) -{ - char *recvbuf = NULL; - struct pico_ip4 peer = { - 0 - }; - int pico_read = 0; - uint16_t port = 0; - - /* process read event, data available */ - if (ev == PICO_SOCK_EV_RD) { - mdns_dbg("\n>>>>>>> READ EVENT! <<<<<<<\n"); - recvbuf = PICO_ZALLOC(PICO_MDNS_MAXBUF); - if (!recvbuf) { - pico_err = PICO_ERR_ENOMEM; - return; - } - - /* Receive while data is available in socket buffer */ - while((pico_read = pico_socket_recvfrom(s, recvbuf, PICO_MDNS_MAXBUF, - &peer, &port)) > 0) { - /* Handle the MDNS data received */ - pico_mdns_recv(recvbuf, pico_read, peer); - } - PICO_FREE(recvbuf); - mdns_dbg(">>>>>>>>>>>>>><<<<<<<<<<<<<\n\n"); - } else - mdns_dbg("Socket Error received. Bailing out.\n"); -} - -/* MARK: ADDRESS RESOLUTION */ - -/* **************************************************************************** - * Send a mDNS query packet on the wire. This is scheduled with a pico_timer- - * event. - * - * @param now Ignore - * @param arg Void-pointer to query-cookie - * ****************************************************************************/ -static void -pico_mdns_send_query_packet( pico_time now, void *arg ) -{ - struct pico_mdns_cookie *cookie = (struct pico_mdns_cookie *)arg; - pico_dns_qtree *questions = NULL; - pico_dns_packet *packet = NULL; - uint16_t len = 0; - - IGNORE_PARAMETER(now); - - /* Parse in the cookie */ - if (!cookie || cookie->type != PICO_MDNS_PACKET_TYPE_QUERY) - return; - - /* Create DNS query packet */ - questions = &(cookie->qtree); - if (!(packet = pico_dns_query_create(questions, NULL, NULL, NULL, &len))) { - mdns_dbg("Could not create query packet!\n"); - return; - } - - packet->id = 0; - - /* RFC6762: 18.6: In both multicast query and response messages, - the RD bit SHOULD be zero on transmission. In pico_dns_fill_packet_header, - the RD bit is set to PICO_DNS_RD_IS_DESIRED, which is defined to be 1 */ - packet->rd = PICO_DNS_RD_NO_DESIRE; - - if (cookie->status != PICO_MDNS_COOKIE_STATUS_CANCELLED) { - cookie->status = PICO_MDNS_COOKIE_STATUS_ACTIVE; - if(pico_mdns_send_packet(packet, len) != (int)len) { - mdns_dbg("Send error occurred!\n"); - return; - } - - mdns_dbg("DONE - Sent query.\n"); - } else { - mdns_dbg("DONE - Duplicate query suppressed.\n"); - pico_timer_cancel(cookie->send_timer); - /* Remove cookie from Cookies */ - cookie = pico_tree_delete(&Cookies, cookie); - pico_mdns_cookie_delete((void **)&cookie); - } - - PICO_FREE(packet); -} - -/* **************************************************************************** - * Generates a mDNS query packet and schedules a sending on the wire. - * - * @param url URL for the name of the question contained in the query - * @param type DNS type of the question contained in the query - * @param callback Callback to call when a response on this query is RCVD. - * @return 0 When the query is successfully generated and scheduled for sending - * ****************************************************************************/ -static int -pico_mdns_getrecord_generic( const char *url, uint16_t type, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg) -{ - struct pico_mdns_cookie *cookie = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - struct pico_dns_question *q = NULL; - uint16_t l = 0; - - /* Create a single question and add it to the tree */ - q = pico_mdns_question_create(url, &l, PICO_PROTO_IPV4, type, 0, 0); - if (!q) { - mdns_dbg("question_create returned NULL!\n"); - return -1; - } - - if (pico_tree_insert(&qtree, q)) { - mdns_dbg("inserting query into tree failed!\n"); - pico_dns_question_delete((void **)&q); - return -1; - } - - - /* Create a mDNS cookie to send */ - if (!(cookie = pico_mdns_cookie_create(qtree, antree, artree, 1, - PICO_MDNS_PACKET_TYPE_QUERY, - callback, arg))) { - PICO_DNS_QTREE_DESTROY(&qtree); - mdns_dbg("cookie_create returned NULL!\n"); - return -1; - } - - /* Add cookie to Cookies to be able to find it afterwards */ - if(pico_tree_insert(&Cookies, cookie) ){ - mdns_dbg("inserting cookie into tree failed!\n"); - PICO_DNS_QTREE_DESTROY(&qtree); - pico_mdns_cookie_delete((void **)&cookie); - return -1; - } - - /* Create new pico_timer-event to send packet */ - if (!pico_mdns_timer_add((pico_rand() % 120) + 20, pico_mdns_send_query_packet, - (void *)cookie)) { - mdns_dbg("MDNS: Failed to start send_query_packet timer\n"); - pico_tree_delete(&Cookies, cookie); - pico_mdns_cookie_delete((void**)&cookie); - pico_dns_question_delete((void**)&q); - return -1; - } - - return 0; -} - -/* **************************************************************************** - * API-call to query a record with a certain URL and type. First checks the - * Cache for this record. If no cache-entry is found, a query will be sent on - * the wire for this record. - * - * @param url URL to query for. - * @param type DNS type to query for. - * @param callback Callback to call when records are found for the query. - * @return 0 when query is correctly parsed, something else on failure. - * ****************************************************************************/ -int -pico_mdns_getrecord( const char *url, uint16_t type, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ -#if PICO_MDNS_ALLOW_CACHING == 1 - PICO_MDNS_RTREE_DECLARE(cache_hits); - char *name = NULL; -#endif - - /* Check params */ - if (!url) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* First, try to find records in the cache */ -#if PICO_MDNS_ALLOW_CACHING == 1 - name = pico_dns_url_to_qname(url); - cache_hits = pico_mdns_rtree_find_name_type(&Cache, name, type, 0); - PICO_FREE(name); - if (pico_tree_count(&cache_hits) > 0) { - mdns_dbg("CACHE HIT! Passed cache records to callback.\n"); - callback(&cache_hits, NULL, arg); - } else { -#endif - mdns_dbg("CACHE MISS! Trying to resolve URL '%s'...\n", url); - return pico_mdns_getrecord_generic(url, type, callback, arg); -#if PICO_MDNS_ALLOW_CACHING == 1 -} -return 0; -#endif -} - -/* MARK: PROBING & ANNOUNCING */ - -/* **************************************************************************** - * Function to create an announcement from an mDNS cookie and send it on the - * wire. - * - * @param now Ignore - * @param arg Void-pointer to mDNS announcement cookie - * ***************************************************************************/ -static void -pico_mdns_send_announcement_packet( pico_time now, void *arg ) -{ - struct pico_mdns_cookie *cookie = (struct pico_mdns_cookie *)arg; - - /* Check params */ - IGNORE_PARAMETER(now); - if (!cookie) { - return; - } - - cookie->status = PICO_MDNS_COOKIE_STATUS_ACTIVE; - if (cookie->count > 0) { - /* Send the announcement on the wire */ - pico_mdns_reply(&(cookie->antree), inaddr_any); - mdns_dbg("DONE - Sent announcement!\n"); - - /* The Multicast DNS responder MUST send at least two unsolicited - responses, one second apart. To provide increased robustness - against packet loss, a responder MAY send up to eight unsolicited - responses, provided that the interval between unsolicited - responses increases by at least a factor of two with - every response sent. - */ - --(cookie->count); - if (cookie->count == 0) { - cookie->status = PICO_MDNS_COOKIE_STATUS_INACTIVE; - - /* Update the states of the records */ - pico_mdns_my_records_claimed(cookie->antree, - cookie->callback, - cookie->arg); - - /* Try to delete the cookie */ - pico_tree_delete(&Cookies, cookie); - pico_mdns_cookie_delete((void **)&cookie); - } - else{ - /* - A responder MAY send up to eight unsolicited responses, - provided that the interval between unsolicited responses increases - by at least a factor of two with every response sent. - Starting at 1 second. - So we bithsift to get our powers of two and we multiply by 1000 to - get our miliseconds. - */ - if (!pico_mdns_timer_add((pico_time)((1 << (PICO_MDNS_ANNOUNCEMENT_COUNT - cookie->count - 1)) - * 1000), pico_mdns_send_announcement_packet, cookie)) { - mdns_dbg("MDNS: Failed to start send_announcement_packet timer\n"); - /* TODO no idea what the consequences of this are */ - - } - } - } -} - -/* **************************************************************************** - * Announces all 'my records' which passed the probing-step or just shared - * records. - * - * @param callback Gets called when all records in the cookie are announced. - * @return 0 When the host successfully started announcing. - * ****************************************************************************/ -static int -pico_mdns_announce( void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - struct pico_mdns_cookie *announcement_cookie = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - - /* Check params */ - if (!callback) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - IGNORE_PARAMETER(arg); - - /* Find out which resource records can be announced */ - antree = pico_mdns_my_records_find_probed(); - if (pico_tree_count(&antree) == 0) { - return 0; - } - - /* Create a mDNS packet cookie */ - if (!(announcement_cookie = pico_mdns_cookie_create(qtree, antree, artree, - PICO_MDNS_ANNOUNCEMENT_COUNT, - PICO_MDNS_PACKET_TYPE_ANNOUNCEMENT, - callback, arg))) { - mdns_dbg("cookie_create returned NULL!\n"); - PICO_MDNS_RTREE_DESTROY(&antree); - return -1; - } - - /* Send a first unsolicited announcement */ - pico_mdns_send_announcement_packet(0, announcement_cookie); - mdns_dbg("DONE - Started announcing.\n"); - - return 0; -} - -/* **************************************************************************** - * Makes sure the cache flush bit of the to probe records is cleared, and - * generates a DNS record tree to insert in the Authority Section of the DNS - * packet - * - * @param records mDNS records to probe. - * @return DNS record tree to with actual DNS records to insert in Authority - * Section of probe packet. - * ****************************************************************************/ -static pico_dns_rtree -pico_mdns_gen_probe_auths( pico_mdns_rtree *records ) -{ - PICO_DNS_RTREE_DECLARE(nstree); - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - - pico_tree_foreach(node, records) { - if ((record = node->keyValue) && record->record) { - /* Clear the cache flush bit for authority records in probes */ - PICO_MDNS_CLR_MSB_BE(record->record->rsuffix->rclass); - /* Only the actual DNS records is required */ - if (pico_tree_insert(&nstree, record->record) == &LEAF) { - mdns_dbg("MDNS: Failed to insert record in tree\n"); - break; - } - } - } - - return nstree; -} - -/* **************************************************************************** - * Function to create a probe from an mDNS cookie and send it on the wire. - * - * @param now Ignore - * @param arg Void-pointer to mDNS probe cookie - * ****************************************************************************/ -static void -pico_mdns_send_probe_packet( pico_time now, void *arg ) -{ - struct pico_mdns_cookie *cookie = (struct pico_mdns_cookie *)arg; - pico_dns_packet *packet = NULL; - PICO_DNS_RTREE_DECLARE(nstree); - uint16_t len = 0; - - /* Check params */ - IGNORE_PARAMETER(now); - /* if (!cookie || (cookie->type == PICO_MDNS_COOKIE_STATUS_INACTIVE)) { */ - if (!cookie || (cookie->type != PICO_MDNS_PACKET_TYPE_PROBE)) { - pico_err = PICO_ERR_EINVAL; - return; - } else { - /* Set the cookie to the active state */ - cookie->status = PICO_MDNS_COOKIE_STATUS_ACTIVE; - if (cookie->count > 0) { - --(cookie->count); - - /* Generate authority records */ - nstree = pico_mdns_gen_probe_auths(&(cookie->antree)); - - /* Create an mDNS answer */ - if (!(packet = pico_dns_query_create(&(cookie->qtree), NULL, - &nstree, NULL, &len))) { - PICO_DNS_RTREE_DESTROY(&nstree); - mdns_dbg("Could not create probe packet!\n"); - return; - } - - pico_tree_destroy(&nstree, NULL); - - /* RFC6762: 18.1 */ - packet->id = 0; - - /* RFC6762: 18.6: In both multicast query and response messages, - the RD bit SHOULD be zero on transmission. - In pico_dns_fill_packet_header, the RD bit is set to - PICO_DNS_RD_IS_DESIRED, which is defined to be 1 */ - packet->rd = PICO_DNS_RD_NO_DESIRE; - - /* Send the mDNS answer unsolicited via multicast */ - if(pico_mdns_send_packet(packet, len) != (int)len) { - mdns_dbg("Send error occurred!\n"); - return; - } - - PICO_FREE(packet); - - mdns_dbg("DONE - Sent probe!\n"); - - /* Probes should be sent with a delay in between of 250 ms */ - if (PICO_MDNS_COOKIE_STATUS_ACTIVE == cookie->status ) { - cookie->send_timer = pico_mdns_timer_add(250, - pico_mdns_send_probe_packet, - (void *)cookie); - if (!cookie->send_timer) { - mdns_dbg("MDNS: Failed to start send_probe_packet timer\n"); - /* TODO no idea what the consequences of this are */ - return; - } - } - } else { - mdns_dbg("DONE - Probing.\n"); - - pico_mdns_my_records_probed(&(cookie->antree)); - - /* Start announcing */ - cookie->count = PICO_MDNS_ANNOUNCEMENT_COUNT; - cookie->type = PICO_MDNS_PACKET_TYPE_ANNOUNCEMENT; - pico_mdns_send_announcement_packet(0, (void*) cookie); - } - } -} /* Static path count: 10 */ - -/* **************************************************************************** - * Adds a new probe question to the probe cookie questions, if a probe question - * for a new is already present in the question-tree, it will not be generated - * and inserted again - * - * @param qtree Probe question tree - * @param name Name for which the function has to create a probe question - * @return 0 when the probe question is already present or added successfully. - * ****************************************************************************/ -static int -pico_mdns_add_probe_question( pico_dns_qtree *qtree, - char *name ) -{ - struct pico_dns_question *new = NULL; - char *url = NULL; - uint16_t qlen = 0; - uint8_t flags = PICO_MDNS_QUESTION_FLAG_PROBE; - -#if PICO_MDNS_PROBE_UNICAST == 1 - flags |= PICO_MDNS_QUESTION_FLAG_UNICAST_RES; -#endif - - /* Convert name to URL and try to create a new probe question */ - if (!(url = pico_dns_qname_to_url(name))) - return -1; - - mdns_dbg("Probe question for URL: %s\n", url); - if (!(new = pico_mdns_question_create(url, &qlen, PICO_PROTO_IPV4, - PICO_DNS_TYPE_ANY, flags, 0))) { - PICO_FREE(url); - return -1; - } - - PICO_FREE(url); - - /* Try to find an existing question in the vector */ - if (pico_tree_insert(qtree, new)) - pico_dns_question_delete((void **)&new); - - return 0; -} - -/* **************************************************************************** - * Find any of my record that need to be probed and try to probe them. - * - * @param callback Callback to call when all records are properly registered - * @return When host successfully started probing. - * ****************************************************************************/ -static int pico_mdns_probe( void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - struct pico_mdns_cookie *cookie = NULL; - struct pico_mdns_record *record = NULL; - struct pico_tree_node *node = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - - /* Check params */ - if (!callback) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* Find my records that need to pass the probing step first - * All records that don't have their PROBED flag set and - * are not being probed at hte moment are added to the tree - */ - antree = pico_mdns_my_records_find_to_probe(); - - /* Create probe questions for the records to be probed */ - pico_tree_foreach(node, &antree) { - if ((record = node->keyValue)) { - pico_mdns_add_probe_question(&qtree, record->record->rname); - } - } - - /* Create a mDNS packet to send */ - cookie = pico_mdns_cookie_create(qtree, antree, artree, - PICO_MDNS_PROBE_COUNT, - PICO_MDNS_PACKET_TYPE_PROBE, - callback, arg); - if (!cookie) { - mdns_dbg("Cookie_create returned NULL @ probe()!\n"); - PICO_DNS_QTREE_DESTROY(&qtree); - PICO_MDNS_RTREE_DESTROY(&antree); - return -1; - } - - /* Add the probe cookie to the cookie tree */ - if (pico_tree_insert(&Cookies, cookie)) { - pico_mdns_cookie_delete((void **)&cookie); - return -1; - } - - /* RFC6762: 8.1. Probing */ - /* When ready to send its Multicast DNS probe packet(s) the host should */ - /* first wait for a short random delay time, uniformly distributed in */ - /* the range 0-250 ms. */ - cookie->send_timer = pico_mdns_timer_add(pico_rand() % 250, - pico_mdns_send_probe_packet, - (void *)cookie); - if (!cookie->send_timer) { - mdns_dbg("MDNS: Failed to start send_probe_packet timer\n"); - pico_tree_delete(&Cookies, cookie); - pico_mdns_cookie_delete((void**)&cookie); - return -1; - } - - mdns_dbg("DONE - Started probing.\n"); - } - return 0; -} /* Static path count: 9 */ - -/* MARK: API functions */ - -/* **************************************************************************** - * Claim or reclaim all the mDNS records contain in a tree in one single call - * - * @param rtree mDNS record tree with records to claim - * @param reclaim Whether or not the records in tree should be reclaimed. - * @param callback Callback to call when all records are properly registered - * @return 0 When claiming didn't horribly fail. - * ****************************************************************************/ -static int -pico_mdns_claim_generic( pico_mdns_rtree rtree, - uint8_t reclaim, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - /* Check if arguments are passed correctly */ - if (!callback) { - mdns_dbg("NULL pointers passed to 'pico_mdns_claim()'!\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Check if module is initialised */ - if (!mdns_sock_ipv4) { - mdns_dbg("Socket not initialised, did you call 'pico_mdns_init()'?\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* 1.) Appending records to 'my records' */ - pico_mdns_my_records_add(&rtree, reclaim); - - /* 2a.) Try to probe any records */ - pico_mdns_probe(callback, arg); - - /* 2b.) Try to announce any records */ - pico_mdns_announce(callback, arg); - - return 0; -} - -/* **************************************************************************** - * Claim all different mDNS records in a tree in a single API-call. All records - * in tree are called in a single new claim-session. - * - * @param rtree mDNS record tree with records to claim - * @param callback Callback to call when all record are properly claimed. - * @return 0 When claiming didn't horribly fail. - * ****************************************************************************/ -int -pico_mdns_claim( pico_mdns_rtree rtree, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - return pico_mdns_claim_generic(rtree, PICO_MDNS_NO_RECLAIM, callback, arg); -} - -/* **************************************************************************** - * Reclaim records when a conflict occurred, claim-session will stay the same - * as the session in which the conflict occurred. - * - * @param rtree mDNS record tree with records to claim - * @param callback Callback to call when all record are properly claimed. - * @return 0 When claiming didn't horribly fail. - * ****************************************************************************/ -static int -pico_mdns_reclaim( pico_mdns_rtree rtree, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - return pico_mdns_claim_generic(rtree, PICO_MDNS_RECLAIM, callback, arg); -} - -/* **************************************************************************** - * Tries to claim a hostname for this machine. Claims automatically a - * unique A record with the IPv4-address of this host. - * The hostname won't be set directly when this functions returns, - * but only if the claiming of the unique record succeeded. - * Init-callback will be called when the hostname-record is successfully - * registered. - * - * @param url URL to set the hostname to. - * @param arg Argument to pass to the init-callback. - * @return 0 when the host started registering the hostname-record successfully, - * Returns something else when it didn't succeeded. - * ****************************************************************************/ -int -pico_mdns_tryclaim_hostname( const char *url, void *arg ) -{ - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *record = NULL; - - /* Check if module is initialised */ - if (!mdns_sock_ipv4) { - mdns_dbg("mDNS socket not initialised, did you call 'pico_mdns_init()'?\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* Create an A record for hostname */ - record = pico_mdns_record_create(url, - &(mdns_sock_ipv4->local_addr.ip4.addr), - PICO_SIZE_IP4, PICO_DNS_TYPE_A, - PICO_MDNS_DEFAULT_TTL, - (PICO_MDNS_RECORD_UNIQUE | - PICO_MDNS_RECORD_HOSTNAME)); - if (!record) { - mdns_dbg("Could not create A record for hostname %s!\n", - strerror(pico_err)); - return -1; - } - - /* TODO: Create IPv6 record */ - /* TODO: Create a reverse resolution record */ - - /* Try to claim the record */ - if (pico_tree_insert(&rtree, record)) { - pico_mdns_record_delete((void **)&record); - return -1; - } - - if (pico_mdns_claim(rtree, init_callback, arg)) { - mdns_dbg("Could not claim record for hostname %s!\n", url); - PICO_MDNS_RTREE_DESTROY(&rtree); - return -1; - } - - pico_tree_destroy(&rtree, NULL); - } - return 0; -} /* Static path count: 9 */ - -/* **************************************************************************** - * Get the hostname for this machine. - * - * @return Returns the hostname for this machine when the module is initialised - * Returns NULL when the module is not initialised. - * ****************************************************************************/ -const char * -pico_mdns_get_hostname( void ) -{ - /* Check if module is initialised */ - if (!mdns_sock_ipv4) { - mdns_dbg("mDNS socket not initialised, did you call 'pico_mdns_init()'?\n"); - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - return (const char *)_hostname; -} - -static void -pico_mdns_cleanup( void ) -{ - /* Delete socket if it was previously opened */ - if (mdns_sock_ipv4) { - pico_socket_del(mdns_sock_ipv4); - } - - /* Clear out every memory structure used by mDNS */ -#if PICO_MDNS_ALLOW_CACHING == 1 - PICO_MDNS_RTREE_DESTROY(&Cache); -#endif /* PICO_MDNS_ALLOW_CACHING */ - PICO_MDNS_RTREE_DESTROY(&MyRecords); - PICO_MDNS_CTREE_DESTROY(&Cookies); - - /* Cancel every timer */ - pico_timer_cancel_hashed(mdns_hash); -} - -/* **************************************************************************** - * Initialises the entire mDNS-module and sets the hostname for this machine. - * Sets up the global mDNS socket properly and calls callback when succeeded. - * Only when the module is properly initialised records can be registered on - * the module. - * - * @param hostname_url URL to set the hostname to. - * @param address IPv4-address of this host to bind to. - * @param callback Callback to call when the hostname is registered and - * also the global mDNS module callback. Gets called when - * Passive conflicts occur, so changes in records can be - * tracked in this callback. - * @param arg Argument to pass to the init-callback. - * @return 0 when the module is properly initialised and the host started regis- - * tering the hostname. Returns something else went the host failed - * initialising the module or registering the hostname. - * ****************************************************************************/ -int -pico_mdns_init( const char *hostname, - struct pico_ip4 address, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ) -{ - struct pico_ip_mreq mreq4; - uint16_t proto4 = PICO_PROTO_IPV4, port = 0, loop = 0, ttl = 255; - - /* Initialise port */ - port = short_be(mdns_port); - - /* Check callback parameter */ - if(!callback || !hostname) { - mdns_dbg("No callback function supplied!\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Clear out all the memory structure's and delete socket if it was - * already opened before */ - pico_mdns_cleanup(); - - /* Create a hash to identify mDNS timers with */ - mdns_hash = pico_hash(hostname, (uint32_t)strlen(hostname)); - - /* Open global IPv4 mDNS socket */ - mdns_sock_ipv4 = pico_socket_open(proto4, PICO_PROTO_UDP, &pico_mdns_event4); - if(!mdns_sock_ipv4) { - mdns_dbg("pico_socket_open returned NULL-ptr...\n"); - return -1; - } - - /* Convert the mDNS IPv4 destination address to struct */ - if(pico_string_to_ipv4(PICO_MDNS_DEST_ADDR4, &mreq4.mcast_group_addr.ip4.addr)) { - mdns_dbg("String to IPv4 error\n"); - return -1; - } - - /* Receive data on any network interface */ - mreq4.mcast_link_addr.ip4 = inaddr_any; - - /* Don't want the multicast data to be looped back to the host */ - if(pico_socket_setoption(mdns_sock_ipv4, PICO_IP_MULTICAST_LOOP, &loop)) { - mdns_dbg("socket_setoption PICO_IP_MULTICAST_LOOP failed\n"); - return -1; - } - - /* Tell the stack we're interested in this particular multicast group */ - if(pico_socket_setoption(mdns_sock_ipv4, PICO_IP_ADD_MEMBERSHIP, &mreq4)) { - mdns_dbg("socket_setoption PICO_IP_ADD_MEMBERSHIP failed\n"); - return -1; - } - - /* RFC6762: - * 11. Source Address Check - * All Multicast DNS responses (including responses sent via unicast) - * SHOULD be sent with IP TTL set to 255. - */ - if(pico_socket_setoption(mdns_sock_ipv4, PICO_IP_MULTICAST_TTL, &ttl)) { - mdns_dbg("socket_setoption PICO_IP_MULTICAST_TTL failed\n"); - return -1; - } - - /* Bind to mDNS port */ - if (pico_socket_bind(mdns_sock_ipv4, (void *)&address, &port)) { - mdns_dbg("Bind error!\n"); - return -1; - } - - /* Set the global init callback variable */ - init_callback = callback; - if (!pico_mdns_timer_add(PICO_MDNS_RR_TTL_TICK, pico_mdns_tick, NULL)) { - mdns_dbg("MDNS: Failed to start tick timer\n"); - return -1; - } - - /* Set the hostname eventually */ - return pico_mdns_tryclaim_hostname(hostname, arg); -} - -#endif /* PICO_SUPPORT_MDNS */ diff --git a/kernel/picotcp/modules/pico_mdns.h b/kernel/picotcp/modules/pico_mdns.h deleted file mode 100644 index 54c8ede..0000000 --- a/kernel/picotcp/modules/pico_mdns.h +++ /dev/null @@ -1,206 +0,0 @@ -/* **************************************************************************** - * PicoTCP. Copyright (c) 2014 TASS Belgium NV. Some rights reserved. - * See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - * . - * Author: Toon Stegen, Jelle De Vleeschouwer - * ****************************************************************************/ -#ifndef INCLUDE_PICO_MDNS -#define INCLUDE_PICO_MDNS - -#include "pico_dns_common.h" -#include "pico_tree.h" -#include "pico_ipv4.h" - -/* ********************************* CONFIG ***********************************/ -#define PICO_MDNS_PROBE_UNICAST 1 /* Probe queries as QU-questions */ -#define PICO_MDNS_CONTINUOUS_REFRESH 0 /* Continuously update cache */ -#define PICO_MDNS_ALLOW_CACHING 1 /* Enable caching on this host */ -#define PICO_MDNS_DEFAULT_TTL 120 /* Default TTL of mDNS records */ -#define PICO_MDNS_SERVICE_TTL 120 /* Default TTL of SRV/TXT/PTR/NSEC */ -#define PICO_MDNS_PROBE_COUNT 3 -/* Amount of probes to send: - RFC6762: 8.1. Probing: - 250 ms after the first query, the host should send a second; then, - 250 ms after that, a third. If, by 250 ms after the third probe, no - conflicting Multicast DNS responses have been received, the host may - move to the next step, announcing. - */ - -#define PICO_MDNS_ANNOUNCEMENT_COUNT 3 -/* Amount of announcements to send: (we've opted for 1 extra for robustness) - RFC6762: 8.3. Announcing: - The Multicast DNS responder MUST send at least two unsolicited - responses, one second apart. To provide increased robustness against - packet loss, a responder MAY send up to eight unsolicited responses, - provided that the interval between unsolicited responses increases by - at least a factor of two with every response sent. - */ -/* ****************************************************************************/ - -#define PICO_MDNS_DEST_ADDR4 "224.0.0.251" - -/* To make mDNS records unique or shared records */ -#define PICO_MDNS_RECORD_UNIQUE 0x00u -#define PICO_MDNS_RECORD_SHARED 0x01u - -/* To indicate if we reclaim or not */ -#define PICO_MDNS_RECLAIM 1 -#define PICO_MDNS_NO_RECLAIM 0 - -/* Flag to check for when records are returned, to determine the hostname */ -#define PICO_MDNS_RECORD_HOSTNAME 0x02u -#define IS_HOSTNAME_RECORD(x) \ - (((x)->flags) & PICO_MDNS_RECORD_HOSTNAME) ? (1) : (0) - -/* --- MDNS resource record --- */ -struct pico_mdns_record -{ - struct pico_dns_record *record; /* DNS Resource Record */ - uint32_t current_ttl; /* Current TTL */ - uint8_t flags; /* Resource Record flags */ - uint8_t claim_id; /* Claim ID number */ -}; - -/* **************************************************************************** - * Compares 2 mDNS records by type, name AND rdata for a truly unique result - * - * @param ra mDNS record A - * @param rb mDNS record B - * @return 0 when records are equal, returns difference when they're not. - * ****************************************************************************/ -int -pico_mdns_record_cmp( void *a, void *b ); - -/* **************************************************************************** - * Deletes a single mDNS resource record. - * - * @param record Void-pointer to mDNS Resource Record. Can be used with pico_- - * tree-destroy. - * @return Returns 0 on success, something else on failure. - * ****************************************************************************/ -int -pico_mdns_record_delete( void **record ); - -/* **************************************************************************** - * Creates a single standalone mDNS resource record with given name, type and - * data to register on the network. - * - * @param url DNS rrecord name in URL format. Will be converted to DNS - * name notation format. - * @param _rdata Memory buffer with data to insert in the resource record. If - * data of record should contain a DNS name, the name in the - * databuffer needs to be in URL-format. - * @param datalen The exact length in bytes of the _rdata-buffer. If data of - * record should contain a DNS name, datalen needs to be - * pico_dns_strlen(_rdata). - * @param rtype DNS type of the resource record to be. - * @param rclass DNS class of the resource record to be. - * @param rttl DNS ttl of the resource record to be. - * @param flags You can specify if the mDNS record should be a shared record - * rather than a unique record. - * @return Pointer to newly created mDNS resource record. - * ****************************************************************************/ -struct pico_mdns_record * -pico_mdns_record_create( const char *url, - void *_rdata, - uint16_t datalen, - uint16_t rtype, - uint32_t rttl, - uint8_t flags ); - - - -/* **************************************************************************** - * Definition of DNS record tree - * ****************************************************************************/ -typedef struct pico_tree pico_mdns_rtree; -#define PICO_MDNS_RTREE_DECLARE(name) \ - pico_mdns_rtree (name) = {&LEAF, pico_mdns_record_cmp} -#define PICO_MDNS_RTREE_DESTROY(rtree) \ - pico_tree_destroy((rtree), pico_mdns_record_delete) -#define PICO_MDNS_RTREE_ADD(tree, record) \ - pico_tree_insert((tree), (record)) - -/* **************************************************************************** - * API-call to query a record with a certain URL and type. First checks the - * Cache for this record. If no cache-entry is found, a query will be sent on - * the wire for this record. - * - * @param url URL to query for. - * @param type DNS type top query for. - * @param callback Callback to call when records are found for the query. - * @return 0 when query is correctly parsed, something else on failure. - * ****************************************************************************/ -int -pico_mdns_getrecord( const char *url, uint16_t type, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ); - -/* **************************************************************************** - * Claim all different mDNS records in a tree in a single API-call. All records - * in tree are called in a single new claim-session. - * - * @param rtree mDNS record tree with records to claim - * @param callback Callback to call when all record are properly claimed. - * @return 0 When claiming didn't horribly fail. - * ****************************************************************************/ -int -pico_mdns_claim( pico_mdns_rtree record_tree, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ); - -/* **************************************************************************** - * Tries to claim a hostname for this machine. Claims automatically a - * unique A record with the IPv4-address of this host. - * The hostname won't be set directly when this functions returns, - * but only if the claiming of the unique record succeeded. - * Init-callback will be called when the hostname-record is successfully - * registered. - * - * @param url URL to set the hostname to. - * @param arg Argument to pass to the init-callback. - * @return 0 when the host started registering the hostname-record successfully, - * Returns something else when it didn't succeeded. - * ****************************************************************************/ -int -pico_mdns_tryclaim_hostname( const char *url, void *arg ); - -/* **************************************************************************** - * Get the current hostname for this machine. - * - * @return Returns the hostname for this machine when the module is initialised - * Returns NULL when the module is not initialised. - * ****************************************************************************/ -const char * -pico_mdns_get_hostname( void ); - -/* **************************************************************************** - * Initialises the entire mDNS-module and sets the hostname for this machine. - * Sets up the global mDNS socket properly and calls callback when succeeded. - * Only when the module is properly initialised records can be registered on - * the module. - * - * @param hostname URL to set the hostname to. - * @param address IPv4-address of this host to bind to. - * @param callback Callback to call when the hostname is registered and - * also the global mDNS module callback. Gets called when - * Passive conflicts occur, so changes in records can be - * tracked in this callback. - * @param arg Argument to pass to the init-callback. - * @return 0 when the module is properly initialised and the host started regis- - * tering the hostname. Returns something else went the host failed - * initialising the module or registering the hostname. - * ****************************************************************************/ -int -pico_mdns_init( const char *hostname, - struct pico_ip4 address, - void (*callback)(pico_mdns_rtree *, - char *, - void *), - void *arg ); - -#endif /* _INCLUDE_PICO_MDNS */ diff --git a/kernel/picotcp/modules/pico_mld.c b/kernel/picotcp/modules/pico_mld.c deleted file mode 100644 index c362acf..0000000 --- a/kernel/picotcp/modules/pico_mld.c +++ /dev/null @@ -1,1165 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - RFC 2710 3019 3590 3810 4604 6636 - - Authors: Roel Postelmans - *********************************************************************/ - -#include "pico_stack.h" -#include "pico_ipv6.h" -#include "pico_mld.h" -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_addressing.h" -#include "pico_frame.h" -#include "pico_tree.h" -#include "pico_device.h" -#include "pico_socket.h" -#include "pico_icmp6.h" -#include "pico_dns_client.h" -#include "pico_mld.h" -#include "pico_constants.h" -#include "pico_mcast.h" - -#if defined(PICO_SUPPORT_MLD) && defined(PICO_SUPPORT_IPV6) && defined(PICO_SUPPORT_MCAST) - -#ifdef DEBUG_MLD -#define mld_dbg dbg -#else -#define mld_dbg(...) do {} while(0) -#endif - -/* MLD groups */ -#define MLD_ALL_HOST_GROUP "FF01:0:0:0:0:0:0:1" -#define MLD_ALL_ROUTER_GROUP "FF01:0:0:0:0:0:0:2" -#define MLDV2_ALL_ROUTER_GROUP "FF02:0:0:0:0:0:0:16" -#define MLD_ROUTER_ALERT_LEN (8) - -static uint8_t pico_mld_flag = 0; - -PACKED_STRUCT_DEF mld_message { - uint8_t type; - uint8_t code; - uint16_t crc; - uint16_t max_resp_delay; - uint16_t reserved; - struct pico_ip6 mcast_group; -}; -PACKED_STRUCT_DEF mldv2_group_record { - uint8_t type; - uint8_t aux; - uint16_t nbr_src; - struct pico_ip6 mcast_group; - struct pico_ip6 src[1]; -}; -PACKED_STRUCT_DEF mldv2_report { - uint8_t type; - uint8_t res; - uint16_t crc; - uint16_t res1; - uint16_t nbr_gr; - struct mldv2_group_record record[1]; -}; -PACKED_STRUCT_DEF mldv2_query { - uint8_t type; - uint8_t code; - uint16_t crc; - uint16_t max_resp_delay; - uint16_t res; - struct pico_ip6 mcast_group; - uint8_t rsq; - uint8_t qqic; - uint16_t nbr_src; - struct pico_ip6 source_addr[1]; -}; -typedef int (*mld_callback)(struct mcast_parameters *); -static int pico_mld_process_event(struct mcast_parameters *p); -static struct mcast_parameters *pico_mld_find_parameter(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group); - -static uint8_t *pico_mld_fill_hopbyhop(struct pico_ipv6_hbhoption *hbh) -{ - uint8_t *p; - if(hbh == NULL) - return NULL; - - hbh->type = PICO_PROTO_ICMP6; - hbh->len = 0; - /* ROUTER ALERT, RFC2711 */ - p = (uint8_t *)hbh + sizeof(struct pico_ipv6_hbhoption); - *(p++) = PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT; - *(p++) = PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT_DATALEN; - *(p++) = 0; - *(p++) = 0; - /* PadN allignment with N=2 */ - *(p++) = 1; - *(p++) = 0; /* N-2 */ - return p; -} -static int pico_mld_check_hopbyhop(struct pico_ipv6_hbhoption *hbh) -{ - uint8_t options[8] = { - PICO_PROTO_ICMP6, 0, PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT, \ - PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT_DATALEN, 0, 0, 1, 0 - }; - int i; - uint8_t *p; - if(hbh == NULL) - return -1; - - if(hbh->type != options[0] || hbh->len != options[1]) - return -1; - - p = (uint8_t *)hbh + sizeof(struct pico_ipv6_hbhoption); - for(i = 0; i < MLD_ROUTER_ALERT_LEN - 2; i++) { - if( *(p + i) != options[i + 2]) - return -1; - } - return 0; -} -static inline int mldt_type_compare(struct mld_timer *a, struct mld_timer *b) -{ - if (a->type < b->type) - return -1; - - if (a->type > b->type) - return 1; - - return 0; -} -static inline int mldt_group_compare(struct mld_timer *a, struct mld_timer *b) -{ - return pico_ipv6_compare(&a->mcast_group, &b->mcast_group); -} - -static inline int mldt_link_compare(struct mld_timer *a, struct mld_timer *b) -{ - return pico_ipv6_compare(&a->mcast_link, &b->mcast_link); -} -static int mld_timer_cmp(void *ka, void *kb) -{ - struct mld_timer *a = ka, *b = kb; - int cmp = mldt_type_compare(a, b); - if (cmp) - return cmp; - - cmp = mldt_group_compare(a, b); - if (cmp) - return cmp; - - return mldt_link_compare(a, b); -} -static void pico_mld_report_expired(struct mld_timer *t) -{ - struct mcast_parameters *p = NULL; - - p = pico_mld_find_parameter(&t->mcast_link, &t->mcast_group); - if (!p) - return; - - p->event = MLD_EVENT_TIMER_EXPIRED; - pico_mld_process_event(p); -} -static PICO_TREE_DECLARE(MLDTimers, mld_timer_cmp); -static void pico_mld_v1querier_expired(struct mld_timer *t) -{ - struct pico_ipv6_link *link = NULL; - struct pico_tree_node *index = NULL, *_tmp = NULL; - - link = pico_ipv6_link_by_dev(t->f->dev); - if (!link) - return; - - /* When changing compatibility mode, cancel all pending response - * and retransmission timers. - */ - pico_tree_foreach_safe(index, &MLDTimers, _tmp) - { - ((struct mld_timer *)index->keyValue)->stopped = MLD_TIMER_STOPPED; - pico_tree_delete(&MLDTimers, index->keyValue); - } - mld_dbg("MLD: switch to compatibility mode MLDv2\n"); - link->mcast_compatibility = PICO_MLDV2; - return; -} - - -static inline int mldparm_group_compare(struct mcast_parameters *a, struct mcast_parameters *b) -{ - return pico_ipv6_compare(&a->mcast_group.ip6, &b->mcast_group.ip6); -} -static inline int mldparm_link_compare(struct mcast_parameters *a, struct mcast_parameters *b) -{ - return pico_ipv6_compare(&a->mcast_link.ip6, &b->mcast_link.ip6); -} -static int mcast_parameters_cmp(void *ka, void *kb) -{ - struct mcast_parameters *a = ka, *b = kb; - int cmp = mldparm_group_compare(a, b); - if (cmp) - return cmp; - - return mldparm_link_compare(a, b); -} - -static PICO_TREE_DECLARE(MLDParameters, mcast_parameters_cmp); - -static int pico_mld_delete_parameter(struct mcast_parameters *p) -{ - if (pico_tree_delete(&MLDParameters, p)) - PICO_FREE(p); - else - return -1; - - return 0; -} -static void pico_mld_timer_expired(pico_time now, void *arg) -{ - struct mld_timer *t = NULL, *timer = NULL, test = { - 0 - }; -#ifdef DEBUG_MLD - char ipstr[PICO_IPV6_STRING] = { - 0 - }, grpstr[PICO_IPV6_STRING] = { - 0 - }; -#endif - - IGNORE_PARAMETER(now); - t = (struct mld_timer *)arg; - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; -#ifdef DEBUG_MLD - pico_ipv6_to_string(ipstr, t->mcast_link.addr); - pico_ipv6_to_string(grpstr, t->mcast_group.addr); - mld_dbg("MLD: timer expired for %s link %s type %u, delay %lu\n", grpstr, ipstr, t->type, (uint64_t) t->delay); -#endif - timer = pico_tree_findKey(&MLDTimers, &test); - if (!timer) { - return; - } - - if (timer->stopped == MLD_TIMER_STOPPED) { - pico_tree_delete(&MLDTimers, timer); - PICO_FREE(t); - return; - } - - if (timer->start + timer->delay < PICO_TIME_MS()) { - pico_tree_delete(&MLDTimers, timer); - if (timer->mld_callback) - timer->mld_callback(timer); - - PICO_FREE(timer); - } else { -#ifdef DEBUG_MLD - mld_dbg("MLD: restart timer for %s, delay %lu, new delay %lu\n", grpstr, t->delay, (timer->start + timer->delay) - PICO_TIME_MS()); -#endif - if (!pico_timer_add((timer->start + timer->delay) - PICO_TIME_MS(), &pico_mld_timer_expired, timer)) { - mld_dbg("MLD: Failed to start expiration timer\n"); - pico_tree_delete(&MLDTimers, timer); - PICO_FREE(timer); - } - } - - return; -} - -static int pico_mld_timer_reset(struct mld_timer *t) -{ - struct mld_timer *timer = NULL, test = { - 0 - }; -#ifdef DEBUG_MLD - char grpstr[PICO_IPV6_STRING] = { - 0 - }; - pico_ipv6_to_string(grpstr, t->mcast_group.addr); - mld_dbg("MLD: reset timer for %s, delay %lu\n", grpstr, t->delay); -#endif - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&MLDTimers, &test); - if (!timer) - return -1; - - *timer = *t; - timer->start = PICO_TIME_MS(); - return 0; -} - -static int pico_mld_timer_start(struct mld_timer *t) -{ - struct mld_timer *timer = NULL, test = { - 0 - }; -#ifdef DEBUG_MLD - char ipstr[PICO_IPV6_STRING] = { - 0 - }, grpstr[PICO_IPV6_STRING] = { - 0 - }; - pico_ipv6_to_string(ipstr, t->mcast_link.addr); - pico_ipv6_to_string(grpstr, t->mcast_group.addr); - mld_dbg("MLD: start timer for %s link %s type %u, delay %lu\n", grpstr, ipstr, t->type, t->delay); -#endif - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&MLDTimers, &test); - if (timer) - return pico_mld_timer_reset(t); - - timer = PICO_ZALLOC(sizeof(struct mld_timer)); - if (!timer) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - *timer = *t; - timer->start = PICO_TIME_MS(); - if (pico_tree_insert(&MLDTimers, timer)) { - mld_dbg("MLD: Failed to insert timer into tree\n"); - return -1; - } - - if (!pico_timer_add(timer->delay, &pico_mld_timer_expired, timer)) { - mld_dbg("MLD: Failed to start expiration timer\n"); - pico_tree_delete(&MLDTimers, timer); - PICO_FREE(timer); - return -1; - } - return 0; -} - -static int pico_mld_timer_stop(struct mld_timer *t) -{ - struct mld_timer *timer = NULL, test = { - 0 - }; -#ifdef DEBUG_MLD - char grpstr[PICO_IPV6_STRING] = { - 0 - }; -#endif - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&MLDTimers, &test); - if (!timer) - return -1; - -#ifdef DEBUG_MLD - pico_ipv6_to_string(grpstr, timer->mcast_group.addr); - mld_dbg("MLD: stop timer for %s, delay %lu\n", grpstr, timer->delay); -#endif - timer->stopped = MLD_TIMER_STOPPED; - return 0; -} - -static int pico_mld_timer_is_running(struct mld_timer *t) -{ - struct mld_timer *timer = NULL, test = { - 0 - }; - - test.type = t->type; - test.mcast_link = t->mcast_link; - test.mcast_group = t->mcast_group; - timer = pico_tree_findKey(&MLDTimers, &test); - if (timer) - return 1; - - return 0; -} - -static struct mld_timer *pico_mld_find_timer(uint8_t type, struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group) -{ - struct mld_timer test = { - 0 - }; - - test.type = type; - test.mcast_link = *mcast_link; - test.mcast_group = *mcast_group; - return pico_tree_findKey(&MLDTimers, &test); -} - -static int mld_sources_cmp(void *ka, void *kb) -{ - struct pico_ip6 *a = ka, *b = kb; - return pico_ipv6_compare(a, b); -} - -static PICO_TREE_DECLARE(MLDAllow, mld_sources_cmp); -static PICO_TREE_DECLARE(MLDBlock, mld_sources_cmp); - -static struct mcast_parameters *pico_mld_find_parameter(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group) -{ - struct mcast_parameters test = { - 0 - }; - if (!mcast_link || !mcast_group) - return NULL; - - test.mcast_link.ip6 = *mcast_link; - test.mcast_group.ip6 = *mcast_group; - return pico_tree_findKey(&MLDParameters, &test); -} -static int pico_mld_is_checksum_valid(struct pico_frame *f) -{ - if( pico_icmp6_checksum(f) == 0) - return 1; - - mld_dbg("ICMP6 (MLD) : invalid checksum\n"); - return 0; -} -uint16_t pico_mld_checksum(struct pico_frame *f) -{ - struct pico_ipv6_pseudo_hdr pseudo; - struct pico_ipv6_hdr *ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - struct mldv2_report *icmp6_hdr = (struct mldv2_report *)(f->transport_hdr + MLD_ROUTER_ALERT_LEN); - uint16_t len = (uint16_t) (f->transport_len - MLD_ROUTER_ALERT_LEN); - - pseudo.src = ipv6_hdr->src; - pseudo.dst = ipv6_hdr->dst; - pseudo.len = long_be(len); - pseudo.nxthdr = PICO_PROTO_ICMP6; - - pseudo.zero[0] = 0; - pseudo.zero[1] = 0; - pseudo.zero[2] = 0; - return pico_dualbuffer_checksum(&pseudo, sizeof(struct pico_ipv6_pseudo_hdr), icmp6_hdr, len); -} -/* RFC 3810 $8 */ -static int pico_mld_compatibility_mode(struct pico_frame *f) -{ - struct pico_ipv6_link *link = NULL; - struct mld_timer t = { - 0 - }; - uint16_t datalen; - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_icmp6_hdr *hdr = (struct pico_icmp6_hdr *) (f->transport_hdr + MLD_ROUTER_ALERT_LEN); - struct mcast_parameters *p = NULL; - struct pico_ip6 mcast_group = {{ - 0 - }}; - struct mld_message *mld_report = (struct mld_message *) hdr; - - link = pico_ipv6_link_by_dev(f->dev); - if (!link) - return -1; - - datalen = (uint16_t)(f->buffer_len - PICO_SIZE_IP6HDR - MLD_ROUTER_ALERT_LEN); - if (f->dev->eth) { - datalen = (uint16_t)(datalen - PICO_SIZE_ETHHDR); - } - - if( datalen >= 28) { - /* MLDv2 */ - t.type = MLD_TIMER_V2_QUERIER; - if (pico_mld_timer_is_running(&t)) { /* MLDv1 querier present timer still running */ - mld_dbg("Timer is already running\n"); - return -1; - } else { - link->mcast_compatibility = PICO_MLDV2; - mld_dbg("MLD Compatibility: v2\n"); - return 0; - } - } else if( datalen == 24) { - pico_tree_foreach_safe(index, &MLDTimers, _tmp) - { - ((struct mld_timer *)index->keyValue)->stopped = MLD_TIMER_STOPPED; - pico_tree_delete(&MLDTimers, index->keyValue); - } - mld_dbg("MLD: switch to compatibility mode MLDv1\n"); - link->mcast_compatibility = PICO_MLDV1; - - /* Reset states to prevent deadlock */ - mcast_group = mld_report->mcast_group; - p = pico_mld_find_parameter(&link->address, &mcast_group); - if(p) { - p->state = MLD_STATE_NON_LISTENER; - p->event = MLD_EVENT_START_LISTENING; - } - - t.type = MLD_TIMER_V1_QUERIER; - t.delay = (pico_time) ((MLD_ROBUSTNESS * link->mcast_last_query_interval) + MLD_QUERY_RESPONSE_INTERVAL) * 1000; - t.f = f; - t.mld_callback = pico_mld_v1querier_expired; - if (pico_mld_timer_start(&t) < 0) - return -1; - } else { - /* invalid query, silently ignored */ - return -1; - } - - return 0; -} - -int pico_mld_state_change(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state) -{ - struct mcast_parameters *p = NULL; - struct pico_ip6 ipv6; - - pico_string_to_ipv6(MLD_ALL_HOST_GROUP, &ipv6.addr[0]); - - if (!memcmp(&mcast_group->addr, &ipv6, sizeof(struct pico_ip6))) - return 0; - - p = pico_mld_find_parameter(mcast_link, mcast_group); - if (!p && state == PICO_MLD_STATE_CREATE) { - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - if (!p) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if (!mcast_link || !mcast_group) { - pico_err = PICO_ERR_EINVAL; - PICO_FREE(p); - return -1; - } - - p->state = MLD_STATE_NON_LISTENER; - p->mcast_link.ip6 = *mcast_link; - p->mcast_group.ip6 = *mcast_group; - if(pico_tree_insert(&MLDParameters, p)){ - PICO_FREE(p); - return -1; - } - } else if (!p) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - switch (state) { - case PICO_MLD_STATE_CREATE: - p->event = MLD_EVENT_START_LISTENING; - break; - - case PICO_MLD_STATE_UPDATE: - p->event = MLD_EVENT_UPDATE_GROUP; - break; - - case PICO_MLD_STATE_DELETE: - p->event = MLD_EVENT_STOP_LISTENING; - break; - default: - return -1; - } - p->filter_mode = filter_mode; - p->MCASTFilter = _MCASTFilter; - return pico_mld_process_event(p); -} -/* finite state machine caller */ -static int pico_mld_process_event(struct mcast_parameters *p); - -static struct mcast_parameters *pico_mld_analyse_packet(struct pico_frame *f) -{ - struct pico_icmp6_hdr *hdr = (struct pico_icmp6_hdr *) (f->transport_hdr + MLD_ROUTER_ALERT_LEN); - struct pico_ipv6_hdr *ipv6_hdr = (struct pico_ipv6_hdr *) f->net_hdr; - struct pico_ipv6_link *link = NULL; - struct mcast_parameters *p = NULL; - struct pico_ip6 mcast_group = {{ - 0 - }}; - struct mld_message *mld_report = (struct mld_message *) hdr; - struct pico_ipv6_exthdr *hbh; - - link = pico_ipv6_link_by_dev(f->dev); - if(!link) - return NULL; - - mcast_group = mld_report->mcast_group; - /* Package check */ - if(ipv6_hdr->hop != MLD_HOP_LIMIT) { - mld_dbg("MLD: Hop limit > 1, ignoring frame\n"); - return NULL; - } - - hbh = (struct pico_ipv6_exthdr *) (f->transport_hdr); - if(pico_mld_check_hopbyhop((struct pico_ipv6_hbhoption *)hbh) < 0) { - mld_dbg("MLD: Router Alert option is not set\n"); - return NULL; - } - - if(!pico_ipv6_is_linklocal(ipv6_hdr->src.addr) || pico_ipv6_is_unspecified(ipv6_hdr->src.addr)) { - mld_dbg("MLD Source is invalid link-local address\n"); - return NULL; - } - - /* end package check */ - p = pico_mld_find_parameter(&link->address, &mcast_group); - if(!p) { - mld_dbg("Alloc-ing MLD parameters\n"); - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - if(!p) - return NULL; - - p->state = MLD_STATE_NON_LISTENER; - p->mcast_link.ip6 = link->address; - if (pico_tree_insert(&MLDParameters, p)) { - PICO_FREE(p); - return NULL; - } - } - - mld_dbg("Analyse package, type = %d\n", hdr->type); - switch(hdr->type) { - case PICO_MLD_QUERY: - p->max_resp_time = mld_report->max_resp_delay; - p->event = MLD_EVENT_QUERY_RECV; - break; - case PICO_MLD_REPORT: - p->event = MLD_EVENT_REPORT_RECV; - break; - case PICO_MLD_DONE: - p->event = MLD_EVENT_DONE_RECV; - break; - case PICO_MLD_REPORTV2: - p->event = MLD_EVENT_REPORT_RECV; - break; - default: - return NULL; - } - p->f = f; - return p; -} -int pico_mld_process_in(struct pico_frame *f) -{ - struct mcast_parameters *p = NULL; - - if (!pico_mld_is_checksum_valid(f)) - goto out; - - if (pico_mld_compatibility_mode(f) < 0) - goto out; - - if((p = pico_mld_analyse_packet(f)) == NULL) - goto out; - - return pico_mld_process_event(p); -out: - mld_dbg("FRAME DISCARD\n"); - pico_frame_discard(f); - return 0; -} - - - -static int8_t pico_mld_send_done(struct mcast_parameters *p, struct pico_frame *f) -{ - struct mld_message *report = NULL; - uint8_t report_type = PICO_MLD_DONE; - struct pico_device *dev = NULL; - struct pico_ipv6_exthdr *hbh; - struct pico_ip6 dst = {{ 0 }}; -#ifdef DEBUG_MLD - char ipstr[PICO_IPV6_STRING] = { - 0 - }, grpstr[PICO_IPV6_STRING] = { - 0 - }; -#endif - IGNORE_PARAMETER(f); - pico_string_to_ipv6(MLD_ALL_ROUTER_GROUP, &dst.addr[0]); - dev = pico_ipv6_link_find(&p->mcast_link.ip6); - p->f = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, sizeof(struct mld_message) + MLD_ROUTER_ALERT_LEN); - /* p->f->len is correctly set by alloc */ - hbh = (struct pico_ipv6_exthdr *)(p->f->transport_hdr); - report = (struct mld_message *)(pico_mld_fill_hopbyhop((struct pico_ipv6_hbhoption*)hbh)); - if(!report) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - report->type = report_type; - report->max_resp_delay = 0; - report->mcast_group = p->mcast_group.ip6; - - report->crc = 0; - /* Checksum done in ipv6 module, no need to do it twice */ - /* report->crc = short_be(pico_icmp6_checksum(p->f)); */ -#ifdef DEBUG_MLD - pico_ipv6_to_string(ipstr, dst.addr); - pico_ipv6_to_string(grpstr, report->mcast_group.addr); - mld_dbg("MLD: send membership done on group %s to %s\n", grpstr, ipstr); -#endif - pico_ipv6_frame_push(p->f, NULL, &dst, 0, 0); - return 0; -} - -static int pico_mld_send_report(struct mcast_parameters *p, struct pico_frame *f) -{ - struct pico_ip6 dst = {{ - 0 - }}; - struct pico_ip6 mcast_group = {{ - 0 - }}; -#ifdef DEBUG_MLD - char ipstr[PICO_IPV6_STRING] = { - 0 - }, grpstr[PICO_IPV6_STRING] = { - 0 - }; -#endif - struct pico_ipv6_link *link = NULL; - link = pico_ipv6_link_get(&p->mcast_link.ip6); - if (!link) - return -1; - - mcast_group = p->mcast_group.ip6; - switch (link->mcast_compatibility) { - case PICO_MLDV1: - if (p->event == MLD_EVENT_STOP_LISTENING) - pico_string_to_ipv6(MLD_ALL_ROUTER_GROUP, &dst.addr[0]); - else - dst = mcast_group; - - break; - case PICO_MLDV2: - pico_string_to_ipv6(MLDV2_ALL_ROUTER_GROUP, &dst.addr[0]); - break; - default: - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - } -#ifdef DEBUG_MLD - pico_ipv6_to_string(ipstr, dst.addr); - pico_ipv6_to_string(grpstr, mcast_group.addr); - mld_dbg("MLD: send membership report on group %s to %s\n", grpstr, ipstr); -#endif - pico_ipv6_frame_push(f, NULL, &dst, 0, 0); - return 0; -} -static int8_t pico_mldv2_generate_report(struct mcast_filter_parameters *filter, struct mcast_parameters *p) -{ - struct mldv2_report *report = NULL; - struct mldv2_group_record *record = NULL; - struct pico_tree_node *index = NULL; - struct pico_ipv6_hbhoption *hbh; - struct pico_device *dev = NULL; - uint16_t len = 0; - uint16_t i = 0; - /* RFC3810 $5.1.10 */ - if(filter->sources > MLD_MAX_SOURCES) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - len = (uint16_t)(sizeof(struct mldv2_report) + sizeof(struct mldv2_group_record) \ - + (filter->sources * sizeof(struct pico_ip6)) + MLD_ROUTER_ALERT_LEN); - len = (uint16_t)(len - sizeof(struct pico_ip6)); - dev = pico_ipv6_link_find(&p->mcast_link.ip6); - p->f = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, len); - /* p->f->len is correctly set by alloc */ - - hbh = (struct pico_ipv6_hbhoption *) p->f->transport_hdr; - report = (struct mldv2_report *)(pico_mld_fill_hopbyhop(hbh)); - report->type = PICO_MLD_REPORTV2; - report->res = 0; - report->crc = 0; - report->res1 = 0; - report->nbr_gr = short_be(1); - - record = &report->record[0]; - record->type = filter->record_type; - record->aux = 0; - record->nbr_src = short_be(filter->sources); - record->mcast_group = p->mcast_group.ip6; - if (filter->filter && !pico_tree_empty(filter->filter)) { - i = 0; - pico_tree_foreach(index, filter->filter) - { - record->src[i] = (*(struct pico_ip6 *)index->keyValue); - i++; - } - } - - if(i != filter->sources) { - return -1; - } - - /* Checksum done in ipv6 module, no need to do it twice */ - /* report->crc= short_be(pico_mld_checksum(p->f)); */ - return 0; -} -static int8_t pico_mldv2_generate_filter(struct mcast_filter_parameters *filter, struct mcast_parameters *p) -{ - struct pico_mcast_group *g = NULL, test = { - 0 - }; - struct pico_tree *MLDFilter = NULL; - struct pico_ipv6_link *link = (struct pico_ipv6_link*) filter->link; - filter->p = (struct mcast_parameters *)p; - filter->allow = &MLDAllow; - filter->block = &MLDBlock; - filter->filter = MLDFilter; - filter->sources = 0; - filter->proto = PICO_MLDV2; - test.mcast_addr = p->mcast_group; - g = pico_tree_findKey(link->MCASTGroups, &test); - if (!g) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - filter->g = (struct pico_mcast_group *)g; - return pico_mcast_generate_filter(filter, p); - -} -static int8_t pico_mldv1_generate_report(struct mcast_parameters *p) -{ - struct mld_message *report = NULL; - uint8_t report_type = PICO_MLD_REPORT; - struct pico_ipv6_exthdr *hbh; - struct pico_device *dev = pico_ipv6_link_find(&p->mcast_link.ip6); - p->f = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, sizeof(struct mld_message) + MLD_ROUTER_ALERT_LEN ); - /* p->f->len is correctly set by alloc */ - - hbh = (struct pico_ipv6_exthdr *)(p->f->transport_hdr); - report = (struct mld_message *)(pico_mld_fill_hopbyhop((struct pico_ipv6_hbhoption *)hbh)); - report->type = report_type; - report->max_resp_delay = MLD_DEFAULT_MAX_RESPONSE_TIME; - report->mcast_group = p->mcast_group.ip6; - - report->crc = 0; - /* Checksum done in ipv6 module, no need to do it twice */ - /* report->crc = short_be(pico_icmp6_checksum(p->f)); */ - return 0; -} -static int8_t pico_mld_generate_report(struct mcast_parameters *p) -{ - struct mcast_filter_parameters filter; - int8_t result; - filter.link = (union pico_link *)pico_ipv6_link_get(&p->mcast_link.ip6); - if( !filter.link || !pico_ipv6_is_multicast(p->mcast_group.ip6.addr)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - switch (filter.link->ipv6.mcast_compatibility) { - - case PICO_MLDV1: { - return pico_mldv1_generate_report(p); - } - case PICO_MLDV2: { - result = pico_mldv2_generate_filter(&filter, p); - if(result < 0) - return -1; - - if(result != MCAST_NO_REPORT) - return pico_mldv2_generate_report(&filter, p); - } - break; - default: - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; -} -/* stop timer, send done if flag set */ -static int mld_stsdifs(struct mcast_parameters *p) -{ - struct mld_timer t = { - 0 - }; - struct pico_ipv6_link *link = NULL; - struct pico_frame *copy_frame = NULL; - link = pico_ipv6_link_get(&p->mcast_link.ip6); - if (!link) - return -1; - - mld_dbg("MLD: event = stop listening | action = stop timer, send done if flag set\n"); - - t.type = MLD_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip6; - t.mcast_group = p->mcast_group.ip6; - if (pico_mld_timer_stop(&t) < 0) - return -1; - - switch(link->mcast_compatibility) { - case PICO_MLDV2: - if (pico_mld_generate_report(p) < 0) { - return -1; - } - - copy_frame = pico_frame_copy(p->f); - if (!copy_frame) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if (pico_mld_send_report(p, copy_frame) < 0) { - return -1; - } - - break; - case PICO_MLDV1: - /* Send done if flag is set */ - if (pico_mld_flag && pico_mld_send_done(p, p->f) < 0) - return -1; - - break; - } - - pico_mld_delete_parameter(p); - mld_dbg("MLD: new state = Non-Listener\n"); - return 0; -} -/* send report, set flag, start timer */ -static int mld_srsfst(struct mcast_parameters *p) -{ - struct mld_timer t = { - 0 - }; - struct pico_frame *copy_frame = NULL; - mld_dbg("MLD: event = start listening | action = send report, set flag, start timer\n"); - - p->last_host = MLD_HOST_LAST; - if (pico_mld_generate_report(p) < 0) - return -1; - - if (!p->f) - return 0; - - copy_frame = pico_frame_copy(p->f); - if (!copy_frame) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if (pico_mld_send_report(p, copy_frame) < 0) - return -1; - - t.type = MLD_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip6; - t.mcast_group = p->mcast_group.ip6; - - t.delay = (pico_rand() % (MLD_UNSOLICITED_REPORT_INTERVAL * 10000)); - t.f = p->f; - t.mld_callback = pico_mld_report_expired; - - if (pico_mld_timer_start(&t) < 0) - return -1; - - pico_mld_flag = 1; - p->state = MLD_STATE_DELAYING_LISTENER; - mld_dbg("MLD: new state = Delaying Listener\n"); - return 0; -} - -/* stop timer, clear flag */ -static int mld_stcl(struct mcast_parameters *p) -{ - struct mld_timer t = { - 0 - }; - - mld_dbg("MLD: event = report received | action = stop timer, clear flag\n"); - - t.type = MLD_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip6; - t.mcast_group = p->mcast_group.ip6; - if (pico_mld_timer_stop(&t) < 0) - return -1; - - pico_mld_flag = 0; - p->last_host = MLD_HOST_NOT_LAST; - p->state = MLD_STATE_IDLE_LISTENER; - mld_dbg("MLD: new state = Idle Listener\n"); - return 0; -} -/* send report, set flag */ -static int mld_srsf(struct mcast_parameters *p) -{ - mld_dbg("MLD: event = timer expired | action = send report, set flag\n"); - - if (pico_mld_send_report(p, p->f) < 0) - return -1; - - pico_mld_flag = 1; - p->state = MLD_STATE_IDLE_LISTENER; - mld_dbg("MLD: new state = Idle Listener\n"); - return 0; -} -/* reset timer if max response time < current timer */ -static int mld_rtimrtct(struct mcast_parameters *p) -{ - struct mld_timer *t = NULL; - uint32_t current_timer = 0; - - mld_dbg("MLD: event = query received | action = reset timer if max response time < current timer\n"); - - t = pico_mld_find_timer(MLD_TIMER_GROUP_REPORT, &p->mcast_link.ip6, &p->mcast_group.ip6); - if (!t) - return -1; - - current_timer = (uint32_t)(t->start + t->delay - PICO_TIME_MS()); - if ((p->max_resp_time * 100u) < current_timer) { /* max_resp_time in units of 1/10 seconds */ - t->delay = pico_rand() % ((1u + p->max_resp_time) * 100u); - pico_mld_timer_reset(t); - } - - p->state = MLD_STATE_DELAYING_LISTENER; - mld_dbg("MLD: new state = Delaying Listener\n"); - return 0; -} -/* merge report, send report, reset timer (MLDv2 only) */ -static int mld_mrsrrt(struct mcast_parameters *p) -{ - struct mld_timer *t = NULL; - struct pico_frame *copy_frame = NULL; - struct pico_ipv6_link *link = NULL; - mld_dbg("MLD: event = update group | action = merge report, send report, reset timer (MLDv2 only)\n"); - - link = pico_ipv6_link_get(&p->mcast_link.ip6); - if (!link) - return -1; - - if (link->mcast_compatibility != PICO_MLDV2) { - mld_dbg("MLD: no MLDv3 compatible router on network\n"); - return -1; - } - - /* XXX: merge with pending report rfc 3376 $5.1 */ - - copy_frame = pico_frame_copy(p->f); - if (!copy_frame) - return -1; - - if (pico_mld_send_report(p, copy_frame) < 0) - return -1; - - t = pico_mld_find_timer(MLD_TIMER_GROUP_REPORT, &p->mcast_link.ip6, &p->mcast_group.ip6); - if (!t) - return -1; - - t->delay = (pico_rand() % (MLD_UNSOLICITED_REPORT_INTERVAL * 10000)); - pico_mld_timer_reset(t); - - p->state = MLD_STATE_DELAYING_LISTENER; - mld_dbg("MLD: new state = delaying member\n"); - return 0; -} - -/* send report, start timer (MLDv2 only) */ -static int mld_srst(struct mcast_parameters *p) -{ - struct mld_timer t = { - 0 - }; - struct pico_frame *copy_frame = NULL; - struct pico_ipv6_link *link = NULL; - - mld_dbg("MLD: event = update group | action = send report, start timer (MLDv2 only)\n"); - - link = pico_ipv6_link_get(&p->mcast_link.ip6); - if (!link) - return -1; - - if (link->mcast_compatibility != PICO_MLDV2) { - mld_dbg("MLD: no MLDv2 compatible router on network\n"); - return -1; - } - - if (pico_mld_generate_report(p) < 0) - return -1; - - if (!p->f) - return 0; - - copy_frame = pico_frame_copy(p->f); - if (!copy_frame) - return -1; - - if (pico_mld_send_report(p, copy_frame) < 0) - return -1; - - t.type = MLD_TIMER_GROUP_REPORT; - t.mcast_link = p->mcast_link.ip6; - t.mcast_group = p->mcast_group.ip6; - t.delay = (pico_rand() % (MLD_UNSOLICITED_REPORT_INTERVAL * 10000)); - t.f = p->f; - t.mld_callback = pico_mld_report_expired; - - if (pico_mld_timer_start(&t) < 0) - return -1; - - p->state = MLD_STATE_DELAYING_LISTENER; - mld_dbg("MLD: new state = delaying member\n"); - return 0; -} -static int mld_discard(struct mcast_parameters *p) -{ - mld_dbg("MLD: ignore and mld_discard frame\n"); - /* the frame will be discared bij the ipv6 module!!! */ - IGNORE_PARAMETER(p); - return 0; -} - -/* finite state machine table */ -static const mld_callback mld_state_diagram[3][6] = -{ /* event | Stop Listening | Start Listening | Update Group |Query reveive |Report receive |Timer expired */ -/* none listener*/ - { mld_discard, mld_srsfst, mld_srsfst, mld_discard, mld_discard, mld_discard}, -/* idle listener */ { mld_stsdifs, mld_mrsrrt, mld_mrsrrt, mld_rtimrtct, mld_stcl, mld_srsf }, -/* delaying listener */ { mld_rtimrtct, mld_srst, mld_srst, mld_srsf, mld_stsdifs, mld_discard } -}; - -static int pico_mld_process_event(struct mcast_parameters *p) -{ - struct pico_tree_node *index = NULL; - struct mcast_parameters *_p; -#ifdef DEBUG_MLD - char ipv6[PICO_IPV6_STRING]; - pico_ipv6_to_string(ipv6, p->mcast_group.ip6.addr); - mld_dbg("MLD: process event on group address %s\n", ipv6); -#endif - if (p->event == MLD_EVENT_QUERY_RECV && p->general_query) { /* general query */ - pico_tree_foreach(index, &MLDParameters) { - _p = index->keyValue; - _p->max_resp_time = p->max_resp_time; - _p->event = MLD_EVENT_QUERY_RECV; -#ifdef DEBUG_MLD - mld_dbg("MLD: for each mcast_group = %s | state = %u\n", ipv6, _p->state); -#endif - return mld_state_diagram[_p->state][_p->event](_p); - } - } else { - mld_dbg("MLD: state = %u (0: non-listener - 1: delaying listener - 2: idle listener) event = %u\n", p->state, p->event); - return mld_state_diagram[p->state][p->event](p); - } - - return 0; -} -#else -uint16_t pico_mld_checksum(struct pico_frame *f) -{ - IGNORE_PARAMETER(f); - return 0; -} -int pico_mld_process_in(struct pico_frame *f) -{ - IGNORE_PARAMETER(f); - return -1; -} - -int pico_mld_state_change(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state) -{ - IGNORE_PARAMETER(mcast_link); - IGNORE_PARAMETER(mcast_group); - IGNORE_PARAMETER(filter_mode); - IGNORE_PARAMETER(_MCASTFilter); - IGNORE_PARAMETER(state); - return -1; -} -#endif diff --git a/kernel/picotcp/modules/pico_mld.h b/kernel/picotcp/modules/pico_mld.h deleted file mode 100644 index 4318162..0000000 --- a/kernel/picotcp/modules/pico_mld.h +++ /dev/null @@ -1,119 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012 TASS Belgium NV. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Roel Postelmans - *********************************************************************/ - -#ifndef INCLUDE_PICO_MLD -#define INCLUDE_PICO_MLD - -#define PICO_MLDV1 1 -#define PICO_MLDV2 2 - -#define PICO_MLD_QUERY 130 -#define PICO_MLD_REPORT 131 -#define PICO_MLD_DONE 132 -#define PICO_MLD_REPORTV2 143 - -/*RFC 3810 $6.2 */ -#define MLD_HOP_LIMIT 1 - -/* states */ -#define MLD_STATE_NON_LISTENER (0x0) -#define MLD_STATE_DELAYING_LISTENER (0x1) -#define MLD_STATE_IDLE_LISTENER (0x2) - -#define PICO_MLD_STATE_CREATE 1 -#define PICO_MLD_STATE_UPDATE 2 -#define PICO_MLD_STATE_DELETE 3 -/* group record types */ -#define MLD_MODE_IS_INCLUDE (1) -#define MLD_MODE_IS_EXCLUDE (2) -#define MLD_CHANGE_TO_INCLUDE_MODE (3) -#define MLD_CHANGE_TO_EXCLUDE_MODE (4) -#define MLD_ALLOW_NEW_SOURCES (5) -#define MLD_BLOCK_OLD_SOURCES (6) -/* events */ - -#define MLD_EVENT_START_LISTENING (0x1) -#define MLD_EVENT_STOP_LISTENING (0x0) -#define MLD_EVENT_QUERY_RECV (0x3) -#define MLD_EVENT_REPORT_RECV (0x4) -#define MLD_EVENT_TIMER_EXPIRED (0x5) -/*Not needed?*/ -#define MLD_EVENT_DONE_RECV (0x1) - -#define MLD_EVENT_DELETE_GROUP (0x0) -#define MLD_EVENT_CREATE_GROUP (0x1) -#define MLD_EVENT_UPDATE_GROUP (0x2) -#define MLD_EVENT_QUERY_RECV (0x3) -#define MLD_EVENT_REPORT_RECV (0x4) -#define MLD_EVENT_TIMER_EXPIRED (0x5) -/* (default) Variabels for times/counters */ -/* ALL IN SECONDS */ -#define MLD_ROBUSTNESS (2) -#define MLD_QUERY_INTERVAL (125) -#define MLD_QUERY_RESPONSE_INTERVAL (10) -#define MLD_DEFAULT_MAX_RESPONSE_TIME (100) -#define MLD_MULTICAST_LISTENER_INTERVAL (MLD_ROBUSTNESS * MLD_QUERY_INTERVAL) + MLD_QUERY_RESPONSE_INTERVAL -#define MLD_OTHER_QUERIER_PRESENT_INTERVAL (MLD_ROBUSTNESS * MLD_QUERY_INTERVAL) + (0.5 * MLD_QUERY_RESPONSE_INTERVAL) -#define MLD_STARTUP_QUERY_INTERVAL (0.25 * MLD_QUERY_INTERVAL) -#define MLD_STARTUP_QUERY_COUNT MLD_ROBUSTNESS -#define MLD_LAST_LISTENER_QUERY_INTERVAL 1 -#define MLD_LISTENER_QUERY_COUNT MLD_ROBUSTNESS -#define MLD_UNSOLICITED_REPORT_INTERVAL 10 - -/* custom timers types */ -#define MLD_TIMER_GROUP_REPORT (1) -#define MLD_TIMER_V1_QUERIER (2) -#define MLD_TIMER_V2_QUERIER (2) - - -/* Who has send the last report message */ -#define MLD_HOST_LAST (0x1) -#define MLD_HOST_NOT_LAST (0x0) - - -#define MLD_TIMER_STOPPED (1) -#define MLD_MAX_SOURCES (89) -extern struct pico_protocol pico_proto_mld; - -struct mld_multicast_address_record { - uint8_t type; - uint8_t aux_len; - uint16_t nbr_src; - struct pico_ip6 multicast; - struct pico_ip6 src[1]; -}; - -struct mld_parameters { - uint8_t event; - uint8_t state; - uint8_t general_query; - uint8_t filter_mode; - uint8_t last_host; - uint16_t max_resp_time; - struct pico_ip6 mcast_link; - struct pico_ip6 mcast_group; - struct pico_tree *MCASTFilter; - struct pico_frame *f; -}; - -struct mld_timer { - uint8_t type; - uint8_t stopped; - pico_time start; - pico_time delay; - struct pico_ip6 mcast_link; - struct pico_ip6 mcast_group; - struct pico_frame *f; - void (*mld_callback)(struct mld_timer *t); -}; - -uint16_t pico_mld_checksum(struct pico_frame *f); -int pico_mld_process_in(struct pico_frame *f); -int pico_mld_state_change(struct pico_ip6 *mcast_link, struct pico_ip6 *mcast_group, uint8_t filter_mode, struct pico_tree *_MCASTFilter, uint8_t state); -#endif /* _INCLUDE_PICO_MLD */ diff --git a/kernel/picotcp/modules/pico_mm.c b/kernel/picotcp/modules/pico_mm.c deleted file mode 100644 index f2553c1..0000000 --- a/kernel/picotcp/modules/pico_mm.c +++ /dev/null @@ -1,1615 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Gustav Janssens, Jonas Van Nieuwenberg, Sam Van Den Berge - *********************************************************************/ - -#include "pico_config.h" -#include "pico_mm.h" -#include "pico_tree.h" -#include "pico_config.h" -#include "pico_protocol.h" /* For pico_err */ - -#ifdef DEBUG_MM -#define DBG_MM(x, args ...) dbg("[%s:%s:%i] "x" \n",__FILE__,__func__,__LINE__ ,##args ) -#define DBG_MM_RED(x, args ...) dbg("\033[31m[%s:%s:%i] "x" \033[0m\n",__FILE__,__func__,__LINE__ ,##args ) -#define DBG_MM_GREEN(x, args ...) dbg("\033[32m[%s:%s:%i] "x" \033[0m\n",__FILE__,__func__,__LINE__ ,##args ) -#define DBG_MM_YELLOW(x, args ...) dbg("\033[33m[%s:%s:%i] "x" \033[0m\n",__FILE__,__func__,__LINE__ ,##args ) -#define DBG_MM_BLUE(x, args ...) dbg("\033[34m[%s:%s:%i] "x" \033[0m\n",__FILE__,__func__,__LINE__ ,##args ) -#else -#define DBG_MM(x, args ...) do {} while(0) -#define DBG_MM_RED(x, args ...) do {} while(0) -#define DBG_MM_GREEN(x, args ...) do {} while(0) -#define DBG_MM_YELLOW(x, args ...) do {} while(0) -#define DBG_MM_BLUE(x, args ...) do {} while(0) -#endif - -/* The memory manager also uses the pico_tree to keep track of all the different slab sizes it has. - * These nodes should be placed in the manager page which is in a different memory region then the nodes - * which are used for the pico stack in general. - * Therefore the following 2 functions are created so that pico_tree can use them to to put these nodes - * into the correct memory regions. - */ -void*pico_mem_page0_zalloc(size_t len); -void pico_mem_page0_free(void*ptr); - - -/* this is a wrapper function for pico_tree_insert. The function pointers that are used by pico_tree - * to zalloc/free are modified so that pico_tree will insert the node in another memory region - */ -static void *manager_tree_insert(struct pico_tree*tree, void *key) -{ - return (void*) pico_tree_insert_implementation(tree, key, USE_PICO_PAGE0_ZALLOC); -} - -/* this is a wrapper function for pico_tree_insert. The function pointers that are used by pico_tree - * to zalloc/free are modified so that pico_tree will insert the node in another memory region - */ -static void *manager_tree_delete(struct pico_tree *tree, void *key) -{ - return (void *) pico_tree_delete_implementation(tree, key, USE_PICO_PAGE0_ZALLOC); -} - - -static const uint32_t slab_sizes[] = { - 1200, 1400, 1600 -}; /* Sizes must be from small to big */ -static uint32_t slab_size_statistics[] = { - 0, 0, 0 -}; -static uint32_t slab_size_global = PICO_MEM_DEFAULT_SLAB_SIZE; -/* - typedef struct pico_mem_manager pico_mem_manager; - typedef struct pico_mem_manager_extra pico_mem_manager_extra; - typedef struct pico_mem_page pico_mem_page; - typedef struct pico_mem_heap pico_mem_heap; - typedef struct pico_mem_slab pico_mem_slab; - typedef struct pico_mem_heap_block pico_mem_heap_block; - typedef struct pico_mem_slab_block pico_mem_slab_block; - typedef struct pico_mem_slab_node pico_mem_slab_node; - typedef struct pico_mem_block pico_mem_block; - typedef struct pico_tree pico_tree; - typedef struct pico_tree_node pico_tree_node; - typedef union block_internals block_internals; - */ -#define HEAP_BLOCK_NOT_FREE 0xCAFED001 -#define HEAP_BLOCK_FREE 0xCAFED00E - -#define SLAB_BLOCK_TYPE 0 -#define HEAP_BLOCK_TYPE 1 -/* - * page - * <----------------------------------------------------------------------> - * - * - * +------------<------------+----------<-----------+ - * | ^ ^ - * v | | - * +---------+------------+--+----+---------------+-+-----+---------------+ - * | | | | | | | - * | pico_ | | pico_ | | pico_ | | - * | mem_ | ...HEAP... | mem_ | slab | mem_ | slab | - * | page | | block | | block | | - * | | | | | | | - * +---------+------------+-------+-----+---------+-------+----------+----+ - * ^ | ^ | - * +-------+ | | | - * | | +-+ | - * +------|-----+ | | - * | | +-----|--------------------+ - * v | v | - * +---------+-----+-------+------+-+-----+-----+--+ - * | | | | | | | - * | pico_ | | pico_ | pico_ | | pico_ | - * | mem_ | ... | tree_ | mem_ | ... | mem_ | - * | manager | | node | slab_ | | slab_ | - * | | | | node | | node | - * +---------+-----+-+-----+-----+--+-----+-----+--+ - * | ^ | ^ | - * | | +---->---+ | - * +-->--+----<-----------<---+ - * - * <-----------------------------------------------> - * manager page - * - * - * +----------------+ - * | | - * | pico_tree_node +-------------------------------------+ - * | (size x) | | - * +--+----------+--+ | - * | | +---------v----------+ - * | | | | - * v v +----> pico_mem_slab_node +----+ - * | | | | | | - * +----<----+ +---->----+ | +--------------------+ | - * | | | | - * | | ^ v - * | | | | - * | | | +--------------------+ | - * +------v---------+ +---------v------+ +----+ <----+ - * | | | | | pico_mem_slab_node | - * | pico_tree_node | | pico_tree_node | +----> +----+ - * | (size x/2) | | (size 2x) | | +--------------------+ | - * +----------------+ +----------------+ | | - * | | - * | | - * ^ v - * ... ... - * - */ - -/* Housekeeping memory manager (start of page 0) */ -struct pico_mem_manager -{ - uint32_t size; /* Maximum size in bytes */ - uint32_t used_size; /* Used size in bytes */ - struct pico_tree tree; - struct pico_mem_page*first_page; - struct pico_mem_manager_extra*manager_extra; /* this is a pointer to a page with extra heap space used by the manager */ -}; -/* Housekeeping additionnal memory manager heap pages */ -struct pico_mem_manager_extra -{ - struct pico_mem_manager_extra*next; - uint32_t timestamp; - uint32_t blocks; -}; -/* Housekeeping of every page (start of all pages except the manager pages) */ -struct pico_mem_page -{ - uint32_t slab_size; - uint16_t slabs_max; - uint16_t slabs_free; - uint32_t heap_max_size; - uint32_t heap_max_free_space; - uint32_t timestamp; - struct pico_mem_page*next_page; -}; -/* Housekeeping struct for a heap block (kept per block of memory in heap) */ -struct pico_mem_heap_block -{ - uint32_t size; - /* uint8_t free; */ - uint32_t free; -}; -/* Housekeeping struct for a slab block (kept per block of memory in slabs) */ -struct pico_mem_slab_block -{ - struct pico_mem_page*page; - struct pico_mem_slab_node*slab_node; -}; -union block_internals -{ - struct pico_mem_heap_block heap_block; - struct pico_mem_slab_block slab_block; -}; -struct pico_mem_block -{ - union block_internals internals; /* Union has to be in first place!!! */ - uint8_t type; -}; -/* Used to store the slab objects in the RB-tree */ -struct pico_mem_slab_node -{ - struct pico_mem_block*slab; - struct pico_mem_slab_node*prev; - struct pico_mem_slab_node*next; -}; - -static struct pico_mem_manager*manager = NULL; - -/* - * This compare function will be called by pico_tree.c to compare 2 keyValues (type: struct pico_mem_slab_nodes) - * We want to compare slab_nodes by their size. We also want to be able to directly compare an integer, which explains - * the casts from void* to uint32_t*** - */ -static int compare_slab_keys(void*keyA, void*keyB) -{ - /* keyValues are pico_mem_slab_nodes */ - /* We want to compare the sizes */ - /* first element of pico_mem_slab_node: pico_mem_block* slab_block */ - /* first element of pico_mem_block: (slab_block in union): pico_mem_page* page */ - /* first element of pico_mem_page: uint32_t slab_size */ - uint32_t sizeKeyA = ***(uint32_t***) keyA; - uint32_t sizeKeyB = ***(uint32_t***) keyB; - DBG_MM_BLUE("Compare called: sizeA = %i, sizeB = %i", sizeKeyA, sizeKeyB); - if(sizeKeyA == sizeKeyB) - { - return 0; - } - else if(sizeKeyA < sizeKeyB) - { - return 1; - } - else - { - return -1; - } -} - -/* - * Pico_mem_init_page is called to initialize a block of memory pointed to by pico_mem_page* page. - * Slabs of size slabsize are created, and the page, heap and slab housekeeping is initialized. - */ -static void _pico_mem_init_page(struct pico_mem_page*page, size_t slabsize) -{ - uint8_t*byteptr = (uint8_t*) page; - struct pico_mem_block*slab_block; - struct pico_mem_block*heap_block; - struct pico_tree_node*tree_node; - struct pico_mem_slab_node*slab_node; - void*temp; - uint16_t i; - - DBG_MM_YELLOW("Initializing page %p with slabsize %u", page, slabsize); - - page->next_page = manager->first_page; - manager->first_page = page; - page->slab_size = (uint32_t)slabsize; - page->slabs_max = (uint16_t)((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block)) / (slabsize + sizeof(struct pico_mem_block))); - page->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page->slabs_max * (sizeof(struct pico_mem_block) + slabsize))); - if(page->heap_max_size < PICO_MIN_HEAP_SIZE) - { - DBG_MM_BLUE("Not enough heap size available with slabsize %u, allocating one slab to heap.", slabsize); - page->slabs_max--; - /* DBG_MM_BLUE("Heap size %u -> %lu",page->heap_max_size, page->heap_max_size + sizeof(pico_mem_slab_block) + slabsize); */ - DBG_MM_BLUE("Heap size %u -> %lu", page->heap_max_size, page->heap_max_size + sizeof(struct pico_mem_block) + slabsize); - page->heap_max_size += (uint32_t)(sizeof(struct pico_mem_block) + slabsize); - } - - page->slabs_free = page->slabs_max; - page->heap_max_free_space = page->heap_max_size; - page->timestamp = 0; - DBG_MM_BLUE("max slab objects = %i, object_size = %i", page->slabs_max, page->slab_size); - DBG_MM_BLUE("Heap size: %i", page->heap_max_size); - byteptr += sizeof(struct pico_mem_page); /* jump over page struct so byteptr points to start of heap */ - - /* Init HEAP at the beginning of the page */ - heap_block = (struct pico_mem_block*) byteptr; - heap_block->type = HEAP_BLOCK_TYPE; - heap_block->internals.heap_block.free = HEAP_BLOCK_FREE; - heap_block->internals.heap_block.size = page->heap_max_free_space; - - byteptr += sizeof(struct pico_mem_block) + heap_block->internals.heap_block.size; - for(i = 0; i < page->slabs_max; i++) - { - slab_block = (struct pico_mem_block*) byteptr; - DBG_MM_BLUE("Slab object %i at %p. Start of object data at %p", i, slab_block, (uint8_t*) slab_block + sizeof(struct pico_mem_slab_block)); - slab_block->type = SLAB_BLOCK_TYPE; - slab_block->internals.slab_block.page = page; - - DBG_MM_BLUE("Calling find_node with size %u", **((uint32_t**) slab_block)); - tree_node = pico_tree_findNode(&(manager->tree), &slab_block); - - DBG_MM("Creating slab_node.."); - slab_node = pico_mem_page0_zalloc(sizeof(struct pico_mem_slab_node)); - if(slab_node == NULL) - { - DBG_MM_RED("No more space in the manager heap for the housekeeping of slab %i, and no more space for extra manager pages!", i + 1); - DBG_MM_RED("Debug info:\nUsed size: %u/%u\nmanager_extra = %p", manager->used_size, manager->size, manager->manager_extra); - DBG_MM_RED("This page will be initialized with %u slabs instead of %u slabs", i, page->slabs_max); - page->slabs_max = i; - page->slabs_free = page->slabs_max; - return; - /* exit(1); */ - } - - slab_node->slab = slab_block; - slab_node->prev = NULL; - slab_node->next = NULL; - - slab_block->internals.slab_block.slab_node = slab_node; - - if(tree_node != NULL) - { - struct pico_mem_slab_node*first_node = (struct pico_mem_slab_node*) tree_node->keyValue; - tree_node->keyValue = slab_node; - slab_node->next = first_node; - first_node->prev = slab_node; - } - else - { - /* Insert new slab_node */ - DBG_MM_BLUE("Inserting new slab node in the tree of size %u", slabsize); - /* pico_err_t pico_err_backup = pico_err; */ - /* pico_err = 0; */ - - - /* temp = pico_tree_insert(&manager->tree, slab_node); */ - temp = manager_tree_insert(&manager->tree, slab_node); - - /* IF SLAB_NODE COULDN'T BE INSERTED */ - /* if(pico_err == PICO_ERR_ENOMEM) */ - /* if(temp == &LEAF) */ - if(temp != NULL) - { - DBG_MM_RED("No more space in the manager heap for the housekeeping of slab %i, and no more space for extra manager pages!", i + 1); - DBG_MM_RED("This page will be initialized without slabs."); - pico_mem_page0_free(slab_node); - page->slabs_max = (uint16_t) i; - page->slabs_free = page->slabs_max; - /* pico_err = pico_err_backup; */ - return; - } - } - - /* byteptr = (uint8_t*) (slab_block+1); */ - byteptr = (uint8_t*) slab_block; - byteptr += sizeof(struct pico_mem_block); - byteptr += page->slab_size; - } - DBG_MM_GREEN("Initialized page %p with slabsize %u", page, slabsize); -} - -/* - * Initializes the memory by creating a memory manager page and one page with default slab size - * A maximum space of memsize can be occupied by the memory manager at any time - */ -void pico_mem_init(uint32_t memsize) -{ - struct pico_mem_block*first_block; - struct pico_mem_page*page; - uint8_t*startofmanagerheap; - - DBG_MM_YELLOW("Initializing memory with memsize %u", memsize); - if(memsize < PICO_MEM_PAGE_SIZE * 2) - { - /* Not enough memory was provided to initialize a manager page and a data page, return without initializing memory */ - /* Set pico_err to an appropriate value */ - pico_err = PICO_ERR_ENOMEM; - DBG_MM_RED("The memsize provided is too small, memory not initialized!"); - - return; - } - - /* First pico_mem_page is already included in pico_mem_manager. Others are added. */ - /* manager = pico_azalloc(sizeof(pico_mem_manager) + sizeof(pico_mem_page*)*(pages - 1)); //Points to usermanager if one present */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - if( NULL != manager ) - { - manager->size = memsize; - manager->used_size = PICO_MEM_PAGE_SIZE; - manager->first_page = NULL; - manager->manager_extra = NULL; - - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - DBG_MM_BLUE("Manager page is at %p", manager); - - DBG_MM_BLUE("Start of tree: %p, sizeof(pico_tree): %lu", &manager->tree, sizeof(struct pico_tree)); - DBG_MM_BLUE("Root node of tree at %p", manager->tree.root); - - /* Init manager heap. Used to store the RB-tree nodes which store pointers to free slab objects */ - startofmanagerheap = (uint8_t*) manager + sizeof(struct pico_mem_manager); /* manager heap is after struct pico_mem_manager */ - DBG_MM_BLUE("Start of manager heap = %p", startofmanagerheap); - first_block = (struct pico_mem_block*) startofmanagerheap; - first_block->type = HEAP_BLOCK_TYPE; - first_block->internals.heap_block.free = HEAP_BLOCK_FREE; - first_block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block); - - /* Initialize the first page only! */ - page = pico_zalloc(PICO_MEM_PAGE_SIZE); - if(page != NULL) - { - manager->used_size += PICO_MEM_PAGE_SIZE; - DBG_MM_BLUE("Page 1 at %p, manager used size = %u", page, manager->used_size); - _pico_mem_init_page(page, PICO_MEM_DEFAULT_SLAB_SIZE); - } - else - { - /* Not enough memory was provided to initialize a manager page and a data page, return without initializing memory */ - /* Set pico_err to an appropriate value */ - pico_err = PICO_ERR_ENOMEM; - /* Free the manager page */ - pico_free(manager); - manager = NULL; - DBG_MM_RED("Not enough space to allocate page 1, memory not initialized!"); - return; - } - - DBG_MM_GREEN("Memory initialized. Returning from pico_mem_init."); - } - else - { - /* Not enough memory was provided to initialize a manager page and a data page, return without initializing memory */ - /* Set pico_err to an appropriate value */ - pico_err = PICO_ERR_ENOMEM; - DBG_MM_RED("Not enough space to allocate manager page, memory not initialized!"); - return; - } -} - -/* - * Deinitializes the memory manager, returning all its memory to the system's control. - */ -void pico_mem_deinit() -{ - struct pico_mem_page*next_page; - struct pico_mem_manager_extra*next_manager_page; - - DBG_MM_YELLOW("Pico_mem_deinit called"); - if(manager == NULL) - { - DBG_MM_GREEN("No memory instance initialized, returning"); - } - else - { - while(manager->first_page != NULL) - { - next_page = manager->first_page->next_page; - pico_free(manager->first_page); - manager->first_page = next_page; - } - while(manager->manager_extra != NULL) - { - next_manager_page = manager->manager_extra->next; - pico_free(manager->manager_extra); - manager->manager_extra = next_manager_page; - } - DBG_MM_BLUE("Freeing manager page at %p", manager); - pico_free(manager); - manager = NULL; - slab_size_global = PICO_MEM_DEFAULT_SLAB_SIZE; - DBG_MM_GREEN("Memory manager reset"); - } -} - -/* - * This function is called internally by page0_zalloc if there isn't enough space left in the heap of the initial memory page - * This function allocates heap space in extra manager pages, creating new pages as necessary. - */ -static void*_pico_mem_manager_extra_alloc(struct pico_mem_manager_extra*heap_page, size_t len) -{ - struct pico_mem_manager_extra*extra_heap_page; - struct pico_mem_block*heap_block; - struct pico_mem_block*first_block; - struct pico_mem_block*new_block; - uint8_t*startOfData; - uint8_t*byteptr; - uint32_t sizeleft; - - DBG_MM_YELLOW("Searching for a block of len %u in extra manager page %p (%u blocks in use)", len, heap_page, heap_page->blocks); - /* Linearly search for a free heap block */ - - /* heap_block = (pico_mem_block*) (heap_page+1); */ - byteptr = (uint8_t*) heap_page + sizeof(struct pico_mem_manager_extra); - heap_block = (struct pico_mem_block*) byteptr; - - sizeleft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager_extra); - - while(heap_block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE || heap_block->internals.heap_block.size < len) - { - sizeleft -= (uint32_t)sizeof(struct pico_mem_block); - sizeleft -= heap_block->internals.heap_block.size; - /* DBG_MM("Sizeleft=%i", sizeleft); */ - /* byteptr = (uint8_t*) (heap_block+1); */ - byteptr = (uint8_t*) heap_block + sizeof(struct pico_mem_block); - byteptr += heap_block->internals.heap_block.size; - heap_block = (struct pico_mem_block*) byteptr; - if(sizeleft <= sizeof(struct pico_mem_block)) - { - DBG_MM_RED("No more heap space left in the extra manager heap page!"); - if(heap_page->next == NULL) - { - /* TODO: Probably need another function for this */ - DBG_MM_RED("Trying to allocate a new page for extra heap space: space usage %uB/%uB", manager->used_size, manager->size); - if(manager->used_size + PICO_MEM_PAGE_SIZE > manager->size) - { - DBG_MM_RED("No more space left for this page!"); - /* exit(1); */ - return NULL; - } - - extra_heap_page = pico_zalloc(PICO_MEM_PAGE_SIZE); - if(extra_heap_page != NULL) - { - extra_heap_page->blocks = 0; - extra_heap_page->next = NULL; - extra_heap_page->timestamp = 0; - byteptr = (uint8_t*) extra_heap_page + sizeof(struct pico_mem_manager_extra); - first_block = (struct pico_mem_block*) byteptr; - first_block->type = HEAP_BLOCK_TYPE; - first_block->internals.heap_block.free = HEAP_BLOCK_FREE; - first_block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager_extra) - sizeof(struct pico_mem_block); - extra_heap_page->next = heap_page; - manager->manager_extra = extra_heap_page; - manager->used_size += PICO_MEM_PAGE_SIZE; - DBG_MM_BLUE("Allocated an extra manager heap page at %p, manager space usage: %uB/%uB", extra_heap_page, manager->used_size, manager->size); - return _pico_mem_manager_extra_alloc(extra_heap_page, len); - } - else - { - /* This should be a dirty crash */ - DBG_MM_RED("Page not allocated even though the max size for the memory manager hasn't been reached yet!"); - /* exit(1); */ - return NULL; - } - } - else - { - DBG_MM_RED("This should never happen: debug information:"); - DBG_MM_RED("manager->manager_extra = %p", manager->manager_extra); - DBG_MM_RED("heap_page = %p", heap_page); - DBG_MM_RED("heap_page->next = %p", heap_page->next); - /* exit(1); */ - return NULL; - } - } - } - heap_page->blocks++; - heap_page->timestamp = 0; - DBG_MM_BLUE("Found free heap block in extra manager page %p at: %p (%u blocks in use)", heap_page, heap_block, heap_page->blocks); - heap_block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - - if(heap_block->internals.heap_block.size == sizeleft - sizeof(struct pico_mem_block)) - { - DBG_MM_BLUE("End of heap, splitting up into a new block"); - heap_block->internals.heap_block.size = (uint32_t)len; - sizeleft = (uint32_t)(sizeleft - (uint32_t)sizeof(struct pico_mem_block) - len); - if(sizeleft > sizeof(struct pico_mem_block)) - { - sizeleft -= (uint32_t)sizeof(struct pico_mem_block); - byteptr = (uint8_t*) heap_block + sizeof(struct pico_mem_block); - byteptr += len; - new_block = (struct pico_mem_block*) byteptr; - new_block->type = HEAP_BLOCK_TYPE; - new_block->internals.heap_block.free = HEAP_BLOCK_FREE; - new_block->internals.heap_block.size = sizeleft; - DBG_MM_BLUE("New block: %p, size = %u", new_block, new_block->internals.heap_block.size); - } - else - { - DBG_MM_RED("No more space in extra manager heap page left to initialize a new heap block!"); - DBG_MM_RED("A new page will be allocated when even more space is needed"); - } - } - - startOfData = (uint8_t*) heap_block + sizeof(struct pico_mem_block); - DBG_MM_GREEN("Start of data = %p", startOfData); - - return startOfData; -} - -/* - * Page0 zalloc is called by pico_tree.c so that nodes which contain pointers to the free slab objects are put in the - * manager page. Additional manager pages can be created if necessary. - */ -void*pico_mem_page0_zalloc(size_t len) -{ - struct pico_mem_manager_extra*heap_page; - struct pico_mem_block*heap_block; - struct pico_mem_block*first_block; - struct pico_mem_block*new_block; - uint8_t*startOfData; - uint8_t*byteptr; - uint32_t sizeleft; - - DBG_MM_YELLOW("pico_mem_page0_zalloc(%u) called", len); - - byteptr = (uint8_t*) manager + sizeof(struct pico_mem_manager); - heap_block = (struct pico_mem_block*) byteptr; - - /* If heap_block == NULL then a free block at the end of the list is found. */ - /* Else, if the block is free and the size > len, an available block is also found. */ - sizeleft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager); - /* this would mean that heap_block is never NULL */ - /* while(heap_block != NULL && ( heap_block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE || heap_block->internals.heap_block.size < len)) */ - while(heap_block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE || heap_block->internals.heap_block.size < len) - { - sizeleft -= (uint32_t)sizeof(struct pico_mem_block); - sizeleft -= heap_block->internals.heap_block.size; - /* DBG_MM("Sizeleft=%i", sizeleft); */ - byteptr = (uint8_t*) heap_block + sizeof(struct pico_mem_block); /* byteptr points to start of heap block data */ - byteptr += heap_block->internals.heap_block.size; /* jump over that data to start of next heap_block */ - heap_block = (struct pico_mem_block*) byteptr; - if(sizeleft <= sizeof(struct pico_mem_block)) - { - DBG_MM_RED("No more heap space left in the manager page!"); - if(manager->manager_extra == NULL) - { - DBG_MM_RED("Trying to allocate a new page for extra heap space: space usage: %uB/%uB", manager->used_size, manager->size); - if(manager->used_size + PICO_MEM_PAGE_SIZE > manager->size) - { - DBG_MM_RED("No more space left for this page!"); - /* exit(1); */ - return NULL; - } - - heap_page = pico_zalloc(PICO_MEM_PAGE_SIZE); - if(heap_page != NULL) - { - /* Initialize the new heap page */ - heap_page->blocks = 0; - heap_page->next = NULL; - heap_page->timestamp = 0; - byteptr = (uint8_t*) heap_page + sizeof(struct pico_mem_manager_extra); - first_block = (struct pico_mem_block*) byteptr; - first_block->type = HEAP_BLOCK_TYPE; - first_block->internals.heap_block.free = HEAP_BLOCK_FREE; - first_block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager_extra) - sizeof(struct pico_mem_block); - manager->manager_extra = heap_page; - manager->used_size += PICO_MEM_PAGE_SIZE; - DBG_MM_BLUE("Allocated an extra manager heap page at %p, manager space usage: %uB/%uB", heap_page, manager->used_size, manager->size); - return _pico_mem_manager_extra_alloc(heap_page, len); - } - else - { - /* This should be a dirty crash */ - DBG_MM_RED("Page not allocated even though the max size for the memory manager hasn't been reached yet!"); - /* exit(1); */ - return NULL; - } - } - else - { - return _pico_mem_manager_extra_alloc(manager->manager_extra, len); - } - } - } - DBG_MM_BLUE("Found free heap block in manager page at : %p", heap_block); - heap_block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - - if(heap_block->internals.heap_block.size == sizeleft - sizeof(struct pico_mem_block)) - { - sizeleft = (uint32_t)(sizeleft - (uint32_t)sizeof(struct pico_mem_block) - len); - if(sizeleft > sizeof(struct pico_mem_block)) - { - DBG_MM_BLUE("End of heap, splitting up into a new block"); - heap_block->internals.heap_block.size = (uint32_t)len; - sizeleft -= (uint32_t)sizeof(struct pico_mem_block); - byteptr = (uint8_t*) heap_block + sizeof(struct pico_mem_block); - byteptr += len; - new_block = (struct pico_mem_block*) byteptr; - new_block->internals.heap_block.free = HEAP_BLOCK_FREE; - new_block->internals.heap_block.size = sizeleft; - DBG_MM_BLUE("New block: %p, size = %u", new_block, new_block->internals.heap_block.size); - } - else - { - /* DBG_MM_RED("ERROR! No more space in manager heap left to initialise a new heap_block!"); */ - /* exit(1); */ - DBG_MM_RED("No more space in manager heap left to initialize a new heap block!"); - DBG_MM_RED("A new page will be allocated when more space is needed"); - } - } - - startOfData = (uint8_t*) heap_block + sizeof(struct pico_mem_block); - DBG_MM_GREEN("Start of data = %p", startOfData); - - return startOfData; -} - - -/* - * This method will free a given heap block and try to merge it with - * surrounding blocks if they are free. - */ -static void _pico_mem_free_and_merge_heap_block(struct pico_mem_page*page, struct pico_mem_block*mem_block) -{ - uint8_t*byteptr; - /* pico_mem_block* prev = NULL; */ - struct pico_mem_block*prev; - struct pico_mem_block*curr; - struct pico_mem_block*next; - - DBG_MM_YELLOW("Freeing heap block %p with size %u in page %p", mem_block, mem_block->internals.heap_block.size, page); - - mem_block->internals.heap_block.free = HEAP_BLOCK_FREE; - - byteptr = (uint8_t*) page + sizeof(struct pico_mem_page); - curr = (struct pico_mem_block*) byteptr; - byteptr = (uint8_t*) curr + sizeof(struct pico_mem_block); - byteptr += curr->internals.heap_block.size; - next = (struct pico_mem_block*) byteptr; - - while(curr->type == HEAP_BLOCK_TYPE && next->type == HEAP_BLOCK_TYPE) - { - DBG_MM("Checking heap block (%s) with size %u at %p", (curr->internals.heap_block.free == HEAP_BLOCK_FREE) ? "free" : "not free", curr->internals.heap_block.size, curr); - if(curr->internals.heap_block.free == HEAP_BLOCK_FREE && next->internals.heap_block.free == HEAP_BLOCK_FREE) - { - DBG_MM_BLUE("Merging blocks with sizes %u and %u", curr->internals.heap_block.size, next->internals.heap_block.size); - curr->internals.heap_block.size += (uint32_t)sizeof(struct pico_mem_block) + next->internals.heap_block.size; - } - - prev = curr; - byteptr = (uint8_t*) curr + sizeof(struct pico_mem_block); - byteptr += curr->internals.heap_block.size; - curr = (struct pico_mem_block*) byteptr; - byteptr = (uint8_t*) curr + sizeof(struct pico_mem_block); - byteptr += curr->internals.heap_block.size; - next = (struct pico_mem_block*) byteptr; - } - DBG_MM("Checking heap block (%s) with size %u at %p", (curr->internals.heap_block.free == HEAP_BLOCK_FREE) ? "free" : "not free", curr->internals.heap_block.size, curr); - if(curr->type == HEAP_BLOCK_TYPE && prev->internals.heap_block.free == HEAP_BLOCK_FREE && curr->internals.heap_block.free == HEAP_BLOCK_FREE) - { - DBG_MM_BLUE("Merging blocks with sizes %u and %u", prev->internals.heap_block.size, curr->internals.heap_block.size); - prev->internals.heap_block.size += (uint32_t)sizeof(struct pico_mem_block) + curr->internals.heap_block.size; - } - - DBG_MM_GREEN("Heap block freed and heap space defragmentized"); -} - -/* - * This method will return the max. available contiguous free space in the heap - * from a given page. - */ -static uint32_t _pico_mem_determine_max_free_space(struct pico_mem_page*page) -{ - uint32_t maxfreespace = 0; - uint8_t*byteptr; - struct pico_mem_block*mem_block; - - DBG_MM_YELLOW("Determining new maximum free space in page %p (old free space: %u)", page, page->heap_max_free_space); - - /* pico_mem_block* mem_block = (pico_mem_block*) (page+1); //reset mem_block to first block in the heap */ - byteptr = (uint8_t*) page + sizeof(struct pico_mem_page); - mem_block = (struct pico_mem_block*) byteptr; /* reset mem_block to first block in the heap */ - - /* Determine max free space by iterating trough the list */ - /* while(mem_block != NULL && mem_block->type == HEAP_BLOCK_TYPE) */ - while(mem_block->type == HEAP_BLOCK_TYPE) - { - /* DBG_MM("Memblock %p of size %i is free %i\n",block, block->size, block->free); */ - DBG_MM("Memblock %s (size %u) at %p", (mem_block->internals.heap_block.free == HEAP_BLOCK_FREE) ? "not in use" : "in use", mem_block->internals.heap_block.size, mem_block); - if(mem_block->internals.heap_block.free == HEAP_BLOCK_FREE && mem_block->internals.heap_block.size > maxfreespace) - { - maxfreespace = mem_block->internals.heap_block.size; - page->heap_max_free_space = maxfreespace; - } - - byteptr = (uint8_t*) mem_block + sizeof(struct pico_mem_block); - byteptr += mem_block->internals.heap_block.size; - mem_block = (struct pico_mem_block*) byteptr; - } - page->heap_max_free_space = maxfreespace; - DBG_MM_GREEN("New free space: %u", page->heap_max_free_space); - return maxfreespace; -} - -/* - * This method will make a slab object available again by putting it in the RB-tree. - * Slab objects of the same size are stored in a double linked list. One pico_tree_node represents - * all the slab objects of the same size by making the keyvalue of a pico_tree_node point to - * the first element of the linked list. - * An element in this linked list is a struct pico_mem_slab_node. All the elements are also - * stored in the heap of the manager page (page0), or in the heap of extra manager spaces if there isn't enough space. - */ -static void _pico_mem_free_slab_block(struct pico_mem_block*slab_block) -{ - struct pico_mem_slab_node*slab_node; - struct pico_mem_slab_node*first_slab_node; - struct pico_tree_node*tree_node; - void*temp; - - DBG_MM_YELLOW("Freeing slab object"); - - slab_node = pico_mem_page0_zalloc(sizeof(struct pico_mem_slab_node)); - - if(slab_node == NULL) - { - /* Update the page householding without making the slab available again! */ - DBG_MM_RED("No more space in the manager heap and no more space for extra pages!"); - DBG_MM_RED("This slab will be leaked, but the leak will be plugged at the next cleanup, if and when the page is empty"); - slab_block->internals.slab_block.page->slabs_free++; - return; - } - - slab_node->slab = slab_block; - slab_block->internals.slab_block.slab_node = slab_node; - tree_node = pico_tree_findNode(&manager->tree, slab_node); - if(tree_node != NULL) - { - first_slab_node = (struct pico_mem_slab_node*) tree_node->keyValue; - tree_node->keyValue = slab_node; - first_slab_node->prev = slab_node; - slab_node->prev = NULL; - slab_node->next = first_slab_node; - } - else{ - DBG_MM_BLUE("No node found for size %i so calling pico_tree_insert", slab_node->slab->internals.slab_block.page->slab_size); - slab_node->next = NULL; - slab_node->prev = NULL; - /* pico_err_t pico_err_backup = pico_err; */ - /* pico_err = 0; */ - - - /* temp = pico_tree_insert(&manager->tree, slab_node); */ - temp = manager_tree_insert(&manager->tree, slab_node); - - /* if(pico_err == PICO_ERR_ENOMEM) */ - if(temp == &LEAF) - { - DBG_MM_RED("No more space in the manager heap and no more space for extra pages!"); - DBG_MM_RED("This slab will be leaked, but the leak will be plugged at the next cleanup, if and when the page is empty"); - pico_mem_page0_free(slab_node); - /* pico_err = pico_err_backup; */ - slab_block->internals.slab_block.page->slabs_free++; - return; - } - } - - /* Update free slabs in page householding */ - slab_block->internals.slab_block.page->slabs_free++; - DBG_MM_GREEN("Freed slab object, there are now %i free slab objects in the corresponding page", slab_block->internals.slab_block.page->slabs_free); -} - -/* - * This method zero initializes a block of memory pointed to by startOfData, of size len - */ -static void _pico_mem_zero_initialize(void*startOfData, size_t len) -{ - if(startOfData != NULL) - { - DBG_MM_YELLOW("Zero initializing user memory at %p of %u bytes", startOfData, len); - memset(startOfData, 0, len); - DBG_MM_GREEN("Zero initialized."); - } - else - { - DBG_MM_RED("Got a NULL pointer to zero initialize!"); - } -} - -/* - * This method will try to find a free heap block of size len in a given page. - */ -static void*_pico_mem_find_heap_block(struct pico_mem_page*page, size_t len) -{ - struct pico_mem_block*mem_block; - struct pico_mem_block*inserted_block; - uint8_t*startOfData; - uint8_t*byteptr; - - DBG_MM_YELLOW("Searching for a heap block of length %u in page %p (largest free block size = %u)", len, page, page->heap_max_free_space); - if(page->heap_max_free_space < len ) - { - DBG_MM_RED("Size %u > max free space %u of the page. This should only happen when this page is newly created, and its heap space is not large enough for the heap length!", len, page->heap_max_free_space); - return NULL; - } - - byteptr = (uint8_t*) page + sizeof(struct pico_mem_page); - mem_block = (struct pico_mem_block*) byteptr; /* Jump over the page struct to the start of the heap */ - - /* If mem_block == NULL then a free block at the end of the list is found. */ - /* Else, if the block is free and the size > len, an available block is also found. */ - /* while(mem_block != NULL && mem_block->type == HEAP_BLOCK_TYPE && ( mem_block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE || mem_block->internals.heap_block.size < len)) */ - while(mem_block->type == HEAP_BLOCK_TYPE && (mem_block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE || mem_block->internals.heap_block.size < len)) - { - /* DBG_MM_RED("Skipping heap block in use at %p of size %i", mem_block, mem_block->size); */ - DBG_MM_BLUE("Skipping heap block %s (size %u) at %p", (mem_block->internals.heap_block.free == HEAP_BLOCK_FREE) ? "not in use" : "in use", mem_block->internals.heap_block.size, mem_block); - byteptr = (uint8_t*) mem_block + sizeof(struct pico_mem_block); - byteptr += mem_block->internals.heap_block.size; - mem_block = (struct pico_mem_block*) byteptr; - } - if(mem_block->type == SLAB_BLOCK_TYPE) - { - DBG_MM_RED("No free heap block of contiguous size %u could be found in page %p", len, page); - /* exit(1); */ - return NULL; - } - - DBG_MM_BLUE("Found free heap block of size %u at %p", mem_block->internals.heap_block.size, mem_block); - mem_block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - page->timestamp = 0; - - /* Check to split the block into two smaller blocks */ - if(mem_block->internals.heap_block.size >= (len + sizeof(struct pico_mem_block) + PICO_MEM_MINIMUM_OBJECT_SIZE)) - { - byteptr = (uint8_t*) mem_block + sizeof(struct pico_mem_block); - byteptr += len; - inserted_block = (struct pico_mem_block*) byteptr; - - /* Update newly inserted block */ - inserted_block->type = HEAP_BLOCK_TYPE; - inserted_block->internals.heap_block.free = HEAP_BLOCK_FREE; - inserted_block->internals.heap_block.size = (uint32_t)(mem_block->internals.heap_block.size - (uint32_t)sizeof(struct pico_mem_block) - len); - /* Update block that was split up */ - mem_block->internals.heap_block.size = (uint32_t)len; - DBG_MM_BLUE("Splitting up the block, creating a new block of size %u at %p", inserted_block->internals.heap_block.size, inserted_block); - } - - startOfData = (uint8_t*) mem_block + sizeof(struct pico_mem_block); - - page->heap_max_free_space = _pico_mem_determine_max_free_space(page); - - /* Zero-initialize */ - _pico_mem_zero_initialize(startOfData, len); - DBG_MM_GREEN("Returning %p", startOfData); - return startOfData; -} - -/* - * This method will be called from pico_mem_zalloc. If an appropriate slab object is found, - * it is deleted from the RB tree and a pointer to the start of data in the slab object - * is returned. - */ -static void*_pico_mem_find_slab(size_t len) -{ - size_t*lenptr = &len; - size_t**doublelenptr = &lenptr; - struct pico_tree_node*node; - uint8_t *returnVal = NULL; - - DBG_MM_YELLOW("Finding slab with size %u", len); - /* The compare function takes an int*** length */ - node = pico_tree_findNode(&manager->tree, &doublelenptr); - - if(node != NULL) { - /* DBG_MM_BLUE("Found node, size = %d ", ((pico_mem_slab_node*) node->keyValue)->slab->size); */ - struct pico_mem_slab_node*slab_node = node->keyValue; - slab_node->slab->internals.slab_block.page->slabs_free--; - slab_node->slab->internals.slab_block.page->timestamp = 0; - DBG_MM_BLUE("Found node, size = %u at page %p, %u free slabs left in page", slab_node->slab->internals.slab_block.page->slab_size, slab_node->slab->internals.slab_block.page, slab_node->slab->internals.slab_block.page->slabs_free); - if(slab_node->next == NULL) - { - DBG_MM_BLUE("This was the last available slab object. Deleting the tree node now."); - /* if this is the last slab object of this size in the tree, then also delete the tree_node! */ - - - /* pico_tree_delete(&manager->tree, &doublelenptr); */ - manager_tree_delete(&manager->tree, &doublelenptr); - - - } - else - { - /* Remove the pico_mem_slab_node by making the keyvalue of the pico_tree_node point to the next element. */ - slab_node->next->prev = NULL; - node->keyValue = slab_node->next; - } - - returnVal = ((uint8_t*) (slab_node->slab)) + sizeof(struct pico_mem_block); - DBG_MM_BLUE("Start of slab: %p -> start of data : %p", slab_node->slab, returnVal); - /* Update the slab block housekeeping */ - slab_node->slab->internals.slab_block.slab_node = NULL; - /* Zero-initialize */ - _pico_mem_zero_initialize(returnVal, len); - /* Free the struct that was used by the linked list in the RB-tree */ - pico_mem_page0_free(slab_node); - } - - DBG_MM_GREEN("Returning %p", returnVal); - return returnVal; -} - -/* - * This method is called by the picotcp stack to free memory. - */ -void pico_mem_free(void*ptr) -{ - struct pico_mem_block*generic_block; - struct pico_mem_page*page; -#ifdef DEBUG_MM - uint16_t i = 0; -#endif - - DBG_MM_YELLOW("Free called on %p", ptr); - - if(ptr == NULL) return; - - generic_block = (struct pico_mem_block*) ptr; - generic_block--; - - if(generic_block->type == SLAB_BLOCK_TYPE) - { - if(generic_block->internals.slab_block.slab_node) - { - DBG_MM_RED("ERROR: Double free on a slab block (recovered)!"); - return; - } - - DBG_MM_BLUE("Request to free a slab block"); - _pico_mem_free_slab_block(generic_block); - } - else if(generic_block->type == HEAP_BLOCK_TYPE) - { - if(generic_block->internals.heap_block.free == HEAP_BLOCK_FREE) - { - DBG_MM_RED("ERROR: Double free on a heap block (recovered)!"); - return; - } - - DBG_MM_BLUE("Request to free a heap block"); - - /* Update the page housekeeping */ - /* Update the housekeeping of the extra manager pages */ - page = manager->first_page; - while(page != NULL) - { - DBG_MM_BLUE("Checking page %i at %p", i++, page); - if(((uint8_t*) page < (uint8_t*) ptr) && ((uint8_t*) ptr < (uint8_t*) page + PICO_MEM_PAGE_SIZE)) - { - /* DBG_MM_RED("page < ptr < page + PICO_MEM_PAGE_SIZE"); */ - /* DBG_MM_RED("%p < %p < %p", (uint8_t*) page, (uint8_t*) ptr, (uint8_t*) page + PICO_MEM_PAGE_SIZE); */ - _pico_mem_free_and_merge_heap_block(page, generic_block); - _pico_mem_determine_max_free_space(page); - break; - } - - page = page->next_page; - } - } - else - { - DBG_MM_RED("ERROR: You tried to free a pointer from which the type ( heap block or slab object ) could not be determined!!"); - } -} - -/************************NEW***************************/ -static void _pico_mem_reset_slab_statistics(void) -{ - slab_size_statistics[0] = 0; - slab_size_statistics[1] = 0; - slab_size_statistics[2] = 0; -} - -static size_t _pico_mem_determine_slab_size(size_t len) -{ - DBG_MM_YELLOW("Determining slab size to use, request for %u bytes", len); - if (len > slab_sizes[1]) - { - slab_size_statistics[2]++; - if(slab_size_statistics[2] > 3) - { - _pico_mem_reset_slab_statistics(); - if(slab_size_global != slab_sizes[2]) - { - slab_size_global = slab_sizes[2]; - } - } - - if(slab_size_global != slab_sizes[2]) - { - DBG_MM_RED("Using slab size %u, but we have to use a slab size of %u for the request of %u bytes", slab_size_global, slab_sizes[2], len); - return slab_sizes[2]; - } - } - else if(len > slab_sizes[0]) - { - slab_size_statistics[1]++; - if (slab_size_statistics[1] > 3) - { - _pico_mem_reset_slab_statistics(); - if(slab_size_global != slab_sizes[1]) - { - slab_size_global = slab_sizes[1]; - } - } - - if(len > slab_size_global) - { - DBG_MM_RED("Using slab size %u, but we have to use a slab size of %u for the request of %u bytes", slab_size_global, slab_sizes[1], len); - return slab_sizes[1]; - } - } - else - { - slab_size_statistics[0]++; - if (slab_size_statistics[0] > 3) - { - _pico_mem_reset_slab_statistics(); - if(slab_size_global != slab_sizes[0]) - { - slab_size_global = slab_sizes[0]; - } - } - } - - DBG_MM_GREEN("Using slab size %u", slab_size_global); - return slab_size_global; -} -/************************NEW***************************/ - -/* - * This method will be called by the picotcp stack to allocate new memory. - * If the requested size is bigger than the threshold of a slab object, - * then the manager will try to find an appropriate slab object and return a pointer - * to the beginning of the data in that slab object. - * - * If no slab objects could be found, or the requested size is less then the threshold - * of a slab object, the manager will try to allocate a heap block and return a pointer - * to the beginning of the data of that heap block. - * - * If still no memory could be found, then the manager will check again if the - * requested size is smaller than the threshold of a slab object. - * If so, the manager will try to find a slab object again but now ignoring the threshold. - * By doing so, there will be a large amount of internal fragmentation, but at least the - * memory request could be fulfilled. - * - * In any other case, the manager will return NULL. - */ -void*pico_mem_zalloc(size_t len) -{ - struct pico_mem_page*page; - void*returnCandidate; - uint32_t pagenr; - void *ret; - - DBG_MM_YELLOW("===> pico_mem_zalloc(%i) called", len); - len += (len % 4 == 0) ? 0 : 4 - len % 4; - DBG_MM_YELLOW("Aligned size: %i", len); - - if(manager == NULL) - { - DBG_MM_RED("Invalid alloc, a memory manager hasn't been instantiated yet!"); - return NULL; - } - - if(len > PICO_MAX_SLAB_SIZE) - { - DBG_MM_RED("Invalid alloc, the size you requested is larger than the maximum slab size! (%uB>%uB)", len, PICO_MAX_SLAB_SIZE); - return NULL; - } - - /* /////// FIND SLAB OBJECTS ///////// */ - if(len >= PICO_MIN_SLAB_SIZE) - { - /* feed the size into a statistic engine that determines the slabsize to use */ - /* DBG_MM_RED("Placeholder: determine correct slab size to use!"); */ - len = _pico_mem_determine_slab_size(len); - ret = _pico_mem_find_slab(len); - if(ret != NULL) return ret; - - /* No slab object could be found. => Init new page? */ - - DBG_MM_BLUE("No free slab found, trying to create a new page (Used size = %u, max size = %u)", manager->used_size, manager->size); - if(manager->used_size + PICO_MEM_PAGE_SIZE <= manager->size) - { - struct pico_mem_page*newpage = pico_zalloc(PICO_MEM_PAGE_SIZE); - if(newpage != NULL) - { - manager->used_size += PICO_MEM_PAGE_SIZE; - DBG_MM_BLUE("Created new page at %p -> used size = %u", newpage, manager->used_size); - _pico_mem_init_page(newpage, len); - /* Return pointer to first slab in that page */ - return _pico_mem_find_slab(len); /* Find the new slab object! */ - } - else - { - DBG_MM_RED("Not enough space to allocate a new page, even though the max size hasn't been reached yet!"); - return NULL; - } - } - else - { - DBG_MM_RED("Not enough space to allocate a new page!"); - return NULL; - } - } - - /* /////// FIND HEAP BLOCKS ///////// */ - if(len < PICO_MEM_MINIMUM_OBJECT_SIZE) - len = PICO_MEM_MINIMUM_OBJECT_SIZE; - - DBG_MM_BLUE("Searching for heap space of length %u now.", len); - - pagenr = 1; - page = manager->first_page; - - /* The algorithm to find a heap block is based on first fit. */ - /* But when the internal fragmentation is too big, the block is split. */ - while(page != NULL) - { - /* DBG_MM_RED("Max free space in page %i = %i bytes", pagecounter+1, page->heap.max_free_space); */ - DBG_MM_BLUE("Max free space in page %u = %uB (page=%p)", pagenr, page->heap_max_free_space, page); - if(len <= page->heap_max_free_space) - { - return _pico_mem_find_heap_block(page, len); - } - - pagenr++; - page = page->next_page; - } - /* No free heap block could be found, try to alloc a new page */ - DBG_MM_BLUE("No free heap block found, trying to create a new page (Used size = %u, max size = %u)", manager->used_size, manager->size); - if(manager->used_size + PICO_MEM_PAGE_SIZE <= manager->size) - { - struct pico_mem_page*newpage = pico_zalloc(PICO_MEM_PAGE_SIZE); - if(newpage != NULL) - { - manager->used_size += PICO_MEM_PAGE_SIZE; - DBG_MM_BLUE("Created new page at %p -> used size = %u", newpage, manager->used_size); - /* TODO: Careful, if the current slabsize is determined in another way, this needs to change too */ - _pico_mem_init_page(newpage, slab_size_global); - returnCandidate = _pico_mem_find_heap_block(newpage, len); - if(returnCandidate != NULL) - return returnCandidate; - } - else - { - DBG_MM_RED("Not enough space to allocate a new page, even though the max size hasn't been reached yet!"); - return NULL; - } - } - - /* DBG_MM_RED("NO HEAP BLOCK FOUND!"); */ - - /* /////// TRY TO FIND NEW SLAB OBJECT, BUT INCREASE SIZE ///////// */ - DBG_MM_RED("TRYING TO FIND FREE SLAB OBJECT WITH DANGER OF LARGE INTERNAL FRAGMENTATION"); - /* TODO: Careful, if the current slabsize is determined in another way, this needs to change too */ - return _pico_mem_find_slab(slab_size_global); -} -/* - * This method frees heap space used in the manager page, or in one of the extra manager pages - */ -void pico_mem_page0_free(void*ptr) -{ - struct pico_mem_block*node = ptr; - struct pico_mem_manager_extra*heap_page; -#ifdef DEBUG_MM - uint16_t i = 0; -#endif - - /* TODO: should be able to merge free neighbouring blocks (??) */ - DBG_MM_YELLOW("page0_free called"); - - node--; - node->internals.heap_block.free = HEAP_BLOCK_FREE; - /* Update the housekeeping of the extra manager pages */ - heap_page = manager->manager_extra; - while(heap_page != NULL) - { - DBG_MM_BLUE("Checking extra heap page %i at %p", i++, heap_page); - if(((uint8_t*) heap_page < (uint8_t*) ptr) && ((uint8_t*) ptr < (uint8_t*) heap_page + PICO_MEM_PAGE_SIZE)) - { - /* DBG_MM_RED("heap_page < ptr < heap_page + PICO_MEM_PAGE_SIZE"); */ - /* DBG_MM_RED("%p < %p < %p", (uint8_t*) heap_page, (uint8_t*) ptr, (uint8_t*) heap_page + PICO_MEM_PAGE_SIZE); */ - heap_page->blocks--; - DBG_MM_BLUE("Updating heap page housekeeping: %u->%u used blocks", heap_page->blocks + 1, heap_page->blocks); - break; - } - - heap_page = heap_page->next; - } - DBG_MM_GREEN("Heap block (located in %s) succesfully freed", (i != -1) ? "main manager page" : "extra manager page"); -} - -/* - * This cleanup function must be called externally at downtime moments. A system timestamp must be passed to the function. - * All pages and extra manager pages will be checked. If they are empty, the timestamp of the page will be updated. If the - * page has been empty for a time longer than PICO_MEM_PAGE_LIFETIME, the page is returned to the system's control, and all - * the housekeeping is updated. - */ -void pico_mem_cleanup(uint32_t timestamp) -{ - struct pico_mem_slab_node*slab_node; - struct pico_tree_node*tree_node; - struct pico_mem_block*slab_block; - struct pico_mem_page*next_page; - struct pico_mem_page*prev_page; - struct pico_mem_page*page; - struct pico_mem_manager_extra*heap_page; - struct pico_mem_manager_extra*next; - struct pico_mem_manager_extra*prev_heap_page; - uint8_t*byteptr; - int pagenr = 1; - int i; - - DBG_MM_YELLOW("Starting cleanup with timestamp %u", timestamp); - /* Iterate over all pages */ - page = manager->first_page; - prev_page = NULL; - while(page != NULL) - { - DBG_MM_BLUE("Checking page %i at %p", pagenr, page); - /* Check the timestamp of the page. If it doesn't have one (0), update it with the new timestamp if the page is completely empty. */ - if(page->timestamp == 0) - { - if((page->heap_max_size == page->heap_max_free_space) && (page->slabs_free == page->slabs_max)) - { - DBG_MM_BLUE("Page %i empty, updating timestamp", pagenr); - page->timestamp = timestamp; - } - } - /* If the timestamp is old enough, remove the page and all its slabs. This means we have to: */ - /* > Remove all slabs out of the RB tree */ - /* > Update the page list */ - /* > Return the page to the system's control */ - /* > Update manager housekeeping */ - else if(timestamp > page->timestamp) - { - if(timestamp - page->timestamp > PICO_MEM_PAGE_LIFETIME) - { - DBG_MM_BLUE("Page %i is empty and has exceeded the lifetime (%u > lifetime=%u)", pagenr, timestamp - page->timestamp, PICO_MEM_PAGE_LIFETIME); - /* Remove all the slabs out of the RB tree */ - byteptr = (uint8_t*) page + sizeof(struct pico_mem_page); /* byteptr points to the start of the heap (a pico_mem_block), after page housekeeping */ - byteptr += sizeof(struct pico_mem_block); /* jump over pico_mem_block, containing the housekeeping for the heap space */ - byteptr += page->heap_max_size; /* jump over heap space, byteptr now points to the start of the slabs */ - slab_block = (struct pico_mem_block*) byteptr; - slab_node = slab_block->internals.slab_block.slab_node; - /* The corresponding tree_node */ - tree_node = pico_tree_findNode(&manager->tree, slab_node); - for(i = 0; i < page->slabs_max; i++) - { - DBG_MM("Removing slab %i at %p", i, slab_block); - if(slab_node->prev == NULL && slab_node->next == NULL) - { - DBG_MM("This node is the last node in the tree_node, removing tree_node"); - /* slab_node is the last node in the tree leaf, delete it */ - - - /* pico_tree_delete(&manager->tree, slab_node); */ - manager_tree_delete(&manager->tree, slab_node); - - - } - else if(slab_node->prev == NULL) - { - DBG_MM("This node is the first node in the linked list, adjusting tree_node"); - tree_node->keyValue = slab_node->next; - slab_node->next->prev = NULL; - } - else if(slab_node->next == NULL) - { - DBG_MM("This node is the last node in the linked list"); - slab_node->prev->next = NULL; - } - else - { - DBG_MM("This node is neither the first, nor the last node in the list"); - slab_node->prev->next = slab_node->next; - slab_node->next->prev = slab_node->prev; - } - - pico_mem_page0_free(slab_node); - byteptr = (uint8_t*) slab_block + sizeof(struct pico_mem_block); /* byteptr points to the start of the slab data, after the housekeeping */ - byteptr += page->slab_size; /* jump over the slab data, byteptr now points to the start of the next slab block */ - slab_block = (struct pico_mem_block*) byteptr; - slab_node = slab_block->internals.slab_block.slab_node; - } - /* Update the page list */ - if(prev_page == NULL) /* prev_page == NULL when pagenr=1, or when previous pages were deleted */ - { - DBG_MM("Updating page list, manager->first_page = page->next_page"); - manager->first_page = page->next_page; - } - else - { - DBG_MM("Updating page list, prev_page->next_page = page->next_page"); - prev_page->next_page = page->next_page; - } - - /* Return the page to the system's control */ - next_page = page->next_page; - DBG_MM("Freeing page, manager used size = %u", manager->used_size); - pico_free(page); - /* Update the manager housekeeping */ - manager->used_size -= PICO_MEM_PAGE_SIZE; - DBG_MM("Freed page, manager used size = %u, down from %u", manager->used_size, manager->used_size + PICO_MEM_PAGE_SIZE); - /* ITERATION */ - page = next_page; - pagenr++; - continue; - } - else - { - DBG_MM_BLUE("Page %i is empty, but has not exceeded the lifetime (%u < lifetime=%u)", pagenr, timestamp - page->timestamp, PICO_MEM_PAGE_LIFETIME); - } - } - else /* timestamp < page->timestamp */ - { - DBG_MM_RED("Page %i is empty, but the system timestamp < page timestamp! (%u<%u)", pagenr, timestamp, page->timestamp); - DBG_MM_RED("Updating page %i timestamp!", pagenr); - page->timestamp = timestamp; - } - - pagenr++; - prev_page = page; - page = page->next_page; - } - /* Check all extra manager pages if they are empty */ - heap_page = manager->manager_extra; - prev_heap_page = NULL; - pagenr = 1; - while(heap_page != NULL) - { - DBG_MM_BLUE("Checking extra manager page %i at %p", pagenr, heap_page); - if(heap_page->timestamp == 0) - { - if( heap_page->blocks == 0 ) - { - DBG_MM_BLUE("Extra manager page %i empty, updating timestamp", pagenr); - heap_page->timestamp = timestamp; - } - } - else if(timestamp > heap_page->timestamp) - { - if(timestamp - heap_page->timestamp > PICO_MEM_PAGE_LIFETIME) - { - DBG_MM_BLUE("Extra manager page %i empty and has exceeded the lifetime (%u > lifetime=%u)", pagenr, timestamp - heap_page->timestamp, PICO_MEM_PAGE_LIFETIME); - /* Update the page list */ - if(prev_heap_page == NULL) - { - DBG_MM("Updating page list, manager->manager_extra = heap_page->next"); - manager->manager_extra = heap_page->next; - } - else - { - DBG_MM("Updating page list, prev_heap_page->next = heap_page->next"); - prev_heap_page->next = heap_page->next; - } - - /* Return the page to the system's control */ - next = heap_page->next; - DBG_MM("Freeing page, manager used size = %u", manager->used_size); - pico_free(heap_page); - /* Update the manager housekeeping */ - manager->used_size -= PICO_MEM_PAGE_SIZE; - DBG_MM("Freed page, manager used size = %u, down from %u", manager->used_size, manager->used_size + PICO_MEM_PAGE_SIZE); - /* ITERATION */ - heap_page = next; - pagenr++; - continue; - } - else - { - DBG_MM_BLUE("Page %i is empty, but has not exceeded the lifetime (%u < lifetime=%u)", pagenr, timestamp - heap_page->timestamp, PICO_MEM_PAGE_LIFETIME); - } - } - else - { - DBG_MM_RED("Page %i is empty, but the system timestamp < page timestamp! (%u<%u)", pagenr, timestamp, heap_page->timestamp); - DBG_MM_RED("Updating page %i timestamp!", pagenr); - heap_page->timestamp = timestamp; - } - - /* ITERATION */ - pagenr++; - prev_heap_page = heap_page; - heap_page = heap_page->next; - } -} - - - - - - -#ifdef PICO_SUPPORT_MM_PROFILING -/*********************************************************************************************************************** - *********************************************************************************************************************** - MEMORY PROFILING FUNCTIONS - *********************************************************************************************************************** - ***********************************************************************************************************************/ - -static struct pico_mem_manager*manager_profile; - -static void _pico_mem_print_tree(struct pico_tree_node*root) -{ - struct pico_mem_slab_node*iterator; - int j; - - if (root == &LEAF || root == NULL) - { - DBG_MM("No tree nodes at this time.\n"); - return; - } - - iterator = (struct pico_mem_slab_node*) root->keyValue; - DBG_MM("Tree node for size %u:\n", iterator->slab->internals.slab_block.page->slab_size); - j = 0; - while(iterator != NULL) - { - DBG_MM("\tSlab_node %i at %p:\n", j, iterator); - DBG_MM("\t\tPrev:%p\n", iterator->prev); - DBG_MM("\t\tNext:%p\n", iterator->next); - DBG_MM("\t\tSlab:%p\n", iterator->slab); - j++; - iterator = iterator->next; - } - if(root->leftChild != &LEAF && root->leftChild != NULL) - _pico_mem_print_tree(root->leftChild); - - if(root->rightChild != &LEAF && root->rightChild != NULL) - _pico_mem_print_tree(root->rightChild); -} - -void pico_mem_profile_scan_data() -{ - if(manager == NULL) - { - DBG_MM("No memory manager instantiated!\n"); - } - else - { - int manager_pages = 0; - int pages = 0; - int counter = 0; - struct pico_mem_manager_extra*heap_page; - struct pico_mem_page*page; - uint8_t*byteptr; - struct pico_mem_block*mem_block; - - DBG_MM("Memory manager: %uB/%uB in use\n", manager->used_size, manager->size); - _pico_mem_print_tree(manager->tree.root); - - /* Iterate over every extra manager page: */ - heap_page = manager->manager_extra; - while(heap_page != NULL) - { - manager_pages++; - DBG_MM("Extra manager page %i:\n\tBlocks in use: %u\n\tTimestamp: %u\n", manager_pages, heap_page->blocks, heap_page->timestamp); - heap_page = heap_page->next; - } - /* Iterate over every page: */ - pages = (manager->used_size / PICO_MEM_PAGE_SIZE) - manager_pages - 1; - page = manager->first_page; - while(page != NULL) - { - counter++; - DBG_MM("Page %i/%i:\n\tSlabsize: %u\n\tSlabs free: %u/%u\n\tTimestamp: %u\n", counter, pages, page->slab_size, page->slabs_free, page->slabs_max, page->timestamp); - byteptr = (uint8_t*) page + sizeof(struct pico_mem_page); - mem_block = (struct pico_mem_block*) byteptr; - DBG_MM("\tHeap:\n"); - while(mem_block->type == HEAP_BLOCK_TYPE) - { - DBG_MM("\t\tBlock: size %u, %s\n", mem_block->internals.heap_block.size, (mem_block->internals.heap_block.free == HEAP_BLOCK_FREE) ? "free" : "not free"); - byteptr = (uint8_t*) mem_block + sizeof(struct pico_mem_block); - byteptr += mem_block->internals.heap_block.size; - mem_block = (struct pico_mem_block*) byteptr; - } - page = page->next_page; - } - } -} - -void pico_mem_profile_collect_data(struct profiling_data*profiling_struct) -{ - struct pico_mem_block*mem_block; - uint8_t*byteptr; - - profiling_struct->free_heap_space = 0; - profiling_struct->free_slab_space = 0; - profiling_struct->used_heap_space = 0; - profiling_struct->used_slab_space = 0; - if(manager != NULL) - { - struct pico_mem_page*page = manager->first_page; - while(page != NULL) - { - profiling_struct->free_slab_space += page->slab_size * page->slabs_free; - profiling_struct->used_slab_space += page->slab_size * page->slabs_max; - - byteptr = (uint8_t*) page + sizeof(struct pico_mem_page); - mem_block = (struct pico_mem_block*) byteptr; - - while(mem_block->type == HEAP_BLOCK_TYPE) - { - if(mem_block->internals.heap_block.free == HEAP_BLOCK_FREE) - { - profiling_struct->free_heap_space += mem_block->internals.heap_block.size; - } - else - { - /* dbg("Block: size=%u\n", mem_block->internals.heap_block.size); */ - profiling_struct->used_heap_space += mem_block->internals.heap_block.size; - } - - byteptr += sizeof(struct pico_mem_block) + mem_block->internals.heap_block.size; - mem_block = (struct pico_mem_block*) byteptr; - } - page = page->next_page; - } - } -} - -uint32_t pico_mem_profile_used_size() -{ - if(manager != NULL) - { - return manager->used_size; - } - else - { - return 0; - } -} - -struct pico_mem_manager*pico_mem_profile_manager() -{ - return manager; -} -#endif /* PICO_SUPPORT_MM_PROFILING */ - diff --git a/kernel/picotcp/modules/pico_mm.h b/kernel/picotcp/modules/pico_mm.h deleted file mode 100644 index 93bd102..0000000 --- a/kernel/picotcp/modules/pico_mm.h +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Gustav Janssens, Jonas Van Nieuwenberg, Sam Van Den Berge - *********************************************************************/ - - -#ifndef _INCLUDE_PICO_MM -#define _INCLUDE_PICO_MM - -#include "pico_config.h" - -/* - * Memory init function, this will create a memory manager instance - * A memory_manager page will be created, along with one page of memory - * Memory can be asked for via the pico_mem_zalloc function - * More memory will be allocated to the memory manager according to its needs - * A maximum amount of memory of uint32_t memsize can be allocated - */ -void pico_mem_init(uint32_t memsize); -/* - * Memory deinit function, this will free all memory occupied by the current - * memory manager instance. - */ -void pico_mem_deinit(void); -/* - * Zero-initialized malloc function, will reserve a memory segment of length uint32_t len - * This memory will be quickly allocated in a slab of fixed size if possible - * or less optimally in the heap for a small variable size - * The fixed size of the slabs can be changed dynamically via a statistics engine - */ -void*pico_mem_zalloc(size_t len); -/* - * Free function, free a block of memory pointed to by ptr. - * Unused memory is only returned to the system's control by pico_mem_cleanup - */ -void pico_mem_free(void*ptr); -/* - * This cleanup function will be provided by the memory manager - * It can be called during processor downtime - * This function will return unused pages to the system's control - * Pages are unused if they no longer contain slabs or heap, and they have been idle for a longer time - */ -void pico_mem_cleanup(uint32_t timestamp); - - - -#ifdef PICO_SUPPORT_MM_PROFILING -/*********************************************************************************************************************** - *********************************************************************************************************************** - MEMORY PROFILING FUNCTIONS - *********************************************************************************************************************** - ***********************************************************************************************************************/ -/* General info struct */ -struct profiling_data -{ - uint32_t free_heap_space; - uint32_t free_slab_space; - uint32_t used_heap_space; - uint32_t used_slab_space; -}; - -/* - * This function fills up a struct with used and free slab and heap space in the memory manager - * The user is responsible for resource managment - */ -void pico_mem_profile_collect_data(struct profiling_data*profiling_page_struct); - -/* - * This function prints the general structure of the memory manager - * Printf in this function can be rerouted to send this data over a serial port, or to write it away to memory - */ -void pico_mem_profile_scan_data(void); - -/* - * This function returns the total size that the manager has received from the system - * This can give an indication of the total system resource commitment, but keep in mind that - * there can be many free blocks in this "used" size - * Together with pico_mem_profile_collect_data, this can give a good estimation of the total - * resource commitment - */ -uint32_t pico_mem_profile_used_size(void); - -/* - * This function returns a pointer to page 0, the main memory manager housekeeping (struct pico_mem_manager). - * This can be used to collect data about the memory in user defined functions. - * Use with care! - */ -void*pico_mem_profile_manager(void); - -/* - * paramter manager is a pointer to a struct pico_mem_manager - */ -void pico_mem_init_profiling(void*manager, uint32_t memsize); -#endif /* PICO_SUPPORT_MM_PROFILING */ - -#endif /* _INCLUDE_PICO_MM */ diff --git a/kernel/picotcp/modules/pico_nat.c b/kernel/picotcp/modules/pico_nat.c deleted file mode 100644 index b8f08e0..0000000 --- a/kernel/picotcp/modules/pico_nat.c +++ /dev/null @@ -1,589 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Kristof Roelants, Brecht Van Cauwenberghe, - Simon Maes, Philippe Mariman - *********************************************************************/ - -#include "pico_stack.h" -#include "pico_frame.h" -#include "pico_tcp.h" -#include "pico_udp.h" -#include "pico_ipv4.h" -#include "pico_addressing.h" -#include "pico_nat.h" - -#ifdef PICO_SUPPORT_IPV4 -#ifdef PICO_SUPPORT_NAT - -#ifdef DEBUG_NAT -#define nat_dbg dbg -#else -#define nat_dbg(...) do {} while(0) -#endif - -#define PICO_NAT_TIMEWAIT 240000 /* msec (4 mins) */ - -#define PICO_NAT_INBOUND 0 -#define PICO_NAT_OUTBOUND 1 - -struct pico_nat_tuple { - uint8_t proto; - uint16_t conn_active : 11; - uint16_t portforward : 1; - uint16_t rst : 1; - uint16_t syn : 1; - uint16_t fin_in : 1; - uint16_t fin_out : 1; - uint16_t src_port; - uint16_t dst_port; - uint16_t nat_port; - struct pico_ip4 src_addr; - struct pico_ip4 dst_addr; - struct pico_ip4 nat_addr; -}; - -static struct pico_ipv4_link *nat_link = NULL; - -static int nat_cmp_natport(struct pico_nat_tuple *a, struct pico_nat_tuple *b) -{ - - if (a->nat_port < b->nat_port) - return -1; - - if (a->nat_port > b->nat_port) - - return 1; - - return 0; - -} - -static int nat_cmp_srcport(struct pico_nat_tuple *a, struct pico_nat_tuple *b) -{ - - if (a->src_port < b->src_port) - return -1; - - if (a->src_port > b->src_port) - - return 1; - - return 0; - -} - -static int nat_cmp_proto(struct pico_nat_tuple *a, struct pico_nat_tuple *b) -{ - if (a->proto < b->proto) - return -1; - - if (a->proto > b->proto) - return 1; - - return 0; -} - -static int nat_cmp_address(struct pico_nat_tuple *a, struct pico_nat_tuple *b) -{ - return pico_ipv4_compare(&a->src_addr, &b->src_addr); -} - -static int nat_cmp_inbound(void *ka, void *kb) -{ - struct pico_nat_tuple *a = ka, *b = kb; - int cport = nat_cmp_natport(a, b); - if (cport) - return cport; - - return nat_cmp_proto(a, b); -} - - -static int nat_cmp_outbound(void *ka, void *kb) -{ - struct pico_nat_tuple *a = ka, *b = kb; - int caddr, cport; - - caddr = nat_cmp_address(a, b); - if (caddr) - return caddr; - - cport = nat_cmp_srcport(a, b); - - if (cport) - return cport; - - return nat_cmp_proto(a, b); -} - -static PICO_TREE_DECLARE(NATOutbound, nat_cmp_outbound); -static PICO_TREE_DECLARE(NATInbound, nat_cmp_inbound); - -void pico_ipv4_nat_print_table(void) -{ - struct pico_nat_tuple *t = NULL; - struct pico_tree_node *index = NULL; - (void)t; - - nat_dbg("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); - nat_dbg("+ NAT table +\n"); - nat_dbg("+------------------------------------------------------------------------------------------------------------------------+\n"); - nat_dbg("+ src_addr | src_port | dst_addr | dst_port | nat_addr | nat_port | proto | conn active | FIN1 | FIN2 | SYN | RST | FORW +\n"); - nat_dbg("+------------------------------------------------------------------------------------------------------------------------+\n"); - - pico_tree_foreach(index, &NATOutbound) - { - t = index->keyValue; - nat_dbg("+ %08X | %05u | %08X | %05u | %08X | %05u | %03u | %03u | %u | %u | %u | %u | %u +\n", - long_be(t->src_addr.addr), t->src_port, long_be(t->dst_addr.addr), t->dst_port, long_be(t->nat_addr.addr), t->nat_port, - t->proto, t->conn_active, t->fin_in, t->fin_out, t->syn, t->rst, t->portforward); - } - nat_dbg("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"); -} - -/* - 2 options: - find on nat_port and proto - find on src_addr, src_port and proto - zero the unused parameters - */ -static struct pico_nat_tuple *pico_ipv4_nat_find_tuple(uint16_t nat_port, struct pico_ip4 *src_addr, uint16_t src_port, uint8_t proto) -{ - struct pico_nat_tuple *found = NULL, test = { - 0 - }; - - test.nat_port = nat_port; - test.src_port = src_port; - test.proto = proto; - if (src_addr) - test.src_addr = *src_addr; - - if (nat_port) - found = pico_tree_findKey(&NATInbound, &test); - else - found = pico_tree_findKey(&NATOutbound, &test); - - if (found) - return found; - else - return NULL; -} - -int pico_ipv4_nat_find(uint16_t nat_port, struct pico_ip4 *src_addr, uint16_t src_port, uint8_t proto) -{ - struct pico_nat_tuple *t = NULL; - - t = pico_ipv4_nat_find_tuple(nat_port, src_addr, src_port, proto); - if (t) - return 1; - else - return 0; -} - -static struct pico_nat_tuple *pico_ipv4_nat_add(struct pico_ip4 dst_addr, uint16_t dst_port, struct pico_ip4 src_addr, uint16_t src_port, - struct pico_ip4 nat_addr, uint16_t nat_port, uint8_t proto) -{ - struct pico_nat_tuple *t = PICO_ZALLOC(sizeof(struct pico_nat_tuple)); - if (!t) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - t->dst_addr = dst_addr; - t->dst_port = dst_port; - t->src_addr = src_addr; - t->src_port = src_port; - t->nat_addr = nat_addr; - t->nat_port = nat_port; - t->proto = proto; - t->conn_active = 1; - t->portforward = 0; - t->rst = 0; - t->syn = 0; - t->fin_in = 0; - t->fin_out = 0; - - if (pico_tree_insert(&NATOutbound, t)) { - PICO_FREE(t); - return NULL; - } - - if (pico_tree_insert(&NATInbound, t)) { - pico_tree_delete(&NATOutbound, t); - PICO_FREE(t); - return NULL; - } - - return t; -} - -static int pico_ipv4_nat_del(uint16_t nat_port, uint8_t proto) -{ - struct pico_nat_tuple *t = NULL; - t = pico_ipv4_nat_find_tuple(nat_port, NULL, 0, proto); - if (t) { - pico_tree_delete(&NATOutbound, t); - pico_tree_delete(&NATInbound, t); - PICO_FREE(t); - } - - return 0; -} - -static struct pico_trans *pico_nat_generate_tuple_trans(struct pico_ipv4_hdr *net, struct pico_frame *f) -{ - struct pico_trans *trans = NULL; - switch (net->proto) { - case PICO_PROTO_TCP: - { - struct pico_tcp_hdr *tcp = (struct pico_tcp_hdr *)f->transport_hdr; - trans = (struct pico_trans *)&tcp->trans; - break; - } - case PICO_PROTO_UDP: - { - struct pico_udp_hdr *udp = (struct pico_udp_hdr *)f->transport_hdr; - trans = (struct pico_trans *)&udp->trans; - break; - } - case PICO_PROTO_ICMP4: - /* XXX: implement */ - break; - } - return trans; -} - -static struct pico_nat_tuple *pico_ipv4_nat_generate_tuple(struct pico_frame *f) -{ - struct pico_trans *trans = NULL; - struct pico_ipv4_hdr *net = (struct pico_ipv4_hdr *)f->net_hdr; - uint16_t nport = 0; - uint8_t retry = 32; - - /* generate NAT port */ - do { - uint32_t rand = pico_rand(); - nport = (uint16_t) (rand & 0xFFFFU); - nport = (uint16_t)((nport % (65535 - 1024)) + 1024U); - nport = short_be(nport); - - if (pico_is_port_free(net->proto, nport, NULL, &pico_proto_ipv4)) - break; - } while (--retry); - - if (!retry) - return NULL; - - trans = pico_nat_generate_tuple_trans(net, f); - if(!trans) - return NULL; - - return pico_ipv4_nat_add(net->dst, trans->dport, net->src, trans->sport, nat_link->address, nport, net->proto); - /* XXX return pico_ipv4_nat_add(nat_link->address, port, net->src, trans->sport, net->proto); */ -} - -static inline void pico_ipv4_nat_set_tcp_flags(struct pico_nat_tuple *t, struct pico_frame *f, uint8_t direction) -{ - struct pico_tcp_hdr *tcp = (struct pico_tcp_hdr *)f->transport_hdr; - if (tcp->flags & PICO_TCP_SYN) - t->syn = 1; - - if (tcp->flags & PICO_TCP_RST) - t->rst = 1; - - if ((tcp->flags & PICO_TCP_FIN) && (direction == PICO_NAT_INBOUND)) - t->fin_in = 1; - - if ((tcp->flags & PICO_TCP_FIN) && (direction == PICO_NAT_OUTBOUND)) - t->fin_out = 1; -} - -static int pico_ipv4_nat_sniff_session(struct pico_nat_tuple *t, struct pico_frame *f, uint8_t direction) -{ - struct pico_ipv4_hdr *net = (struct pico_ipv4_hdr *)f->net_hdr; - - switch (net->proto) { - case PICO_PROTO_TCP: - { - pico_ipv4_nat_set_tcp_flags(t, f, direction); - break; - } - - case PICO_PROTO_UDP: - t->conn_active = 1; - break; - - case PICO_PROTO_ICMP4: - /* XXX: implement */ - break; - - default: - return -1; - } - - return 0; -} - -static void pico_ipv4_nat_table_cleanup(pico_time now, void *_unused) -{ - struct pico_tree_node *index = NULL, *_tmp = NULL; - struct pico_nat_tuple *t = NULL; - IGNORE_PARAMETER(now); - IGNORE_PARAMETER(_unused); - nat_dbg("NAT: before table cleanup:\n"); - pico_ipv4_nat_print_table(); - - pico_tree_foreach_reverse_safe(index, &NATOutbound, _tmp) - { - t = index->keyValue; - switch (t->proto) - { - case PICO_PROTO_TCP: - if (t->portforward) - break; - else if (t->conn_active == 0 || t->conn_active > 360) /* conn active for > 24 hours */ - pico_ipv4_nat_del(t->nat_port, t->proto); - else if (t->rst || (t->fin_in && t->fin_out)) - t->conn_active = 0; - else - t->conn_active++; - - break; - - case PICO_PROTO_UDP: - if (t->portforward) - break; - else if (t->conn_active > 1) - pico_ipv4_nat_del(t->nat_port, t->proto); - else - t->conn_active++; - - break; - - case PICO_PROTO_ICMP4: - if (t->conn_active > 1) - pico_ipv4_nat_del(t->nat_port, t->proto); - else - t->conn_active++; - break; - - default: - /* unknown protocol in NAT table, delete when it has existed NAT_TIMEWAIT */ - if (t->conn_active > 1) - pico_ipv4_nat_del(t->nat_port, t->proto); - else - t->conn_active++; - } - } - - nat_dbg("NAT: after table cleanup:\n"); - pico_ipv4_nat_print_table(); - if (!pico_timer_add(PICO_NAT_TIMEWAIT, pico_ipv4_nat_table_cleanup, NULL)) { - nat_dbg("NAT: Failed to start cleanup timer\n"); - /* TODO no more NAT table cleanup now */ - } -} - -int pico_ipv4_port_forward(struct pico_ip4 nat_addr, uint16_t nat_port, struct pico_ip4 src_addr, uint16_t src_port, uint8_t proto, uint8_t flag) -{ - struct pico_nat_tuple *t = NULL; - struct pico_ip4 any_addr = { - 0 - }; - uint16_t any_port = 0; - - switch (flag) - { - case PICO_NAT_PORT_FORWARD_ADD: - t = pico_ipv4_nat_add(any_addr, any_port, src_addr, src_port, nat_addr, nat_port, proto); - if (!t) { - pico_err = PICO_ERR_EAGAIN; - return -1; - } - - t->portforward = 1; - break; - - case PICO_NAT_PORT_FORWARD_DEL: - return pico_ipv4_nat_del(nat_port, proto); - - default: - pico_err = PICO_ERR_EINVAL; - return -1; - } - - pico_ipv4_nat_print_table(); - return 0; -} - -int pico_ipv4_nat_inbound(struct pico_frame *f, struct pico_ip4 *link_addr) -{ - struct pico_nat_tuple *tuple = NULL; - struct pico_trans *trans = NULL; - struct pico_ipv4_hdr *net = (struct pico_ipv4_hdr *)f->net_hdr; - - if (!pico_ipv4_nat_is_enabled(link_addr)) - return -1; - - switch (net->proto) { -#ifdef PICO_SUPPORT_TCP - case PICO_PROTO_TCP: - { - struct pico_tcp_hdr *tcp = (struct pico_tcp_hdr *)f->transport_hdr; - trans = (struct pico_trans *)&tcp->trans; - tuple = pico_ipv4_nat_find_tuple(trans->dport, 0, 0, net->proto); - if (!tuple) - return -1; - - /* replace dst IP and dst PORT */ - net->dst = tuple->src_addr; - trans->dport = tuple->src_port; - /* recalculate CRC */ - tcp->crc = 0; - tcp->crc = short_be(pico_tcp_checksum_ipv4(f)); - break; - } -#endif -#ifdef PICO_SUPPORT_UDP - case PICO_PROTO_UDP: - { - struct pico_udp_hdr *udp = (struct pico_udp_hdr *)f->transport_hdr; - trans = (struct pico_trans *)&udp->trans; - tuple = pico_ipv4_nat_find_tuple(trans->dport, 0, 0, net->proto); - if (!tuple) - return -1; - - /* replace dst IP and dst PORT */ - net->dst = tuple->src_addr; - trans->dport = tuple->src_port; - /* recalculate CRC */ - udp->crc = 0; - udp->crc = short_be(pico_udp_checksum_ipv4(f)); - break; - } -#endif - case PICO_PROTO_ICMP4: - /* XXX reimplement */ - break; - - default: - nat_dbg("NAT ERROR: inbound NAT on erroneous protocol\n"); - return -1; - } - - pico_ipv4_nat_sniff_session(tuple, f, PICO_NAT_INBOUND); - net->crc = 0; - net->crc = short_be(pico_checksum(net, f->net_len)); - - nat_dbg("NAT: inbound translation {dst.addr, dport}: {%08X,%u} -> {%08X,%u}\n", - tuple->nat_addr.addr, short_be(tuple->nat_port), tuple->src_addr.addr, short_be(tuple->src_port)); - - return 0; -} - -int pico_ipv4_nat_outbound(struct pico_frame *f, struct pico_ip4 *link_addr) -{ - struct pico_nat_tuple *tuple = NULL; - struct pico_trans *trans = NULL; - struct pico_ipv4_hdr *net = (struct pico_ipv4_hdr *)f->net_hdr; - - if (!pico_ipv4_nat_is_enabled(link_addr)) - return -1; - - switch (net->proto) { -#ifdef PICO_SUPPORT_TCP - case PICO_PROTO_TCP: - { - struct pico_tcp_hdr *tcp = (struct pico_tcp_hdr *)f->transport_hdr; - trans = (struct pico_trans *)&tcp->trans; - tuple = pico_ipv4_nat_find_tuple(0, &net->src, trans->sport, net->proto); - if (!tuple) - tuple = pico_ipv4_nat_generate_tuple(f); - - /* replace src IP and src PORT */ - net->src = tuple->nat_addr; - trans->sport = tuple->nat_port; - /* recalculate CRC */ - tcp->crc = 0; - tcp->crc = short_be(pico_tcp_checksum_ipv4(f)); - break; - } -#endif -#ifdef PICO_SUPPORT_UDP - case PICO_PROTO_UDP: - { - struct pico_udp_hdr *udp = (struct pico_udp_hdr *)f->transport_hdr; - trans = (struct pico_trans *)&udp->trans; - tuple = pico_ipv4_nat_find_tuple(0, &net->src, trans->sport, net->proto); - if (!tuple) - tuple = pico_ipv4_nat_generate_tuple(f); - - /* replace src IP and src PORT */ - net->src = tuple->nat_addr; - trans->sport = tuple->nat_port; - /* recalculate CRC */ - udp->crc = 0; - udp->crc = short_be(pico_udp_checksum_ipv4(f)); - break; - } -#endif - case PICO_PROTO_ICMP4: - /* XXX reimplement */ - break; - - default: - nat_dbg("NAT ERROR: outbound NAT on erroneous protocol\n"); - return -1; - } - - pico_ipv4_nat_sniff_session(tuple, f, PICO_NAT_OUTBOUND); - net->crc = 0; - net->crc = short_be(pico_checksum(net, f->net_len)); - - nat_dbg("NAT: outbound translation {src.addr, sport}: {%08X,%u} -> {%08X,%u}\n", - tuple->src_addr.addr, short_be(tuple->src_port), tuple->nat_addr.addr, short_be(tuple->nat_port)); - - return 0; -} - -int pico_ipv4_nat_enable(struct pico_ipv4_link *link) -{ - if (link == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (!pico_timer_add(PICO_NAT_TIMEWAIT, pico_ipv4_nat_table_cleanup, NULL)) { - nat_dbg("NAT: Failed to start cleanup timer\n"); - return -1; - } - - nat_link = link; - - return 0; -} - -int pico_ipv4_nat_disable(void) -{ - nat_link = NULL; - return 0; -} - -int pico_ipv4_nat_is_enabled(struct pico_ip4 *link_addr) -{ - if (!nat_link) - return 0; - - if (nat_link->address.addr != link_addr->addr) - return 0; - - return 1; -} - -#endif -#endif diff --git a/kernel/picotcp/modules/pico_nat.h b/kernel/picotcp/modules/pico_nat.h deleted file mode 100644 index b6d7010..0000000 --- a/kernel/picotcp/modules/pico_nat.h +++ /dev/null @@ -1,90 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Kristof Roelants, Simon Maes, Brecht Van Cauwenberghe - *********************************************************************/ - -#ifndef INCLUDE_PICO_NAT -#define INCLUDE_PICO_NAT -#include "pico_frame.h" - -#define PICO_NAT_PORT_FORWARD_DEL 0 -#define PICO_NAT_PORT_FORWARD_ADD 1 - -#ifdef PICO_SUPPORT_NAT -void pico_ipv4_nat_print_table(void); -int pico_ipv4_nat_find(uint16_t nat_port, struct pico_ip4 *src_addr, uint16_t src_port, uint8_t proto); -int pico_ipv4_port_forward(struct pico_ip4 nat_addr, uint16_t nat_port, struct pico_ip4 src_addr, uint16_t src_port, uint8_t proto, uint8_t flag); - -int pico_ipv4_nat_inbound(struct pico_frame *f, struct pico_ip4 *link_addr); -int pico_ipv4_nat_outbound(struct pico_frame *f, struct pico_ip4 *link_addr); -int pico_ipv4_nat_enable(struct pico_ipv4_link *link); -int pico_ipv4_nat_disable(void); -int pico_ipv4_nat_is_enabled(struct pico_ip4 *link_addr); -#else - -#define pico_ipv4_nat_print_table() do {} while(0) -static inline int pico_ipv4_nat_inbound(struct pico_frame *f, struct pico_ip4 *link_addr) -{ - (void)f; - (void)link_addr; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -static inline int pico_ipv4_nat_outbound(struct pico_frame *f, struct pico_ip4 *link_addr) -{ - (void)f; - (void)link_addr; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -static inline int pico_ipv4_nat_enable(struct pico_ipv4_link *link) -{ - (void)link; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -static inline int pico_ipv4_nat_disable(void) -{ - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -static inline int pico_ipv4_nat_is_enabled(struct pico_ip4 *link_addr) -{ - (void)link_addr; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -static inline int pico_ipv4_nat_find(uint16_t nat_port, struct pico_ip4 *src_addr, uint16_t src_port, uint8_t proto) -{ - (void)nat_port; - (void)src_addr; - (void)src_port; - (void)proto; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -static inline int pico_ipv4_port_forward(struct pico_ip4 nat_addr, uint16_t nat_port, struct pico_ip4 src_addr, uint16_t src_port, uint8_t proto, uint8_t flag) -{ - (void)nat_addr; - (void)nat_port; - (void)src_addr; - (void)src_port; - (void)proto; - (void)flag; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} -#endif - -#endif /* _INCLUDE_PICO_NAT */ - diff --git a/kernel/picotcp/modules/pico_olsr.c b/kernel/picotcp/modules/pico_olsr.c deleted file mode 100644 index b606e5b..0000000 --- a/kernel/picotcp/modules/pico_olsr.c +++ /dev/null @@ -1,1167 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - ********************************************************************/ - -#include "pico_stack.h" -#include "pico_config.h" -#include "pico_device.h" -#include "pico_ipv4.h" -#include "pico_arp.h" -#include "pico_socket.h" -#include "pico_olsr.h" -#ifdef PICO_SUPPORT_OLSR -#define DGRAM_MAX_SIZE (100 - 28) -#define MAX_OLSR_MEM (4 * DGRAM_MAX_SIZE) - -#ifdef DEBUG_OLSR -#define olsr_dbg dbg -#else -#define olsr_dbg(...) do {} while(0) -#endif - -int OOM(void); - - -#define OLSR_HELLO_INTERVAL ((uint32_t)5000) -#define OLSR_TC_INTERVAL ((uint32_t)9000) -#define OLSR_MAXJITTER ((uint32_t)(OLSR_HELLO_INTERVAL >> 2)) -static const struct pico_ip4 HOST_NETMASK = { - 0xffffffff -}; -#ifndef MIN -# define MIN(a, b) (a < b ? a : b) -#endif - -#define fresher(a, b) ((a > b) || ((b - a) > 32768)) - - -static uint16_t msg_counter; /* Global message sequence number */ - -/* Objects */ -struct olsr_dev_entry -{ - struct olsr_dev_entry *next; - struct pico_device *dev; - uint16_t pkt_counter; -}; - - -/* OLSR Protocol */ -#define OLSRMSG_HELLO 0xc9 -#define OLSRMSG_MID 0x03 -#define OLSRMSG_TC 0xca - -#define OLSRLINK_SYMMETRIC 0x06 -#define OLSRLINK_UNKNOWN 0x08 -#define OLSRLINK_MPR 0x0a - - -#define OLSR_PORT (short_be((uint16_t)698)) - - -/* Headers */ - -PACKED_STRUCT_DEF olsr_link -{ - uint8_t link_code; - uint8_t reserved; - uint16_t link_msg_size; -}; - -PACKED_STRUCT_DEF olsr_neighbor -{ - uint32_t addr; - uint8_t lq; - uint8_t nlq; - uint16_t reserved; -}; - -PACKED_STRUCT_DEF olsr_hmsg_hello -{ - uint16_t reserved; - uint8_t htime; - uint8_t willingness; -}; - -PACKED_STRUCT_DEF olsr_hmsg_tc -{ - uint16_t ansn; - uint16_t reserved; -}; - - -PACKED_STRUCT_DEF olsrmsg -{ - uint8_t type; - uint8_t vtime; - uint16_t size; - struct pico_ip4 orig; - uint8_t ttl; - uint8_t hop; - uint16_t seq; -}; - -PACKED_STRUCT_DEF olsrhdr -{ - uint16_t len; - uint16_t seq; -}; - - - -/* Globals */ -static struct pico_socket *udpsock = NULL; -uint16_t my_ansn = 0; -static struct olsr_route_entry *Local_interfaces = NULL; -static struct olsr_dev_entry *Local_devices = NULL; - -static struct olsr_dev_entry *olsr_get_deventry(struct pico_device *dev) -{ - struct olsr_dev_entry *cur = Local_devices; - while(cur) { - if (cur->dev == dev) - return cur; - - cur = cur->next; - } - return NULL; -} - -struct olsr_route_entry *olsr_get_ethentry(struct pico_device *vif) -{ - struct olsr_route_entry *cur = Local_interfaces; - while(cur) { - if (cur->iface == vif) - return cur; - - cur = cur->next; - } - return NULL; -} - -static struct olsr_route_entry *get_next_hop(struct olsr_route_entry *dst) -{ - struct olsr_route_entry *hop = dst; - while(hop) { - /* olsr_dbg("Finding next hop to %08x m=%d\n", hop->destination.addr, hop->metric); */ - if(hop->metric <= 1) - return hop; - - hop = hop->gateway; - } - return NULL; -} - -static inline void olsr_route_add(struct olsr_route_entry *el) -{ - /* char dest[16],nxdest[16]; */ - struct olsr_route_entry *nexthop; - - if(!el) - return; - - my_ansn++; - - nexthop = get_next_hop(el); - if (el->gateway && nexthop && (nexthop->destination.addr != el->destination.addr)) { - /* 2-hops route or more */ - el->next = el->gateway->children; - el->gateway->children = el; - el->link_type = OLSRLINK_MPR; - olsr_dbg("[OLSR] ----------Adding route to %08x via %08x metric %d\n", el->destination.addr, nexthop->destination.addr, el->metric); - pico_ipv4_route_add(el->destination, HOST_NETMASK, nexthop->destination, (int) el->metric, NULL); - } else if (el->iface) { - /* neighbor */ - struct olsr_route_entry *ei = olsr_get_ethentry(el->iface); - struct pico_ip4 no_gw = { - 0U - }; - if (el->link_type == OLSRLINK_UNKNOWN) - el->link_type = OLSRLINK_SYMMETRIC; - - if (ei) { - el->next = ei->children; - ei->children = el; - } - - olsr_dbg("[OLSR] ----------Adding neighbor %08x iface %s\n", el->destination.addr, el->iface->name); - - pico_ipv4_route_add(el->destination, HOST_NETMASK, no_gw, 1, pico_ipv4_link_by_dev(el->iface)); - } -} - -static inline void olsr_route_del(struct olsr_route_entry *r) -{ - struct olsr_route_entry *cur, *prev = NULL, *lst; - /* olsr_dbg("[OLSR] DELETING route..................\n"); */ - my_ansn++; - if (r->gateway) { - lst = r->gateway->children; - } else if (r->iface) { - lst = olsr_get_ethentry(r->iface); - } else { - lst = Local_interfaces; - } - - cur = lst, prev = NULL; - while(cur) { - if (cur == r) { - /* found */ - if (r->gateway) { - pico_ipv4_route_del(r->destination, HOST_NETMASK, r->metric); - olsr_dbg("[OLSR] Deleting route to %08x \n", r->destination.addr); - if (!prev) - r->gateway->children = r->next; - else - prev->next = r->next; - } - - while (r->children) { - olsr_route_del(r->children); - /* Orphans must die. */ - /* PICO_FREE(r->children); */ - } - return; - } - - prev = cur; - cur = cur->next; - } -} - -static struct olsr_route_entry *get_route_by_address(struct olsr_route_entry *lst, uint32_t ip) -{ - struct olsr_route_entry *found; - if(lst) { - if (lst->destination.addr == ip) { - return lst; - } - - /* recursive function, could be dangerous for stack overflow if a lot of routes are available... */ - found = get_route_by_address(lst->children, ip); - if (found) - return found; - - found = get_route_by_address(lst->next, ip); - if (found) - return found; - } - - return NULL; -} - -#define OLSR_C_SHIFT (uint32_t)4 /* 1/16 */ -#define DEFAULT_VTIME 288UL - -static uint8_t seconds2olsr(uint32_t seconds) -{ - uint16_t a, b; - /* olsr_dbg("seconds=%u\n", (uint16_t)seconds); */ - - if (seconds > 32767) - seconds = 32767; - - /* find largest b such as seconds/C >= 2^b */ - for (b = 1; b <= 0x0fu; b++) { - if ((uint16_t)(seconds * 16u) < (1u << b)) { - b--; - break; - } - } - /* olsr_dbg("b=%u", b); */ - /* compute the expression 16*(T/(C*(2^b))-1), which may not be a - integer, and round it up. This results in the value for 'a' */ - /* a = (T / ( C * (1u << b) ) ) - 1u; */ - { - uint16_t den = ((uint16_t)(1u << b) >> 4u); - /* olsr_dbg(" den=%u ", den); */ - if (den == 0) - { - /* olsr_dbg("div by 0!\n"); */ - den = 1u; - } - - a = (uint16_t)(((uint16_t)seconds / den) - (uint16_t)1); - } - /* a = a & 0x0Fu; */ - - /* olsr_dbg(" a=%u\n", a); */ - - /* if 'a' is equal to 16: increment 'b' by one, and set 'a' to 0 */ - if (16u == a) { - b++; - a = 0u; - } - - return (uint8_t)((a << 4u) + b); -} - -static uint32_t olsr2seconds(uint8_t olsr) -{ - uint8_t a, b; - uint16_t seconds; - /* olsr_dbg("olsr format: %u -- ", olsr); */ - a = (olsr >> 4) & 0xFu; - b = olsr & 0x0f; - /* olsr_dbg("o2s: a=%u, b=%u\n", a,b); */ - if (b < 4) - seconds = (uint16_t)(((1u << b) + (uint16_t)(((uint16_t)(a << b) >> 4u) & 0xFu)) >> OLSR_C_SHIFT); - else - seconds = (uint16_t)(((1u << b) + (uint16_t)(((uint16_t)(a << (b - 4))) & 0xFu)) >> OLSR_C_SHIFT); - - /* olsr_dbg("o2s: seconds: %u\n", seconds); */ - return seconds; -} - -static void olsr_garbage_collector(struct olsr_route_entry *sublist) -{ - if(!sublist) - return; - - if (sublist->time_left <= 0) { - olsr_route_del(sublist); - PICO_FREE(sublist); - return; - } else { - /* sublist->time_left -= 2u; */ - sublist->time_left -= 8u; - } - - olsr_garbage_collector(sublist->children); - olsr_garbage_collector(sublist->next); -} - -struct olsr_fwd_pkt -{ - void *buf; - uint16_t len; - struct pico_device *pdev; -}; - -static uint32_t buffer_mem_used = 0U; - -static void olsr_process_out(pico_time now, void *arg) -{ - struct olsr_fwd_pkt *p = (struct olsr_fwd_pkt *)arg; - struct pico_ip4 bcast; - struct pico_ipv4_link *addr; - struct olsr_dev_entry *pdev = Local_devices; - struct olsrhdr *ohdr; - (void)now; - - /* Send the thing out */ - ohdr = (struct olsrhdr *)p->buf; - ohdr->len = short_be((uint16_t)p->len); - - if (p->pdev) { - struct olsr_dev_entry *odev = olsr_get_deventry(p->pdev); - if (!odev) { - goto out_free; - } - - addr = pico_ipv4_link_by_dev(p->pdev); - if (!addr) - goto out_free; - - ohdr->seq = short_be((uint16_t)(odev->pkt_counter)++); - if (addr->address.addr) - bcast.addr = (addr->netmask.addr & addr->address.addr) | (~addr->netmask.addr); - else - bcast.addr = 0xFFFFFFFFu; - - if ( 0 > pico_socket_sendto(udpsock, p->buf, p->len, &bcast, OLSR_PORT)) { - olsr_dbg("olsr send\n"); - } - } else { - while(pdev) { - ohdr->seq = short_be((uint16_t)(pdev->pkt_counter++)); - addr = pico_ipv4_link_by_dev(pdev->dev); - if (!addr) - continue; - - if (addr->address.addr) - bcast.addr = (addr->netmask.addr & addr->address.addr) | (~addr->netmask.addr); - else - bcast.addr = 0xFFFFFFFFu; - - if ( 0 > pico_socket_sendto(udpsock, p->buf, p->len, &bcast, OLSR_PORT)) { - olsr_dbg("olsr send\n"); - } - - pdev = pdev->next; - } - } - -out_free: - PICO_FREE(p->buf); /* XXX <-- broken? */ - buffer_mem_used -= DGRAM_MAX_SIZE; - PICO_FREE(p); -} - -static void olsr_scheduled_output(uint32_t when, void *buffer, uint16_t size, struct pico_device *pdev) -{ - struct olsr_fwd_pkt *p; - /* olsr_dbg("Scheduling olsr packet, type:%s, size: %x\n", when == OLSR_HELLO_INTERVAL?"HELLO":"TC", size); */ - if ((buffer_mem_used + DGRAM_MAX_SIZE) > MAX_OLSR_MEM) { - PICO_FREE(buffer); - return; - } - - p = PICO_ZALLOC(sizeof(struct olsr_fwd_pkt)); - if (!p) { - OOM(); - PICO_FREE(buffer); - return; - } - - p->buf = buffer; - p->len = size; - p->pdev = pdev; - buffer_mem_used += DGRAM_MAX_SIZE; - if (!pico_timer_add(1 + when - ((pico_rand() % OLSR_MAXJITTER)), &olsr_process_out, p)) { - olsr_dbg("OLSR: Failed to start process timer\n"); - OOM(); - PICO_FREE(p); - PICO_FREE(buffer); - } -} - - -static void refresh_routes(void) -{ - struct olsr_route_entry *local; - struct olsr_dev_entry *icur = Local_devices; - - /* Refresh local entries */ - /* Step 1: set zero expire time for local addresses and neighbors*/ - local = Local_interfaces; - while(local) { - local = local->next; - } - /* Step 2: refresh timer for entries that are still valid. - * Add new entries. - */ - while(icur) { - struct pico_ipv4_link *lnk = NULL; - do { - lnk = pico_ipv4_link_by_dev_next(icur->dev, lnk); - if (!lnk) break; - - local = olsr_get_ethentry(icur->dev); - if (local) { - local->time_left = (OLSR_HELLO_INTERVAL << 2); - } else if (lnk) { - struct olsr_route_entry *e = PICO_ZALLOC(sizeof (struct olsr_route_entry)); - if (!e) { - olsr_dbg("olsr: adding local route entry\n"); - OOM(); - return; - } - - e->destination.addr = lnk->address.addr; /* Always pick the first address */ - e->time_left = (OLSR_HELLO_INTERVAL << 2); - e->iface = icur->dev; - e->metric = 0; - e->lq = 0xFF; - e->nlq = 0xFF; - e->next = Local_interfaces; - Local_interfaces = e; - } - } while (lnk); - - /* disabled if device type != eth */ - /* refresh_neighbors(icur->dev); */ - icur = icur->next; - } -} - -static uint32_t olsr_build_hello_neighbors(uint8_t *buf, uint32_t size, struct olsr_route_entry **bookmark) -{ - uint32_t ret = 0; - struct olsr_route_entry *local, *neighbor; - struct olsr_neighbor *dst = (struct olsr_neighbor *) buf; - uint32_t total_link_size = sizeof(struct olsr_neighbor) + sizeof(struct olsr_link); - local = Local_interfaces; - while (local) { - neighbor = local->children; - if (*bookmark) { - while ((neighbor) && *bookmark != neighbor) - neighbor = neighbor->next; - } - - while (neighbor) { - struct olsr_link *li = (struct olsr_link *) (buf + ret); - - if ((size - ret) < total_link_size) { - /* Incomplete list, new datagram needed. */ - *bookmark = neighbor; - return ret; - } - - li->link_code = neighbor->link_type; - li->reserved = 0; - li->link_msg_size = short_be((uint16_t)total_link_size); - ret += (uint32_t)sizeof(struct olsr_link); - dst = (struct olsr_neighbor *) (buf + ret); - dst->addr = neighbor->destination.addr; - dst->nlq = neighbor->nlq; - dst->lq = neighbor->lq; - dst->reserved = 0; - ret += (uint32_t)sizeof(struct olsr_neighbor); - neighbor = neighbor->next; - } - local = local->next; - } - *bookmark = NULL; /* All the list was visited, no more dgrams needed */ - return ret; -} - -static uint32_t olsr_build_tc_neighbors(uint8_t *buf, uint32_t size, struct olsr_route_entry **bookmark) -{ - uint32_t ret = 0; - struct olsr_route_entry *local, *neighbor; - struct olsr_neighbor *dst = (struct olsr_neighbor *) buf; - local = Local_interfaces; - while (local) { - neighbor = local->children; - if (*bookmark) { - while ((neighbor) && *bookmark != neighbor) - neighbor = neighbor->next; - } - - while (neighbor) { - if (size - ret < sizeof(struct olsr_neighbor)) { - /* Incomplete list, new datagram needed. */ - *bookmark = neighbor; - return ret; - } - - dst->addr = neighbor->destination.addr; - dst->nlq = neighbor->nlq; - dst->lq = neighbor->lq; - dst->reserved = 0; - ret += (uint32_t)sizeof(struct olsr_neighbor); - dst = (struct olsr_neighbor *) (buf + ret); - neighbor = neighbor->next; - } - local = local->next; - } - *bookmark = NULL; /* All the list was visited, no more dgrams needed */ - return ret; -} - -static uint32_t olsr_build_mid(uint8_t *buf, uint32_t size, struct pico_device *excluded) -{ - uint32_t ret = 0; - struct olsr_route_entry *local; - struct pico_ip4 *dst = (struct pico_ip4 *) buf; - local = Local_interfaces; - while (local) { - if (local->iface != excluded) { - dst->addr = local->destination.addr; - ret += (uint32_t)sizeof(uint32_t); - dst = (struct pico_ip4 *) (buf + ret); - if (ret >= size) - return (uint32_t)(ret - sizeof(uint32_t)); - } - - local = local->next; - } - return ret; -} - - -static void olsr_compose_tc_dgram(struct pico_device *pdev, struct pico_ipv4_link *ep) -{ - struct olsrmsg *msg_tc, *msg_mid; - uint32_t size = 0, r; - struct olsr_route_entry *last_neighbor = NULL; - uint8_t *dgram; - struct olsr_hmsg_tc *tc; - do { - dgram = PICO_ZALLOC(DGRAM_MAX_SIZE); - if (!dgram) { - OOM(); - return; - } - - size = (uint32_t)sizeof(struct olsrhdr); - ep = pico_ipv4_link_by_dev(pdev); - if (!ep) { - PICO_FREE(dgram); - return; - } - - - if (!last_neighbor) { - /* MID Message */ - - msg_mid = (struct olsrmsg *)(dgram + size); - size += (uint32_t)sizeof(struct olsrmsg); - msg_mid->type = OLSRMSG_MID; - msg_mid->vtime = seconds2olsr(60); - msg_mid->orig.addr = ep->address.addr; - msg_mid->ttl = 0xFF; - msg_mid->hop = 0; - msg_mid->seq = short_be(msg_counter++); - r = olsr_build_mid(dgram + size, DGRAM_MAX_SIZE - size, pdev); - if (r == 0) { - size -= (uint32_t)sizeof(struct olsrmsg); - } else { - if ((size + r) > DGRAM_MAX_SIZE) - return; - - size += r; - msg_mid->size = short_be((uint16_t)(sizeof(struct olsrmsg) + r)); - } - } - - if (size + sizeof(struct olsrmsg) > DGRAM_MAX_SIZE) - return; - - msg_tc = (struct olsrmsg *) (dgram + size); - size += (uint32_t)sizeof(struct olsrmsg); - msg_tc->type = OLSRMSG_TC; - msg_tc->vtime = seconds2olsr(DEFAULT_VTIME); - msg_tc->orig.addr = ep->address.addr; - msg_tc->ttl = 0xFF; - msg_tc->hop = 0; - msg_tc->seq = short_be(msg_counter++); - tc = (struct olsr_hmsg_tc *)(dgram + size); - size += (uint32_t)sizeof(struct olsr_hmsg_tc); - if (size > DGRAM_MAX_SIZE) - return; - - tc->ansn = short_be(my_ansn); - r = olsr_build_tc_neighbors(dgram + size, DGRAM_MAX_SIZE - size, &last_neighbor); - size += r; - msg_tc->size = short_be((uint16_t)(sizeof(struct olsrmsg) + sizeof(struct olsr_hmsg_tc) + r)); - olsr_scheduled_output(OLSR_TC_INTERVAL, dgram, (uint16_t)size, pdev ); - } while(last_neighbor); -} - -static void olsr_compose_hello_dgram(struct pico_device *pdev, struct pico_ipv4_link *ep) -{ - struct olsrmsg *msg_hello; - uint32_t size = 0, r; - struct olsr_route_entry *last_neighbor = NULL; - uint8_t *dgram; - struct olsr_hmsg_hello *hello; - /* HELLO Message */ - do { - dgram = PICO_ZALLOC(DGRAM_MAX_SIZE); - if (!dgram) { - OOM(); - return; - } - - size = (uint32_t)sizeof(struct olsrhdr); - msg_hello = (struct olsrmsg *) (dgram + size); - size += (uint32_t)sizeof(struct olsrmsg); - msg_hello->type = OLSRMSG_HELLO; - msg_hello->vtime = seconds2olsr(DEFAULT_VTIME); - msg_hello->orig.addr = ep->address.addr; - msg_hello->ttl = 1; - msg_hello->hop = 0; - msg_hello->seq = short_be(msg_counter++); - hello = (struct olsr_hmsg_hello *)(dgram + size); - size += (uint32_t)sizeof(struct olsr_hmsg_hello); - hello->reserved = 0; - hello->htime = seconds2olsr(OLSR_HELLO_INTERVAL); - hello->htime = 0x05; /* Todo: find and define values */ - hello->willingness = 0x07; - if (DGRAM_MAX_SIZE > size) { - r = olsr_build_hello_neighbors(dgram + size, DGRAM_MAX_SIZE - size, &last_neighbor); - if (r == 0) { - /* olsr_dbg("Building hello message\n"); */ - PICO_FREE(dgram); - return; - } - } - - size += r; - msg_hello->size = short_be((uint16_t)(sizeof(struct olsrmsg) + sizeof(struct olsr_hmsg_hello) + r)); - olsr_scheduled_output(OLSR_HELLO_INTERVAL, dgram, (uint16_t)size, pdev ); - } while(last_neighbor); -} - -static void olsr_make_dgram(struct pico_device *pdev, int full) -{ - struct pico_ipv4_link *ep; - ep = pico_ipv4_link_by_dev(pdev); - if (!ep) { - return; - } - - if (!full) { - olsr_compose_hello_dgram(pdev, ep); - } else { - olsr_compose_tc_dgram(pdev, ep); - } /*if full */ - -} - -/* Old code was relying on ethernet arp requests */ -#define arp_storm(...) do {} while(0) - -static void recv_mid(uint8_t *buffer, uint32_t len, struct olsr_route_entry *origin) -{ - uint32_t parsed = 0; - uint32_t *address; - struct olsr_route_entry *e; - - if (len % sizeof(uint32_t)) /*drop*/ - return; - - while (len > parsed) { - address = (uint32_t *)(buffer + parsed); - e = get_route_by_address(Local_interfaces, *address); - if (!e) { - e = PICO_ZALLOC(sizeof(struct olsr_route_entry)); - if (!e) { - olsr_dbg("olsr allocating route\n"); - OOM(); - return; - } - - e->time_left = (OLSR_HELLO_INTERVAL << 2); - e->destination.addr = *address; - e->gateway = origin; - /* e->iface = origin->iface; */ - e->iface = NULL; - e->metric = (uint16_t)(origin->metric + 1u); - e->lq = origin->lq; - e->nlq = origin->nlq; - olsr_route_add(e); - arp_storm(&e->destination); - } else if (e->metric > (origin->metric + 1)) { - olsr_route_del(e); - e->metric = (uint16_t)(origin->metric + 1u); - e->gateway = origin; - e->time_left = (OLSR_HELLO_INTERVAL << 2); - olsr_route_add(e); - } - - parsed += (uint32_t)sizeof(uint32_t); - } -} - -/* static void recv_hello(uint8_t *buffer, uint32_t len, struct olsr_route_entry *origin) */ -static void recv_hello(uint8_t *buffer, uint32_t len, struct olsr_route_entry *origin, uint16_t hops) -{ - struct olsr_link *li; - struct olsr_route_entry *e; - uint32_t parsed = 0; - struct olsr_neighbor *neigh; - - if (!origin) - return; - - /* Don't parse hello messages that were forwarded */ - if (hops > 0 || origin->metric > 1) - return; - - if (pico_ipv4_link_get(&origin->destination)) - return; - - - while (len > parsed) { - li = (struct olsr_link *) buffer; - neigh = (struct olsr_neighbor *)(buffer + parsed + sizeof(struct olsr_link)); - parsed += short_be(li->link_msg_size); - e = get_route_by_address(Local_interfaces, neigh->addr); - if (!e) { - e = PICO_ZALLOC(sizeof(struct olsr_route_entry)); - if (!e) { - olsr_dbg("olsr allocating route\n"); - OOM(); - return; - } - - e->time_left = (OLSR_HELLO_INTERVAL << 2); - e->destination.addr = neigh->addr; - e->gateway = origin; - e->iface = NULL; - e->metric = (uint16_t)(origin->metric + hops + 1); - e->link_type = OLSRLINK_UNKNOWN; - e->lq = MIN(origin->lq, neigh->lq); - e->nlq = MIN(origin->nlq, neigh->nlq); - olsr_route_add(e); - arp_storm(&e->destination); - } else if ((e->gateway != origin) && (origin->metric > 1) && (e->metric > (origin->metric + hops + 1))) { - olsr_route_del(e); - e->metric = (uint16_t)(origin->metric + hops + 1); - e->gateway = origin; - e->time_left = (OLSR_HELLO_INTERVAL << 2); - olsr_route_add(e); - } else { - e->time_left = (OLSR_HELLO_INTERVAL << 2); - } - } -} - -/* static uint32_t reconsider_topology(uint8_t *buf, uint32_t size, struct olsr_route_entry *e) */ -static uint32_t reconsider_topology(uint8_t *buf, uint32_t size, struct olsr_route_entry *e) -{ - struct olsr_hmsg_tc *tc = (struct olsr_hmsg_tc *) buf; - uint16_t new_ansn = short_be(tc->ansn); - uint32_t parsed = sizeof(struct olsr_hmsg_tc); - struct olsr_route_entry *rt; - struct olsr_neighbor *n; - uint32_t retval = 0; - - if (!e->advertised_tc) - retval = 1; - - if (e->advertised_tc && fresher(new_ansn, e->ansn)) - { - PICO_FREE(e->advertised_tc); /* <--- XXX check invalid free? */ - e->advertised_tc = NULL; - retval = 1; - } - - /* Ignore self packets */ - if (pico_ipv4_link_get(&e->destination)) - return 0; - - if (!e->advertised_tc) { - e->advertised_tc = PICO_ZALLOC(size); - if (!e->advertised_tc) { - OOM(); - olsr_dbg("Allocating forward packet\n"); - return 0; - } - - memcpy(e->advertised_tc, buf, size); - e->ansn = new_ansn; - while (parsed < size) { - n = (struct olsr_neighbor *) (buf + parsed); - parsed += (uint32_t)sizeof(struct olsr_neighbor); - rt = get_route_by_address(Local_interfaces, n->addr); - if (rt && (rt->gateway == e)) { - /* Refresh existing node */ - rt->time_left = e->time_left; - } else if (!rt || (rt->metric > (e->metric + 1)) || (rt->nlq < n->nlq)) { - if (!rt) { - rt = PICO_ZALLOC(sizeof (struct olsr_route_entry)); - if (!rt) { - OOM(); - return retval; - } - - rt->destination.addr = n->addr; - rt->link_type = OLSRLINK_UNKNOWN; - } else { - olsr_route_del(rt); - } - - rt->iface = e->iface; - rt->gateway = e; - rt->metric = (uint16_t)(e->metric + 1); - rt->lq = n->lq; /* 0xff */ - rt->nlq = n->nlq; /* 0xff */ - rt->time_left = e->time_left; /* 256 */ - olsr_route_add(rt); - } - } - /* olsr_dbg("Routes changed...\n"); */ - } - - return retval; -} - - -static void olsr_recv(uint8_t *buffer, uint32_t len) -{ - struct olsrmsg *msg; - struct olsrhdr *oh = (struct olsrhdr *) buffer; - struct olsr_route_entry *ancestor; - uint32_t parsed = 0; - uint16_t outsize = 0; - uint8_t *datagram; - - if (len != short_be(oh->len)) { - return; - } - - /* RFC 3626, section 3.4, if a packet is too small, it is silently discarded */ - if (len < 16) { - return; - } - - parsed += (uint32_t)sizeof(struct olsrhdr); - - datagram = PICO_ZALLOC(DGRAM_MAX_SIZE); - if (!datagram) { - OOM(); - return; - } - - outsize = (uint16_t) (outsize + (sizeof(struct olsrhdr))); - /* Section 1: parsing received messages. */ - while (len > parsed) { - struct olsr_route_entry *origin; - msg = (struct olsrmsg *) (buffer + parsed); - origin = get_route_by_address(Local_interfaces, msg->orig.addr); - - if(pico_ipv4_link_find(&msg->orig) != NULL) { - /* olsr_dbg("rebound\n"); */ - parsed += short_be(msg->size); - continue; - } - - /* OLSR's TTL expired. */ - if (msg->ttl < 1u) { - parsed += short_be(msg->size); - continue; - } - - if (!origin) { - if (msg->hop == 0) { - struct olsr_route_entry *e = PICO_ZALLOC(sizeof (struct olsr_route_entry)); - if (!e) { - parsed += short_be(msg->size); - OOM(); - break; - } - - e->destination.addr = msg->orig.addr; - e->link_type = OLSRLINK_SYMMETRIC; - e->time_left = (OLSR_HELLO_INTERVAL << 2); - e->iface = Local_devices->dev; - e->gateway = olsr_get_ethentry(e->iface); - e->metric = 1; - e->lq = 0xFF; - e->nlq = 0xFF; - olsr_route_add(e); - } - - parsed += short_be(msg->size); - continue; - } - - /* We know this is a Master host and a neighbor */ - origin->link_type = OLSRLINK_MPR; - origin->time_left = olsr2seconds(msg->vtime); - switch(msg->type) { - case OLSRMSG_HELLO: - ancestor = olsr_get_ethentry(origin->iface); - if ((origin->metric > 1) && ancestor) { - olsr_route_del(origin); - origin->gateway = ancestor; - origin->metric = 1; - olsr_route_add(origin); - } - - recv_hello(buffer + (uint32_t)parsed + (uint32_t)sizeof(struct olsrmsg) + (uint32_t)sizeof(struct olsr_hmsg_hello), - (uint32_t) ((short_be(msg->size) - (sizeof(struct olsrmsg))) - (uint32_t)sizeof(struct olsr_hmsg_hello)), - origin, msg->hop); - msg->ttl = 0; - break; - case OLSRMSG_MID: - if ((origin->seq != 0) && (!fresher(short_be(msg->seq), origin->seq))) { - msg->ttl = 0; - } else { - recv_mid(buffer + parsed + sizeof(struct olsrmsg), (uint32_t)(short_be(msg->size) - (sizeof(struct olsrmsg))), origin); - /* olsr_dbg("MID forwarded from origin %08x (seq: %u)\n", long_be(msg->orig.addr), short_be(msg->seq)); */ - origin->seq = short_be(msg->seq); - } - - break; - case OLSRMSG_TC: - if(!pico_ipv4_link_find(&origin->destination)) { - reconsider_topology(buffer + parsed + sizeof(struct olsrmsg), (uint32_t)(short_be(msg->size) - (sizeof(struct olsrmsg))), origin); - if ((origin->seq != 0) && (!fresher(short_be(msg->seq), origin->seq))) { - msg->ttl = 0; - } else { - /* olsr_dbg("TC forwarded from origin %08x (seq: %u)\n", long_be(msg->orig.addr), short_be(msg->seq)); */ - origin->seq = short_be(msg->seq); - } - } - - break; - default: - PICO_FREE(datagram); - return; - } - - if (msg->ttl > 1) { - msg->hop++; - msg->ttl--; - memcpy(datagram + outsize, msg, short_be(msg->size)); - outsize = (uint16_t)(outsize + short_be(msg->size)); - } - - parsed += short_be(msg->size); - } - /* Section 2: forwarding parsed messages that got past the filter. */ - if ((outsize > sizeof(struct olsrhdr))) { - /* Finalize FWD packet */ - olsr_scheduled_output(OLSR_MAXJITTER, datagram, outsize, NULL); - } else { - /* Nothing to forward. */ - PICO_FREE(datagram); - } -} - -static void wakeup(uint16_t ev, struct pico_socket *s) -{ - unsigned char *recvbuf; - int r = 0; - struct pico_ip4 ANY = { - 0 - }; - uint16_t port = OLSR_PORT; - recvbuf = PICO_ZALLOC(DGRAM_MAX_SIZE); - if (!recvbuf) { - OOM(); - return; - } - - if (ev & PICO_SOCK_EV_RD) { - r = pico_socket_recv(s, recvbuf, DGRAM_MAX_SIZE); - if (r > 0) - olsr_recv(recvbuf, (uint32_t)r); - } - - if (ev == PICO_SOCK_EV_ERR) { - pico_socket_close(udpsock); - udpsock = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &wakeup); - if (udpsock) - pico_socket_bind(udpsock, &ANY, &port); - } - - PICO_FREE(recvbuf); -} - -static void olsr_hello_tick(pico_time when, void *unused) -{ - struct olsr_dev_entry *d; - (void)when; - (void)unused; - olsr_garbage_collector(Local_interfaces); - refresh_routes(); - d = Local_devices; - while(d) { - olsr_make_dgram(d->dev, 0); - d = d->next; - } - if (!pico_timer_add(OLSR_HELLO_INTERVAL, &olsr_hello_tick, NULL)) { - olsr_dbg("OLSR: Failed to start hello_tick timer\n"); - /* TODO no more ticks now */ - } -} - -static void olsr_tc_tick(pico_time when, void *unused) -{ - struct olsr_dev_entry *d; - (void)when; - (void)unused; - d = Local_devices; - while(d) { - olsr_make_dgram(d->dev, 1); - d = d->next; - } - if (!pico_timer_add(OLSR_TC_INTERVAL, &olsr_tc_tick, NULL)) { - olsr_dbg("OLSR: Failed to start tc_tick timer\n"); - /* TODO no more ticks now */ - } -} - - -/* Public interface */ - -void pico_olsr_init(void) -{ - uint32_t hello_timer = 0; - struct pico_ip4 ANY = { - 0 - }; - uint16_t port = OLSR_PORT; - olsr_dbg("OLSR initialized.\n"); - if (!udpsock) { - udpsock = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &wakeup); - if (udpsock) - pico_socket_bind(udpsock, &ANY, &port); - } - - hello_timer = pico_timer_add(pico_rand() % 100, &olsr_hello_tick, NULL); - if (!hello_timer) { - olsr_dbg("OLSR: Failed to start hello_tick timer\n"); - return; - } - if (!pico_timer_add(pico_rand() % 900, &olsr_tc_tick, NULL)) { - olsr_dbg("OLSR: Failed to start tc_tick timer\n"); - pico_timer_cancel(hello_timer); - } -} - - -int OOM(void) -{ - volatile int c = 3600; - c++; - c++; - c++; - return -1; -} - -int pico_olsr_add(struct pico_device *dev) -{ - struct pico_ipv4_link *lnk = NULL; - struct olsr_dev_entry *od; - - - if (!dev) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* olsr_dbg("OLSR: Adding device %s\n", dev->name); */ - od = PICO_ZALLOC(sizeof(struct olsr_dev_entry)); - if (!od) { - pico_err = PICO_ERR_ENOMEM; - /* OOM(); */ - return -1; - } - - od->dev = dev; - od->next = Local_devices; - Local_devices = od; - - do { - char ipaddr[20]; - lnk = pico_ipv4_link_by_dev_next(dev, lnk); - if (lnk) { - struct olsr_route_entry *e = PICO_ZALLOC(sizeof(struct olsr_route_entry)); - /* olsr_dbg("OLSR: Found IP address %08x\n", long_be(lnk->address.addr)); */ - pico_ipv4_to_string(ipaddr, (lnk->address.addr)); - /* olsr_dbg("OLSR: Found IP address %s\n", ipaddr); */ - if (!e) { - olsr_dbg("olsr allocating route\n"); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - e->destination.addr = lnk->address.addr; - e->link_type = OLSRLINK_SYMMETRIC; - e->time_left = (OLSR_HELLO_INTERVAL << 2); - e->gateway = NULL; - e->children = NULL; - e->iface = dev; - e->metric = 0; - e->lq = 0xFF; - e->nlq = 0xFF; - e->next = Local_interfaces; - Local_interfaces = e; - - } - } while(lnk); - - return 0; -} - -#endif diff --git a/kernel/picotcp/modules/pico_olsr.h b/kernel/picotcp/modules/pico_olsr.h deleted file mode 100644 index b111188..0000000 --- a/kernel/picotcp/modules/pico_olsr.h +++ /dev/null @@ -1,32 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Daniele Lacamera - *********************************************************************/ -#ifndef PICO_OLSR_H -#define PICO_OLSR_H - - -/* Objects */ -struct olsr_route_entry -{ - struct olsr_route_entry *next; - uint32_t time_left; - struct pico_ip4 destination; - struct olsr_route_entry *gateway; - struct pico_device *iface; - uint16_t metric; - uint8_t link_type; - struct olsr_route_entry *children; - uint16_t ansn; - uint16_t seq; - uint8_t lq, nlq; - uint8_t *advertised_tc; -}; - - -void pico_olsr_init(void); -int pico_olsr_add(struct pico_device *dev); -struct olsr_route_entry *olsr_get_ethentry(struct pico_device *vif); -#endif diff --git a/kernel/picotcp/modules/pico_posix.c b/kernel/picotcp/modules/pico_posix.c deleted file mode 100644 index 659cb4c..0000000 --- a/kernel/picotcp/modules/pico_posix.c +++ /dev/null @@ -1,99 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Andrei Carp, Maarten Vandersteegen - *********************************************************************/ - -#ifdef PICO_SUPPORT_THREADING - -#include -#include -#include "pico_config.h" - -/* POSIX mutex implementation */ -void *pico_mutex_init(void) -{ - pthread_mutex_t *m; - m = (pthread_mutex_t *)PICO_ZALLOC(sizeof(pthread_mutex_t)); - pthread_mutex_init(m, NULL); - return m; -} - -void pico_mutex_destroy(void *mux) -{ - PICO_FREE(mux); - mux = NULL; -} - -void pico_mutex_lock(void *mux) -{ - if (mux == NULL) return; - - pthread_mutex_t *m = (pthread_mutex_t *)mux; - pthread_mutex_lock(m); -} - -void pico_mutex_unlock(void *mux) -{ - if (mux == NULL) return; - - pthread_mutex_t *m = (pthread_mutex_t *)mux; - pthread_mutex_unlock(m); -} - -/* POSIX semaphore implementation */ -void *pico_sem_init(void) -{ - sem_t *s; - s = (sem_t *)PICO_ZALLOC(sizeof(sem_t)); - sem_init(s, 0, 0); - return s; -} - -void pico_sem_destroy(void *sem) -{ - PICO_FREE(sem); - sem = NULL; -} - -void pico_sem_post(void *sem) -{ - if (sem == NULL) return; - - sem_t *s = (sem_t *)sem; - sem_post(s); -} - -int pico_sem_wait(void *sem, int timeout) -{ - struct timespec t; - if (sem == NULL) return 0; - - sem_t *s = (sem_t *)sem; - - if (timeout < 0) { - sem_wait(s); - } else { - clock_gettime(CLOCK_REALTIME, &t); - t.tv_sec += timeout / 1000; - t.tv_nsec += (timeout % 1000) * 1000000; - if (sem_timedwait(s, &t) == -1) - return -1; - } - - return 0; -} - -/* POSIX thread implementation */ -void *pico_thread_create(void *(*routine)(void *), void *arg) -{ - pthread_t *thread; - thread = (pthread_t *)PICO_ZALLOC(sizeof(pthread_t)); - - if (pthread_create(thread, NULL, routine, arg) == -1) - return NULL; - - return thread; -} -#endif /* PICO_SUPPORT_THREADING */ diff --git a/kernel/picotcp/modules/pico_slaacv4.c b/kernel/picotcp/modules/pico_slaacv4.c deleted file mode 100644 index 7b11cf8..0000000 --- a/kernel/picotcp/modules/pico_slaacv4.c +++ /dev/null @@ -1,307 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Bogdan Lupu - *********************************************************************/ -#include "pico_slaacv4.h" -#include "pico_arp.h" -#include "pico_constants.h" -#include "pico_stack.h" -#include "pico_hotplug_detection.h" - -#ifdef PICO_SUPPORT_SLAACV4 - -#ifdef DEBUG_SLAACV4 -#define slaacv4_dbg dbg -#else -#define slaacv4_dbg(...) do {} while(0) -#endif - -#define SLAACV4_NETWORK ((long_be(0xa9fe0000))) -#define SLAACV4_NETMASK ((long_be(0xFFFF0000))) -#define SLAACV4_MINRANGE (0x00000100) /* In host order */ -#define SLAACV4_MAXRANGE (0x0000FDFF) /* In host order */ - -#define SLAACV4_CREATE_IPV4(seed) ((long_be((seed % SLAACV4_MAXRANGE) + SLAACV4_MINRANGE) & ~SLAACV4_NETMASK) | SLAACV4_NETWORK) - -#define PROBE_WAIT 1 /* delay between two tries during claim */ -#define PROBE_NB 3 /* number of probe packets during claim */ -/* #define PROBE_MIN 1 */ -/* #define PROBE_MAX 2 */ -#define ANNOUNCE_WAIT 2 /* delay before start announcing */ -#define ANNOUNCE_NB 2 /* number of announcement packets */ -#define ANNOUNCE_INTERVAL 2 /* time between announcement packets */ -#define MAX_CONFLICTS 10 /* max conflicts before rate limiting */ -#define MAX_CONFLICTS_FAIL 20 /* max conflicts before declaring failure */ -#define RATE_LIMIT_INTERVAL 60 /* time between successive attempts */ -#define DEFEND_INTERVAL 10 /* minimum interval between defensive ARP */ - -enum slaacv4_state { - SLAACV4_RESET = 0, - SLAACV4_CLAIMING, - SLAACV4_CLAIMED, - SLAACV4_ANNOUNCING, - SLAACV4_ERROR -}; - -struct slaacv4_cookie { - enum slaacv4_state state; - uint8_t probe_try_nb; - uint8_t conflict_nb; - uint8_t announce_nb; - struct pico_ip4 ip; - struct pico_device *device; - uint32_t timer; - void (*cb)(struct pico_ip4 *ip, uint8_t code); -}; - -static void pico_slaacv4_hotplug_cb(struct pico_device *dev, int event); - -static struct slaacv4_cookie slaacv4_local; - -static uint32_t pico_slaacv4_getip(struct pico_device *dev, uint8_t rand) -{ - uint32_t seed = 0; - if (dev->eth != NULL) - { - seed = pico_hash((const uint8_t *)dev->eth->mac.addr, PICO_SIZE_ETH); - } - - if (rand) - { - seed += pico_rand(); - } - - return SLAACV4_CREATE_IPV4(seed); -} - -static void pico_slaacv4_init_cookie(struct pico_ip4 *ip, struct pico_device *dev, struct slaacv4_cookie *ck, void (*cb)(struct pico_ip4 *ip, uint8_t code)) -{ - ck->state = SLAACV4_RESET; - ck->probe_try_nb = 0; - ck->conflict_nb = 0; - ck->announce_nb = 0; - ck->cb = cb; - ck->device = dev; - ck->ip.addr = ip->addr; - ck->timer = 0; -} - -static void pico_slaacv4_cancel_timers(struct slaacv4_cookie *tmp) -{ - pico_timer_cancel(tmp->timer); - tmp->timer = 0; -} - -static void pico_slaacv4_send_announce_timer(pico_time now, void *arg) -{ - struct slaacv4_cookie *tmp = (struct slaacv4_cookie *)arg; - struct pico_ip4 netmask = { - 0 - }; - netmask.addr = long_be(0xFFFF0000); - - (void)now; - - if (tmp->announce_nb < ANNOUNCE_NB) - { - pico_arp_request(tmp->device, &tmp->ip, PICO_ARP_ANNOUNCE); - tmp->announce_nb++; - tmp->timer = pico_timer_add(ANNOUNCE_INTERVAL * 1000, pico_slaacv4_send_announce_timer, arg); - if (!tmp->timer) { - slaacv4_dbg("SLAACV4: Failed to start announce timer\n"); - tmp->state = SLAACV4_ERROR; - if (tmp->cb != NULL) - tmp->cb(&tmp->ip, PICO_SLAACV4_ERROR); - } - } - else - { - tmp->state = SLAACV4_CLAIMED; - pico_ipv4_link_add(tmp->device, tmp->ip, netmask); - if (tmp->cb != NULL) - tmp->cb(&tmp->ip, PICO_SLAACV4_SUCCESS); - } -} - -static void pico_slaacv4_send_probe_timer(pico_time now, void *arg) -{ - struct slaacv4_cookie *tmp = (struct slaacv4_cookie *)arg; - (void)now; - - if (tmp->probe_try_nb < PROBE_NB) - { - pico_arp_request(tmp->device, &tmp->ip, PICO_ARP_PROBE); - tmp->probe_try_nb++; - tmp->timer = pico_timer_add(PROBE_WAIT * 1000, pico_slaacv4_send_probe_timer, tmp); - if (!tmp->timer) { - slaacv4_dbg("SLAACV4: Failed to start probe timer\n"); - tmp->state = SLAACV4_ERROR; - if (tmp->cb != NULL) - tmp->cb(&tmp->ip, PICO_SLAACV4_ERROR); - } - } - else - { - tmp->state = SLAACV4_ANNOUNCING; - tmp->timer = pico_timer_add(ANNOUNCE_WAIT * 1000, pico_slaacv4_send_announce_timer, arg); - if (!tmp->timer) { - slaacv4_dbg("SLAACV4: Failed to start announce timer\n"); - tmp->state = SLAACV4_ERROR; - if (tmp->cb != NULL) - tmp->cb(&tmp->ip, PICO_SLAACV4_ERROR); - } - } -} - -static void pico_slaacv4_receive_ipconflict(int reason) -{ - struct slaacv4_cookie *tmp = &slaacv4_local; - - tmp->conflict_nb++; - pico_slaacv4_cancel_timers(tmp); - - if(tmp->state == SLAACV4_CLAIMED) - { - if(reason == PICO_ARP_CONFLICT_REASON_CONFLICT) - { - pico_ipv4_link_del(tmp->device, tmp->ip); - } - } - - if (tmp->conflict_nb < MAX_CONFLICTS) - { - tmp->state = SLAACV4_CLAIMING; - tmp->probe_try_nb = 0; - tmp->announce_nb = 0; - tmp->ip.addr = pico_slaacv4_getip(tmp->device, (uint8_t)1); - pico_arp_register_ipconflict(&tmp->ip, &tmp->device->eth->mac, pico_slaacv4_receive_ipconflict); - pico_arp_request(tmp->device, &tmp->ip, PICO_ARP_PROBE); - tmp->probe_try_nb++; - tmp->timer = pico_timer_add(PROBE_WAIT * 1000, pico_slaacv4_send_probe_timer, tmp); - if (!tmp->timer) { - slaacv4_dbg("SLAACV4: Failed to start probe timer\n"); - tmp->state = SLAACV4_ERROR; - if (tmp->cb != NULL) - tmp->cb(&tmp->ip, PICO_SLAACV4_ERROR); - } - } - else if (tmp->conflict_nb < MAX_CONFLICTS_FAIL) - { - tmp->state = SLAACV4_CLAIMING; - tmp->probe_try_nb = 0; - tmp->announce_nb = 0; - tmp->ip.addr = pico_slaacv4_getip(tmp->device, (uint8_t)1); - pico_arp_register_ipconflict(&tmp->ip, &tmp->device->eth->mac, pico_slaacv4_receive_ipconflict); - tmp->timer = pico_timer_add(RATE_LIMIT_INTERVAL * 1000, pico_slaacv4_send_probe_timer, tmp); - if (!tmp->timer) { - slaacv4_dbg("SLAACV4: Failed to start probe timer\n"); - tmp->state = SLAACV4_ERROR; - if (tmp->cb != NULL) - tmp->cb(&tmp->ip, PICO_SLAACV4_ERROR); - } - } - else - { - if (tmp->cb != NULL) - { - pico_hotplug_deregister(tmp->device, &pico_slaacv4_hotplug_cb); - tmp->cb(&tmp->ip, PICO_SLAACV4_ERROR); - } - - tmp->state = SLAACV4_ERROR; - } - -} - -static void pico_slaacv4_hotplug_cb(__attribute__((unused)) struct pico_device *dev, int event) -{ - struct slaacv4_cookie *tmp = &slaacv4_local; - - if (event == PICO_HOTPLUG_EVENT_UP ) - { - slaacv4_local.state = SLAACV4_CLAIMING; - tmp->probe_try_nb = 0; - tmp->announce_nb = 0; - - pico_arp_register_ipconflict(&tmp->ip, &tmp->device->eth->mac, pico_slaacv4_receive_ipconflict); - pico_arp_request(tmp->device, &tmp->ip, PICO_ARP_PROBE); - tmp->probe_try_nb++; - tmp->timer = pico_timer_add(PROBE_WAIT * 1000, pico_slaacv4_send_probe_timer, tmp); - if (!tmp->timer) { - slaacv4_dbg("SLAACV4: Failed to start probe timer\n"); - tmp->state = SLAACV4_ERROR; - if (tmp->cb != NULL) - tmp->cb(&tmp->ip, PICO_SLAACV4_ERROR); - } - } - else - { - if (tmp->state == SLAACV4_CLAIMED ) - pico_ipv4_link_del(tmp->device, tmp->ip); - - pico_slaacv4_cancel_timers(tmp); - } -} - -int pico_slaacv4_claimip(struct pico_device *dev, void (*cb)(struct pico_ip4 *ip, uint8_t code)) -{ - struct pico_ip4 ip; - - if (!dev->eth) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - } - - if( dev->link_state != NULL ) - { - /* hotplug detect will work */ - - ip.addr = pico_slaacv4_getip(dev, 0); - pico_slaacv4_init_cookie(&ip, dev, &slaacv4_local, cb); - - if (pico_hotplug_register(dev, &pico_slaacv4_hotplug_cb)) - { - return -1; - } - } - else - { - ip.addr = pico_slaacv4_getip(dev, 0); - - pico_slaacv4_init_cookie(&ip, dev, &slaacv4_local, cb); - pico_arp_register_ipconflict(&ip, &dev->eth->mac, pico_slaacv4_receive_ipconflict); - pico_arp_request(dev, &ip, PICO_ARP_PROBE); - slaacv4_local.state = SLAACV4_CLAIMING; - slaacv4_local.probe_try_nb++; - slaacv4_local.timer = pico_timer_add(PROBE_WAIT * 1000, pico_slaacv4_send_probe_timer, &slaacv4_local); - if (!slaacv4_local.timer) { - slaacv4_dbg("SLAACV4: Failed to start probe timer\n"); - slaacv4_local.state = SLAACV4_ERROR; - return -1; - } - } - - return 0; -} - -void pico_slaacv4_unregisterip(void) -{ - struct slaacv4_cookie *tmp = &slaacv4_local; - struct pico_ip4 empty = { - .addr = 0x00000000 - }; - - if (tmp->state == SLAACV4_CLAIMED) - { - pico_ipv4_link_del(tmp->device, tmp->ip); - } - - pico_slaacv4_cancel_timers(tmp); - pico_slaacv4_init_cookie(&empty, NULL, tmp, NULL); - pico_arp_register_ipconflict(&tmp->ip, NULL, NULL); - pico_hotplug_deregister(tmp->device, &pico_slaacv4_hotplug_cb); -} - -#endif diff --git a/kernel/picotcp/modules/pico_slaacv4.h b/kernel/picotcp/modules/pico_slaacv4.h deleted file mode 100644 index 8912dbd..0000000 --- a/kernel/picotcp/modules/pico_slaacv4.h +++ /dev/null @@ -1,18 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Authors: Bogdan Lupu - *********************************************************************/ -#ifndef INCLUDE_PICO_SUPPORT_SLAACV4 -#define INCLUDE_PICO_SUPPORT_SLAACV4 -#include "pico_arp.h" - -#define PICO_SLAACV4_SUCCESS 0 -#define PICO_SLAACV4_ERROR 1 - -int pico_slaacv4_claimip(struct pico_device *dev, void (*cb)(struct pico_ip4 *ip, uint8_t code)); -void pico_slaacv4_unregisterip(void); - -#endif /* _INCLUDE_PICO_SUPPORT_SLAACV4 */ - diff --git a/kernel/picotcp/modules/pico_sntp_client.c b/kernel/picotcp/modules/pico_sntp_client.c deleted file mode 100644 index 3b68bb2..0000000 --- a/kernel/picotcp/modules/pico_sntp_client.c +++ /dev/null @@ -1,552 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Author: Toon Stegen - *********************************************************************/ -#include "pico_sntp_client.h" -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_dns_client.h" -#include "pico_tree.h" -#include "pico_stack.h" - -#ifdef PICO_SUPPORT_SNTP_CLIENT - -#ifdef DEBUG_SNTP -#define sntp_dbg dbg -#else -#define sntp_dbg(...) do {} while(0) -#endif - -#define SNTP_VERSION 4 -#define PICO_SNTP_MAXBUF (1400) - -/* Sntp mode */ -#define SNTP_MODE_CLIENT 3 - -/* SNTP conversion parameters */ -#define SNTP_FRAC_TO_PICOSEC (4294967llu) -#define SNTP_THOUSAND (1000llu) -#define SNTP_UNIX_OFFSET (2208988800llu) /* nr of seconds from 1900 to 1970 */ -#define SNTP_BITMASK (0X00000000FFFFFFFF) /* mask to convert from 64 to 32 */ - -PACKED_STRUCT_DEF pico_sntp_ts -{ - uint32_t sec; /* Seconds */ - uint32_t frac; /* Fraction */ -}; - -PACKED_STRUCT_DEF pico_sntp_header -{ - uint8_t mode : 3; /* Mode */ - uint8_t vn : 3; /* Version number */ - uint8_t li : 2; /* Leap indicator */ - uint8_t stratum; /* Stratum */ - uint8_t poll; /* Poll, only significant in server messages */ - uint8_t prec; /* Precision, only significant in server messages */ - int32_t rt_del; /* Root delay, only significant in server messages */ - int32_t rt_dis; /* Root dispersion, only significant in server messages */ - int32_t ref_id; /* Reference clock ID, only significant in server messages */ - struct pico_sntp_ts ref_ts; /* Reference time stamp */ - struct pico_sntp_ts orig_ts; /* Originate time stamp */ - struct pico_sntp_ts recv_ts; /* Receive time stamp */ - struct pico_sntp_ts trs_ts; /* Transmit time stamp */ - -}; - -struct sntp_server_ns_cookie -{ - int rec; /* Indicates wheter an sntp packet has been received */ - uint16_t proto; /* IPV4 or IPV6 prototype */ - pico_time stamp; /* Timestamp of the moment the sntp packet is sent */ - char *hostname; /* Hostname of the (s)ntp server*/ - struct pico_socket *sock; /* Socket which contains the cookie */ - void (*cb_synced)(pico_err_t status); /* Callback function for telling the user - wheter/when the time is synchronised */ - uint32_t timer; /* Timer that will signal timeout */ -}; - -/* global variables */ -static uint16_t sntp_port = 123u; -static struct pico_timeval server_time = { - 0 -}; -static pico_time tick_stamp = 0ull; -static union pico_address sntp_inaddr_any = { - .ip6.addr = { 0 } -}; - -/*************************************************************************/ - -/* Converts a sntp time stamp to a pico_timeval struct */ -static int timestamp_convert(const struct pico_sntp_ts *ts, struct pico_timeval *tv, pico_time delay) -{ - if(long_be(ts->sec) < SNTP_UNIX_OFFSET) { - pico_err = PICO_ERR_EINVAL; - tv->tv_sec = 0; - tv->tv_msec = 0; - sntp_dbg("Error: input too low\n"); - return -1; - } - - sntp_dbg("Delay: %lu\n", delay); - tv->tv_msec = (pico_time) (((uint32_t)(long_be(ts->frac))) / SNTP_FRAC_TO_PICOSEC + delay); - tv->tv_sec = (pico_time) (long_be(ts->sec) - SNTP_UNIX_OFFSET + (uint32_t)tv->tv_msec / SNTP_THOUSAND); - tv->tv_msec = (uint32_t) (tv->tv_msec & SNTP_BITMASK) % SNTP_THOUSAND; - sntp_dbg("Converted time stamp: %lusec, %lumsec\n", tv->tv_sec, tv->tv_msec); - return 0; -} - -/* Cleanup function that is called when the time is synced or an error occured */ -static void pico_sntp_cleanup(struct sntp_server_ns_cookie *ck, pico_err_t status) -{ - sntp_dbg("Cleanup called\n"); - if(!ck) - return; - - pico_timer_cancel(ck->timer); - - ck->cb_synced(status); - if(ck->sock) - ck->sock->priv = NULL; - - sntp_dbg("FREE!\n"); - PICO_FREE(ck->hostname); - PICO_FREE(ck); - -} - -/* Extracts the current time from a server sntp packet*/ -static int pico_sntp_parse(char *buf, struct sntp_server_ns_cookie *ck) -{ - int ret = 0; - struct pico_sntp_header *hp = (struct pico_sntp_header*) buf; - - if(!ck) { - sntp_dbg("pico_sntp_parse: invalid cookie\n"); - return -1; - } - - sntp_dbg("Received mode: %u, version: %u, stratum: %u\n", hp->mode, hp->vn, hp->stratum); - - tick_stamp = pico_tick; - /* tick_stamp - ck->stamp is the delay between sending and receiving the ntp packet */ - ret = timestamp_convert(&(hp->trs_ts), &server_time, (tick_stamp - ck->stamp) / 2); - if(ret != 0) { - sntp_dbg("Conversion error!\n"); - pico_sntp_cleanup(ck, PICO_ERR_EINVAL); - return ret; - } - - sntp_dbg("Server time: %lu seconds and %lu milisecs since 1970\n", server_time.tv_sec, server_time.tv_msec); - - /* Call back the user saying the time is synced */ - pico_sntp_cleanup(ck, PICO_ERR_NOERR); - return ret; -} - -/* callback for UDP socket events */ -static void pico_sntp_client_wakeup(uint16_t ev, struct pico_socket *s) -{ - struct sntp_server_ns_cookie *ck = (struct sntp_server_ns_cookie *)s->priv; - char *recvbuf; - int read = 0; - uint32_t peer; - uint16_t port; - - if(!ck) { - sntp_dbg("pico_sntp_client_wakeup: invalid cookie\n"); - return; - } - - /* process read event, data available */ - if (ev == PICO_SOCK_EV_RD) { - ck->rec = 1; - /* receive while data available in socket buffer */ - recvbuf = PICO_ZALLOC(PICO_SNTP_MAXBUF); - if (!recvbuf) - return; - - do { - read = pico_socket_recvfrom(s, recvbuf, PICO_SNTP_MAXBUF, &peer, &port); - } while(read > 0); - pico_sntp_parse(recvbuf, s->priv); - s->priv = NULL; /* make sure UDP callback does not try to read from freed mem again */ - PICO_FREE(recvbuf); - } - /* socket is closed */ - else if(ev == PICO_SOCK_EV_CLOSE) { - sntp_dbg("Socket is closed. Bailing out.\n"); - pico_sntp_cleanup(ck, PICO_ERR_ENOTCONN); - return; - } - /* process error event, socket error occured */ - else if(ev == PICO_SOCK_EV_ERR) { - sntp_dbg("Socket Error received. Bailing out.\n"); - pico_sntp_cleanup(ck, PICO_ERR_ENOTCONN); - return; - } - - sntp_dbg("Received data from %08X:%u\n", peer, port); -} - -/* Function that is called after the receive timer expires */ -static void sntp_receive_timeout(pico_time now, void *arg) -{ - struct sntp_server_ns_cookie *ck = (struct sntp_server_ns_cookie *)arg; - (void) now; - - if(!ck) { - sntp_dbg("sntp_timeout: invalid cookie\n"); - return; - } - - if(!ck->rec) { - pico_sntp_cleanup(ck, PICO_ERR_ETIMEDOUT); - } -} - -/* Sends an sntp packet on sock to dst*/ -static void pico_sntp_send(struct pico_socket *sock, union pico_address *dst) -{ - struct pico_sntp_header header = { - 0 - }; - struct sntp_server_ns_cookie *ck = (struct sntp_server_ns_cookie *)sock->priv; - - if(!ck) { - sntp_dbg("pico_sntp_sent: invalid cookie\n"); - return; - } - - ck->timer = pico_timer_add(5000, sntp_receive_timeout, ck); - if (!ck->timer) { - sntp_dbg("SNTP: Failed to start timeout timer\n"); - pico_sntp_cleanup(ck, pico_err); - pico_socket_close(sock); - pico_socket_del(sock); - return; - } - header.vn = SNTP_VERSION; - header.mode = SNTP_MODE_CLIENT; - /* header.trs_ts.frac = long_be(0ul); */ - ck->stamp = pico_tick; - pico_socket_sendto(sock, &header, sizeof(header), dst, short_be(sntp_port)); -} - -static int pico_sntp_sync_start(struct sntp_server_ns_cookie *ck, union pico_address *addr) -{ - uint16_t any_port = 0; - struct pico_socket *sock; - - sock = pico_socket_open(ck->proto, PICO_PROTO_UDP, &pico_sntp_client_wakeup); - if (!sock) - return -1; - - sock->priv = ck; - ck->sock = sock; - if ((pico_socket_bind(sock, &sntp_inaddr_any, &any_port) < 0)) { - pico_socket_close(sock); - return -1; - } - pico_sntp_send(sock, addr); - - return 0; -} - -#ifdef PICO_SUPPORT_DNS_CLIENT -/* used for getting a response from DNS servers */ -static void dnsCallback(char *ip, void *arg) -{ - struct sntp_server_ns_cookie *ck = (struct sntp_server_ns_cookie *)arg; - union pico_address address; - int retval = -1; - - if(!ck) { - sntp_dbg("dnsCallback: Invalid argument\n"); - return; - } - - if(ck->proto == PICO_PROTO_IPV6) { -#ifdef PICO_SUPPORT_IPV6 - if (ip) { - /* add the ip address to the client, and start a tcp connection socket */ - sntp_dbg("using IPv6 address: %s\n", ip); - retval = pico_string_to_ipv6(ip, address.ip6.addr); - } else { - sntp_dbg("Invalid query response for AAAA\n"); - retval = -1; - pico_sntp_cleanup(ck, PICO_ERR_ENETDOWN); - } -#endif - } else if(ck->proto == PICO_PROTO_IPV4) { -#ifdef PICO_SUPPORT_IPV4 - if(ip) { - sntp_dbg("using IPv4 address: %s\n", ip); - retval = pico_string_to_ipv4(ip, (uint32_t *)&address.ip4.addr); - } else { - sntp_dbg("Invalid query response for A\n"); - retval = -1; - pico_sntp_cleanup(ck, PICO_ERR_ENETDOWN); - } -#endif - } - - if (retval >= 0) { - retval = pico_sntp_sync_start(ck, &address); - if (retval < 0) - pico_sntp_cleanup(ck, PICO_ERR_ENOTCONN); - } -} -#endif - -#ifdef PICO_SUPPORT_IPV4 -#ifdef PICO_SUPPORT_DNS_CLIENT -static int pico_sntp_sync_start_dns_ipv4(const char *sntp_server, void (*cb_synced)(pico_err_t status)) -{ - int retval = -1; - struct sntp_server_ns_cookie *ck; - /* IPv4 query */ - ck = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - if (!ck) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - ck->proto = PICO_PROTO_IPV4; - ck->stamp = 0ull; - ck->rec = 0; - ck->sock = NULL; - ck->hostname = PICO_ZALLOC(strlen(sntp_server) + 1); - if (!ck->hostname) { - PICO_FREE(ck); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - strcpy(ck->hostname, sntp_server); - - ck->cb_synced = cb_synced; - - sntp_dbg("Resolving A %s\n", ck->hostname); - retval = pico_dns_client_getaddr(sntp_server, &dnsCallback, ck); - if (retval != 0) { - PICO_FREE(ck->hostname); - PICO_FREE(ck); - return -1; - } - - return 0; -} -#endif -static int pico_sntp_sync_start_ipv4(union pico_address *addr, void (*cb_synced)(pico_err_t status)) -{ - int retval = -1; - struct sntp_server_ns_cookie *ck; - ck = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - if (!ck) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - ck->proto = PICO_PROTO_IPV4; - ck->stamp = 0ull; - ck->rec = 0; - ck->sock = NULL; - /* Set the given IP address as hostname, allocate the maximum IPv4 string length + 1 */ - ck->hostname = PICO_ZALLOC(15 + 1); - if (!ck->hostname) { - PICO_FREE(ck); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - retval = pico_ipv4_to_string(ck->hostname, addr->ip4.addr); - if (retval < 0) { - PICO_FREE(ck->hostname); - PICO_FREE(ck); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - ck->cb_synced = cb_synced; - - retval = pico_sntp_sync_start(ck, addr); - if (retval < 0) { - pico_sntp_cleanup(ck, PICO_ERR_ENOTCONN); - return -1; - } - - return 0; -} -#endif - -#ifdef PICO_SUPPORT_IPV6 -#ifdef PICO_SUPPORT_DNS_CLIENT -static int pico_sntp_sync_start_dns_ipv6(const char *sntp_server, void (*cb_synced)(pico_err_t status)) -{ - struct sntp_server_ns_cookie *ck6; - int retval6 = -1; - /* IPv6 query */ - ck6 = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - if (!ck6) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - ck6->proto = PICO_PROTO_IPV6; - ck6->hostname = PICO_ZALLOC(strlen(sntp_server) + 1); - if (!ck6->hostname) { - PICO_FREE(ck6); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - strcpy(ck6->hostname, sntp_server); - ck6->proto = PICO_PROTO_IPV6; - ck6->stamp = 0ull; - ck6->rec = 0; - ck6->sock = NULL; - ck6->cb_synced = cb_synced; - sntp_dbg("Resolving AAAA %s\n", ck6->hostname); - retval6 = pico_dns_client_getaddr6(sntp_server, &dnsCallback, ck6); - if (retval6 != 0) { - PICO_FREE(ck6->hostname); - PICO_FREE(ck6); - return -1; - } - - return 0; -} -#endif -static int pico_sntp_sync_start_ipv6(union pico_address *addr, void (*cb_synced)(pico_err_t status)) -{ - struct sntp_server_ns_cookie *ck6; - int retval6 = -1; - ck6 = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - if (!ck6) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - ck6->proto = PICO_PROTO_IPV6; - ck6->stamp = 0ull; - ck6->rec = 0; - ck6->sock = NULL; - ck6->cb_synced = cb_synced; - /* Set the given IP address as hostname, allocate the maximum IPv6 string length + 1 */ - ck6->hostname = PICO_ZALLOC(39 + 1); - if (!ck6->hostname) { - PICO_FREE(ck6); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - retval6 = pico_ipv6_to_string(ck6->hostname, addr->ip6.addr); - if (retval6 < 0) { - PICO_FREE(ck6->hostname); - PICO_FREE(ck6); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - retval6 = pico_sntp_sync_start(ck6, addr); - if (retval6 < 0) { - pico_sntp_cleanup(ck6, PICO_ERR_ENOTCONN); - return -1; - } - - return 0; -} -#endif - -/* user function to sync the time from a given sntp source in string notation, DNS resolution is needed */ -int pico_sntp_sync(const char *sntp_server, void (*cb_synced)(pico_err_t status)) -{ -#ifdef PICO_SUPPORT_DNS_CLIENT - int retval4 = -1, retval6 = -1; - if (sntp_server == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if(cb_synced == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - -#ifdef PICO_SUPPORT_IPV4 - retval4 = pico_sntp_sync_start_dns_ipv4(sntp_server, cb_synced); -#endif -#ifdef PICO_SUPPORT_IPV6 - retval6 = pico_sntp_sync_start_dns_ipv6(sntp_server, cb_synced); -#endif - - if (retval4 != 0 && retval6 != 0) - return -1; - - return 0; -#else - sntp_debug("No DNS support available\n"); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -#endif -} - -/* user function to sync the time from a given sntp source in pico_address notation */ -int pico_sntp_sync_ip(union pico_address *sntp_addr, void (*cb_synced)(pico_err_t status)) -{ - int retval4 = -1, retval6 = -1; - if (sntp_addr == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (cb_synced == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - -#ifdef PICO_SUPPORT_IPV4 - retval4 = pico_sntp_sync_start_ipv4(sntp_addr, cb_synced); -#endif -#ifdef PICO_SUPPORT_IPV6 - retval6 = pico_sntp_sync_start_ipv6(sntp_addr, cb_synced); -#endif - - if (retval4 != 0 && retval6 != 0) - return -1; - - return 0; -} - -/* user function to get the current time */ -int pico_sntp_gettimeofday(struct pico_timeval *tv) -{ - pico_time diff, temp; - uint32_t diffH, diffL; - int ret = 0; - if (tick_stamp == 0) { - /* TODO: set pico_err */ - ret = -1; - sntp_dbg("Error: Unsynchronised\n"); - return ret; - } - - diff = pico_tick - tick_stamp; - diffL = ((uint32_t) (diff & SNTP_BITMASK)) / 1000; - diffH = ((uint32_t) (diff >> 32)) / 1000; - - temp = server_time.tv_msec + (uint32_t)(diff & SNTP_BITMASK) % SNTP_THOUSAND; - tv->tv_sec = server_time.tv_sec + ((uint64_t)diffH << 32) + diffL + (uint32_t)temp / SNTP_THOUSAND; - tv->tv_msec = (uint32_t)(temp & SNTP_BITMASK) % SNTP_THOUSAND; - sntp_dbg("Time of day: %lu seconds and %lu milisecs since 1970\n", tv->tv_sec, tv->tv_msec); - return ret; -} - -#endif /* PICO_SUPPORT_SNTP_CLIENT */ diff --git a/kernel/picotcp/modules/pico_sntp_client.h b/kernel/picotcp/modules/pico_sntp_client.h deleted file mode 100644 index 21ee5ed..0000000 --- a/kernel/picotcp/modules/pico_sntp_client.h +++ /dev/null @@ -1,23 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Author: Toon Stegen - *********************************************************************/ -#ifndef INCLUDE_PICO_SNTP_CLIENT -#define INCLUDE_PICO_SNTP_CLIENT - -#include "pico_config.h" -#include "pico_protocol.h" - -struct pico_timeval -{ - pico_time tv_sec; - pico_time tv_msec; -}; - -int pico_sntp_sync(const char *sntp_server, void (*cb_synced)(pico_err_t status)); -int pico_sntp_sync_ip(union pico_address *sntp_addr, void (*cb_synced)(pico_err_t status)); -int pico_sntp_gettimeofday(struct pico_timeval *tv); - -#endif /* _INCLUDE_PICO_SNTP_CLIENT */ diff --git a/kernel/picotcp/modules/pico_socket_tcp.c b/kernel/picotcp/modules/pico_socket_tcp.c deleted file mode 100644 index 569e4e8..0000000 --- a/kernel/picotcp/modules/pico_socket_tcp.c +++ /dev/null @@ -1,272 +0,0 @@ -#include "pico_config.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_tcp.h" -#include "pico_socket_tcp.h" - - -static int sockopt_validate_args(struct pico_socket *s, void *value) -{ - if (!value) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (s->proto->proto_number != PICO_PROTO_TCP) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - } - - return 0; -} - -int pico_getsockopt_tcp(struct pico_socket *s, int option, void *value) -{ - if (sockopt_validate_args(s, value) < 0) - return -1; - -#ifdef PICO_SUPPORT_TCP - if (option == PICO_TCP_NODELAY) { - /* state of the NODELAY option */ - *(int *)value = PICO_SOCKET_GETOPT(s, PICO_SOCKET_OPT_TCPNODELAY); - return 0; - } - else if (option == PICO_SOCKET_OPT_RCVBUF) { - return pico_tcp_get_bufsize_in(s, (uint32_t *)value); - } - - else if (option == PICO_SOCKET_OPT_SNDBUF) { - return pico_tcp_get_bufsize_out(s, (uint32_t *)value); - } - -#endif - return -1; -} - -static void tcp_set_nagle_option(struct pico_socket *s, void *value) -{ - int *val = (int*)value; - if (*val > 0) { - dbg("setsockopt: Nagle algorithm disabled.\n"); - PICO_SOCKET_SETOPT_EN(s, PICO_SOCKET_OPT_TCPNODELAY); - } else { - dbg("setsockopt: Nagle algorithm enabled.\n"); - PICO_SOCKET_SETOPT_DIS(s, PICO_SOCKET_OPT_TCPNODELAY); - } -} - -int pico_setsockopt_tcp(struct pico_socket *s, int option, void *value) -{ - if (sockopt_validate_args(s, value) < 0) - return -1; - -#ifdef PICO_SUPPORT_TCP - if (option == PICO_TCP_NODELAY) { - tcp_set_nagle_option(s, value); - return 0; - } - else if (option == PICO_SOCKET_OPT_RCVBUF) { - uint32_t *val = (uint32_t*)value; - pico_tcp_set_bufsize_in(s, *val); - return 0; - } - else if (option == PICO_SOCKET_OPT_SNDBUF) { - uint32_t *val = (uint32_t*)value; - pico_tcp_set_bufsize_out(s, *val); - return 0; - } - else if (option == PICO_SOCKET_OPT_KEEPCNT) { - uint32_t *val = (uint32_t*)value; - pico_tcp_set_keepalive_probes(s, *val); - return 0; - } - else if (option == PICO_SOCKET_OPT_KEEPIDLE) { - uint32_t *val = (uint32_t*)value; - pico_tcp_set_keepalive_time(s, *val); - return 0; - } - else if (option == PICO_SOCKET_OPT_KEEPINTVL) { - uint32_t *val = (uint32_t*)value; - pico_tcp_set_keepalive_intvl(s, *val); - return 0; - } - else if (option == PICO_SOCKET_OPT_LINGER) { - uint32_t *val = (uint32_t*)value; - pico_tcp_set_linger(s, *val); - return 0; - } - -#endif - pico_err = PICO_ERR_EINVAL; - return -1; -} - -void pico_socket_tcp_cleanup(struct pico_socket *sock) -{ -#ifdef PICO_SUPPORT_TCP - /* for tcp sockets go further and clean the sockets inside queue */ - if(is_sock_tcp(sock)) - pico_tcp_cleanup_queues(sock); - -#endif -} - - -void pico_socket_tcp_delete(struct pico_socket *s) -{ -#ifdef PICO_SUPPORT_TCP - if(s->parent) - s->parent->number_of_pending_conn--; - -#endif -} - -static struct pico_socket *socket_tcp_deliver_ipv4(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket *found = NULL; - #ifdef PICO_SUPPORT_IPV4 - struct pico_ip4 s_local, s_remote, p_src, p_dst; - struct pico_ipv4_hdr *ip4hdr = (struct pico_ipv4_hdr*)(f->net_hdr); - struct pico_trans *tr = (struct pico_trans *) f->transport_hdr; - s_local.addr = s->local_addr.ip4.addr; - s_remote.addr = s->remote_addr.ip4.addr; - p_src.addr = ip4hdr->src.addr; - p_dst.addr = ip4hdr->dst.addr; - if ((s->remote_port == tr->sport) && /* remote port check */ - (s_remote.addr == p_src.addr) && /* remote addr check */ - ((s_local.addr == PICO_IPV4_INADDR_ANY) || (s_local.addr == p_dst.addr))) { /* Either local socket is ANY, or matches dst */ - found = s; - return found; - } else if ((s->remote_port == 0) && /* not connected... listening */ - ((s_local.addr == PICO_IPV4_INADDR_ANY) || (s_local.addr == p_dst.addr))) { /* Either local socket is ANY, or matches dst */ - /* listen socket */ - found = s; - } - - #endif - return found; -} - -static struct pico_socket *socket_tcp_deliver_ipv6(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket *found = NULL; - #ifdef PICO_SUPPORT_IPV6 - struct pico_trans *tr = (struct pico_trans *) f->transport_hdr; - struct pico_ip6 s_local = {{0}}, s_remote = {{0}}, p_src = {{0}}, p_dst = {{0}}; - struct pico_ipv6_hdr *ip6hdr = (struct pico_ipv6_hdr *)(f->net_hdr); - s_local = s->local_addr.ip6; - s_remote = s->remote_addr.ip6; - p_src = ip6hdr->src; - p_dst = ip6hdr->dst; - if ((s->remote_port == tr->sport) && - (!memcmp(s_remote.addr, p_src.addr, PICO_SIZE_IP6)) && - ((!memcmp(s_local.addr, PICO_IP6_ANY, PICO_SIZE_IP6)) || (!memcmp(s_local.addr, p_dst.addr, PICO_SIZE_IP6)))) { - found = s; - return found; - } else if ((s->remote_port == 0) && /* not connected... listening */ - ((!memcmp(s_local.addr, PICO_IP6_ANY, PICO_SIZE_IP6)) || (!memcmp(s_local.addr, p_dst.addr, PICO_SIZE_IP6)))) { - /* listen socket */ - found = s; - } - - #else - (void) s; - (void) f; - #endif - return found; -} - -static int socket_tcp_do_deliver(struct pico_socket *s, struct pico_frame *f) -{ - if (s != NULL) { - pico_tcp_input(s, f); - if ((s->ev_pending) && s->wakeup) { - s->wakeup(s->ev_pending, s); - if(!s->parent) - s->ev_pending = 0; - } - - return 0; - } - - dbg("TCP SOCKET> Not s.\n"); - return -1; -} - -int pico_socket_tcp_deliver(struct pico_sockport *sp, struct pico_frame *f) -{ - struct pico_socket *found = NULL; - struct pico_socket *target = NULL; - struct pico_tree_node *index = NULL; - struct pico_tree_node *_tmp; - struct pico_socket *s = NULL; - - pico_tree_foreach_safe(index, &sp->socks, _tmp){ - s = index->keyValue; - /* 4-tuple identification of socket (port-IP) */ - if (IS_IPV4(f)) { - found = socket_tcp_deliver_ipv4(s, f); - } - - if (IS_IPV6(f)) { - found = socket_tcp_deliver_ipv6(s, f); - } - - if (found) - { - target = found; - if ( found->remote_port != 0) - /* only break if it's connected */ - break; - } - } /* FOREACH */ - - return socket_tcp_do_deliver(target, f); -} - -struct pico_socket *pico_socket_tcp_open(uint16_t family) -{ - struct pico_socket *s = NULL; - (void) family; -#ifdef PICO_SUPPORT_TCP - s = pico_tcp_open(family); - if (!s) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - s->proto = &pico_proto_tcp; - /*check if Nagle enabled */ - /* - if (!IS_NAGLE_ENABLED(s)) - dbg("ERROR Nagle should be enabled here\n\n"); - */ -#endif - return s; -} - -int pico_socket_tcp_read(struct pico_socket *s, void *buf, uint32_t len) -{ -#ifdef PICO_SUPPORT_TCP - /* check if in shutdown state and if no more data in tcpq_in */ - if ((s->state & PICO_SOCKET_STATE_SHUT_REMOTE) && pico_tcp_queue_in_is_empty(s)) { - pico_err = PICO_ERR_ESHUTDOWN; - return -1; - } else { - return (int)(pico_tcp_read(s, buf, (uint32_t)len)); - } - -#else - return 0; -#endif -} - -void transport_flags_update(struct pico_frame *f, struct pico_socket *s) -{ -#ifdef PICO_SUPPORT_TCP - if(is_sock_tcp(s)) - pico_tcp_flags_update(f, s); - -#endif -} diff --git a/kernel/picotcp/modules/pico_socket_tcp.h b/kernel/picotcp/modules/pico_socket_tcp.h deleted file mode 100644 index 6479103..0000000 --- a/kernel/picotcp/modules/pico_socket_tcp.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef PICO_SOCKET_TCP_H -#define PICO_SOCKET_TCP_H -#include "pico_socket.h" - -#ifdef PICO_SUPPORT_TCP - -/* Functions/macros: conditional! */ - -# define IS_NAGLE_ENABLED(s) (!(!(!(s->opt_flags & (1 << PICO_SOCKET_OPT_TCPNODELAY))))) -int pico_setsockopt_tcp(struct pico_socket *s, int option, void *value); -int pico_getsockopt_tcp(struct pico_socket *s, int option, void *value); -int pico_socket_tcp_deliver(struct pico_sockport *sp, struct pico_frame *f); -void pico_socket_tcp_delete(struct pico_socket *s); -void pico_socket_tcp_cleanup(struct pico_socket *sock); -struct pico_socket *pico_socket_tcp_open(uint16_t family); -int pico_socket_tcp_read(struct pico_socket *s, void *buf, uint32_t len); -void transport_flags_update(struct pico_frame *, struct pico_socket *); - -#else -# define pico_getsockopt_tcp(...) (-1) -# define pico_setsockopt_tcp(...) (-1) -# define pico_socket_tcp_deliver(...) (-1) -# define IS_NAGLE_ENABLED(s) (0) -# define pico_socket_tcp_delete(...) do {} while(0) -# define pico_socket_tcp_cleanup(...) do {} while(0) -# define pico_socket_tcp_open(f) (NULL) -# define pico_socket_tcp_read(...) (-1) -# define transport_flags_update(...) do {} while(0) - -#endif - - -#endif diff --git a/kernel/picotcp/modules/pico_socket_udp.c b/kernel/picotcp/modules/pico_socket_udp.c deleted file mode 100644 index 9ec1bfd..0000000 --- a/kernel/picotcp/modules/pico_socket_udp.c +++ /dev/null @@ -1,260 +0,0 @@ -#include "pico_config.h" -#include "pico_socket.h" -#include "pico_udp.h" -#include "pico_socket_multicast.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_socket_udp.h" - -#define UDP_FRAME_OVERHEAD (sizeof(struct pico_frame)) - - -struct pico_socket *pico_socket_udp_open(void) -{ - struct pico_socket *s = NULL; -#ifdef PICO_SUPPORT_UDP - s = pico_udp_open(); - if (!s) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - s->proto = &pico_proto_udp; - s->q_in.overhead = UDP_FRAME_OVERHEAD; - s->q_out.overhead = UDP_FRAME_OVERHEAD; -#endif - return s; -} - - -#if defined (PICO_SUPPORT_IPV4) || defined (PICO_SUPPORT_IPV6) -static int pico_enqueue_and_wakeup_if_needed(struct pico_queue *q_in, struct pico_socket* s, struct pico_frame* cpy) -{ - if (pico_enqueue(q_in, cpy) > 0) { - if (s->wakeup){ - s->wakeup(PICO_SOCK_EV_RD, s); - } - } - else { - pico_frame_discard(cpy); - return -1; - } - return 0; -} -#endif - -#ifdef PICO_SUPPORT_IPV4 -#ifdef PICO_SUPPORT_MCAST -static inline int pico_socket_udp_deliver_ipv4_mcast_initial_checks(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_ip4 p_dst; - struct pico_ipv4_hdr *ip4hdr; - - ip4hdr = (struct pico_ipv4_hdr*)(f->net_hdr); - p_dst.addr = ip4hdr->dst.addr; - if (pico_ipv4_is_multicast(p_dst.addr) && (pico_socket_mcast_filter(s, (union pico_address *)&ip4hdr->dst, (union pico_address *)&ip4hdr->src) < 0)) - return -1; - - - if ((pico_ipv4_link_get(&ip4hdr->src)) && (PICO_SOCKET_GETOPT(s, PICO_SOCKET_OPT_MULTICAST_LOOP) == 0u)) { - /* Datagram from ourselves, Loop disabled, discarding. */ - return -1; - } - - return 0; -} - - -static int pico_socket_udp_deliver_ipv4_mcast(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_ip4 s_local; - struct pico_frame *cpy; - struct pico_device *dev = pico_ipv4_link_find(&s->local_addr.ip4); - - s_local.addr = s->local_addr.ip4.addr; - - if (pico_socket_udp_deliver_ipv4_mcast_initial_checks(s, f) < 0) - return 0; - - if ((s_local.addr == PICO_IPV4_INADDR_ANY) || /* If our local ip is ANY, or.. */ - (dev == f->dev)) { /* the source of the bcast packet is a neighbor... */ - cpy = pico_frame_copy(f); - if (!cpy) - return -1; - - pico_enqueue_and_wakeup_if_needed(&s->q_in, s, cpy); - } - - return 0; -} -#endif -static int pico_socket_udp_deliver_ipv4_unicast(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_frame *cpy; - /* Either local socket is ANY, or matches dst */ - cpy = pico_frame_copy(f); - if (!cpy) - return -1; - - pico_enqueue_and_wakeup_if_needed(&s->q_in, s, cpy); - - return 0; -} - - -static int pico_socket_udp_deliver_ipv4(struct pico_socket *s, struct pico_frame *f) -{ - int ret = 0; - struct pico_ip4 s_local, p_dst; - struct pico_ipv4_hdr *ip4hdr; - ip4hdr = (struct pico_ipv4_hdr*)(f->net_hdr); - s_local.addr = s->local_addr.ip4.addr; - p_dst.addr = ip4hdr->dst.addr; - if ((pico_ipv4_is_broadcast(p_dst.addr)) || pico_ipv4_is_multicast(p_dst.addr)) { -#ifdef PICO_SUPPORT_MCAST - ret = pico_socket_udp_deliver_ipv4_mcast(s, f); -#endif - } else if ((s_local.addr == PICO_IPV4_INADDR_ANY) || (s_local.addr == p_dst.addr)) { - ret = pico_socket_udp_deliver_ipv4_unicast(s, f); - } - - pico_frame_discard(f); - return ret; -} -#endif - -#ifdef PICO_SUPPORT_IPV6 -#ifdef PICO_SUPPORT_MCAST -static inline int pico_socket_udp_deliver_ipv6_mcast(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_ipv6_hdr *ip6hdr; - struct pico_frame *cpy; - struct pico_device *dev = pico_ipv6_link_find(&s->local_addr.ip6); - - ip6hdr = (struct pico_ipv6_hdr*)(f->net_hdr); - - if ((pico_ipv6_link_get(&ip6hdr->src)) && (PICO_SOCKET_GETOPT(s, PICO_SOCKET_OPT_MULTICAST_LOOP) == 0u)) { - /* Datagram from ourselves, Loop disabled, discarding. */ - return 0; - } - - - if (pico_ipv6_is_unspecified(s->local_addr.ip6.addr) || /* If our local ip is ANY, or.. */ - (dev == f->dev)) { /* the source of the bcast packet is a neighbor... */ - cpy = pico_frame_copy(f); - if (!cpy) - { - return -1; - } - - pico_enqueue_and_wakeup_if_needed(&s->q_in, s, cpy); - } - - return 0; -} -#endif -static int pico_socket_udp_deliver_ipv6(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_ip6 s_local, p_dst; - struct pico_ipv6_hdr *ip6hdr; - struct pico_frame *cpy; - ip6hdr = (struct pico_ipv6_hdr*)(f->net_hdr); - s_local = s->local_addr.ip6; - p_dst = ip6hdr->dst; - if ((pico_ipv6_is_multicast(p_dst.addr))) { -#ifdef PICO_SUPPORT_MCAST - int retval = pico_socket_udp_deliver_ipv6_mcast(s, f); - pico_frame_discard(f); - return retval; -#endif - } - else if (pico_ipv6_is_unspecified(s->local_addr.ip6.addr) || (pico_ipv6_compare(&s_local, &p_dst) == 0)) - { /* Either local socket is ANY, or matches dst */ - cpy = pico_frame_copy(f); - if (!cpy) - { - pico_frame_discard(f); - return -1; - } - - pico_enqueue_and_wakeup_if_needed(&s->q_in, s, cpy); - } - - pico_frame_discard(f); - return 0; -} -#endif - - -int pico_socket_udp_deliver(struct pico_sockport *sp, struct pico_frame *f) -{ - struct pico_tree_node *index = NULL; - struct pico_tree_node *_tmp; - struct pico_socket *s = NULL; - pico_err = PICO_ERR_EPROTONOSUPPORT; - #ifdef PICO_SUPPORT_UDP - pico_err = PICO_ERR_NOERR; - pico_tree_foreach_safe(index, &sp->socks, _tmp){ - s = index->keyValue; - if (IS_IPV4(f)) { /* IPV4 */ -#ifdef PICO_SUPPORT_IPV4 - return pico_socket_udp_deliver_ipv4(s, f); -#endif - } else if (IS_IPV6(f)) { -#ifdef PICO_SUPPORT_IPV6 - return pico_socket_udp_deliver_ipv6(s, f); -#endif - } else { - /* something wrong in the packet header*/ - } - } /* FOREACH */ - pico_frame_discard(f); - if (s) - return 0; - - pico_err = PICO_ERR_ENXIO; - #endif - return -1; -} - -int pico_setsockopt_udp(struct pico_socket *s, int option, void *value) -{ - switch(option) { - case PICO_SOCKET_OPT_RCVBUF: - s->q_in.max_size = (*(uint32_t*)value); - return 0; - case PICO_SOCKET_OPT_SNDBUF: - s->q_out.max_size = (*(uint32_t*)value); - return 0; - } - - /* switch's default */ -#ifdef PICO_SUPPORT_MCAST - return pico_setsockopt_mcast(s, option, value); -#else - pico_err = PICO_ERR_EINVAL; - return -1; -#endif -} - -int pico_getsockopt_udp(struct pico_socket *s, int option, void *value) -{ - uint32_t *val = (uint32_t *)value; - switch(option) { - case PICO_SOCKET_OPT_RCVBUF: - *val = s->q_in.max_size; - return 0; - case PICO_SOCKET_OPT_SNDBUF: - *val = s->q_out.max_size; - return 0; - } - - /* switch's default */ -#ifdef PICO_SUPPORT_MCAST - return pico_getsockopt_mcast(s, option, value); -#else - pico_err = PICO_ERR_EINVAL; - return -1; -#endif -} - diff --git a/kernel/picotcp/modules/pico_socket_udp.h b/kernel/picotcp/modules/pico_socket_udp.h deleted file mode 100644 index 6b3a4c9..0000000 --- a/kernel/picotcp/modules/pico_socket_udp.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef PICO_SOCKET_UDP_H -#define PICO_SOCKET_UDP_H - -struct pico_socket *pico_socket_udp_open(void); -int pico_socket_udp_deliver(struct pico_sockport *sp, struct pico_frame *f); - - -#ifdef PICO_SUPPORT_UDP -int pico_setsockopt_udp(struct pico_socket *s, int option, void *value); -int pico_getsockopt_udp(struct pico_socket *s, int option, void *value); -# define pico_socket_udp_recv(s, buf, len, addr, port) pico_udp_recv(s, buf, len, addr, port, NULL) -#else -# define pico_socket_udp_recv(...) (0) -# define pico_getsockopt_udp(...) (-1) -# define pico_setsockopt_udp(...) (-1) -#endif - - -#endif diff --git a/kernel/picotcp/modules/pico_strings.c b/kernel/picotcp/modules/pico_strings.c deleted file mode 100644 index cb60650..0000000 --- a/kernel/picotcp/modules/pico_strings.c +++ /dev/null @@ -1,101 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2015-2017 Altran ISY BeNeLux. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - - - Author: Michele Di Pede - *********************************************************************/ - -#include -#include -#include "pico_strings.h" - -char *get_string_terminator_position(char *const block, size_t len) -{ - size_t length = pico_strnlen(block, len); - - return (len != length) ? (block + length) : 0; -} - -int pico_strncasecmp(const char *const str1, const char *const str2, size_t n) -{ - int ch1; - int ch2; - size_t i; - - for (i = 0; i < n; ++i) { - ch1 = toupper(*(str1 + i)); - ch2 = toupper(*(str2 + i)); - if (ch1 < ch2) - return -1; - - if (ch1 > ch2) - return 1; - - if ((!ch1) && (!ch2)) - return 0; - } - return 0; -} - -size_t pico_strnlen(const char *str, size_t n) -{ - size_t len = 0; - - if (!str) - return 0; - - for (; len < n && *(str + len); ++len) - ; /* TICS require this empty statement here */ - - return len; -} - -static inline int num2string_validate(int32_t num, char *buf, int len) -{ - if (num < 0) - return -1; - - if (!buf) - return -2; - - if (len < 2) - return -3; - - return 0; -} - -static inline int revert_and_shift(char *buf, int len, int pos) -{ - int i; - - len -= pos; - for (i = 0; i < len; ++i) - buf[i] = buf[i + pos]; - return len; -} - -int num2string(int32_t num, char *buf, int len) -{ - ldiv_t res; - int pos = 0; - - if (num2string_validate(num, buf, len)) - return -1; - - pos = len; - buf[--pos] = '\0'; - - res.quot = (long)num; - - do { - if (!pos) - return -3; - - res = ldiv(res.quot, 10); - buf[--pos] = (char)((res.rem + '0') & 0xFF); - } while (res.quot); - - return revert_and_shift(buf, len, pos); -} diff --git a/kernel/picotcp/modules/pico_strings.h b/kernel/picotcp/modules/pico_strings.h deleted file mode 100644 index c4b6ac2..0000000 --- a/kernel/picotcp/modules/pico_strings.h +++ /dev/null @@ -1,21 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2015-2017 Altran ISY BeNeLux. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Author: Michele Di Pede - *********************************************************************/ - -#ifndef PICO_STRINGS_H -#define PICO_STRINGS_H -#include -#include - -char *get_string_terminator_position(char *const block, size_t len); -int pico_strncasecmp(const char *const str1, const char *const str2, size_t n); -size_t pico_strnlen(const char *str, size_t n); - -int num2string(int32_t num, char *buf, int len); - -#endif diff --git a/kernel/picotcp/modules/pico_tcp.c b/kernel/picotcp/modules/pico_tcp.c deleted file mode 100644 index dc1c492..0000000 --- a/kernel/picotcp/modules/pico_tcp.c +++ /dev/null @@ -1,3314 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera, Philippe Mariman - *********************************************************************/ - -#include "pico_tcp.h" -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_socket.h" -#include "pico_stack.h" -#include "pico_socket.h" -#include "pico_socket_tcp.h" -#include "pico_queue.h" -#include "pico_tree.h" - -#define TCP_IS_STATE(s, st) ((s->state & PICO_SOCKET_STATE_TCP) == st) -#define TCP_SOCK(s) ((struct pico_socket_tcp *)s) -#define SEQN(f) ((f) ? (long_be(((struct pico_tcp_hdr *)((f)->transport_hdr))->seq)) : 0) -#define ACKN(f) ((f) ? (long_be(((struct pico_tcp_hdr *)((f)->transport_hdr))->ack)) : 0) - -#define TCP_TIME (pico_time)(PICO_TIME_MS()) - -#define PICO_TCP_RTO_MIN (70) -#define PICO_TCP_RTO_MAX (120000) -#define PICO_TCP_IW 2 -#define PICO_TCP_SYN_TO 2000u -#define PICO_TCP_ZOMBIE_TO 30000 - -#define PICO_TCP_MAX_RETRANS 10 -#define PICO_TCP_MAX_CONNECT_RETRIES 3 - -#define PICO_TCP_LOOKAHEAD 0x00 -#define PICO_TCP_FIRST_DUPACK 0x01 -#define PICO_TCP_SECOND_DUPACK 0x02 -#define PICO_TCP_RECOVER 0x03 -#define PICO_TCP_BLACKOUT 0x04 -#define PICO_TCP_UNREACHABLE 0x05 -#define PICO_TCP_WINDOW_FULL 0x06 - -#define ONE_GIGABYTE ((uint32_t)(1024UL * 1024UL * 1024UL)) - -/* check if tcp connection is "idle" according to Nagle (RFC 896) */ -#define IS_TCP_IDLE(t) ((t->in_flight == 0) && (t->tcpq_out.size == 0)) -/* check if the hold queue contains data (again Nagle) */ -#define IS_TCP_HOLDQ_EMPTY(t) (t->tcpq_hold.size == 0) - -#define IS_INPUT_QUEUE(q) (q->pool.compare == input_segment_compare) -#define TCP_INPUT_OVERHEAD (sizeof(struct tcp_input_segment) + sizeof(struct pico_tree_node)) - - -#ifdef PICO_SUPPORT_TCP - -#ifdef DEBUG_TCP_GENERAL -#define tcp_dbg dbg -#else -#define tcp_dbg(...) do {} while(0) -#endif - -#ifdef DEBUG_TCP_NAGLE -#define tcp_dbg_nagle dbg -#else -#define tcp_dbg_nagle(...) do {} while(0) -#endif - -#ifdef DEBUG_TCP_OPTIONS -#define tcp_dbg_options dbg -#else -#define tcp_dbg_options(...) do {} while(0) -#endif - -#ifdef PICO_SUPPORT_MUTEX -static void *Mutex = NULL; -#endif - - - -/* Input segment, used to keep only needed data, not the full frame */ -struct tcp_input_segment -{ - uint32_t seq; - /* Pointer to payload */ - unsigned char *payload; - uint16_t payload_len; -}; - -/* Function to compare input segments */ -static int input_segment_compare(void *ka, void *kb) -{ - struct tcp_input_segment *a = ka, *b = kb; - return pico_seq_compare(a->seq, b->seq); -} - -static struct tcp_input_segment *segment_from_frame(struct pico_frame *f) -{ - struct tcp_input_segment *seg; - - if (!f->payload_len) - return NULL; - - seg = PICO_ZALLOC(sizeof(struct tcp_input_segment)); - if (!seg) - return NULL; - - seg->payload = PICO_ZALLOC(f->payload_len); - if(!seg->payload) - { - PICO_FREE(seg); - return NULL; - } - - seg->seq = SEQN(f); - seg->payload_len = f->payload_len; - memcpy(seg->payload, f->payload, seg->payload_len); - return seg; -} - -static int segment_compare(void *ka, void *kb) -{ - struct pico_frame *a = ka, *b = kb; - return pico_seq_compare(SEQN(a), SEQN(b)); -} - -struct pico_tcp_queue -{ - struct pico_tree pool; - uint32_t max_size; - uint32_t size; - uint32_t frames; -}; - -static void tcp_discard_all_segments(struct pico_tcp_queue *tq); -static void *peek_segment(struct pico_tcp_queue *tq, uint32_t seq) -{ - if(!IS_INPUT_QUEUE(tq)) - { - struct pico_tcp_hdr H; - struct pico_frame f = { - 0 - }; - f.transport_hdr = (uint8_t *) (&H); - H.seq = long_be(seq); - - return pico_tree_findKey(&tq->pool, &f); - } - else - { - struct tcp_input_segment dummy = { - 0 - }; - dummy.seq = seq; - - return pico_tree_findKey(&tq->pool, &dummy); - } - -} - -static void *first_segment(struct pico_tcp_queue *tq) -{ - return pico_tree_first(&tq->pool); -} - -static void *next_segment(struct pico_tcp_queue *tq, void *cur) -{ - if (!cur) - return NULL; - - if(IS_INPUT_QUEUE(tq)) - { - return peek_segment(tq, ((struct tcp_input_segment *)cur)->seq + ((struct tcp_input_segment *)cur)->payload_len); - } - else - { - return peek_segment(tq, SEQN((struct pico_frame *)cur) + ((struct pico_frame *)cur)->payload_len); - } -} - -static uint16_t enqueue_segment_len(struct pico_tcp_queue *tq, void *f) -{ - if (IS_INPUT_QUEUE(tq)) { - return ((struct tcp_input_segment *)f)->payload_len; - } else { - return (uint16_t)(((struct pico_frame *)f)->buffer_len); - } -} - - -static int32_t do_enqueue_segment(struct pico_tcp_queue *tq, void *f, uint16_t payload_len) -{ - int32_t ret = -1; - PICOTCP_MUTEX_LOCK(Mutex); - if ((tq->size + payload_len) > tq->max_size) - { - ret = 0; - goto out; - } - - if (pico_tree_insert(&tq->pool, f) != 0) - { - ret = 0; - goto out; - } - - tq->size += (uint16_t)payload_len; - if (payload_len > 0) - tq->frames++; - - ret = (int32_t)payload_len; - -out: - PICOTCP_MUTEX_UNLOCK(Mutex); - return ret; -} - -static int32_t pico_enqueue_segment(struct pico_tcp_queue *tq, void *f) -{ - uint16_t payload_len; - - if (!f) - return -1; - - payload_len = enqueue_segment_len(tq, f); - - - if (payload_len == 0) { - tcp_dbg("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! TRIED TO ENQUEUE INVALID SEGMENT!\n"); - return -1; - } - - return do_enqueue_segment(tq, f, payload_len); -} - -static void pico_discard_segment(struct pico_tcp_queue *tq, void *f) -{ - void *f1; - uint16_t payload_len = (uint16_t)((IS_INPUT_QUEUE(tq)) ? - (((struct tcp_input_segment *)f)->payload_len) : - (((struct pico_frame *)f)->buffer_len)); - PICOTCP_MUTEX_LOCK(Mutex); - f1 = pico_tree_delete(&tq->pool, f); - if (f1) { - tq->size -= (uint16_t)payload_len; - if (payload_len > 0) - tq->frames--; - } - - if(f1 && IS_INPUT_QUEUE(tq)) - { - struct tcp_input_segment *inp = f1; - PICO_FREE(inp->payload); - PICO_FREE(inp); - } - else - pico_frame_discard(f); - - PICOTCP_MUTEX_UNLOCK(Mutex); -} - -/* Structure for TCP socket */ -struct tcp_sack_block { - uint32_t left; - uint32_t right; - struct tcp_sack_block *next; -}; - -struct pico_socket_tcp { - struct pico_socket sock; - - /* Tree/queues */ - struct pico_tcp_queue tcpq_in; /* updated the input queue to hold input segments not the full frame. */ - struct pico_tcp_queue tcpq_out; - struct pico_tcp_queue tcpq_hold; /* buffer to hold delayed frames according to Nagle */ - - /* tcp_output */ - uint32_t snd_nxt; - uint32_t snd_last; - uint32_t snd_old_ack; - uint32_t snd_retry; - uint32_t snd_last_out; - - /* congestion control */ - uint32_t avg_rtt; - uint32_t rttvar; - uint32_t rto; - uint32_t in_flight; - uint32_t retrans_tmr; - pico_time retrans_tmr_due; - uint16_t cwnd_counter; - uint16_t cwnd; - uint16_t ssthresh; - uint16_t recv_wnd; - uint16_t recv_wnd_scale; - - /* tcp_input */ - uint32_t rcv_nxt; - uint32_t rcv_ackd; - uint32_t rcv_processed; - uint16_t wnd; - uint16_t wnd_scale; - uint16_t remote_closed; - - /* options */ - uint32_t ts_nxt; - uint16_t mss; - uint8_t sack_ok; - uint8_t ts_ok; - uint8_t mss_ok; - uint8_t scale_ok; - struct tcp_sack_block *sacks; - uint8_t jumbo; - uint32_t linger_timeout; - - /* Transmission */ - uint8_t x_mode; - uint8_t dupacks; - uint8_t backoff; - uint8_t localZeroWindow; - - /* Keepalive */ - uint32_t keepalive_tmr; - pico_time ack_timestamp; - uint32_t ka_time; - uint32_t ka_intvl; - uint32_t ka_probes; - uint32_t ka_retries_count; - - /* FIN timer */ - uint32_t fin_tmr; -}; - -/* Queues */ -static struct pico_queue tcp_in = { - 0 -}; -static struct pico_queue tcp_out = { - 0 -}; - -/* If Nagle enabled, this function can make 1 new segment from smaller segments in hold queue */ -static struct pico_frame *pico_hold_segment_make(struct pico_socket_tcp *t); - -/* checks if tcpq_in is empty */ -int pico_tcp_queue_in_is_empty(struct pico_socket *s) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - - if (t->tcpq_in.frames == 0) - return 1; - else - return 0; -} - -/* Useful for getting rid of the beginning of the buffer (read() op) */ -static int release_until(struct pico_tcp_queue *q, uint32_t seq) -{ - void *head = first_segment(q); - int ret = 0; - int32_t seq_result = 0; - - if (!head) - return ret; - - do { - void *cur = head; - - if (IS_INPUT_QUEUE(q)) - seq_result = pico_seq_compare(((struct tcp_input_segment *)head)->seq + ((struct tcp_input_segment *)head)->payload_len, seq); - else - seq_result = pico_seq_compare(SEQN((struct pico_frame *)head) + ((struct pico_frame *)head)->payload_len, seq); - - if (seq_result <= 0) - { - head = next_segment(q, cur); - /* tcp_dbg("Releasing %08x, len: %d\n", SEQN((struct pico_frame *)head), ((struct pico_frame *)head)->payload_len); */ - pico_discard_segment(q, cur); - ret++; - } else { - break; - } - } while (head); - - return ret; -} - -static int release_all_until(struct pico_tcp_queue *q, uint32_t seq, pico_time *timestamp) -{ - void *f = NULL; - struct pico_tree_node *idx, *temp; - int seq_result; - int ret = 0; - *timestamp = 0; - - pico_tree_foreach_safe(idx, &q->pool, temp) - { - f = idx->keyValue; - - if (IS_INPUT_QUEUE(q)) - seq_result = pico_seq_compare(((struct tcp_input_segment *)f)->seq + ((struct tcp_input_segment *)f)->payload_len, seq); - else - seq_result = pico_seq_compare(SEQN((struct pico_frame *)f) + ((struct pico_frame *)f)->payload_len, seq); - - if (seq_result <= 0) { - tcp_dbg("Releasing %p\n", f); - if ((seq_result == 0) && !IS_INPUT_QUEUE(q)) - *timestamp = ((struct pico_frame *)f)->timestamp; - - pico_discard_segment(q, f); - ret++; - } else { - return ret; - } - } - return ret; -} - - -/* API calls */ - -uint16_t pico_tcp_checksum_ipv4(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - struct pico_tcp_hdr *tcp_hdr = (struct pico_tcp_hdr *) f->transport_hdr; - struct pico_socket *s = f->sock; - struct pico_ipv4_pseudo_hdr pseudo; - - if (s) { - /* Case of outgoing frame */ - /* dbg("TCP CRC: on outgoing frame\n"); */ - pseudo.src.addr = s->local_addr.ip4.addr; - pseudo.dst.addr = s->remote_addr.ip4.addr; - } else { - /* Case of incoming frame */ - /* dbg("TCP CRC: on incoming frame\n"); */ - pseudo.src.addr = hdr->src.addr; - pseudo.dst.addr = hdr->dst.addr; - } - - pseudo.zeros = 0; - pseudo.proto = PICO_PROTO_TCP; - pseudo.len = (uint16_t)short_be(f->transport_len); - - return pico_dualbuffer_checksum(&pseudo, sizeof(struct pico_ipv4_pseudo_hdr), tcp_hdr, f->transport_len); -} - -#ifdef PICO_SUPPORT_IPV6 -uint16_t pico_tcp_checksum_ipv6(struct pico_frame *f) -{ - struct pico_ipv6_hdr *ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - struct pico_tcp_hdr *tcp_hdr = (struct pico_tcp_hdr *)f->transport_hdr; - struct pico_ipv6_pseudo_hdr pseudo; - struct pico_socket *s = f->sock; - - /* XXX If the IPv6 packet contains a Routing header, the Destination - * Address used in the pseudo-header is that of the final destination */ - if (s) { - /* Case of outgoing frame */ - pseudo.src = s->local_addr.ip6; - pseudo.dst = s->remote_addr.ip6; - } else { - /* Case of incoming frame */ - pseudo.src = ipv6_hdr->src; - pseudo.dst = ipv6_hdr->dst; - } - - pseudo.zero[0] = 0; - pseudo.zero[1] = 0; - pseudo.zero[2] = 0; - pseudo.len = long_be(f->transport_len); - pseudo.nxthdr = PICO_PROTO_TCP; - - return pico_dualbuffer_checksum(&pseudo, sizeof(struct pico_ipv6_pseudo_hdr), tcp_hdr, f->transport_len); -} -#endif - -#ifdef PICO_SUPPORT_IPV4 -static inline int checksum_is_ipv4(struct pico_frame *f) -{ - return (IS_IPV4(f) || (f->sock && (f->sock->net == &pico_proto_ipv4))); -} -#endif - -#ifdef PICO_SUPPORT_IPV6 -static inline int checksum_is_ipv6(struct pico_frame *f) -{ - return ((IS_IPV6(f)) || (f->sock && (f->sock->net == &pico_proto_ipv6))); -} -#endif - -uint16_t pico_tcp_checksum(struct pico_frame *f) -{ - (void)f; - - #ifdef PICO_SUPPORT_IPV4 - if (checksum_is_ipv4(f)) - return pico_tcp_checksum_ipv4(f); - - #endif - - #ifdef PICO_SUPPORT_IPV6 - if (checksum_is_ipv6(f)) - return pico_tcp_checksum_ipv6(f); - - #endif - return 0xffff; -} - -static void tcp_send_fin(struct pico_socket_tcp *t); -static int pico_tcp_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr; - struct pico_socket_tcp *t = (struct pico_socket_tcp *)f->sock; - IGNORE_PARAMETER(self); - hdr = (struct pico_tcp_hdr *)f->transport_hdr; - f->sock->timestamp = TCP_TIME; - if (f->payload_len > 0) { - tcp_dbg("Process out: sending %p (%d bytes)\n", f, f->payload_len); - } else { - tcp_dbg("Sending empty packet\n"); - } - - if (f->payload_len > 0) { - if (pico_seq_compare(SEQN(f) + f->payload_len, t->snd_nxt) > 0) { - t->snd_nxt = SEQN(f) + f->payload_len; - tcp_dbg("%s: snd_nxt is now %08x\n", __FUNCTION__, t->snd_nxt); - } - } else if (hdr->flags == PICO_TCP_ACK) { /* pure ack */ - /* hdr->seq = long_be(t->snd_nxt); / * XXX disabled this to not to mess with seq nrs of ACKs anymore * / */ - } else { - tcp_dbg("%s: non-pure ACK with len=0, fl:%04x\n", __FUNCTION__, hdr->flags); - } - - pico_network_send(f); - return 0; -} - -int pico_tcp_push(struct pico_protocol *self, struct pico_frame *data); - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_tcp = { - .name = "tcp", - .proto_number = PICO_PROTO_TCP, - .layer = PICO_LAYER_TRANSPORT, - .process_in = pico_transport_process_in, - .process_out = pico_tcp_process_out, - .push = pico_tcp_push, - .q_in = &tcp_in, - .q_out = &tcp_out, -}; - -static uint32_t pico_paws(void) -{ - static uint32_t _paws = 0; - _paws = pico_rand(); - return long_be(_paws); -} - -static inline void tcp_add_sack_option(struct pico_socket_tcp *ts, struct pico_frame *f, uint16_t flags, uint32_t *ii) -{ - if (flags & PICO_TCP_ACK) { - struct tcp_sack_block *sb; - uint32_t len_off; - - if (ts->sack_ok && ts->sacks) { - f->start[(*ii)++] = PICO_TCP_OPTION_SACK; - len_off = *ii; - f->start[(*ii)++] = PICO_TCPOPTLEN_SACK; - while(ts->sacks) { - sb = ts->sacks; - ts->sacks = sb->next; - memcpy(f->start + *ii, sb, 2 * sizeof(uint32_t)); - *ii += (2 * (uint32_t)sizeof(uint32_t)); - f->start[len_off] = (uint8_t)(f->start[len_off] + (2 * sizeof(uint32_t))); - PICO_FREE(sb); - } - } - } -} - -static void tcp_add_options(struct pico_socket_tcp *ts, struct pico_frame *f, uint16_t flags, uint16_t optsiz) -{ - uint32_t tsval = long_be((uint32_t)TCP_TIME); - uint32_t tsecr = long_be(ts->ts_nxt); - uint32_t i = 0; - f->start = f->transport_hdr + PICO_SIZE_TCPHDR; - - memset(f->start, PICO_TCP_OPTION_NOOP, optsiz); /* fill blanks with noop */ - - if (flags & PICO_TCP_SYN) { - f->start[i++] = PICO_TCP_OPTION_MSS; - f->start[i++] = PICO_TCPOPTLEN_MSS; - f->start[i++] = (uint8_t)((ts->mss >> 8) & 0xFF); - f->start[i++] = (uint8_t)(ts->mss & 0xFF); - f->start[i++] = PICO_TCP_OPTION_SACK_OK; - f->start[i++] = PICO_TCPOPTLEN_SACK_OK; - } - - f->start[i++] = PICO_TCP_OPTION_WS; - f->start[i++] = PICO_TCPOPTLEN_WS; - f->start[i++] = (uint8_t)(ts->wnd_scale); - - if ((flags & PICO_TCP_SYN) || ts->ts_ok) { - f->start[i++] = PICO_TCP_OPTION_TIMESTAMP; - f->start[i++] = PICO_TCPOPTLEN_TIMESTAMP; - memcpy(f->start + i, &tsval, 4); - i += 4; - memcpy(f->start + i, &tsecr, 4); - i += 4; - } - - tcp_add_sack_option(ts, f, flags, &i); - - if (i < optsiz) - f->start[ optsiz - 1 ] = PICO_TCP_OPTION_END; -} - -static uint16_t tcp_options_size_frame(struct pico_frame *f) -{ - uint16_t size = 0; - - /* Always update window scale. */ - size = (uint16_t)(size + PICO_TCPOPTLEN_WS); - if (f->transport_flags_saved) - size = (uint16_t)(size + PICO_TCPOPTLEN_TIMESTAMP); - - size = (uint16_t)(size + PICO_TCPOPTLEN_END); - size = (uint16_t)(((uint16_t)(size + 3u) >> 2u) << 2u); - return size; -} - -static void tcp_add_options_frame(struct pico_socket_tcp *ts, struct pico_frame *f) -{ - uint32_t tsval = long_be((uint32_t)TCP_TIME); - uint32_t tsecr = long_be(ts->ts_nxt); - uint32_t i = 0; - uint16_t optsiz = tcp_options_size_frame(f); - - f->start = f->transport_hdr + PICO_SIZE_TCPHDR; - - memset(f->start, PICO_TCP_OPTION_NOOP, optsiz); /* fill blanks with noop */ - - - f->start[i++] = PICO_TCP_OPTION_WS; - f->start[i++] = PICO_TCPOPTLEN_WS; - f->start[i++] = (uint8_t)(ts->wnd_scale); - - if (f->transport_flags_saved) { - f->start[i++] = PICO_TCP_OPTION_TIMESTAMP; - f->start[i++] = PICO_TCPOPTLEN_TIMESTAMP; - memcpy(f->start + i, &tsval, 4); - i += 4; - memcpy(f->start + i, &tsecr, 4); - i += 4; - } - - if (i < optsiz) - f->start[ optsiz - 1 ] = PICO_TCP_OPTION_END; -} - -static void tcp_send_ack(struct pico_socket_tcp *t); -#define tcp_send_windowUpdate(t) (tcp_send_ack(t)) - -static inline void tcp_set_space_check_winupdate(struct pico_socket_tcp *t, int32_t space, uint32_t shift) -{ - if (((uint32_t)space != t->wnd) || (shift != t->wnd_scale) || ((space - t->wnd) > (int32_t)((uint32_t)space >> 2u))) { - t->wnd = (uint16_t)space; - t->wnd_scale = (uint16_t)shift; - - if(t->wnd == 0) /* mark the entering to zero window state */ - t->localZeroWindow = 1u; - else if(t->localZeroWindow) - { - t->localZeroWindow = 0u; - tcp_send_windowUpdate(t); - } - } -} - -static void tcp_set_space(struct pico_socket_tcp *t) -{ - int32_t space; - uint32_t shift = 0; - - if (t->tcpq_in.max_size == 0) { - space = ONE_GIGABYTE; - } else { - space = (int32_t)(t->tcpq_in.max_size - t->tcpq_in.size); - } - - if (space < 0) - space = 0; - - while(space > 0xFFFF) { - space = (int32_t)(((uint32_t)space >> 1u)); - shift++; - } - tcp_set_space_check_winupdate(t, space, shift); -} - -/* Return 32-bit aligned option size */ -static uint16_t tcp_options_size(struct pico_socket_tcp *t, uint16_t flags) -{ - uint16_t size = 0; - struct tcp_sack_block *sb = t->sacks; - - if (flags & PICO_TCP_SYN) { /* Full options */ - size = PICO_TCPOPTLEN_MSS + PICO_TCP_OPTION_SACK_OK + PICO_TCPOPTLEN_WS + PICO_TCPOPTLEN_TIMESTAMP; - } else { - - /* Always update window scale. */ - size = (uint16_t)(size + PICO_TCPOPTLEN_WS); - - if (t->ts_ok) - size = (uint16_t)(size + PICO_TCPOPTLEN_TIMESTAMP); - - size = (uint16_t)(size + PICO_TCPOPTLEN_END); - } - - if ((flags & PICO_TCP_ACK) && (t->sack_ok && sb)) { - size = (uint16_t)(size + 2); - while(sb) { - size = (uint16_t)(size + (2 * sizeof(uint32_t))); - sb = sb->next; - } - } - - size = (uint16_t)(((size + 3u) >> 2u) << 2u); - return size; -} - -uint16_t pico_tcp_overhead(struct pico_socket *s) -{ - if (!s) - return 0; - - return (uint16_t)(PICO_SIZE_TCPHDR + tcp_options_size((struct pico_socket_tcp *)s, (uint16_t)0)); /* hdr + Options size for data pkt */ - -} - -static inline int tcp_sack_marker(struct pico_frame *f, uint32_t start, uint32_t end, uint16_t *count) -{ - int cmp; - cmp = pico_seq_compare(SEQN(f), start); - if (cmp > 0) - return 0; - - if (cmp == 0) { - cmp = pico_seq_compare(SEQN(f) + f->payload_len, end); - if (cmp > 0) { - tcp_dbg("Invalid SACK: ignoring.\n"); - } - - tcp_dbg("Marking (by SACK) segment %08x BLK:[%08x::%08x]\n", SEQN(f), start, end); - f->flags |= PICO_FRAME_FLAG_SACKED; - (*count)++; - } - - return cmp; -} - -static void tcp_process_sack(struct pico_socket_tcp *t, uint32_t start, uint32_t end) -{ - struct pico_frame *f; - struct pico_tree_node *index, *temp; - uint16_t count = 0; - - pico_tree_foreach_safe(index, &t->tcpq_out.pool, temp){ - f = index->keyValue; - if (tcp_sack_marker(f, start, end, &count) == 0) - goto done; - } - -done: - if (t->x_mode > PICO_TCP_LOOKAHEAD) { - if (t->in_flight > (count)) - t->in_flight -= (count); - else - t->in_flight = 0; - } -} - -inline static void tcp_add_header(struct pico_socket_tcp *t, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *)f->transport_hdr; - f->timestamp = TCP_TIME; - tcp_add_options(t, f, 0, (uint16_t)(f->transport_len - f->payload_len - (uint16_t)PICO_SIZE_TCPHDR)); - hdr->rwnd = short_be(t->wnd); - hdr->flags |= PICO_TCP_PSH | PICO_TCP_ACK; - hdr->ack = long_be(t->rcv_nxt); - hdr->crc = 0; - hdr->crc = short_be(pico_tcp_checksum(f)); -} - -static void tcp_rcv_sack(struct pico_socket_tcp *t, uint8_t *opt, int len) -{ - uint32_t start, end; - int i = 0; - if (len % 8) { - tcp_dbg("SACK: Invalid len.\n"); - return; - } - - while (i < len) { - start = long_from(opt + i); - i += 4; - end = long_from(opt + i); - i += 4; - tcp_process_sack(t, long_be(start), long_be(end)); - } -} - -static int tcpopt_len_check(uint32_t *idx, uint8_t len, uint8_t expected) -{ - if (len != expected) { - *idx = *idx + len - 2; - return -1; - } - - return 0; -} - -static inline void tcp_parse_option_ws(struct pico_socket_tcp *t, uint8_t len, uint8_t *opt, uint32_t *idx) -{ - if (tcpopt_len_check(idx, len, PICO_TCPOPTLEN_WS) < 0) - return; - - t->recv_wnd_scale = opt[(*idx)++]; - tcp_dbg_options("TCP Window scale: received %d\n", t->recv_wnd_scale); - -} - -static inline void tcp_parse_option_sack_ok(struct pico_socket_tcp *t, struct pico_frame *f, uint8_t len, uint32_t *idx) -{ - if (tcpopt_len_check(idx, len, PICO_TCPOPTLEN_SACK_OK) < 0) - return; - - if(((struct pico_tcp_hdr *)(f->transport_hdr))->flags & PICO_TCP_SYN ) - t->sack_ok = 1; -} - -static inline void tcp_parse_option_mss(struct pico_socket_tcp *t, uint8_t len, uint8_t *opt, uint32_t *idx) -{ - uint16_t mss; - if (tcpopt_len_check(idx, len, PICO_TCPOPTLEN_MSS) < 0) - return; - - t->mss_ok = 1; - mss = short_from(opt + *idx); - *idx += (uint32_t)sizeof(uint16_t); - if (t->mss > short_be(mss)) - t->mss = short_be(mss); -} - -static inline void tcp_parse_option_timestamp(struct pico_socket_tcp *t, struct pico_frame *f, uint8_t len, uint8_t *opt, uint32_t *idx) -{ - uint32_t tsval, tsecr; - if (tcpopt_len_check(idx, len, PICO_TCPOPTLEN_TIMESTAMP) < 0) - return; - - t->ts_ok = 1; - tsval = long_from(opt + *idx); - *idx += (uint32_t)sizeof(uint32_t); - tsecr = long_from(opt + *idx); - f->timestamp = long_be(tsecr); - *idx += (uint32_t)sizeof(uint32_t); - t->ts_nxt = long_be(tsval); -} - -static void tcp_parse_options(struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)f->sock; - uint8_t *opt = f->transport_hdr + PICO_SIZE_TCPHDR; - uint32_t i = 0; - f->timestamp = 0; - while (i < (f->transport_len - PICO_SIZE_TCPHDR)) { - uint8_t type = opt[i++]; - uint8_t len; - if(i < (f->transport_len - PICO_SIZE_TCPHDR) && (type > 1)) - len = opt[i++]; - else - len = 1; - - if (f->payload && ((opt + i) > f->payload)) - break; - - tcp_dbg_options("Received option '%d', len = %d \n", type, len); - switch (type) { - case PICO_TCP_OPTION_NOOP: - case PICO_TCP_OPTION_END: - break; - case PICO_TCP_OPTION_WS: - tcp_parse_option_ws(t, len, opt, &i); - break; - case PICO_TCP_OPTION_SACK_OK: - tcp_parse_option_sack_ok(t, f, len, &i); - break; - case PICO_TCP_OPTION_MSS: - tcp_parse_option_mss(t, len, opt, &i); - break; - case PICO_TCP_OPTION_TIMESTAMP: - tcp_parse_option_timestamp(t, f, len, opt, &i); - break; - - case PICO_TCP_OPTION_SACK: - tcp_rcv_sack(t, opt + i, len - 2); - i = i + len - 2; - break; - default: - tcp_dbg_options("TCP: received unsupported option %u\n", type); - i = i + len - 2; - } - } -} - -static inline void tcp_send_add_tcpflags(struct pico_socket_tcp *ts, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) f->transport_hdr; - if (ts->rcv_nxt != 0) { - if ((ts->rcv_ackd == 0) || (pico_seq_compare(ts->rcv_ackd, ts->rcv_nxt) != 0) || (hdr->flags & PICO_TCP_ACK)) { - hdr->flags |= PICO_TCP_ACK; - hdr->ack = long_be(ts->rcv_nxt); - ts->rcv_ackd = ts->rcv_nxt; - } - } - - if (hdr->flags & PICO_TCP_SYN) { - ts->snd_nxt++; - } - - if (f->payload_len > 0) { - hdr->flags |= PICO_TCP_PSH | PICO_TCP_ACK; - hdr->ack = long_be(ts->rcv_nxt); - ts->rcv_ackd = ts->rcv_nxt; - } -} - -static inline int tcp_send_try_enqueue(struct pico_socket_tcp *ts, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) f->transport_hdr; - struct pico_frame *cpy; - (void)hdr; - - /* TCP: ENQUEUE to PROTO ( Transmit ) */ - cpy = pico_frame_copy(f); - if (!cpy) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if ((pico_enqueue(&tcp_out, cpy) > 0)) { - if (f->payload_len > 0) { - ts->in_flight++; - ts->snd_nxt += f->payload_len; /* update next pointer here to prevent sending same segment twice when called twice in same tick */ - } - - tcp_dbg("DBG> [tcp output] state: %02x --> local port:%u remote port: %u seq: %08x ack: %08x flags: %02x = t_len: %u, hdr: %u payload: %d\n", - TCPSTATE(&ts->sock) >> 8, short_be(hdr->trans.sport), short_be(hdr->trans.dport), SEQN(f), ACKN(f), hdr->flags, f->transport_len, (hdr->len & 0xf0) >> 2, f->payload_len ); - } else { - pico_frame_discard(cpy); - } - - return 0; - -} - -static int tcp_send(struct pico_socket_tcp *ts, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) f->transport_hdr; - hdr->trans.sport = ts->sock.local_port; - hdr->trans.dport = ts->sock.remote_port; - if (!hdr->seq) - hdr->seq = long_be(ts->snd_nxt); - - tcp_send_add_tcpflags(ts, f); - - f->start = f->transport_hdr + PICO_SIZE_TCPHDR; - hdr->rwnd = short_be(ts->wnd); - hdr->crc = 0; - hdr->crc = short_be(pico_tcp_checksum(f)); - - return tcp_send_try_enqueue(ts, f); - -} - -/* #define PICO_TCP_SUPPORT_SOCKET_STATS */ - -#ifdef PICO_TCP_SUPPORT_SOCKET_STATS -static void sock_stats(uint32_t when, void *arg) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)arg; - tcp_dbg("STATISTIC> [%lu] socket state: %02x --> local port:%d remote port: %d queue size: %d snd_una: %08x snd_nxt: %08x cwnd: %d\n", - when, t->sock.state, short_be(t->sock.local_port), short_be(t->sock.remote_port), t->tcpq_out.size, SEQN((struct pico_frame *)first_segment(&t->tcpq_out)), t->snd_nxt, t->cwnd); - if (!pico_timer_add(2000, sock_stats, t)) { - tcp_dbg("TCP: Failed to start socket statistics timer\n"); - } -} -#endif - -static void tcp_send_probe(struct pico_socket_tcp *t); - -static void pico_tcp_keepalive(pico_time now, void *arg) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)arg; - if (((t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_ESTABLISHED) && (t->ka_time > 0)) { - if (t->ka_time < (now - t->ack_timestamp)) { - if (t->ka_retries_count == 0) { - /* First probe */ - tcp_send_probe(t); - t->ka_retries_count++; - } - - if (t->ka_retries_count > t->ka_probes) { - if (t->sock.wakeup) - { - pico_err = PICO_ERR_ECONNRESET; - t->sock.wakeup(PICO_SOCK_EV_ERR, &t->sock); - } - } - - if (((t->ka_retries_count * (pico_time)t->ka_intvl) + t->ka_time) < (now - t->ack_timestamp)) { - /* Next probe */ - tcp_send_probe(t); - t->ka_retries_count++; - } - } else { - t->ka_retries_count = 0; - } - } - - t->keepalive_tmr = pico_timer_add(1000, pico_tcp_keepalive, t); - if (!t->keepalive_tmr) { - tcp_dbg("TCP: Failed to start keepalive timer\n"); - if (t->sock.wakeup) - t->sock.wakeup(PICO_SOCK_EV_ERR, &t->sock); - } -} - -static inline void rto_set(struct pico_socket_tcp *t, uint32_t rto) -{ - if (rto < PICO_TCP_RTO_MIN) - rto = PICO_TCP_RTO_MIN; - - if (rto > PICO_TCP_RTO_MAX) - rto = PICO_TCP_RTO_MAX; - - t->rto = rto; -} - - -struct pico_socket *pico_tcp_open(uint16_t family) -{ - struct pico_socket_tcp *t = PICO_ZALLOC(sizeof(struct pico_socket_tcp)); - if (!t) - return NULL; - - t->sock.timestamp = TCP_TIME; - pico_socket_set_family(&t->sock, family); - t->mss = (uint16_t)(pico_socket_get_mss(&t->sock) - PICO_SIZE_TCPHDR); - t->tcpq_in.pool.root = t->tcpq_hold.pool.root = t->tcpq_out.pool.root = &LEAF; - t->tcpq_hold.pool.compare = t->tcpq_out.pool.compare = segment_compare; - t->tcpq_in.pool.compare = input_segment_compare; - t->tcpq_in.max_size = PICO_DEFAULT_SOCKETQ; - t->tcpq_out.max_size = PICO_DEFAULT_SOCKETQ; - t->tcpq_hold.max_size = 2u * t->mss; - rto_set(t, PICO_TCP_RTO_MIN); - - /* Uncomment next line and disable Nagle by default */ - t->sock.opt_flags |= (1 << PICO_SOCKET_OPT_TCPNODELAY); - - /* Uncomment next line and Nagle is enabled by default */ - /* t->sock.opt_flags &= (uint16_t) ~(1 << PICO_SOCKET_OPT_TCPNODELAY); */ - - /* Set default linger for the socket */ - t->linger_timeout = PICO_SOCKET_LINGER_TIMEOUT; - - -#ifdef PICO_TCP_SUPPORT_SOCKET_STATS - if (!pico_timer_add(2000, sock_stats, t)) { - tcp_dbg("TCP: Failed to start socket statistics timer\n"); - PICO_FREE(t); - return NULL; - } -#endif - - t->keepalive_tmr = pico_timer_add(1000, pico_tcp_keepalive, t); - if (!t->keepalive_tmr) { - tcp_dbg("TCP: Failed to start keepalive timer\n"); - PICO_FREE(t); - return NULL; - } - tcp_set_space(t); - - return &t->sock; -} - -static uint32_t tcp_read_finish(struct pico_socket *s, uint32_t tot_rd_len) -{ - struct pico_socket_tcp *t = TCP_SOCK(s); - tcp_set_space(t); - if (t->tcpq_in.size == 0) { - s->ev_pending &= (uint16_t)(~PICO_SOCK_EV_RD); - } - - if (t->remote_closed) { - s->ev_pending |= (uint16_t)(PICO_SOCK_EV_CLOSE); - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_CLOSE_WAIT; - /* set SHUT_REMOTE */ - s->state |= PICO_SOCKET_STATE_SHUT_REMOTE; - if (s->wakeup) { - s->wakeup(PICO_SOCK_EV_CLOSE, s); - } - } - - return tot_rd_len; -} - -static inline uint32_t tcp_read_in_frame_len(struct tcp_input_segment *f, int32_t in_frame_off, uint32_t tot_rd_len, uint32_t read_op_len) -{ - uint32_t in_frame_len = 0; - if (in_frame_off > 0) - { - if ((uint32_t)in_frame_off > f->payload_len) { - tcp_dbg("FATAL TCP ERR: in_frame_off > f->payload_len\n"); - } - - in_frame_len = f->payload_len - (uint32_t)in_frame_off; - } else { /* in_frame_off == 0 */ - in_frame_len = f->payload_len; - } - - if ((in_frame_len + tot_rd_len) > (uint32_t)read_op_len) { - in_frame_len = read_op_len - tot_rd_len; - } - - return in_frame_len; - -} - -static inline void tcp_read_check_segment_done(struct pico_socket_tcp *t, struct tcp_input_segment *f, uint32_t in_frame_len) -{ - if ((in_frame_len == 0u) || (in_frame_len == (uint32_t)f->payload_len)) { - pico_discard_segment(&t->tcpq_in, f); - } -} - -uint32_t pico_tcp_read(struct pico_socket *s, void *buf, uint32_t len) -{ - struct pico_socket_tcp *t = TCP_SOCK(s); - struct tcp_input_segment *f; - int32_t in_frame_off; - uint32_t in_frame_len; - uint32_t tot_rd_len = 0; - - while (tot_rd_len < len) { - /* To be sure we don't have garbage at the beginning */ - release_until(&t->tcpq_in, t->rcv_processed); - f = first_segment(&t->tcpq_in); - if (!f) - return tcp_read_finish(s, tot_rd_len); - - in_frame_off = pico_seq_compare(t->rcv_processed, f->seq); - /* Check for hole at the beginning of data, awaiting retransmissions. */ - if (in_frame_off < 0) { - tcp_dbg("TCP> read hole beginning of data, %08x - %08x. rcv_nxt is %08x\n", t->rcv_processed, f->seq, t->rcv_nxt); - return tcp_read_finish(s, tot_rd_len); - } - - in_frame_len = tcp_read_in_frame_len(f, in_frame_off, tot_rd_len, len); - - - memcpy((uint8_t *)buf + tot_rd_len, f->payload + in_frame_off, in_frame_len); - tot_rd_len += in_frame_len; - t->rcv_processed += in_frame_len; - - tcp_read_check_segment_done(t, f, in_frame_len); - - } - return tcp_read_finish(s, tot_rd_len); -} - -int pico_tcp_initconn(struct pico_socket *s); -static void initconn_retry(pico_time when, void *arg) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)arg; - IGNORE_PARAMETER(when); - if (TCPSTATE(&t->sock) != PICO_SOCKET_STATE_TCP_ESTABLISHED) - { - if (t->backoff > PICO_TCP_MAX_CONNECT_RETRIES) { - tcp_dbg("TCP> Connection timeout. \n"); - if (t->sock.wakeup) - { - pico_err = PICO_ERR_ECONNREFUSED; - t->sock.wakeup(PICO_SOCK_EV_ERR, &t->sock); - } - - pico_socket_del(&t->sock); - return; - } - - tcp_dbg("TCP> SYN retry %d...\n", t->backoff); - t->backoff++; - pico_tcp_initconn(&t->sock); - } else { - tcp_dbg("TCP> Connection is already established: no retry needed. good.\n"); - } -} - -int pico_tcp_initconn(struct pico_socket *s) -{ - struct pico_socket_tcp *ts = TCP_SOCK(s); - struct pico_frame *syn; - struct pico_tcp_hdr *hdr; - uint16_t mtu, opt_len = tcp_options_size(ts, PICO_TCP_SYN); - - syn = s->net->alloc(s->net, NULL, (uint16_t)(PICO_SIZE_TCPHDR + opt_len)); - if (!syn) - return -1; - - hdr = (struct pico_tcp_hdr *) syn->transport_hdr; - - if (!ts->snd_nxt) - ts->snd_nxt = long_be(pico_paws()); - - ts->snd_last = ts->snd_nxt; - ts->cwnd = PICO_TCP_IW; - mtu = (uint16_t)pico_socket_get_mss(s); - ts->mss = (uint16_t)(mtu - PICO_SIZE_TCPHDR); - ts->ssthresh = (uint16_t)((uint16_t)(PICO_DEFAULT_SOCKETQ / ts->mss) - (((uint16_t)(PICO_DEFAULT_SOCKETQ / ts->mss)) >> 3u)); - syn->sock = s; - hdr->seq = long_be(ts->snd_nxt); - hdr->len = (uint8_t)((PICO_SIZE_TCPHDR + opt_len) << 2 | ts->jumbo); - hdr->flags = PICO_TCP_SYN; - tcp_set_space(ts); - hdr->rwnd = short_be(ts->wnd); - tcp_add_options(ts, syn, PICO_TCP_SYN, opt_len); - hdr->trans.sport = ts->sock.local_port; - hdr->trans.dport = ts->sock.remote_port; - - hdr->crc = 0; - hdr->crc = short_be(pico_tcp_checksum(syn)); - - /* TCP: ENQUEUE to PROTO ( SYN ) */ - tcp_dbg("Sending SYN... (ports: %d - %d) size: %d\n", short_be(ts->sock.local_port), short_be(ts->sock.remote_port), syn->buffer_len); - ts->retrans_tmr = pico_timer_add(PICO_TCP_SYN_TO << ts->backoff, initconn_retry, ts); - if (!ts->retrans_tmr) { - tcp_dbg("TCP: Failed to start initconn_retry timer\n"); - PICO_FREE(syn); - return -1; - } - pico_enqueue(&tcp_out, syn); - return 0; -} - -static int tcp_send_synack(struct pico_socket *s) -{ - struct pico_socket_tcp *ts = TCP_SOCK(s); - struct pico_frame *synack; - struct pico_tcp_hdr *hdr; - uint16_t opt_len = tcp_options_size(ts, PICO_TCP_SYN | PICO_TCP_ACK); - - synack = s->net->alloc(s->net, NULL, (uint16_t)(PICO_SIZE_TCPHDR + opt_len)); - if (!synack) - return -1; - - hdr = (struct pico_tcp_hdr *) synack->transport_hdr; - - synack->sock = s; - hdr->len = (uint8_t)((PICO_SIZE_TCPHDR + opt_len) << 2 | ts->jumbo); - hdr->flags = PICO_TCP_SYN | PICO_TCP_ACK; - hdr->rwnd = short_be(ts->wnd); - hdr->seq = long_be(ts->snd_nxt); - ts->rcv_processed = long_be(hdr->seq); - ts->snd_last = ts->snd_nxt; - tcp_set_space(ts); - tcp_add_options(ts, synack, hdr->flags, opt_len); - synack->payload_len = 0; - synack->timestamp = TCP_TIME; - tcp_send(ts, synack); - pico_frame_discard(synack); - return 0; -} - -static void tcp_send_empty(struct pico_socket_tcp *t, uint16_t flags, int is_keepalive) -{ - struct pico_frame *f; - struct pico_tcp_hdr *hdr; - uint16_t opt_len = tcp_options_size(t, flags); - f = t->sock.net->alloc(t->sock.net, NULL, (uint16_t)(PICO_SIZE_TCPHDR + opt_len)); - if (!f) { - return; - } - - f->sock = &t->sock; - hdr = (struct pico_tcp_hdr *) f->transport_hdr; - hdr->len = (uint8_t)((PICO_SIZE_TCPHDR + opt_len) << 2 | t->jumbo); - hdr->flags = (uint8_t)flags; - hdr->rwnd = short_be(t->wnd); - tcp_set_space(t); - tcp_add_options(t, f, flags, opt_len); - hdr->trans.sport = t->sock.local_port; - hdr->trans.dport = t->sock.remote_port; - hdr->seq = long_be(t->snd_nxt); - if ((flags & PICO_TCP_ACK) != 0) { - hdr->ack = long_be(t->rcv_nxt); - } - - if (is_keepalive) - hdr->seq = long_be(t->snd_nxt - 1); - - t->rcv_ackd = t->rcv_nxt; - - f->start = f->transport_hdr + PICO_SIZE_TCPHDR; - hdr->rwnd = short_be(t->wnd); - hdr->crc = 0; - hdr->crc = short_be(pico_tcp_checksum(f)); - - /* TCP: ENQUEUE to PROTO */ - pico_enqueue(&tcp_out, f); -} - -static void tcp_send_ack(struct pico_socket_tcp *t) -{ - tcp_send_empty(t, PICO_TCP_ACK, 0); -} - -static void tcp_send_probe(struct pico_socket_tcp *t) -{ - /* tcp_dbg("Sending probe\n"); */ - tcp_send_empty(t, PICO_TCP_PSHACK, 1); -} - -static int tcp_do_send_rst(struct pico_socket *s, uint32_t seq) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - uint16_t opt_len = tcp_options_size(t, PICO_TCP_RST); - struct pico_frame *f; - struct pico_tcp_hdr *hdr; - f = t->sock.net->alloc(t->sock.net, NULL, (uint16_t)(PICO_SIZE_TCPHDR + opt_len)); - if (!f) { - return -1; - } - - f->sock = &t->sock; - tcp_dbg("TCP SEND_RST >>>>>>>>>>>>>>> START\n"); - - hdr = (struct pico_tcp_hdr *) f->transport_hdr; - hdr->len = (uint8_t)((PICO_SIZE_TCPHDR + opt_len) << 2 | t->jumbo); - hdr->flags = PICO_TCP_RST; - hdr->rwnd = short_be(t->wnd); - tcp_set_space(t); - tcp_add_options(t, f, PICO_TCP_RST, opt_len); - hdr->trans.sport = t->sock.local_port; - hdr->trans.dport = t->sock.remote_port; - hdr->seq = seq; - hdr->ack = long_be(t->rcv_nxt); - t->rcv_ackd = t->rcv_nxt; - f->start = f->transport_hdr + PICO_SIZE_TCPHDR; - hdr->rwnd = short_be(t->wnd); - hdr->crc = 0; - hdr->crc = short_be(pico_tcp_checksum(f)); - - /* TCP: ENQUEUE to PROTO */ - pico_enqueue(&tcp_out, f); - tcp_dbg("TCP SEND_RST >>>>>>>>>>>>>>> DONE\n"); - return 0; -} - -static int tcp_send_rst(struct pico_socket *s, struct pico_frame *fr) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - struct pico_tcp_hdr *hdr_rcv; - int ret; - - if (fr && ((s->state & PICO_SOCKET_STATE_TCP) > PICO_SOCKET_STATE_TCP_SYN_RECV)) { - /* in synchronized state: send RST with seq = ack from previous segment */ - hdr_rcv = (struct pico_tcp_hdr *) fr->transport_hdr; - ret = tcp_do_send_rst(s, hdr_rcv->ack); - } else { - /* non-synchronized state */ - /* go to CLOSED here to prevent timer callback to go on after timeout */ - (t->sock).state &= 0x00FFU; - (t->sock).state |= PICO_SOCKET_STATE_TCP_CLOSED; - ret = tcp_do_send_rst(s, long_be(t->snd_nxt)); - - /* Set generic socket state to CLOSED, too */ - (t->sock).state &= 0xFF00U; - (t->sock).state |= PICO_SOCKET_STATE_CLOSED; - - /* call EV_FIN wakeup before deleting */ - if ((t->sock).wakeup) - (t->sock).wakeup(PICO_SOCK_EV_FIN, &(t->sock)); - - /* delete socket */ - pico_socket_del(&t->sock); - } - - return ret; -} - -static inline void tcp_fill_rst_payload(struct pico_frame *fr, struct pico_frame *f) -{ - /* fill in IP data from original frame */ - if (IS_IPV4(fr)) { - memcpy(f->net_hdr, fr->net_hdr, sizeof(struct pico_ipv4_hdr)); - ((struct pico_ipv4_hdr *)(f->net_hdr))->dst.addr = ((struct pico_ipv4_hdr *)(fr->net_hdr))->src.addr; - ((struct pico_ipv4_hdr *)(f->net_hdr))->src.addr = ((struct pico_ipv4_hdr *)(fr->net_hdr))->dst.addr; - tcp_dbg("Making IPv4 reset frame...\n"); - - } else { - memcpy(f->net_hdr, fr->net_hdr, sizeof(struct pico_ipv6_hdr)); - ((struct pico_ipv6_hdr *)(f->net_hdr))->dst = ((struct pico_ipv6_hdr *)(fr->net_hdr))->src; - ((struct pico_ipv6_hdr *)(f->net_hdr))->src = ((struct pico_ipv6_hdr *)(fr->net_hdr))->dst; - } - - /* fill in TCP data from original frame */ - ((struct pico_tcp_hdr *)(f->transport_hdr))->trans.dport = ((struct pico_tcp_hdr *)(fr->transport_hdr))->trans.sport; - ((struct pico_tcp_hdr *)(f->transport_hdr))->trans.sport = ((struct pico_tcp_hdr *)(fr->transport_hdr))->trans.dport; - -} - - -static inline void tcp_fill_rst_header(struct pico_frame *fr, struct pico_tcp_hdr *hdr1, struct pico_frame *f, struct pico_tcp_hdr *hdr) -{ - if(!(hdr1->flags & PICO_TCP_ACK)) - hdr->flags |= PICO_TCP_ACK; - - hdr->rwnd = 0; - if (((struct pico_tcp_hdr *)(fr->transport_hdr))->flags & PICO_TCP_ACK) { - hdr->seq = ((struct pico_tcp_hdr *)(fr->transport_hdr))->ack; - } else { - hdr->seq = 0U; - } - - hdr->ack = 0; - if(!(hdr1->flags & PICO_TCP_ACK)) - hdr->ack = long_be(long_be(((struct pico_tcp_hdr *)(fr->transport_hdr))->seq) + fr->payload_len); - - hdr->crc = short_be(pico_tcp_checksum(f)); -} - -int pico_tcp_reply_rst(struct pico_frame *fr) -{ - struct pico_tcp_hdr *hdr, *hdr1; - struct pico_frame *f; - uint16_t size = PICO_SIZE_TCPHDR; - - - hdr1 = (struct pico_tcp_hdr *) (fr->transport_hdr); - if ((hdr1->flags & PICO_TCP_RST) != 0) - return -1; - - tcp_dbg("TCP> sending RST ... \n"); - - f = fr->sock->net->alloc(fr->sock->net, NULL, size); - if (!f) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - tcp_fill_rst_payload(fr, f); - - hdr = (struct pico_tcp_hdr *) f->transport_hdr; - hdr->len = (uint8_t)(size << 2); - hdr->flags = PICO_TCP_RST; - - tcp_fill_rst_header(fr, hdr1, f, hdr); - - if (0) { -#ifdef PICO_SUPPORT_IPV4 - } else if (IS_IPV4(f)) { - tcp_dbg("Pushing IPv4 reset frame...\n"); - pico_ipv4_frame_push(f, &(((struct pico_ipv4_hdr *)(f->net_hdr))->dst), PICO_PROTO_TCP); -#endif -#ifdef PICO_SUPPORT_IPV6 - } else { - pico_ipv6_frame_push(f, NULL, &(((struct pico_ipv6_hdr *)(f->net_hdr))->dst), PICO_PROTO_TCP, 0); -#endif - } - - - return 0; -} - -static int tcp_nosync_rst(struct pico_socket *s, struct pico_frame *fr) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - struct pico_frame *f; - struct pico_tcp_hdr *hdr, *hdr_rcv; - uint16_t opt_len = tcp_options_size(t, PICO_TCP_RST | PICO_TCP_ACK); - hdr_rcv = (struct pico_tcp_hdr *) fr->transport_hdr; - - tcp_dbg("TCP SEND RST (NON-SYNC) >>>>>>>>>>>>>>>>>> state %x\n", (s->state & PICO_SOCKET_STATE_TCP)); - if (((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_LISTEN)) { - if ((fr->flags & PICO_TCP_RST) != 0) - return 0; - - return pico_tcp_reply_rst(fr); - } - - /***************************************************************************/ - /* sending RST */ - f = t->sock.net->alloc(t->sock.net, NULL, (uint16_t)(PICO_SIZE_TCPHDR + opt_len)); - - if (!f) { - return -1; - } - - - f->sock = &t->sock; - hdr = (struct pico_tcp_hdr *) f->transport_hdr; - hdr->len = (uint8_t)((PICO_SIZE_TCPHDR + opt_len) << 2 | t->jumbo); - hdr->flags = PICO_TCP_RST | PICO_TCP_ACK; - hdr->rwnd = short_be(t->wnd); - tcp_set_space(t); - tcp_add_options(t, f, PICO_TCP_RST | PICO_TCP_ACK, opt_len); - hdr->trans.sport = t->sock.local_port; - hdr->trans.dport = t->sock.remote_port; - - /* non-synchronized state */ - if (hdr_rcv->flags & PICO_TCP_ACK) { - hdr->seq = hdr_rcv->ack; - } else { - hdr->seq = 0U; - } - - hdr->ack = long_be(SEQN(fr) + fr->payload_len); - - t->rcv_ackd = t->rcv_nxt; - f->start = f->transport_hdr + PICO_SIZE_TCPHDR; - hdr->rwnd = short_be(t->wnd); - hdr->crc = 0; - hdr->crc = short_be(pico_tcp_checksum(f)); - - /* TCP: ENQUEUE to PROTO */ - pico_enqueue(&tcp_out, f); - - /***************************************************************************/ - - tcp_dbg("TCP SEND_RST (NON_SYNC) >>>>>>>>>>>>>>> DONE, ...\n"); - - return 0; -} - -static void tcp_deltcb(pico_time when, void *arg); - -static void tcp_linger(struct pico_socket_tcp *t) -{ - pico_timer_cancel(t->fin_tmr); - t->fin_tmr = pico_timer_add(t->linger_timeout, tcp_deltcb, t); - if (!t->fin_tmr) { - tcp_dbg("TCP: failed to start delete callback timer, deleting socket now\n"); - tcp_deltcb((pico_time)0, t); - } -} - -static void tcp_send_fin(struct pico_socket_tcp *t) -{ - struct pico_frame *f; - struct pico_tcp_hdr *hdr; - uint16_t opt_len = tcp_options_size(t, PICO_TCP_FIN); - f = t->sock.net->alloc(t->sock.net, NULL, (uint16_t)(PICO_SIZE_TCPHDR + opt_len)); - if (!f) { - return; - } - - f->sock = &t->sock; - hdr = (struct pico_tcp_hdr *) f->transport_hdr; - hdr->len = (uint8_t)((PICO_SIZE_TCPHDR + opt_len) << 2 | t->jumbo); - hdr->flags = PICO_TCP_FIN | PICO_TCP_ACK; - hdr->ack = long_be(t->rcv_nxt); - t->rcv_ackd = t->rcv_nxt; - hdr->rwnd = short_be(t->wnd); - tcp_set_space(t); - tcp_add_options(t, f, PICO_TCP_FIN, opt_len); - hdr->trans.sport = t->sock.local_port; - hdr->trans.dport = t->sock.remote_port; - hdr->seq = long_be(t->snd_nxt); - - f->start = f->transport_hdr + PICO_SIZE_TCPHDR; - hdr->rwnd = short_be(t->wnd); - hdr->crc = 0; - hdr->crc = short_be(pico_tcp_checksum(f)); - /* tcp_dbg("SENDING FIN...\n"); */ - if (t->linger_timeout > 0) { - pico_enqueue(&tcp_out, f); - t->snd_nxt++; - } else { - pico_frame_discard(f); - } - - tcp_linger(t); -} - -static void tcp_sack_prepare(struct pico_socket_tcp *t) -{ - struct tcp_input_segment *pkt; - uint32_t left = 0, right = 0; - struct tcp_sack_block *sb; - int n = 0; - if (t->sacks) /* previous sacks are pending */ - return; - - pkt = first_segment(&t->tcpq_in); - while(n < 3) { - if (!pkt) { - if(left) { - sb = PICO_ZALLOC(sizeof(struct tcp_sack_block)); - if (!sb) - break; - - sb->left = long_be(left); - sb->right = long_be(right); - n++; - sb->next = t->sacks; - t->sacks = sb; - left = 0; - right = 0; - } - - break; - } - - if (pkt->seq < t->rcv_nxt) { - pkt = next_segment(&t->tcpq_in, pkt); - continue; - } - - if (!left) { - left = pkt->seq; - right = pkt->seq + pkt->payload_len; - pkt = next_segment(&t->tcpq_in, pkt); - continue; - } - - if(pkt->seq == right) { - right += pkt->payload_len; - pkt = next_segment(&t->tcpq_in, pkt); - continue; - } else { - sb = PICO_ZALLOC(sizeof(struct tcp_sack_block)); - if (!sb) - break; - - sb->left = long_be(left); - sb->right = long_be(right); - n++; - sb->next = t->sacks; - t->sacks = sb; - left = 0; - right = 0; - pkt = next_segment(&t->tcpq_in, pkt); - } - } -} - -static inline int tcp_data_in_expected(struct pico_socket_tcp *t, struct pico_frame *f) -{ - struct tcp_input_segment *nxt; - if (pico_seq_compare(SEQN(f), t->rcv_nxt) == 0) { /* Exactly what we expected */ - /* Create new segment and enqueue it */ - struct tcp_input_segment *input = segment_from_frame(f); - if (!input) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if(pico_enqueue_segment(&t->tcpq_in, input) <= 0) - { - /* failed to enqueue, destroy segment */ - PICO_FREE(input->payload); - PICO_FREE(input); - return -1; - } else { - t->rcv_nxt = SEQN(f) + f->payload_len; - nxt = peek_segment(&t->tcpq_in, t->rcv_nxt); - while(nxt) { - tcp_dbg("scrolling rcv_nxt...%08x\n", t->rcv_nxt); - t->rcv_nxt += nxt->payload_len; - nxt = peek_segment(&t->tcpq_in, t->rcv_nxt); - } - t->sock.ev_pending |= PICO_SOCK_EV_RD; - } - } else { - tcp_dbg("TCP> lo segment. Uninteresting retransmission. (exp: %x got: %x)\n", t->rcv_nxt, SEQN(f)); - } - - return 0; -} - -static inline int tcp_data_in_high_segment(struct pico_socket_tcp *t, struct pico_frame *f) -{ - tcp_dbg("TCP> hi segment. Possible packet loss. I'll dupack this. (exp: %x got: %x)\n", t->rcv_nxt, SEQN(f)); - if (t->sack_ok) { - struct tcp_input_segment *input = segment_from_frame(f); - if (!input) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - if(pico_enqueue_segment(&t->tcpq_in, input) <= 0) { - /* failed to enqueue, destroy segment */ - PICO_FREE(input->payload); - PICO_FREE(input); - return -1; - } - - tcp_sack_prepare(t); - } - - return 0; -} - -static inline void tcp_data_in_send_ack(struct pico_socket_tcp *t, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) f->transport_hdr; - /* In either case, ack til recv_nxt, unless received data raises a RST flag. */ - if (((t->sock.state & PICO_SOCKET_STATE_TCP) != PICO_SOCKET_STATE_TCP_CLOSE_WAIT) && - ((t->sock.state & PICO_SOCKET_STATE_TCP) != PICO_SOCKET_STATE_TCP_SYN_SENT) && - ((t->sock.state & PICO_SOCKET_STATE_TCP) != PICO_SOCKET_STATE_TCP_SYN_RECV) && - ((hdr->flags & PICO_TCP_RST) == 0)) - tcp_send_ack(t); -} - -static int tcp_data_in(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) f->transport_hdr; - uint16_t payload_len = (uint16_t)(f->transport_len - ((hdr->len & 0xf0u) >> 2u)); - int ret = 0; - (void)hdr; - - if (((hdr->len & 0xf0u) >> 2u) <= f->transport_len) { - tcp_parse_options(f); - f->payload = f->transport_hdr + ((hdr->len & 0xf0u) >> 2u); - f->payload_len = payload_len; - tcp_dbg("TCP> Received segment. (exp: %x got: %x)\n", t->rcv_nxt, SEQN(f)); - - if (pico_seq_compare(SEQN(f), t->rcv_nxt) <= 0) { - ret = tcp_data_in_expected(t, f); - } else { - ret = tcp_data_in_high_segment(t, f); - } - - tcp_data_in_send_ack(t, f); - return ret; - } else { - tcp_dbg("TCP: invalid data in pkt len, exp: %d, got %d\n", (hdr->len & 0xf0) >> 2, f->transport_len); - return -1; - } -} - -static int tcp_ack_advance_una(struct pico_socket_tcp *t, struct pico_frame *f, pico_time *timestamp) -{ - int ret = release_all_until(&t->tcpq_out, ACKN(f), timestamp); - if (ret > 0) { - t->sock.ev_pending |= PICO_SOCK_EV_WR; - } - - return ret; -} - -static uint16_t time_diff(pico_time a, pico_time b) -{ - if (a >= b) - return (uint16_t)(a - b); - else - return (uint16_t)(b - a); -} - -static void tcp_rtt(struct pico_socket_tcp *t, uint32_t rtt) -{ - - uint32_t avg = t->avg_rtt; - uint32_t rvar = t->rttvar; - if (!avg) { - /* This follows RFC2988 - * (2.2) When the first RTT measurement R is made, the host MUST set - * - * SRTT <- R - * RTTVAR <- R/2 - * RTO <- SRTT + max (G, K*RTTVAR) - */ - t->avg_rtt = rtt; - t->rttvar = rtt >> 1; - rto_set(t, t->avg_rtt + (t->rttvar << 2)); - } else { - int32_t var = (int32_t)t->avg_rtt - (int32_t)rtt; - if (var < 0) - var = 0 - var; - - /* RFC2988, section (2.3). Alpha and beta are the ones suggested. */ - - /* First, evaluate a new value for the rttvar */ - t->rttvar <<= 2; - t->rttvar -= rvar; - t->rttvar += (uint32_t)var; - t->rttvar >>= 2; - - /* Then, calculate the new avg_rtt */ - t->avg_rtt <<= 3; - t->avg_rtt -= avg; - t->avg_rtt += rtt; - t->avg_rtt >>= 3; - - /* Finally, assign a new value for the RTO, as specified in the RFC, with K=4 */ - rto_set(t, t->avg_rtt + (t->rttvar << 2)); - } - - tcp_dbg(" -----=============== RTT CUR: %u AVG: %u RTTVAR: %u RTO: %u ======================----\n", rtt, t->avg_rtt, t->rttvar, t->rto); -} - -static void tcp_congestion_control(struct pico_socket_tcp *t) -{ - if (t->x_mode > PICO_TCP_LOOKAHEAD) - return; - - tcp_dbg("Doing congestion control\n"); - if (t->cwnd < t->ssthresh) { - t->cwnd++; - } else { - t->cwnd_counter++; - if (t->cwnd_counter >= t->cwnd) { - t->cwnd++; - t->cwnd_counter = 0; - } - } - - tcp_dbg("TCP_CWND, %lu, %u, %u, %u\n", TCP_TIME, t->cwnd, t->ssthresh, t->in_flight); -} - -static void add_retransmission_timer(struct pico_socket_tcp *t, pico_time next_ts); - - -/* Retransmission time out (RTO). */ - -static void tcp_first_timeout(struct pico_socket_tcp *t) -{ - t->x_mode = PICO_TCP_BLACKOUT; - t->cwnd = PICO_TCP_IW; - t->in_flight = 0; -} - -static int tcp_rto_xmit(struct pico_socket_tcp *t, struct pico_frame *f) -{ - struct pico_frame *cpy; - /* TCP: ENQUEUE to PROTO ( retransmit )*/ - cpy = pico_frame_copy(f); - if (!cpy) { - add_retransmission_timer(t, (t->rto << t->backoff) + TCP_TIME); - return -1; - } - - if (pico_enqueue(&tcp_out, cpy) > 0) { - t->snd_last_out = SEQN(cpy); - add_retransmission_timer(t, (t->rto << (++t->backoff)) + TCP_TIME); - tcp_dbg("TCP_CWND, %lu, %u, %u, %u\n", TCP_TIME, t->cwnd, t->ssthresh, t->in_flight); - tcp_dbg("Sending RTO!\n"); - return 1; - } else { - tcp_dbg("RTO fail, retry!\n"); - add_retransmission_timer(t, (t->rto << t->backoff) + TCP_TIME); - pico_frame_discard(cpy); - return 0; - } -} - -static void tcp_next_zerowindow_probe(struct pico_socket_tcp *t) -{ - tcp_dbg("Sending probe!\n"); - tcp_send_probe(t); - add_retransmission_timer(t, (t->rto << ++t->backoff) + TCP_TIME); -} - -static int tcp_is_allowed_to_send(struct pico_socket_tcp *t) -{ - return t->sock.net && - ( - ((t->sock.state & 0xFF00) == PICO_SOCKET_STATE_TCP_ESTABLISHED) || - ((t->sock.state & 0xFF00) == PICO_SOCKET_STATE_TCP_CLOSE_WAIT) - ) && - ((t->backoff < PICO_TCP_MAX_RETRANS)); -} - -static inline int tcp_retrans_timeout_check_queue(struct pico_socket_tcp *t) -{ - struct pico_frame *f = NULL; - f = first_segment(&t->tcpq_out); - while (f) { - tcp_dbg("Checking frame in queue \n"); - if (t->x_mode == PICO_TCP_WINDOW_FULL) { - tcp_dbg("TCP BLACKOUT> TIMED OUT (output) frame %08x, len= %d rto=%d Win full: %d frame flags: %04x\n", SEQN(f), f->payload_len, t->rto, t->x_mode == PICO_TCP_WINDOW_FULL, f->flags); - tcp_next_zerowindow_probe(t); - return -1; - } - - if (t->x_mode != PICO_TCP_BLACKOUT) - tcp_first_timeout(t); - - tcp_add_header(t, f); - if (tcp_rto_xmit(t, f) > 0) /* A segment has been rexmit'd */ - return -1; - - f = next_segment(&t->tcpq_out, f); - } - if (t->tcpq_out.size < t->tcpq_out.max_size) - t->sock.ev_pending |= PICO_SOCK_EV_WR; - - return 0; - - - -} - -static void tcp_retrans_timeout(pico_time val, void *sock) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) sock; - - t->retrans_tmr = 0; - - if (t->retrans_tmr_due == 0ull) { - return; - } - - if (t->retrans_tmr_due > val) { - /* Timer was postponed... */ - add_retransmission_timer(t, t->retrans_tmr_due); - return; - } - - tcp_dbg("TIMEOUT! backoff = %d, rto: %d\n", t->backoff, t->rto); - t->retrans_tmr_due = 0ull; - - if (tcp_is_allowed_to_send(t)) { - if (tcp_retrans_timeout_check_queue(t) < 0) - return; - } - else if(t->backoff >= PICO_TCP_MAX_RETRANS && - ((t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_ESTABLISHED || - (t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_FIN_WAIT1 || - (t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_FIN_WAIT2 || - (t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_TIME_WAIT || - (t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_CLOSE_WAIT || - (t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_LAST_ACK || - (t->sock.state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_CLOSING)) - { - tcp_dbg("Connection timeout!\n"); - /* the retransmission timer, failed to get an ack for a frame, gives up on the connection */ - tcp_discard_all_segments(&t->tcpq_out); - if(t->sock.wakeup) - t->sock.wakeup(PICO_SOCK_EV_FIN, &t->sock); - - /* delete socket */ - pico_socket_del(&t->sock); - return; - } else { - tcp_dbg("Retransmission not allowed, rescheduling\n"); - } -} - -static void add_retransmission_timer(struct pico_socket_tcp *t, pico_time next_ts) -{ - struct pico_tree_node *index; - pico_time now = TCP_TIME; - pico_time val = 0; - - - if (next_ts == 0) { - struct pico_frame *f; - - pico_tree_foreach(index, &t->tcpq_out.pool){ - f = index->keyValue; - if ((next_ts == 0) || ((f->timestamp < next_ts) && (f->timestamp > 0))) { - next_ts = f->timestamp; - val = next_ts + (t->rto << t->backoff); - } - } - } else { - val = next_ts; - } - - if ((val > 0) || (val > now)) { - t->retrans_tmr_due = val; - } else { - t->retrans_tmr_due = now + 1; - } - - if (!t->retrans_tmr) { - t->retrans_tmr = pico_timer_add(t->retrans_tmr_due - now, tcp_retrans_timeout, t); - if(!t->retrans_tmr) { - tcp_dbg("TCP: Failed to start retransmission timer\n"); - //TODO do something about this? - } else { - tcp_dbg("Next timeout in %u msec\n", (uint32_t) (t->retrans_tmr_due - now)); - } - } -} - -static int tcp_retrans(struct pico_socket_tcp *t, struct pico_frame *f) -{ - struct pico_frame *cpy; - if (f) { - tcp_dbg("TCP> RETRANS (by dupack) frame %08x, len= %d\n", SEQN(f), f->payload_len); - tcp_add_header(t, f); - /* TCP: ENQUEUE to PROTO ( retransmit )*/ - cpy = pico_frame_copy(f); - if (!cpy) { - return -1; - } - - if (pico_enqueue(&tcp_out, cpy) > 0) { - t->in_flight++; - t->snd_last_out = SEQN(cpy); - } else { - pico_frame_discard(cpy); - } - - add_retransmission_timer(t, TCP_TIME + t->rto); - return(f->payload_len); - } - - return 0; -} - -#ifdef TCP_ACK_DBG -static void tcp_ack_dbg(struct pico_socket *s, struct pico_frame *f) -{ - uint32_t una, nxt, ack, cur; - struct pico_frame *una_f = NULL, *cur_f; - struct pico_tree_node *idx; - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - char info[64]; - char tmp[64]; - ack = ACKN(f); - nxt = t->snd_nxt; - tcp_dbg("===================================\n"); - tcp_dbg("Queue out (%d/%d). ACKED=%08x\n", t->tcpq_out.size, t->tcpq_out.max_size, ack); - - pico_tree_foreach(idx, &t->tcpq_out.pool) { - info[0] = 0; - cur_f = idx->keyValue; - cur = SEQN(cur_f); - if (!una_f) { - una_f = cur_f; - una = SEQN(una_f); - } - - if (cur == nxt) { - strncpy(tmp, info, strlen(info)); - snprintf(info, 64, "%s SND_NXT", tmp); - } - - if (cur == ack) { - strncpy(tmp, info, strlen(info)); - snprintf(info, 64, "%s ACK", tmp); - } - - if (cur == una) { - strncpy(tmp, info, strlen(info)); - snprintf(info, 64, "%s SND_UNA", tmp); - } - - if (cur == t->snd_last) { - strncpy(tmp, info, strlen(info)); - snprintf(info, 64, "%s SND_LAST", tmp); - } - - tcp_dbg("%08x %d%s\n", cur, cur_f->payload_len, info); - - } - tcp_dbg("SND_NXT is %08x, snd_LAST is %08x\n", nxt, t->snd_last); - tcp_dbg("===================================\n"); - tcp_dbg("\n\n"); -} -#endif - -static int tcp_ack(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_frame *f_new; /* use with Nagle to push to out queue */ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - struct pico_tcp_hdr *hdr; - uint32_t rtt = 0; - uint16_t acked = 0; - pico_time acked_timestamp = 0; - struct pico_frame *una = NULL; - - if (!f || !s) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - hdr = (struct pico_tcp_hdr *) f->transport_hdr; - - if ((hdr->flags & PICO_TCP_ACK) == 0) - return -1; - -#ifdef TCP_ACK_DBG - tcp_ack_dbg(s, f); -#endif - - tcp_parse_options(f); - t->recv_wnd = short_be(hdr->rwnd); - - acked = (uint16_t)tcp_ack_advance_una(t, f, &acked_timestamp); - una = first_segment(&t->tcpq_out); - t->ack_timestamp = TCP_TIME; - - if ((t->x_mode == PICO_TCP_BLACKOUT) || - ((t->x_mode == PICO_TCP_WINDOW_FULL) && ((t->recv_wnd << t->recv_wnd_scale) > t->mss))) { - int prev_mode = t->x_mode; - tcp_dbg("Re-entering look-ahead...\n\n\n"); - t->x_mode = PICO_TCP_LOOKAHEAD; - t->backoff = 0; - - if((prev_mode == PICO_TCP_BLACKOUT) && (acked > 0) && una) - { - t->snd_nxt = SEQN(una); - /* restart the retrans timer */ - if (t->retrans_tmr) { - t->retrans_tmr_due = 0ull; - } - } - } - - /* One should be acked. */ - if ((acked == 0) && (f->payload_len == 0) && (t->in_flight > 0)) - t->in_flight--; - - if (!una || acked > 0) { - t->x_mode = PICO_TCP_LOOKAHEAD; - tcp_dbg("Mode: Look-ahead. In flight: %d/%d buf: %d\n", t->in_flight, t->cwnd, t->tcpq_out.frames); - t->backoff = 0; - - /* Do rtt/rttvar/rto calculations */ - /* First, try with timestamps, using the value from options */ - if(f->timestamp != 0) { - rtt = time_diff(TCP_TIME, f->timestamp); - if (rtt) - tcp_rtt(t, rtt); - } else if(acked_timestamp) { - /* If no timestamps are there, use conservative estimation on the una */ - rtt = time_diff(TCP_TIME, acked_timestamp); - if (rtt) - tcp_rtt(t, rtt); - } - - tcp_dbg("TCP ACK> FRESH ACK %08x (acked %d) Queue size: %u/%u frames: %u cwnd: %u in_flight: %u snd_una: %u\n", ACKN(f), acked, t->tcpq_out.size, t->tcpq_out.max_size, t->tcpq_out.frames, t->cwnd, t->in_flight, SEQN(una)); - if (acked > t->in_flight) { - tcp_dbg("WARNING: in flight < 0\n"); - t->in_flight = 0; - } else - t->in_flight -= (acked); - - } else if ((t->snd_old_ack == ACKN(f)) && /* We've just seen this ack, and... */ - ((0 == (hdr->flags & (PICO_TCP_PSH | PICO_TCP_SYN))) && - (f->payload_len == 0)) && /* This is a pure ack, and... */ - (ACKN(f) != t->snd_nxt)) /* There is something in flight awaiting to be acked... */ - { - /* Process incoming duplicate ack. */ - if (t->x_mode < PICO_TCP_RECOVER) { - t->x_mode++; - tcp_dbg("Mode: DUPACK %d, due to PURE ACK %0x, len = %d\n", t->x_mode, SEQN(f), f->payload_len); - /* tcp_dbg("ACK: %x - QUEUE: %x\n", ACKN(f), SEQN(first_segment(&t->tcpq_out))); */ - if (t->x_mode == PICO_TCP_RECOVER) { /* Switching mode */ - if (t->in_flight > PICO_TCP_IW) - t->cwnd = (uint16_t)t->in_flight; - else - t->cwnd = PICO_TCP_IW; - - t->snd_retry = SEQN((struct pico_frame *)first_segment(&t->tcpq_out)); - if (t->ssthresh > t->cwnd) - t->ssthresh >>= 2; - else - t->ssthresh = (t->cwnd >> 1); - - if (t->ssthresh < 2) - t->ssthresh = 2; - } - } else if (t->x_mode == PICO_TCP_RECOVER) { - /* tcp_dbg("TCP RECOVER> DUPACK! snd_una: %08x, snd_nxt: %08x, acked now: %08x\n", SEQN(first_segment(&t->tcpq_out)), t->snd_nxt, ACKN(f)); */ - if (t->in_flight <= t->cwnd) { - struct pico_frame *nxt = peek_segment(&t->tcpq_out, t->snd_retry); - if (!nxt) - nxt = first_segment(&t->tcpq_out); - - while (nxt && (nxt->flags & PICO_FRAME_FLAG_SACKED) && (nxt != first_segment(&t->tcpq_out))) { - tcp_dbg("Skipping %08x because it is sacked.\n", SEQN(nxt)); - nxt = next_segment(&t->tcpq_out, nxt); - } - if (nxt && (pico_seq_compare(SEQN(nxt), t->snd_nxt)) > 0) - nxt = NULL; - - if (nxt && (pico_seq_compare(SEQN(nxt), SEQN((struct pico_frame *)first_segment(&t->tcpq_out))) > (int)(t->recv_wnd << t->recv_wnd_scale))) - nxt = NULL; - - if(!nxt) - nxt = first_segment(&t->tcpq_out); - - if (nxt) { - tcp_retrans(t, peek_segment(&t->tcpq_out, t->snd_retry)); - t->snd_retry = SEQN(nxt); - } - } - - if (++t->cwnd_counter > 1) { - t->cwnd--; - if (t->cwnd < 2) - t->cwnd = 2; - - t->cwnd_counter = 0; - } - } else { - tcp_dbg("DUPACK in mode %d \n", t->x_mode); - - } - } /* End case duplicate ack detection */ - - /* Linux very special zero-window probe detection (see bug #107) */ - if ((0 == (hdr->flags & (PICO_TCP_PSH | PICO_TCP_SYN))) && /* This is a pure ack, and... */ - (ACKN(f) == t->snd_nxt) && /* it's acking our snd_nxt, and... */ - (pico_seq_compare(SEQN(f), t->rcv_nxt) < 0)) /* Has an old seq number */ - { - tcp_send_ack(t); - } - - - /* Do congestion control */ - tcp_congestion_control(t); - if ((acked > 0) && t->sock.wakeup) { - if (t->tcpq_out.size < t->tcpq_out.max_size) - t->sock.wakeup(PICO_SOCK_EV_WR, &(t->sock)); - - /* t->sock.ev_pending |= PICO_SOCK_EV_WR; */ - } - - /* if Nagle enabled, check if no unack'ed data and fill out queue (till window) */ - if (IS_NAGLE_ENABLED((&(t->sock)))) { - while (!IS_TCP_HOLDQ_EMPTY(t) && ((t->tcpq_out.max_size - t->tcpq_out.size) >= t->mss)) { - tcp_dbg_nagle("TCP_ACK - NAGLE add new segment\n"); - f_new = pico_hold_segment_make(t); - if (f_new == NULL) - break; /* XXX corrupt !!! (or no memory) */ - - if (pico_enqueue_segment(&t->tcpq_out, f_new) <= 0) - /* handle error */ - tcp_dbg_nagle("TCP_ACK - NAGLE FAILED to enqueue in out\n"); - } - } - - /* If some space was created, put a few segments out. */ - tcp_dbg("TCP_CWND, %lu, %u, %u, %u\n", TCP_TIME, t->cwnd, t->ssthresh, t->in_flight); - if (t->x_mode == PICO_TCP_LOOKAHEAD) { - if ((t->cwnd >= t->in_flight) && (t->snd_nxt > t->snd_last_out)) { - pico_tcp_output(&t->sock, (int)t->cwnd - (int)t->in_flight); - } - } - - add_retransmission_timer(t, 0); - t->snd_old_ack = ACKN(f); - return 0; -} - -static int tcp_finwaitack(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - tcp_dbg("RECEIVED ACK IN FIN_WAIT1\n"); - - /* acking part */ - tcp_ack(s, f); - - - tcp_dbg("FIN_WAIT1: ack is %08x - snd_nxt is %08x\n", ACKN(f), t->snd_nxt); - if (ACKN(f) == (t->snd_nxt - 1u)) { - /* update TCP state */ - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_FIN_WAIT2; - tcp_dbg("TCP> IN STATE FIN_WAIT2\n"); - } - - return 0; -} - -static void tcp_deltcb(pico_time when, void *arg) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)arg; - IGNORE_PARAMETER(when); - - /* send RST if not yet in TIME_WAIT */ - if ((((t->sock).state & PICO_SOCKET_STATE_TCP) != PICO_SOCKET_STATE_TCP_TIME_WAIT) - && (((t->sock).state & PICO_SOCKET_STATE_TCP) != PICO_SOCKET_STATE_TCP_CLOSING)) { - tcp_dbg("Called deltcb in state = %04x (sending reset!)\n", (t->sock).state); - tcp_do_send_rst(&t->sock, long_be(t->snd_nxt)); - } else { - tcp_dbg("Called deltcb in state = %04x\n", (t->sock).state); - } - - /* update state */ - (t->sock).state &= 0x00FFU; - (t->sock).state |= PICO_SOCKET_STATE_TCP_CLOSED; - (t->sock).state &= 0xFF00U; - (t->sock).state |= PICO_SOCKET_STATE_CLOSED; - /* call EV_FIN wakeup before deleting */ - if (t->sock.wakeup) { - (t->sock).wakeup(PICO_SOCK_EV_FIN, &(t->sock)); - } - - /* delete socket */ - pico_socket_del(&t->sock); -} - -static int tcp_finwaitfin(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) (f->transport_hdr); - tcp_dbg("TCP> received fin in FIN_WAIT2\n"); - /* received FIN, increase ACK nr */ - t->rcv_nxt = long_be(hdr->seq) + 1; - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_TIME_WAIT; - /* set SHUT_REMOTE */ - s->state |= PICO_SOCKET_STATE_SHUT_REMOTE; - if (s->wakeup) - s->wakeup(PICO_SOCK_EV_CLOSE, s); - - if (f->payload_len > 0) /* needed?? */ - tcp_data_in(s, f); - - /* send ACK */ - tcp_send_ack(t); - /* linger */ - tcp_linger(t); - return 0; -} - -static int tcp_closing_ack(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - tcp_dbg("TCP> received ack in CLOSING\n"); - /* acking part */ - tcp_ack(s, f); - - /* update TCP state DLA TODO: Only if FIN is acked! */ - tcp_dbg("CLOSING: ack is %08x - snd_nxt is %08x\n", ACKN(f), t->snd_nxt); - if (ACKN(f) == t->snd_nxt) { - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_TIME_WAIT; - /* set timer */ - tcp_linger(t); - } - - return 0; -} - -static int tcp_lastackwait(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - tcp_dbg("LAST_ACK: ack is %08x - snd_nxt is %08x\n", ACKN(f), t->snd_nxt); - if (ACKN(f) == t->snd_nxt) { - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_CLOSED; - s->state &= 0xFF00U; - s->state |= PICO_SOCKET_STATE_CLOSED; - /* call socket wakeup with EV_FIN */ - if (s->wakeup) - s->wakeup(PICO_SOCK_EV_FIN, s); - - /* delete socket */ - pico_socket_del(s); - } - - return 0; -} - -static int tcp_syn(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *new = NULL; - struct pico_tcp_hdr *hdr = NULL; - uint16_t mtu; - if(s->number_of_pending_conn >= s->max_backlog) - return -1; - - new = (struct pico_socket_tcp *)pico_socket_clone(s); - hdr = (struct pico_tcp_hdr *)f->transport_hdr; - if (!new) - return -1; - -#ifdef PICO_TCP_SUPPORT_SOCKET_STATS - if (!pico_timer_add(2000, sock_stats, s)) { - tcp_dbg("TCP: Failed to start socket statistics timer\n"); - return -1; - } -#endif - - new->sock.remote_port = ((struct pico_trans *)f->transport_hdr)->sport; -#ifdef PICO_SUPPORT_IPV4 - if (IS_IPV4(f)) { - new->sock.remote_addr.ip4.addr = ((struct pico_ipv4_hdr *)(f->net_hdr))->src.addr; - new->sock.local_addr.ip4.addr = ((struct pico_ipv4_hdr *)(f->net_hdr))->dst.addr; - } - -#endif -#ifdef PICO_SUPPORT_IPV6 - if (IS_IPV6(f)) { - new->sock.remote_addr.ip6 = ((struct pico_ipv6_hdr *)(f->net_hdr))->src; - new->sock.local_addr.ip6 = ((struct pico_ipv6_hdr *)(f->net_hdr))->dst; - } - -#endif - f->sock = &new->sock; - mtu = (uint16_t)pico_socket_get_mss(&new->sock); - new->mss = (uint16_t)(mtu - PICO_SIZE_TCPHDR); - tcp_parse_options(f); - new->tcpq_in.max_size = PICO_DEFAULT_SOCKETQ; - new->tcpq_out.max_size = PICO_DEFAULT_SOCKETQ; - new->tcpq_hold.max_size = 2u * mtu; - new->rcv_nxt = long_be(hdr->seq) + 1; - new->snd_nxt = long_be(pico_paws()); - new->snd_last = new->snd_nxt; - new->cwnd = PICO_TCP_IW; - new->ssthresh = (uint16_t)((uint16_t)(PICO_DEFAULT_SOCKETQ / new->mss) - (((uint16_t)(PICO_DEFAULT_SOCKETQ / new->mss)) >> 3u)); - new->recv_wnd = short_be(hdr->rwnd); - new->jumbo = hdr->len & 0x07; - new->linger_timeout = PICO_SOCKET_LINGER_TIMEOUT; - s->number_of_pending_conn++; - new->sock.parent = s; - new->sock.wakeup = s->wakeup; - rto_set(new, PICO_TCP_RTO_MIN); - /* Initialize timestamp values */ - new->sock.state = PICO_SOCKET_STATE_BOUND | PICO_SOCKET_STATE_CONNECTED | PICO_SOCKET_STATE_TCP_SYN_RECV; - pico_socket_add(&new->sock); - tcp_send_synack(&new->sock); - tcp_dbg("SYNACK sent, socket added. snd_nxt is %08x\n", new->snd_nxt); - return 0; -} - -static int tcp_synrecv_syn(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = NULL; - struct pico_socket_tcp *t = TCP_SOCK(s); - hdr = (struct pico_tcp_hdr *)f->transport_hdr; - if (t->rcv_nxt == long_be(hdr->seq) + 1u) { - /* take back our own SEQ number to its original value, - * so the synack retransmitted is identical to the original. - */ - t->snd_nxt--; - tcp_send_synack(s); - } else { - tcp_send_rst(s, f); - return -1; - } - - return 0; -} - -static void tcp_set_init_point(struct pico_socket *s) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - t->rcv_processed = t->rcv_nxt; -} - - -uint16_t pico_tcp_get_socket_mss(struct pico_socket *s) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - if (t->mss > 0) - return (uint16_t)(t->mss + PICO_SIZE_TCPHDR); - else - return (uint16_t)pico_socket_get_mss(s); -} - -static int tcp_synack(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *)f->transport_hdr; - - if (ACKN(f) == (1u + t->snd_nxt)) { - /* Get rid of initconn retry */ - pico_timer_cancel(t->retrans_tmr); - t->retrans_tmr = 0; - - t->rcv_nxt = long_be(hdr->seq); - t->rcv_processed = t->rcv_nxt + 1; - tcp_ack(s, f); - - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_ESTABLISHED; - tcp_dbg("TCP> Established. State: %x\n", s->state); - - if (s->wakeup) - s->wakeup(PICO_SOCK_EV_CONN, s); - - s->ev_pending |= PICO_SOCK_EV_WR; - - t->rcv_nxt++; - t->snd_nxt++; - tcp_send_ack(t); /* return ACK */ - - return 0; - - } else if ((hdr->flags & PICO_TCP_RST) == 0) { - tcp_dbg("TCP> Not established, RST sent.\n"); - tcp_nosync_rst(s, f); - return 0; - } else { - /* The segment has the reset flag on: Ignore! */ - return 0; - } -} - -static int tcp_first_ack(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *)f->transport_hdr; - tcp_dbg("ACK in SYN_RECV: expecting %08x got %08x\n", t->snd_nxt, ACKN(f)); - if (t->snd_nxt == ACKN(f)) { - tcp_set_init_point(s); - tcp_ack(s, f); - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_ESTABLISHED; - tcp_dbg("TCP: Established. State now: %04x\n", s->state); - if( !s->parent && s->wakeup) { /* If the socket has no parent, -> sending socket that has a sim_open */ - tcp_dbg("FIRST ACK - No parent found -> sending socket\n"); - s->wakeup(PICO_SOCK_EV_CONN, s); - } - - if (s->parent && s->parent->wakeup) { - tcp_dbg("FIRST ACK - Parent found -> listening socket\n"); - s->wakeup = s->parent->wakeup; - s->parent->wakeup(PICO_SOCK_EV_CONN, s->parent); - } - - s->ev_pending |= PICO_SOCK_EV_WR; - tcp_dbg("%s: snd_nxt is now %08x\n", __FUNCTION__, t->snd_nxt); - return 0; - } else if ((hdr->flags & PICO_TCP_RST) == 0) { - tcp_nosync_rst(s, f); - return 0; - } else { - /* The segment has the reset flag on: Ignore! */ - return 0; - } -} - -static void tcp_attempt_closewait(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) (f->transport_hdr); - if (pico_seq_compare(SEQN(f), t->rcv_nxt) == 0) { - /* received FIN, increase ACK nr */ - t->rcv_nxt = long_be(hdr->seq) + 1; - if (pico_seq_compare(SEQN(f), t->rcv_processed) == 0) { - if ((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_ESTABLISHED) { - tcp_dbg("Changing state to CLOSE_WAIT\n"); - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_CLOSE_WAIT; - } - - /* set SHUT_REMOTE */ - s->state |= PICO_SOCKET_STATE_SHUT_REMOTE; - tcp_dbg("TCP> Close-wait\n"); - if (s->wakeup) { - s->wakeup(PICO_SOCK_EV_CLOSE, s); - } - } else { - t->remote_closed = 1; - } - } - - -} - -static int tcp_closewait(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) (f->transport_hdr); - - if (f->payload_len > 0) - tcp_data_in(s, f); - - if (hdr->flags & PICO_TCP_ACK) - tcp_ack(s, f); - - tcp_dbg("called close_wait (%p), in state %08x, f->flags: 0x%02x, hdr->flags: 0x%02x\n", tcp_closewait, s->state, f->flags, hdr->flags); - tcp_attempt_closewait(s, f); - - /* Ensure that the notification given to the socket - * did not put us in LAST_ACK state before sending the ACK: i.e. if - * pico_socket_close() has been called in the socket callback, we don't need to send - * an ACK here. - * - */ - if (((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_CLOSE_WAIT) || - ((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_ESTABLISHED)) - { - tcp_dbg("In closewait: Sending ack! (state is %08x)\n", s->state); - tcp_send_ack(t); - } - - return 0; -} - -static int tcp_rcvfin(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - IGNORE_PARAMETER(f); - tcp_dbg("TCP> Received FIN in FIN_WAIT1\n"); - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_CLOSING; - t->rcv_processed = t->rcv_nxt + 1; - t->rcv_nxt++; - /* send ACK */ - tcp_send_ack(t); - return 0; -} - -static int tcp_finack(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - IGNORE_PARAMETER(f); - - tcp_dbg("TCP> ENTERED finack\n"); - t->rcv_nxt++; - /* send ACK */ - tcp_send_ack(t); - - /* call socket wakeup with EV_FIN */ - if (s->wakeup) - s->wakeup(PICO_SOCK_EV_FIN, s); - - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_TIME_WAIT; - /* set SHUT_REMOTE */ - s->state |= PICO_SOCKET_STATE_SHUT_REMOTE; - - tcp_linger(t); - - return 0; -} - -static void tcp_force_closed(struct pico_socket *s) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - /* update state */ - (t->sock).state &= 0x00FFU; - (t->sock).state |= PICO_SOCKET_STATE_TCP_CLOSED; - (t->sock).state &= 0xFF00U; - (t->sock).state |= PICO_SOCKET_STATE_CLOSED; - /* call EV_ERR wakeup before deleting */ - if (((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_ESTABLISHED)) { - if ((t->sock).wakeup) - (t->sock).wakeup(PICO_SOCK_EV_FIN, &(t->sock)); - } else { - pico_err = PICO_ERR_ECONNRESET; - if ((t->sock).wakeup) - (t->sock).wakeup(PICO_SOCK_EV_FIN, &(t->sock)); - - /* delete socket */ - pico_socket_del(&t->sock); - } -} - -static void tcp_wakeup_pending(struct pico_socket *s, uint16_t ev) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - if ((t->sock).wakeup) - (t->sock).wakeup(ev, &(t->sock)); -} - -static int tcp_rst(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) (f->transport_hdr); - - tcp_dbg("TCP >>>>>>>>>>>>>> received RST <<<<<<<<<<<<<<<<<<<<\n"); - if ((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_SYN_SENT) { - /* the RST is acceptable if the ACK field acknowledges the SYN */ - if ((t->snd_nxt + 1u) == ACKN(f)) { /* valid, got to closed state */ - tcp_force_closed(s); - } else { /* not valid, ignore */ - tcp_dbg("TCP RST> IGNORE\n"); - return 0; - } - } else { /* all other states */ - /* all reset (RST) segments are validated by checking their SEQ-fields, - a reset is valid if its sequence number is in the window */ - uint32_t this_seq = long_be(hdr->seq); - if ((this_seq >= t->rcv_ackd) && (this_seq <= ((uint32_t)(short_be(hdr->rwnd) << (t->wnd_scale)) + t->rcv_ackd))) { - tcp_force_closed(s); - } else { /* not valid, ignore */ - tcp_dbg("TCP RST> IGNORE\n"); - return 0; - } - } - - return 0; -} -static int tcp_halfopencon(struct pico_socket *s, struct pico_frame *fr) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - IGNORE_PARAMETER(fr); - tcp_send_ack(t); - return 0; -} - -static int tcp_closeconn(struct pico_socket *s, struct pico_frame *fr) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *) s; - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) (fr->transport_hdr); - - if (pico_seq_compare(SEQN(fr), t->rcv_nxt) == 0) { - /* received FIN, increase ACK nr */ - t->rcv_nxt = long_be(hdr->seq) + 1; - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_CLOSE_WAIT; - /* set SHUT_LOCAL */ - s->state |= PICO_SOCKET_STATE_SHUT_LOCAL; - pico_socket_close(s); - return 1; - } - - return 0; -} - -struct tcp_action_entry { - uint16_t tcpstate; - int (*syn)(struct pico_socket *s, struct pico_frame *f); - int (*synack)(struct pico_socket *s, struct pico_frame *f); - int (*ack)(struct pico_socket *s, struct pico_frame *f); - int (*data)(struct pico_socket *s, struct pico_frame *f); - int (*fin)(struct pico_socket *s, struct pico_frame *f); - int (*finack)(struct pico_socket *s, struct pico_frame *f); - int (*rst)(struct pico_socket *s, struct pico_frame *f); -}; - -static const struct tcp_action_entry tcp_fsm[] = { - /* State syn synack ack data fin finack rst*/ - { PICO_SOCKET_STATE_TCP_UNDEF, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, - { PICO_SOCKET_STATE_TCP_CLOSED, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, - { PICO_SOCKET_STATE_TCP_LISTEN, &tcp_syn, NULL, NULL, NULL, NULL, NULL, NULL }, - { PICO_SOCKET_STATE_TCP_SYN_SENT, NULL, &tcp_synack, NULL, NULL, NULL, NULL, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_SYN_RECV, &tcp_synrecv_syn, NULL, &tcp_first_ack, &tcp_data_in, NULL, &tcp_closeconn, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_ESTABLISHED, &tcp_halfopencon, &tcp_ack, &tcp_ack, &tcp_data_in, &tcp_closewait, &tcp_closewait, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_CLOSE_WAIT, NULL, &tcp_ack, &tcp_ack, &tcp_send_rst, &tcp_closewait, &tcp_closewait, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_LAST_ACK, NULL, &tcp_ack, &tcp_lastackwait, &tcp_send_rst, &tcp_send_rst, &tcp_send_rst, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_FIN_WAIT1, NULL, &tcp_ack, &tcp_finwaitack, &tcp_data_in, &tcp_rcvfin, &tcp_finack, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_FIN_WAIT2, NULL, &tcp_ack, &tcp_ack, &tcp_data_in, &tcp_finwaitfin, &tcp_finack, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_CLOSING, NULL, &tcp_ack, &tcp_closing_ack, &tcp_send_rst, &tcp_send_rst, &tcp_send_rst, &tcp_rst }, - { PICO_SOCKET_STATE_TCP_TIME_WAIT, NULL, NULL, NULL, &tcp_send_rst, NULL, NULL, NULL} -}; - -#define MAX_VALID_FLAGS 10 /* Maximum number of valid flag combinations */ -static uint8_t invalid_flags(struct pico_socket *s, uint8_t flags) -{ - uint8_t i; - static const uint8_t valid_flags[PICO_SOCKET_STATE_TCP_ARRAYSIZ][MAX_VALID_FLAGS] = { - { /* PICO_SOCKET_STATE_TCP_UNDEF */ 0, }, - { /* PICO_SOCKET_STATE_TCP_CLOSED */ 0, }, - { /* PICO_SOCKET_STATE_TCP_LISTEN */ PICO_TCP_SYN }, - { /* PICO_SOCKET_STATE_TCP_SYN_SENT */ PICO_TCP_SYNACK, PICO_TCP_RST, PICO_TCP_RSTACK}, - { /* PICO_SOCKET_STATE_TCP_SYN_RECV */ PICO_TCP_SYN, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST}, - { /* PICO_SOCKET_STATE_TCP_ESTABLISHED*/ PICO_TCP_SYN, PICO_TCP_SYNACK, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FIN, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST, PICO_TCP_RSTACK}, - { /* PICO_SOCKET_STATE_TCP_CLOSE_WAIT */ PICO_TCP_SYNACK, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FIN, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST}, - { /* PICO_SOCKET_STATE_TCP_LAST_ACK */ PICO_TCP_SYNACK, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FIN, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST}, - { /* PICO_SOCKET_STATE_TCP_FIN_WAIT1 */ PICO_TCP_SYNACK, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FIN, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST}, - { /* PICO_SOCKET_STATE_TCP_FIN_WAIT2 */ PICO_TCP_SYNACK, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FIN, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST}, - { /* PICO_SOCKET_STATE_TCP_CLOSING */ PICO_TCP_SYNACK, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FIN, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST}, - { /* PICO_SOCKET_STATE_TCP_TIME_WAIT */ PICO_TCP_SYNACK, PICO_TCP_ACK, PICO_TCP_PSH, PICO_TCP_PSHACK, PICO_TCP_FIN, PICO_TCP_FINACK, PICO_TCP_FINPSHACK, PICO_TCP_RST}, - }; - if(!flags) - return 1; - - for(i = 0; i < MAX_VALID_FLAGS; i++) { - if(valid_flags[s->state >> 8u][i] == flags) - return 0; - } - return 1; -} - -static void tcp_action_call(int (*call)(struct pico_socket *s, struct pico_frame *f), struct pico_socket *s, struct pico_frame *f ) -{ - if (call) - call(s, f); -} - -static int tcp_action_by_flags(const struct tcp_action_entry *action, struct pico_socket *s, struct pico_frame *f, uint8_t flags) -{ - int ret = 0; - - if ((flags == PICO_TCP_ACK) || (flags == (PICO_TCP_ACK | PICO_TCP_PSH))) { - tcp_action_call(action->ack, s, f); - } - - if ((f->payload_len > 0 || (flags & PICO_TCP_PSH)) && - !(s->state & PICO_SOCKET_STATE_CLOSED) && !TCP_IS_STATE(s, PICO_SOCKET_STATE_TCP_LISTEN)) - { - ret = f->payload_len; - tcp_action_call(action->data, s, f); - } - - if (flags == PICO_TCP_FIN) { - tcp_action_call(action->fin, s, f); - } - - if ((flags == (PICO_TCP_FIN | PICO_TCP_ACK)) || (flags == (PICO_TCP_FIN | PICO_TCP_ACK | PICO_TCP_PSH))) { - tcp_action_call(action->finack, s, f); - } - - if (flags & PICO_TCP_RST) { - tcp_action_call(action->rst, s, f); - } - - return ret; -} - -int pico_tcp_input(struct pico_socket *s, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *) (f->transport_hdr); - int ret = 0; - uint8_t flags = hdr->flags; - const struct tcp_action_entry *action = &tcp_fsm[s->state >> 8]; - - f->payload = (f->transport_hdr + ((hdr->len & 0xf0u) >> 2u)); - f->payload_len = (uint16_t)(f->transport_len - ((hdr->len & 0xf0u) >> 2u)); - - tcp_dbg("[sam] TCP> [tcp input] t_len: %u\n", f->transport_len); - tcp_dbg("[sam] TCP> flags = 0x%02x\n", hdr->flags); - tcp_dbg("[sam] TCP> s->state >> 8 = %u\n", s->state >> 8); - tcp_dbg("[sam] TCP> [tcp input] socket: %p state: %d <-- local port:%u remote port: %u seq: 0x%08x ack: 0x%08x flags: 0x%02x t_len: %u, hdr: %u payload: %d\n", s, s->state >> 8, short_be(hdr->trans.dport), short_be(hdr->trans.sport), SEQN(f), ACKN(f), hdr->flags, f->transport_len, (hdr->len & 0xf0) >> 2, f->payload_len ); - - /* This copy of the frame has the current socket as owner */ - f->sock = s; - s->timestamp = TCP_TIME; - /* Those are not supported at this time. */ - /* flags &= (uint8_t) ~(PICO_TCP_CWR | PICO_TCP_URG | PICO_TCP_ECN); */ - if(invalid_flags(s, flags)) { - pico_tcp_reply_rst(f); - } - else if (flags == PICO_TCP_SYN) { - tcp_action_call(action->syn, s, f); - } else if (flags == (PICO_TCP_SYN | PICO_TCP_ACK)) { - tcp_action_call(action->synack, s, f); - } else { - ret = tcp_action_by_flags(action, s, f, flags); - } - - if (s->ev_pending) - tcp_wakeup_pending(s, s->ev_pending); - -/* discard: */ - pico_frame_discard(f); - return ret; -} - - -inline static int checkLocalClosing(struct pico_socket *s); -inline static int checkRemoteClosing(struct pico_socket *s); - -static struct pico_frame *tcp_split_segment(struct pico_socket_tcp *t, struct pico_frame *f, uint16_t size) -{ - struct pico_frame *f1, *f2; - uint16_t size1, size2, size_f; - uint16_t overhead; - struct pico_tcp_hdr *hdr1, *hdr2, *hdr = (struct pico_tcp_hdr *)f->transport_hdr; - overhead = pico_tcp_overhead(&t->sock); - size_f = f->payload_len; - - - if (size >= size_f) - return f; /* no need to split! */ - - size1 = size; - size2 = (uint16_t)(size_f - size); - - f1 = pico_socket_frame_alloc(&t->sock, get_sock_dev(&t->sock), (uint16_t) (size1 + overhead)); - f2 = pico_socket_frame_alloc(&t->sock, get_sock_dev(&t->sock), (uint16_t) (size2 + overhead)); - - if (!f1 || !f2) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - /* Advance payload pointer to the beginning of segment data */ - f1->payload += overhead; - f1->payload_len = (uint16_t)(f1->payload_len - overhead); - f2->payload += overhead; - f2->payload_len = (uint16_t)(f2->payload_len - overhead); - - hdr1 = (struct pico_tcp_hdr *)f1->transport_hdr; - hdr2 = (struct pico_tcp_hdr *)f2->transport_hdr; - - /* Copy payload */ - memcpy(f1->payload, f->payload, size1); - memcpy(f2->payload, f->payload + size1, size2); - - /* Copy tcp hdr */ - memcpy(hdr1, hdr, sizeof(struct pico_tcp_hdr)); - memcpy(hdr2, hdr, sizeof(struct pico_tcp_hdr)); - - /* Adjust f2's sequence number */ - hdr2->seq = long_be(SEQN(f) + size1); - - /* Add TCP options */ - pico_tcp_flags_update(f1, &t->sock); - pico_tcp_flags_update(f2, &t->sock); - tcp_add_options_frame(t, f1); - tcp_add_options_frame(t, f2); - - /* Get rid of the full frame */ - pico_discard_segment(&t->tcpq_out, f); - - /* Enqueue f2 for later send... */ - if (pico_enqueue_segment(&t->tcpq_out, f2) < 0) { - tcp_dbg("Discarding invalid segment\n"); - pico_frame_discard(f2); - } - - /* Return the partial frame */ - return f1; -} - - -int pico_tcp_output(struct pico_socket *s, int loop_score) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - struct pico_frame *f, *una; - int sent = 0; - int data_sent = 0; - int32_t seq_diff = 0; - - una = first_segment(&t->tcpq_out); - f = peek_segment(&t->tcpq_out, t->snd_nxt); - - while((f) && (t->cwnd >= t->in_flight)) { - f->timestamp = TCP_TIME; - add_retransmission_timer(t, t->rto + TCP_TIME); - tcp_add_options_frame(t, f); - seq_diff = pico_seq_compare(SEQN(f), SEQN(una)); - if (seq_diff < 0) { - tcp_dbg(">>> FATAL: seq diff is negative!\n"); - break; - } - - /* Check if advertised window is full */ - if ((uint32_t)seq_diff >= (uint32_t)(t->recv_wnd << t->recv_wnd_scale)) { - if (t->x_mode != PICO_TCP_WINDOW_FULL) { - tcp_dbg("TCP> RIGHT SIZING (rwnd: %d, frame len: %d\n", t->recv_wnd << t->recv_wnd_scale, f->payload_len); - tcp_dbg("In window full...\n"); - t->snd_nxt = SEQN(una); - t->snd_retry = SEQN(una); - t->x_mode = PICO_TCP_WINDOW_FULL; - } - - break; - } - - /* Check if the advertised window is too small to receive the current frame */ - if ((uint32_t)(seq_diff + f->payload_len) > (uint32_t)(t->recv_wnd << t->recv_wnd_scale)) { - f = tcp_split_segment(t, f, (uint16_t)(t->recv_wnd << t->recv_wnd_scale)); - if (!f) - break; - - /* Limit sending window to packets in flight (right sizing) */ - t->cwnd = (uint16_t)t->in_flight; - if (t->cwnd < 1) - t->cwnd = 1; - } - - tcp_dbg("TCP> DEQUEUED (for output) frame %08x, acks %08x len= %d, remaining frames %d\n", SEQN(f), ACKN(f), f->payload_len, t->tcpq_out.frames); - tcp_send(t, f); - sent++; - loop_score--; - t->snd_last_out = SEQN(f); - if (loop_score < 1) - break; - - if (f->payload_len > 0) { - data_sent++; - f = next_segment(&t->tcpq_out, f); - } else { - f = NULL; - } - } - if ((sent > 0 && data_sent > 0)) { - rto_set(t, t->rto); - } else { - /* Nothing to transmit. */ - } - - if ((t->tcpq_out.frames == 0) && (s->state & PICO_SOCKET_STATE_SHUT_LOCAL)) { /* if no more packets in queue, XXX replaced !f by tcpq check */ - if(!checkLocalClosing(&t->sock)) /* check if local closing started and send fin */ - { - checkRemoteClosing(&t->sock); /* check if remote closing started and send fin */ - } - } - - return loop_score; -} - -/* function to make new segment from hold queue with specific size (mss) */ -static struct pico_frame *pico_hold_segment_make(struct pico_socket_tcp *t) -{ - struct pico_frame *f_temp, *f_new; - struct pico_socket *s = (struct pico_socket *) &t->sock; - struct pico_tcp_hdr *hdr; - uint16_t total_len = 0, total_payload_len = 0; - uint16_t off = 0, test = 0; - - off = pico_tcp_overhead(s); - - /* init with first frame in hold queue */ - f_temp = first_segment(&t->tcpq_hold); - total_len = f_temp->payload_len; - f_temp = next_segment(&t->tcpq_hold, f_temp); - - /* check till total_len <= MSS */ - while ((f_temp != NULL) && ((total_len + f_temp->payload_len) <= t->mss)) { - total_len = (uint16_t)(total_len + f_temp->payload_len); - f_temp = next_segment(&t->tcpq_hold, f_temp); - if (f_temp == NULL) - break; - } - /* alloc new frame with payload size = off + total_len */ - f_new = pico_socket_frame_alloc(s, get_sock_dev(s), (uint16_t)(off + total_len)); - if (!f_new) { - pico_err = PICO_ERR_ENOMEM; - return f_new; - } - - pico_tcp_flags_update(f_new, &t->sock); - hdr = (struct pico_tcp_hdr *) f_new->transport_hdr; - /* init new frame */ - f_new->payload += off; - f_new->payload_len = (uint16_t)(f_new->payload_len - off); - f_new->sock = s; - - f_temp = first_segment(&t->tcpq_hold); - hdr->seq = ((struct pico_tcp_hdr *)(f_temp->transport_hdr))->seq; /* get sequence number of first frame */ - hdr->trans.sport = t->sock.local_port; - hdr->trans.dport = t->sock.remote_port; - - /* check till total_payload_len <= MSS */ - while ((f_temp != NULL) && ((total_payload_len + f_temp->payload_len) <= t->mss)) { - /* cpy data and discard frame */ - test++; - memcpy(f_new->payload + total_payload_len, f_temp->payload, f_temp->payload_len); - total_payload_len = (uint16_t)(total_payload_len + f_temp->payload_len); - pico_discard_segment(&t->tcpq_hold, f_temp); - f_temp = first_segment(&t->tcpq_hold); - } - hdr->len = (uint8_t)((f_new->payload - f_new->transport_hdr) << 2u | (int8_t)t->jumbo); - - tcp_dbg_nagle("NAGLE make - joined %d segments, len %d bytes\n", test, total_payload_len); - tcp_add_options_frame(t, f_new); - - return f_new; -} - - - -static int pico_tcp_push_nagle_enqueue(struct pico_socket_tcp *t, struct pico_frame *f) -{ - if (pico_enqueue_segment(&t->tcpq_out, f) > 0) { - tcp_dbg_nagle("TCP_PUSH - NAGLE - Pushing segment %08x, len %08x to socket %p\n", t->snd_last + 1, f->payload_len, t); - t->snd_last += f->payload_len; - return f->payload_len; - } else { - tcp_dbg("Enqueue failed.\n"); - return 0; - } -} - -static int pico_tcp_push_nagle_hold(struct pico_socket_tcp *t, struct pico_frame *f) -{ - struct pico_frame *f_new; - uint32_t total_len = 0; - total_len = f->payload_len + t->tcpq_hold.size; - if ((total_len >= t->mss) && ((t->tcpq_out.max_size - t->tcpq_out.size) >= t->mss)) { - /* IF enough data in hold (>mss) AND space in out queue (>mss) */ - /* add current frame in hold and make new segment */ - if (pico_enqueue_segment(&t->tcpq_hold, f) > 0 ) { - tcp_dbg_nagle("TCP_PUSH - NAGLE - Pushed into hold, make new (enqueued frames out %d)\n", t->tcpq_out.frames); - t->snd_last += f->payload_len; /* XXX WATCH OUT */ - f_new = pico_hold_segment_make(t); - } else { - tcp_dbg_nagle("TCP_PUSH - NAGLE - enqueue hold failed 1\n"); - return 0; - } - - /* and put new frame in out queue */ - if ((f_new != NULL) && (pico_enqueue_segment(&t->tcpq_out, f_new) > 0)) { - return f_new->payload_len; - } else { - tcp_dbg_nagle("TCP_PUSH - NAGLE - enqueue out failed, f_new = %p\n", f_new); - return -1; /* XXX something seriously wrong */ - } - } else { - /* ELSE put frame in hold queue */ - if (pico_enqueue_segment(&t->tcpq_hold, f) > 0) { - tcp_dbg_nagle("TCP_PUSH - NAGLE - Pushed into hold (enqueued frames out %d)\n", t->tcpq_out.frames); - t->snd_last += f->payload_len; /* XXX WATCH OUT */ - return f->payload_len; - } else { - pico_err = PICO_ERR_EAGAIN; - tcp_dbg_nagle("TCP_PUSH - NAGLE - enqueue hold failed 2\n"); - } - } - - return 0; -} - - -static int pico_tcp_push_nagle_on(struct pico_socket_tcp *t, struct pico_frame *f) -{ - /* Nagle's algorithm enabled, check if ready to send, or put frame in hold queue */ - if (IS_TCP_IDLE(t) && IS_TCP_HOLDQ_EMPTY(t)) - return pico_tcp_push_nagle_enqueue(t, f); - - return pico_tcp_push_nagle_hold(t, f); -} - - - -/* original behavior kept when Nagle disabled; - Nagle algorithm added here, keeping hold frame queue instead of eg linked list of data */ -int pico_tcp_push(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_tcp_hdr *hdr = (struct pico_tcp_hdr *)f->transport_hdr; - struct pico_socket_tcp *t = (struct pico_socket_tcp *) f->sock; - IGNORE_PARAMETER(self); - pico_err = PICO_ERR_NOERR; - hdr->trans.sport = t->sock.local_port; - hdr->trans.dport = t->sock.remote_port; - hdr->seq = long_be(t->snd_last + 1); - hdr->len = (uint8_t)((f->payload - f->transport_hdr) << 2u | (int8_t)t->jumbo); - - if ((uint32_t)f->payload_len > (uint32_t)(t->tcpq_out.max_size - t->tcpq_out.size)) - t->sock.ev_pending &= (uint16_t)(~PICO_SOCK_EV_WR); - - /***************************************************************************/ - - if (!IS_NAGLE_ENABLED((&(t->sock)))) { - /* TCP_NODELAY enabled, original behavior */ - if (pico_enqueue_segment(&t->tcpq_out, f) > 0) { - tcp_dbg_nagle("TCP_PUSH - NO NAGLE - Pushing segment %08x, len %08x to socket %p\n", t->snd_last + 1, f->payload_len, t); - t->snd_last += f->payload_len; - return f->payload_len; - } else { - tcp_dbg("Enqueue failed.\n"); - return 0; - } - } else { - return pico_tcp_push_nagle_on(t, f); - } - -} - -inline static void tcp_discard_all_segments(struct pico_tcp_queue *tq) -{ - struct pico_tree_node *index = NULL, *index_safe = NULL; - PICOTCP_MUTEX_LOCK(Mutex); - pico_tree_foreach_safe(index, &tq->pool, index_safe) - { - void *f = index->keyValue; - if(!f) - break; - - pico_tree_delete(&tq->pool, f); - if(IS_INPUT_QUEUE(tq)) - { - struct tcp_input_segment *inp = (struct tcp_input_segment *)f; - PICO_FREE(inp->payload); - PICO_FREE(inp); - } - else - pico_frame_discard(f); - } - tq->frames = 0; - tq->size = 0; - PICOTCP_MUTEX_UNLOCK(Mutex); -} - -void pico_tcp_cleanup_queues(struct pico_socket *sck) -{ - struct pico_socket_tcp *tcp = (struct pico_socket_tcp *)sck; - pico_timer_cancel(tcp->retrans_tmr); - pico_timer_cancel(tcp->keepalive_tmr); - pico_timer_cancel(tcp->fin_tmr); - - tcp->retrans_tmr = 0; - tcp->keepalive_tmr = 0; - tcp->fin_tmr = 0; - - tcp_discard_all_segments(&tcp->tcpq_in); - tcp_discard_all_segments(&tcp->tcpq_out); - tcp_discard_all_segments(&tcp->tcpq_hold); -} - -static int checkLocalClosing(struct pico_socket *s) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - if ((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_ESTABLISHED) { - tcp_dbg("TCP> buffer empty, shutdown established ...\n"); - /* send fin if queue empty and in state shut local (write) */ - tcp_send_fin(t); - /* change tcp state to FIN_WAIT1 */ - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_FIN_WAIT1; - return 1; - } - - return 0; -} - -static int checkRemoteClosing(struct pico_socket *s) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - if ((s->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_CLOSE_WAIT) { - /* send fin if queue empty and in state shut local (write) */ - tcp_send_fin(t); - /* change tcp state to LAST_ACK */ - s->state &= 0x00FFU; - s->state |= PICO_SOCKET_STATE_TCP_LAST_ACK; - tcp_dbg("TCP> STATE: LAST_ACK.\n"); - return 1; - } - - return 0; -} - -void pico_tcp_notify_closing(struct pico_socket *sck) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)sck; - if(t->tcpq_out.frames == 0) - { - if(!checkLocalClosing(sck)) - checkRemoteClosing(sck); - } -} - - -int pico_tcp_check_listen_close(struct pico_socket *s) -{ - if (TCP_IS_STATE(s, PICO_SOCKET_STATE_TCP_LISTEN)) { - pico_socket_del(s); - return 0; - } - - return -1; -} - -void pico_tcp_flags_update(struct pico_frame *f, struct pico_socket *s) -{ - f->transport_flags_saved = ((struct pico_socket_tcp *)s)->ts_ok; -} - -int pico_tcp_set_bufsize_in(struct pico_socket *s, uint32_t value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - t->tcpq_in.max_size = value; - return 0; -} - -int pico_tcp_set_bufsize_out(struct pico_socket *s, uint32_t value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - t->tcpq_out.max_size = value; - return 0; -} - -int pico_tcp_get_bufsize_in(struct pico_socket *s, uint32_t *value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - *value = t->tcpq_in.max_size; - return 0; -} - -int pico_tcp_get_bufsize_out(struct pico_socket *s, uint32_t *value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - *value = t->tcpq_out.max_size; - return 0; -} - -int pico_tcp_set_keepalive_probes(struct pico_socket *s, uint32_t value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - t->ka_probes = value; - return 0; -} - -int pico_tcp_set_keepalive_intvl(struct pico_socket *s, uint32_t value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - t->ka_intvl = value; - return 0; -} - -int pico_tcp_set_keepalive_time(struct pico_socket *s, uint32_t value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - t->ka_time = value; - return 0; -} - -int pico_tcp_set_linger(struct pico_socket *s, uint32_t value) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)s; - t->linger_timeout = value; - return 0; -} - -#endif /* PICO_SUPPORT_TCP */ diff --git a/kernel/picotcp/modules/pico_tcp.h b/kernel/picotcp/modules/pico_tcp.h deleted file mode 100644 index 784a6bd..0000000 --- a/kernel/picotcp/modules/pico_tcp.h +++ /dev/null @@ -1,106 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef INCLUDE_PICO_TCP -#define INCLUDE_PICO_TCP -#include "pico_addressing.h" -#include "pico_protocol.h" -#include "pico_socket.h" - -extern struct pico_protocol pico_proto_tcp; - -PACKED_STRUCT_DEF pico_tcp_hdr { - struct pico_trans trans; - uint32_t seq; - uint32_t ack; - uint8_t len; - uint8_t flags; - uint16_t rwnd; - uint16_t crc; - uint16_t urgent; -}; - -PACKED_STRUCT_DEF tcp_pseudo_hdr_ipv4 -{ - struct pico_ip4 src; - struct pico_ip4 dst; - uint16_t tcp_len; - uint8_t res; - uint8_t proto; -}; - -#define PICO_TCPHDR_SIZE 20 -#define PICO_SIZE_TCPOPT_SYN 20 -#define PICO_SIZE_TCPHDR (uint32_t)(sizeof(struct pico_tcp_hdr)) - -/* TCP options */ -#define PICO_TCP_OPTION_END 0x00 -#define PICO_TCPOPTLEN_END 1u -#define PICO_TCP_OPTION_NOOP 0x01 -#define PICO_TCPOPTLEN_NOOP 1 -#define PICO_TCP_OPTION_MSS 0x02 -#define PICO_TCPOPTLEN_MSS 4 -#define PICO_TCP_OPTION_WS 0x03 -#define PICO_TCPOPTLEN_WS 3u -#define PICO_TCP_OPTION_SACK_OK 0x04 -#define PICO_TCPOPTLEN_SACK_OK 2 -#define PICO_TCP_OPTION_SACK 0x05 -#define PICO_TCPOPTLEN_SACK 2 /* Plus the block */ -#define PICO_TCP_OPTION_TIMESTAMP 0x08 -#define PICO_TCPOPTLEN_TIMESTAMP 10u - -/* TCP flags */ -#define PICO_TCP_FIN 0x01u -#define PICO_TCP_SYN 0x02u -#define PICO_TCP_RST 0x04u -#define PICO_TCP_PSH 0x08u -#define PICO_TCP_ACK 0x10u -#define PICO_TCP_URG 0x20u -#define PICO_TCP_ECN 0x40u -#define PICO_TCP_CWR 0x80u - -#define PICO_TCP_SYNACK (PICO_TCP_SYN | PICO_TCP_ACK) -#define PICO_TCP_PSHACK (PICO_TCP_PSH | PICO_TCP_ACK) -#define PICO_TCP_FINACK (PICO_TCP_FIN | PICO_TCP_ACK) -#define PICO_TCP_FINPSHACK (PICO_TCP_FIN | PICO_TCP_PSH | PICO_TCP_ACK) -#define PICO_TCP_RSTACK (PICO_TCP_RST | PICO_TCP_ACK) - - -PACKED_STRUCT_DEF pico_tcp_option -{ - uint8_t kind; - uint8_t len; -}; - -struct pico_socket *pico_tcp_open(uint16_t family); -uint32_t pico_tcp_read(struct pico_socket *s, void *buf, uint32_t len); -int pico_tcp_initconn(struct pico_socket *s); -int pico_tcp_input(struct pico_socket *s, struct pico_frame *f); -uint16_t pico_tcp_checksum(struct pico_frame *f); -uint16_t pico_tcp_checksum_ipv4(struct pico_frame *f); -#ifdef PICO_SUPPORT_IPV6 -uint16_t pico_tcp_checksum_ipv6(struct pico_frame *f); -#endif -uint16_t pico_tcp_overhead(struct pico_socket *s); -int pico_tcp_output(struct pico_socket *s, int loop_score); -int pico_tcp_queue_in_is_empty(struct pico_socket *s); -int pico_tcp_reply_rst(struct pico_frame *f); -void pico_tcp_cleanup_queues(struct pico_socket *sck); -void pico_tcp_notify_closing(struct pico_socket *sck); -void pico_tcp_flags_update(struct pico_frame *f, struct pico_socket *s); -int pico_tcp_set_bufsize_in(struct pico_socket *s, uint32_t value); -int pico_tcp_set_bufsize_out(struct pico_socket *s, uint32_t value); -int pico_tcp_get_bufsize_in(struct pico_socket *s, uint32_t *value); -int pico_tcp_get_bufsize_out(struct pico_socket *s, uint32_t *value); -int pico_tcp_set_keepalive_probes(struct pico_socket *s, uint32_t value); -int pico_tcp_set_keepalive_intvl(struct pico_socket *s, uint32_t value); -int pico_tcp_set_keepalive_time(struct pico_socket *s, uint32_t value); -int pico_tcp_set_linger(struct pico_socket *s, uint32_t value); -uint16_t pico_tcp_get_socket_mss(struct pico_socket *s); -int pico_tcp_check_listen_close(struct pico_socket *s); - -#endif diff --git a/kernel/picotcp/modules/pico_tftp.c b/kernel/picotcp/modules/pico_tftp.c deleted file mode 100644 index e20a06c..0000000 --- a/kernel/picotcp/modules/pico_tftp.c +++ /dev/null @@ -1,1323 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Author: Daniele Lacamera - *********************************************************************/ - -#include -#include -#include -#include -#include - -#ifdef DEBUG_TFTP -#define tftp_dbg dbg -#else -#define tftp_dbg(...) do {} while(0) -#endif - -/* a zero value means adaptative timeout! (2, 4, 8) */ -#define PICO_TFTP_TIMEOUT 2000U - -#define TFTP_MAX_RETRY 3 - -#define TFTP_STATE_READ_REQUESTED 0 -#define TFTP_STATE_RX 1 -#define TFTP_STATE_LAST_ACK_SENT 2 -#define TFTP_STATE_WRITE_REQUESTED 3 -#define TFTP_STATE_TX 4 -#define TFTP_STATE_WAIT_OPT_CONFIRM 5 -#define TFTP_STATE_WAIT_LAST_ACK 6 -#define TFTP_STATE_CLOSING 7 - -#define AUTOMA_STATES (TFTP_STATE_CLOSING + 1) - -/* MAX_OPTIONS_SIZE: "timeout" 255 "tsize" filesize => 8 + 4 + 6 + 11 */ -#define MAX_OPTIONS_SIZE 29 - -/* RRQ and WRQ packets (opcodes 1 and 2 respectively) */ -PACKED_STRUCT_DEF pico_tftp_hdr -{ - uint16_t opcode; -}; - -/* DATA or ACK (opcodes 3 and 4 respectively)*/ -PACKED_STRUCT_DEF pico_tftp_data_hdr -{ - uint16_t opcode; - uint16_t block; -}; - -/* ERROR (opcode 5) */ -PACKED_STRUCT_DEF pico_tftp_err_hdr -{ - uint16_t opcode; - uint16_t error_code; -}; - -#define PICO_TFTP_TOTAL_BLOCK_SIZE (PICO_TFTP_PAYLOAD_SIZE + (int32_t)sizeof(struct pico_tftp_data_hdr)) -#define tftp_payload(p) (((uint8_t *)(p)) + sizeof(struct pico_tftp_data_hdr)) - -/* STATUS FLAGS */ -#define SESSION_STATUS_CLOSED 1 -#define SESSION_STATUS_APP_PENDING 2 -#define SESSION_STATUS_IN_CALLBACK 4 -#define SESSION_STATUS_APP_ACK 64 - -struct pico_tftp_session { - int state; - int status; - int options; - int retry; - uint16_t packet_counter; - /* Current connection */ - struct pico_socket *socket; - union pico_address remote_address; - uint16_t remote_port; - uint16_t localport; - pico_time wallclock_timeout; - pico_time bigger_wallclock; - struct pico_tftp_session *next; - uint32_t timer; - unsigned int active_timers; - void *argument; - int (*callback)(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg); - int32_t file_size; - int32_t len; - uint8_t option_timeout; - uint8_t tftp_block[PICO_TFTP_TOTAL_BLOCK_SIZE]; - int32_t block_len; -}; - -struct server_t { - void (*listen_callback)(union pico_address *addr, uint16_t port, uint16_t opcode, char *filename, int32_t len); - struct pico_socket *listen_socket; - uint8_t tftp_block[PICO_TFTP_TOTAL_BLOCK_SIZE]; -}; - -struct automa_events { - void (*ack)(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port); - void (*data)(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port); - void (*error)(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port); - void (*oack)(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port); - void (*timeout)(struct pico_tftp_session *session, pico_time t); -}; - -static struct server_t server; - -static struct pico_tftp_session *tftp_sessions = NULL; - -static inline void session_status_set(struct pico_tftp_session *session, int status) -{ - session->status |= status; -} - -static inline void session_status_clear(struct pico_tftp_session *session, int status) -{ - session->status &= ~status; -} - -static char *extract_arg_pointer(char *arg, char *end_arg, char **value) -{ - char *pos; - - pos = get_string_terminator_position(arg, (size_t)(end_arg - arg)); - if (!pos) - return NULL; - - if (end_arg == ++pos) - return NULL; - - arg = get_string_terminator_position(pos, (size_t)(end_arg - pos)); - - if (!arg) - return NULL; - - *value = pos; - return arg + 1; -} - -static int extract_value(char *str, uint32_t *value, uint32_t max) -{ - char *endptr; - unsigned long num; - - num = strtoul(str, &endptr, 10); - - if (endptr == str || *endptr || num > max) - return -1; - - *value = (uint32_t)num; - return 0; -} - -static int parse_optional_arguments(char *option_string, int32_t len, int *options, uint8_t *timeout, int32_t *filesize) -{ - char *pos; - char *end_args = option_string + len; - char *current_option; - int ret; - uint32_t value; - - *options = 0; - - while (option_string < end_args) { - current_option = option_string; - option_string = extract_arg_pointer(option_string, end_args, &pos); - if (!option_string) - return 0; - - if (!pico_strncasecmp("timeout", current_option, (size_t)(pos - current_option))) { - ret = extract_value(pos, &value, PICO_TFTP_MAX_TIMEOUT); - if (ret) - return -1; - - *timeout = (uint8_t)value; - *options |= PICO_TFTP_OPTION_TIME; - } else { - if (!pico_strncasecmp("tsize", current_option, (size_t)(pos - current_option))) { - ret = extract_value(pos, (uint32_t *)filesize, PICO_TFTP_MAX_FILESIZE); - if (ret) - return -1; - - if (*filesize < 0) - return -1; - - *options |= PICO_TFTP_OPTION_FILE; - } - } - } - return 0; -} - -static inline struct pico_tftp_session *pico_tftp_session_create(struct pico_socket *sock, union pico_address *remote_addr) -{ - struct pico_tftp_session *session; - - session = (struct pico_tftp_session *) PICO_ZALLOC(sizeof (struct pico_tftp_session)); - - if (!session) - pico_err = PICO_ERR_ENOMEM; - else { - session->state = 0; - session->status = 0; - session->options = 0; - session->packet_counter = 0u; - session->socket = sock; - session->wallclock_timeout = 0; - session->bigger_wallclock = 0; - session->active_timers = 0; - session->next = NULL; - session->localport = 0; - session->callback = NULL; - session->argument = NULL; - memcpy(&session->remote_address, remote_addr, sizeof(union pico_address)); - session->remote_port = 0; - session->len = 0; - } - - return session; -} - -static struct pico_tftp_session *find_session_by_socket(struct pico_socket *tftp_socket) -{ - struct pico_tftp_session *pos = tftp_sessions; - - for (; pos; pos = pos->next) - if (pos->socket == tftp_socket) - return pos; - - return NULL; -} - -/* **************** for future use... - static struct pico_tftp_session * find_session_by_localport(uint16_t localport) - { - struct pico_tftp_session *idx = tftp_sessions; - - for (; idx; idx = idx->next) - if (idx->localport == localport) - return idx; - - return NULL; - } *********************/ - -static void add_session(struct pico_tftp_session *idx) -{ - struct pico_tftp_session *prev = NULL; - struct pico_tftp_session *pos; - - for (pos = tftp_sessions; pos; prev = pos, pos = pos->next) - if (pos->localport > idx->localport) - break; - - if (prev) { - idx->next = prev->next; - prev->next = idx; - } else { - idx->next = tftp_sessions; - tftp_sessions = idx; - } -} - -/* Returns 0 if OK and -1 in case of errors */ -static int del_session(struct pico_tftp_session *idx) -{ - struct pico_tftp_session *prev = NULL; - struct pico_tftp_session *pos; - - for (pos = tftp_sessions; pos; pos = pos->next) { - if (pos == idx) { - if (pos == tftp_sessions) - tftp_sessions = tftp_sessions->next; - else - prev->next = pos->next; - - PICO_FREE(idx); - return 0; - } - - prev = pos; - } - return -1; -} - -static inline int do_callback(struct pico_tftp_session *session, uint16_t err, uint8_t *data, int32_t len) -{ - int ret; - - session_status_set(session, SESSION_STATUS_IN_CALLBACK); - ret = session->callback(session, err, data, len, session->argument); - session_status_clear(session, SESSION_STATUS_IN_CALLBACK); - - return ret; -} - -static void timer_callback(pico_time now, void *arg); -static void tftp_finish(struct pico_tftp_session *session); - -static void tftp_schedule_timeout(struct pico_tftp_session *session, pico_time interval) -{ - pico_time new_timeout = PICO_TIME_MS() + interval; - - if (session->active_timers) { - if (session->bigger_wallclock > new_timeout) { - session->timer = pico_timer_add(interval + 1, timer_callback, session); - if (!session->timer) { - tftp_dbg("TFTP: Failed to start callback timer, deleting session\n"); - tftp_finish(session); - return; - } - session->active_timers++; - } - } else { - session->timer = pico_timer_add(interval + 1, timer_callback, session); - if (!session->timer) { - tftp_dbg("TFTP: Failed to start callback timer, deleting session\n"); - tftp_finish(session); - return; - } - session->active_timers++; - session->bigger_wallclock = new_timeout; - } - - session->wallclock_timeout = new_timeout; -} - -static void tftp_finish(struct pico_tftp_session *session) -{ - if (session->state != TFTP_STATE_CLOSING) { - pico_socket_close(session->socket); - session->state = TFTP_STATE_CLOSING; - if (session->active_timers) { - pico_timer_cancel(session->timer); - --session->active_timers; - } - - session->wallclock_timeout = 0; - tftp_schedule_timeout(session, 5); - } -} - -static void tftp_send(struct pico_tftp_session *session, int len) -{ - if (len) - session->len = len; - else - len = session->len; - - pico_socket_sendto(session->socket, session->tftp_block, session->len, &session->remote_address, session->remote_port); -} - -static void tftp_send_ack(struct pico_tftp_session *session) -{ - struct pico_tftp_data_hdr *dh; - - dh = PICO_ZALLOC(sizeof(struct pico_tftp_data_hdr)); - if (!dh) - return; - - dh->opcode = short_be(PICO_TFTP_ACK); - dh->block = short_be(session->packet_counter); - - if (session->socket) { - pico_socket_sendto(session->socket, dh, (int) sizeof(struct pico_tftp_err_hdr), - &session->remote_address, session->remote_port); - tftp_schedule_timeout(session, PICO_TFTP_TIMEOUT); - } - - PICO_FREE(dh); -} - -static size_t prepare_options_string(struct pico_tftp_session *session, char *str_options, int32_t filesize) -{ - size_t len = 0; - int res; - - if (session->options & PICO_TFTP_OPTION_TIME) { - strcpy(str_options, "timeout"); - len += 8; - res = num2string(session->option_timeout, &str_options[len], 4); - if (res < 0) - return 0; - - len += (size_t)res; - } - - if (session->options & PICO_TFTP_OPTION_FILE) { - strcpy(&str_options[len], "tsize"); - len += 6; - res = num2string(filesize, &str_options[len], 11); - if (res < 0) - return 0; - - len += (size_t)res; - } - - return len; -} - -static void tftp_send_oack(struct pico_tftp_session *session) -{ - struct pico_tftp_hdr *hdr; - size_t options_size; - size_t options_pos = sizeof(struct pico_tftp_hdr); - uint8_t *buf; - char str_options[MAX_OPTIONS_SIZE] = { - 0 - }; - - options_size = prepare_options_string(session, str_options, session->file_size); - - buf = PICO_ZALLOC(options_pos + options_size); - if (!buf) { - strcpy((char *)session->tftp_block, "Out of memory"); - do_callback(session, PICO_TFTP_EV_ERR_LOCAL, session->tftp_block, 0); - tftp_finish(session); - return; - } - - hdr = (struct pico_tftp_hdr *)buf; - hdr->opcode = short_be(PICO_TFTP_OACK); - memcpy(buf + options_pos, str_options, options_size); - (void)pico_socket_sendto(session->socket, buf, (int)(options_pos + options_size), &session->remote_address, session->remote_port); - PICO_FREE(buf); -} - -static void tftp_send_req(struct pico_tftp_session *session, union pico_address *a, uint16_t port, const char *filename, uint16_t opcode) -{ -#define OCTET_STRSIZ 7U - static const char octet[OCTET_STRSIZ] = { - 0, 'o', 'c', 't', 'e', 't', 0 - }; - struct pico_tftp_hdr *hdr; - size_t len; - size_t options_size; - size_t options_pos; - uint8_t *buf; - char str_options[MAX_OPTIONS_SIZE] = { - 0 - }; - - if (!filename) { - return; - } - - len = strlen(filename); - - options_size = prepare_options_string(session, str_options, (opcode == PICO_TFTP_WRQ) ? (session->file_size) : (0)); - - options_pos = sizeof(struct pico_tftp_hdr) + OCTET_STRSIZ + len; - buf = PICO_ZALLOC(options_pos + options_size); - if (!buf) { - strcpy((char *)session->tftp_block, "Out of memory"); - do_callback(session, PICO_TFTP_EV_ERR_LOCAL, session->tftp_block, 0); - tftp_finish(session); - return; - } - - hdr = (struct pico_tftp_hdr *)buf; - hdr->opcode = short_be(opcode); - memcpy(buf + sizeof(struct pico_tftp_hdr), filename, len); - memcpy(buf + sizeof(struct pico_tftp_hdr) + len, octet, OCTET_STRSIZ); - memcpy(buf + options_pos, str_options, options_size); - (void)pico_socket_sendto(session->socket, buf, (int)(options_pos + options_size), a, port); - PICO_FREE(buf); -} - -static void tftp_send_rx_req(struct pico_tftp_session *session, union pico_address *a, uint16_t port, const char *filename) -{ - tftp_send_req(session, a, port, filename, PICO_TFTP_RRQ); - session->state = TFTP_STATE_READ_REQUESTED; - tftp_schedule_timeout(session, PICO_TFTP_TIMEOUT); -} - -static void tftp_send_tx_req(struct pico_tftp_session *session, union pico_address *a, uint16_t port, const char *filename) -{ - tftp_send_req(session, a, port, filename, PICO_TFTP_WRQ); - session->state = TFTP_STATE_WRITE_REQUESTED; - tftp_schedule_timeout(session, PICO_TFTP_TIMEOUT); -} - -static int send_error(uint8_t *buf, struct pico_socket *sock, union pico_address *a, uint16_t port, uint16_t errcode, const char *errmsg) -{ - struct pico_tftp_err_hdr *eh; - int32_t len; - int32_t maxlen = PICO_TFTP_TOTAL_BLOCK_SIZE - sizeof(struct pico_tftp_err_hdr); - - if (!errmsg) - len = 0; - else - len = (int32_t)strlen(errmsg); - - eh = (struct pico_tftp_err_hdr *) buf; - eh->opcode = short_be(PICO_TFTP_ERROR); - eh->error_code = short_be(errcode); - if (len + 1 > maxlen) - len = maxlen; - - if (len) - memcpy(tftp_payload(eh), errmsg, (size_t)len); - - tftp_payload(eh)[len++] = (char)0; - - return pico_socket_sendto(sock, eh, (int)(len + (int32_t)sizeof(struct pico_tftp_err_hdr)), a, port); -} - -static void tftp_send_error(struct pico_tftp_session *session, union pico_address *a, uint16_t port, uint16_t errcode, const char *errmsg) -{ - struct pico_tftp_err_hdr *eh; - int32_t len; - int32_t maxlen = PICO_TFTP_TOTAL_BLOCK_SIZE - sizeof(struct pico_tftp_err_hdr); - - if (!errmsg) - len = 0; - else - len = (int32_t)strlen(errmsg); - - if (!a) { - a = &session->remote_address; - port = session->remote_port; - } - - eh = (struct pico_tftp_err_hdr *) (session ? (session->tftp_block) : (server.tftp_block)); - eh->opcode = short_be(PICO_TFTP_ERROR); - eh->error_code = short_be(errcode); - if (len + 1 > maxlen) - len = maxlen; - - if (len) - memcpy(tftp_payload(eh), errmsg, (size_t)len); - - tftp_payload(eh)[len++] = (char)0; - if (session) { - (void)pico_socket_sendto(session->socket, eh, (int) (len + (int32_t)sizeof(struct pico_tftp_err_hdr)), a, port); - tftp_finish(session); - } else - (void)pico_socket_sendto(server.listen_socket, eh, (int) (len + (int32_t)sizeof(struct pico_tftp_err_hdr)), a, port); -} - -static void tftp_send_data(struct pico_tftp_session *session, const uint8_t *data, int32_t len) -{ - struct pico_tftp_data_hdr *dh; - - dh = (struct pico_tftp_data_hdr *) session->tftp_block; - dh->opcode = short_be(PICO_TFTP_DATA); - dh->block = short_be(session->packet_counter++); - - if (len < PICO_TFTP_PAYLOAD_SIZE) - session->state = TFTP_STATE_WAIT_LAST_ACK; - else - session->state = TFTP_STATE_TX; - - memcpy(session->tftp_block + sizeof(struct pico_tftp_data_hdr), data, (size_t)len); - pico_socket_sendto(session->socket, session->tftp_block, (int)(len + (int32_t)sizeof(struct pico_tftp_data_hdr)), - &session->remote_address, session->remote_port); - tftp_schedule_timeout(session, PICO_TFTP_TIMEOUT); -} - -static inline void tftp_eval_finish(struct pico_tftp_session *session, int32_t len) -{ - if (len < PICO_TFTP_PAYLOAD_SIZE) { - pico_socket_close(session->socket); - session->state = TFTP_STATE_CLOSING; - } -} - -static inline int tftp_data_prepare(struct pico_tftp_session *session, union pico_address *a, uint16_t port) -{ - if (!session->socket) - return -1; - - if (pico_address_compare(a, &session->remote_address, session->socket->net->proto_number) != 0) { - tftp_send_error(session, a, port, TFTP_ERR_EXCEEDED, "TFTP busy, try again later."); - return -1; - } - - return 0; -} - -static void tftp_req(uint8_t *block, int32_t len, union pico_address *a, uint16_t port) -{ - struct pico_tftp_hdr *hdr = (struct pico_tftp_hdr *)block; - char *filename; - char *pos; - char *mode; - int ret; - - switch (short_be(hdr->opcode)) { - case PICO_TFTP_RRQ: - case PICO_TFTP_WRQ: - filename = (char *)(block + sizeof(struct pico_tftp_hdr)); - len -= (int32_t)sizeof(struct pico_tftp_hdr); - - pos = extract_arg_pointer(filename, filename + len, &mode); - if (!pos) { - send_error(block, server.listen_socket, a, port, TFTP_ERR_EILL, "Invalid argument in request"); - return; - } - - ret = strcmp("octet", mode); - if (ret) { - send_error(block, server.listen_socket, a, port, TFTP_ERR_EILL, "Unsupported mode"); - return; - } - - /*ret = parse_optional_arguments((char *)(block + sizeof(struct pico_tftp_hdr)), len - sizeof(struct pico_tftp_hdr), &new_options, &new_timeout, &new_filesize); - if (ret) { - tftp_send_error(NULL, a, port, TFTP_ERR_EILL, "Bad request"); - return; - } */ - - if (server.listen_callback) { - server.listen_callback(a, port, short_be(hdr->opcode), filename, len); - } - - break; - default: - send_error(block, server.listen_socket, a, port, TFTP_ERR_EILL, "Illegal opcode"); - } -} - -static int event_ack_base(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - struct pico_tftp_data_hdr *dh; - uint16_t block_n; - const char *wrong_address = "Wrong address"; - const char *wrong_block = "Wrong packet number"; - - (void)len; - if (pico_address_compare(a, &session->remote_address, session->socket->net->proto_number) != 0) { - strcpy((char *)session->tftp_block, wrong_address); - do_callback(session, PICO_TFTP_EV_ERR_PEER, session->tftp_block, len); - tftp_send_error(session, a, port, TFTP_ERR_EXCEEDED, wrong_address); - return -1; - } - - dh = (struct pico_tftp_data_hdr *)session->tftp_block; - block_n = short_be(dh->block); - if (block_n != (session->packet_counter - 1U)) { - strcpy((char *)session->tftp_block, wrong_block); - do_callback(session, PICO_TFTP_EV_ERR_PEER, session->tftp_block, len); - tftp_send_error(session, a, port, TFTP_ERR_EILL, wrong_block); - return -1; - } - - return 0; -} - -static inline int event_ack0_check(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - struct pico_tftp_data_hdr *dh; - uint16_t block_n; - - (void)len; - if (pico_address_compare(a, &session->remote_address, session->socket->net->proto_number) != 0) { - tftp_send_error(session, a, port, TFTP_ERR_EXCEEDED, "TFTP busy, try again later."); - return -1; - } - - dh = (struct pico_tftp_data_hdr *)session->tftp_block; - block_n = short_be(dh->block); - if (block_n != 0) { - tftp_send_error(session, a, port, TFTP_ERR_EILL, "TFTP connection broken!"); - return -1; - } - - return 0; -} - -static void event_ack0_wr(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - if (!event_ack0_check(session, len, a, port)) { - session->remote_port = port; - do_callback(session, PICO_TFTP_EV_OK, session->tftp_block, 0); - } -} - -static void event_ack0_woc(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - if (!event_ack0_check(session, len, a, port)) - do_callback(session, PICO_TFTP_EV_OPT, session->tftp_block, 0); -} - -static void event_ack(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - if (!event_ack_base(session, len, a, port)) - do_callback(session, PICO_TFTP_EV_OK, session->tftp_block, 0); -} - -static void event_ack_last(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - if (!event_ack_base(session, len, a, port)) - tftp_finish(session); -} - -static void event_data(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - struct pico_tftp_data_hdr *dh; - int32_t payload_len = len - (int32_t)sizeof(struct pico_tftp_data_hdr); - - if (tftp_data_prepare(session, a, port)) - return; - - dh = (struct pico_tftp_data_hdr *)session->tftp_block; - if (short_be(dh->block) > (session->packet_counter + 1U)) { - strcpy((char *)session->tftp_block, "Wrong/unexpected sequence number"); - do_callback(session, PICO_TFTP_EV_ERR_LOCAL, session->tftp_block, 0); - tftp_send_error(session, a, port, TFTP_ERR_EILL, "TFTP connection broken!"); - return; - } - - if (short_be(dh->block) == (session->packet_counter + 1U)) { - session->packet_counter++; - if (do_callback(session, PICO_TFTP_EV_OK, tftp_payload(session->tftp_block), payload_len) >= 0) { - if (!(session->status & SESSION_STATUS_APP_ACK)) - tftp_send_ack(session); - } - - if (!(session->status & SESSION_STATUS_APP_ACK)) - tftp_eval_finish(session, len); - } -} - -static void event_data_rdr(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - if (tftp_data_prepare(session, a, port)) - return; - - session->remote_port = port; - session->state = TFTP_STATE_RX; - event_data(session, len, a, port); -} - -static void event_data_rpl(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - struct pico_tftp_data_hdr *dh; - - (void)len; - if (tftp_data_prepare(session, a, port)) - return; - - dh = (struct pico_tftp_data_hdr *)session->tftp_block; - - if (short_be(dh->block) == session->packet_counter) - tftp_send_ack(session); -} - -static void event_err(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - (void)a; - (void)port; - do_callback(session, PICO_TFTP_EV_ERR_PEER, session->tftp_block, len); - tftp_finish(session); -} - -static inline void event_oack(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - char *option_string = (char *)session->tftp_block + sizeof(struct pico_tftp_hdr); - int ret; - int proposed_options = session->options; - - (void)a; - - session->remote_port = port; - - ret = parse_optional_arguments(option_string, len - (int32_t)sizeof(struct pico_tftp_hdr), &session->options, &session->option_timeout, &session->file_size); - if (ret || (session->options & ~proposed_options)) { - do_callback(session, PICO_TFTP_EV_ERR_PEER, session->tftp_block, len); - tftp_send_error(session, a, port, TFTP_ERR_EOPT, "Invalid option"); - return; - } - - do_callback(session, PICO_TFTP_EV_OPT, session->tftp_block, len); -} - -static void event_oack_rr(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - event_oack(session, len, a, port); - tftp_send_ack(session); - session->state = TFTP_STATE_RX; -} - -static void event_oack_wr(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - event_oack(session, len, a, port); - session->state = TFTP_STATE_TX; -} - -static void event_timeout(struct pico_tftp_session *session, pico_time t) -{ - pico_time new_timeout; - int factor; - - (void)t; - if (++session->retry == TFTP_MAX_RETRY) { - strcpy((char *)session->tftp_block, "Network timeout"); - do_callback(session, PICO_TFTP_EV_ERR_PEER, session->tftp_block, 0); - tftp_finish(session); - return; - } - - tftp_send(session, 0); - if (session->options & PICO_TFTP_OPTION_TIME) - new_timeout = session->option_timeout * 1000U; - else { - new_timeout = PICO_TFTP_TIMEOUT; - for (factor = session->retry; factor; --factor) - new_timeout *= 2; - } - - tftp_schedule_timeout(session, new_timeout); -} - -static void event_timeout_closing(struct pico_tftp_session *session, pico_time t) -{ - (void)t; - if (session->active_timers == 0) - del_session(session); -} - -static void event_timeout_final(struct pico_tftp_session *session, pico_time t) -{ - (void)t; - - tftp_finish(session); -} - -static void unexpected(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - (void)len; - tftp_send_error(session, a, port, TFTP_ERR_EILL, "Unexpected message"); -} - -static void null(struct pico_tftp_session *session, int32_t len, union pico_address *a, uint16_t port) -{ - (void)session; - (void)len; - (void)a; - (void)port; -} - -static struct automa_events fsm[AUTOMA_STATES] = { - /* STATE * ACK DATA ERROR OACK TIMEOUT */ - /* ***************************************************************************************************************** */ - { /* TFTP_STATE_READ_REQUESTED */ unexpected, event_data_rdr, event_err, event_oack_rr, event_timeout}, - { /* TFTP_STATE_RX */ unexpected, event_data, event_err, unexpected, event_timeout}, - { /* TFTP_STATE_LAST_ACK_SENT */ unexpected, event_data_rpl, null, unexpected, event_timeout_final}, - { /* TFTP_STATE_WRITE_REQUESTED */ event_ack0_wr, unexpected, event_err, event_oack_wr, event_timeout}, - { /* TFTP_STATE_TX */ event_ack, unexpected, event_err, unexpected, event_timeout}, - { /* TFTP_STATE_WAIT_OPT_CONFIRM */ event_ack0_woc, unexpected, event_err, unexpected, event_timeout}, - { /* TFTP_STATE_WAIT_LAST_ACK */ event_ack_last, unexpected, event_err, unexpected, event_timeout}, - { /* TFTP_STATE_CLOSING */ null, null, null, null, event_timeout_closing} -}; - -static void tftp_message_received(struct pico_tftp_session *session, uint8_t *block, int32_t len, union pico_address *a, uint16_t port) -{ - struct pico_tftp_hdr *th = (struct pico_tftp_hdr *) block; - - if (!session->callback) - return; - - session->wallclock_timeout = 0; - - switch (short_be(th->opcode)) { - case PICO_TFTP_RRQ: - case PICO_TFTP_WRQ: - unexpected(session, len, a, port); - break; - case PICO_TFTP_DATA: - fsm[session->state].data(session, len, a, port); - break; - case PICO_TFTP_ACK: - fsm[session->state].ack(session, len, a, port); - break; - case PICO_TFTP_ERROR: - fsm[session->state].error(session, len, a, port); - break; - case PICO_TFTP_OACK: - fsm[session->state].oack(session, len, a, port); - break; - default: - tftp_send_error(session, NULL, 0, TFTP_ERR_EILL, "Illegal opcode"); - } -} - -static void tftp_cb(uint16_t ev, struct pico_socket *s) -{ - int r; - struct pico_tftp_session *session; - union pico_address ep; - uint16_t port = 0; - - session = find_session_by_socket(s); - if (session) { - if (ev == PICO_SOCK_EV_ERR) { - strcpy((char *)session->tftp_block, "Socket Error"); - do_callback(session, PICO_TFTP_EV_ERR_LOCAL, session->tftp_block, (int32_t)strlen((char *)session->tftp_block)); - tftp_finish(session); - return; - } - - r = pico_socket_recvfrom(s, session->tftp_block, PICO_TFTP_TOTAL_BLOCK_SIZE, &ep, &port); - if (r < (int)sizeof(struct pico_tftp_hdr)) - return; - - tftp_message_received(session, session->tftp_block, r, &ep, port); - } else { - if (!server.listen_socket || s != server.listen_socket) { - return; - } - - r = pico_socket_recvfrom(s, server.tftp_block, PICO_TFTP_TOTAL_BLOCK_SIZE, &ep, &port); - if (r < (int)sizeof(struct pico_tftp_hdr)) - return; - - tftp_req(server.tftp_block, r, &ep, port); - } -} - -static int application_rx_cb(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg) -{ - int *flag = (int *)arg; - - (void)block; - - switch (event) { - case PICO_TFTP_EV_ERR_PEER: - case PICO_TFTP_EV_ERR_LOCAL: - *flag = 0 - event; - break; - case PICO_TFTP_EV_OK: - session->len = len; - *flag = 1; - break; - case PICO_TFTP_EV_OPT: - break; - } - return 0; -} - -static int application_tx_cb(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg) -{ - (void)session; - (void)block; - (void)len; - - *(int*)arg = ((event == PICO_TFTP_EV_OK) || (event == PICO_TFTP_EV_OPT)) ? (1) : (0 - event); - return 0; -} - -static void timer_callback(pico_time now, void *arg) -{ - struct pico_tftp_session *session = (struct pico_tftp_session *)arg; - - --session->active_timers; - if (session->wallclock_timeout == 0) { - /* Timer is cancelled. */ - return; - } - - if (now >= session->wallclock_timeout) { - session->wallclock_timeout = 0ULL; - fsm[session->state].timeout(session, now); - } else { - tftp_schedule_timeout(session, session->wallclock_timeout - now); - } -} - -static struct pico_socket *tftp_socket_open(uint16_t family, uint16_t localport) -{ - struct pico_socket *sock; - union pico_address local_address; - - sock = pico_socket_open(family, PICO_PROTO_UDP, tftp_cb); - if (!sock) - return NULL; - - localport = short_be(localport); - - memset(&local_address, 0, sizeof(union pico_address)); - if (pico_socket_bind(sock, &local_address, &localport) < 0) { - pico_socket_close(sock); - return NULL; - } - - return sock; -} - -static inline int tftp_start_check(struct pico_tftp_session *session, uint16_t port, const char *filename, - int (*user_cb)(struct pico_tftp_session *session, uint16_t err, uint8_t *block, int32_t len, void *arg)) -{ - if (!session) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if ((!server.listen_socket) && (port != short_be(PICO_TFTP_PORT))) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (!filename) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (!user_cb) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; -} - -/* *** EXPORTED FUNCTIONS *** */ - -struct pico_tftp_session *pico_tftp_session_setup(union pico_address *a, uint16_t family) -{ - struct pico_socket *sock; - - sock = tftp_socket_open(family, 0); - if (!sock) - return NULL; - - return pico_tftp_session_create(sock, a); -} - -int pico_tftp_get_option(struct pico_tftp_session *session, uint8_t type, int32_t *value) -{ - if (!session) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - switch (type) { - case PICO_TFTP_OPTION_FILE: - if (session->options & PICO_TFTP_OPTION_FILE) - *value = session->file_size; - else { - pico_err = PICO_ERR_ENOENT; - return -1; - } - - break; - case PICO_TFTP_OPTION_TIME: - if (session->options & PICO_TFTP_OPTION_TIME) - *value = session->option_timeout; - else { - pico_err = PICO_ERR_ENOENT; - return -1; - } - - break; - default: - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; -} - -int pico_tftp_set_option(struct pico_tftp_session *session, uint8_t type, int32_t value) -{ - if (!session) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - switch (type) { - case PICO_TFTP_OPTION_FILE: - if (value < 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - session->file_size = value; - session->options |= PICO_TFTP_OPTION_FILE; - break; - case PICO_TFTP_OPTION_TIME: - if (value > PICO_TFTP_MAX_TIMEOUT) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - session->option_timeout = (uint8_t)(value & 0xFF); - if (value) { - session->options |= PICO_TFTP_OPTION_TIME; - } else { - session->options &= ~PICO_TFTP_OPTION_TIME; - } - - break; - default: - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; -} - -/* Active RX request from PicoTCP */ -int pico_tftp_start_rx(struct pico_tftp_session *session, uint16_t port, const char *filename, - int (*user_cb)(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg), void *arg) -{ - if (tftp_start_check(session, port, filename, user_cb)) - return -1; - - session->callback = user_cb; - session->packet_counter = 0u; - session->argument = arg; - - add_session(session); - - if (port != short_be(PICO_TFTP_PORT)) { - session->remote_port = port; - session->state = TFTP_STATE_RX; - if (session->options & (PICO_TFTP_OPTION_FILE | PICO_TFTP_OPTION_TIME)) - tftp_send_oack(session); - else - tftp_send_ack(session); - } else { - tftp_send_rx_req(session, &session->remote_address, port, filename); - } - - return 0; -} - -int pico_tftp_start_tx(struct pico_tftp_session *session, uint16_t port, const char *filename, - int (*user_cb)(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg), void *arg) -{ - if (tftp_start_check(session, port, filename, user_cb)) - return -1; - - session->callback = user_cb; - session->packet_counter = 1u; - session->argument = arg; - - add_session(session); - - if (port != short_be(PICO_TFTP_PORT)) { - session->remote_port = port; - if (session->options) { - tftp_send_oack(session); - session->state = TFTP_STATE_WAIT_OPT_CONFIRM; - } else { - do_callback(session, PICO_TFTP_EV_OK, NULL, 0); - } - } else - tftp_send_tx_req(session, &session->remote_address, port, filename); - - return 0; -} - -int pico_tftp_reject_request(union pico_address*addr, uint16_t port, uint16_t error_code, const char*error_message) -{ - return send_error(server.tftp_block, server.listen_socket, addr, port, error_code, error_message); -} - -int32_t pico_tftp_send(struct pico_tftp_session *session, const uint8_t *data, int32_t len) -{ - int32_t size; - - - if (len < 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - size = len; - - if (size > PICO_TFTP_PAYLOAD_SIZE) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - tftp_send_data(session, data, size); - - return len; -} - -int pico_tftp_listen(uint16_t family, void (*cb)(union pico_address *addr, uint16_t port, uint16_t opcode, char *filename, int32_t len)) -{ - struct pico_socket *sock; - - if (server.listen_socket) { - pico_err = PICO_ERR_EEXIST; - return -1; - } - - sock = tftp_socket_open(family, PICO_TFTP_PORT); - if (!sock) - return -1; - - server.listen_socket = sock; - server.listen_callback = cb; - - return 0; -} - -int pico_tftp_parse_request_args(char *args, int32_t len, int *options, uint8_t *timeout, int32_t *filesize) -{ - char *pos; - char *end_args = args + len; - - args = extract_arg_pointer(args, end_args, &pos); - - return parse_optional_arguments(args, (int32_t)(end_args - args), options, timeout, filesize); -} - -int pico_tftp_abort(struct pico_tftp_session *session, uint16_t error, const char *reason) -{ - if (!session) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (!find_session_by_socket(session->socket)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - tftp_send_error(session, NULL, 0, error, reason); - - return 0; -} - -int pico_tftp_close_server(void) -{ - if (!server.listen_socket) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - pico_socket_close(server.listen_socket); - server.listen_socket = NULL; - return 0; -} - -int pico_tftp_get_file_size(struct pico_tftp_session *session, int32_t *file_size) -{ - return pico_tftp_get_option(session, PICO_TFTP_OPTION_FILE, file_size); -} - -struct pico_tftp_session *pico_tftp_app_setup(union pico_address *a, uint16_t port, uint16_t family, int *synchro) -{ - struct pico_tftp_session *session; - - if (!synchro) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - session = pico_tftp_session_setup(a, family); - if (!session) - return NULL; - - session->remote_port = port; - session->status |= SESSION_STATUS_APP_ACK; - session->argument = synchro; - - *synchro = 0; - - return session; -} - -int pico_tftp_app_start_rx(struct pico_tftp_session *session, const char *filename) -{ - return pico_tftp_start_rx(session, session->remote_port, filename, application_rx_cb, session->argument); -} - -int pico_tftp_app_start_tx(struct pico_tftp_session *session, const char *filename) -{ - return pico_tftp_start_tx(session, session->remote_port, filename, application_tx_cb, session->argument); -} - -int32_t pico_tftp_get(struct pico_tftp_session *session, uint8_t *data, int32_t len) -{ - int synchro; - - if (!session || len < session->len ) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - synchro = *(int*)session->argument; - *(int*)session->argument = 0; - if ((session->state != TFTP_STATE_RX) && (session->state != TFTP_STATE_READ_REQUESTED)) - return -1; - - if (synchro < 0) - return synchro; - - memcpy(data, tftp_payload(session->tftp_block), (size_t)session->len); - len = session->len; - - tftp_send_ack(session); - tftp_eval_finish(session, len); - return len; -} - -int32_t pico_tftp_put(struct pico_tftp_session *session, uint8_t *data, int32_t len) -{ - int synchro; - - if ((!session) || (!data) || (len < 0)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - synchro = *(int*)session->argument; - *(int*)session->argument = 0; - if (synchro < 0) - return synchro; - - if (len > PICO_TFTP_PAYLOAD_SIZE) - len = PICO_TFTP_PAYLOAD_SIZE; - - pico_tftp_send(session, data, len); - return len; -} diff --git a/kernel/picotcp/modules/pico_tftp.h b/kernel/picotcp/modules/pico_tftp.h deleted file mode 100644 index 2785189..0000000 --- a/kernel/picotcp/modules/pico_tftp.h +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - *********************************************************************/ -#ifndef PICO_TFTP_H -#define PICO_TFTP_H - -#include -#include - -#define PICO_TFTP_PORT (69) -#define PICO_TFTP_PAYLOAD_SIZE (512) - -#define PICO_TFTP_NONE 0 -#define PICO_TFTP_RRQ 1 -#define PICO_TFTP_WRQ 2 -#define PICO_TFTP_DATA 3 -#define PICO_TFTP_ACK 4 -#define PICO_TFTP_ERROR 5 -#define PICO_TFTP_OACK 6 - -/* Callback user events */ -#define PICO_TFTP_EV_OK 0 -#define PICO_TFTP_EV_OPT 1 -#define PICO_TFTP_EV_ERR_PEER 2 -#define PICO_TFTP_EV_ERR_LOCAL 3 - -/* TFTP ERROR CODES */ -#define TFTP_ERR_UNDEF 0 -#define TFTP_ERR_ENOENT 1 -#define TFTP_ERR_EACC 2 -#define TFTP_ERR_EXCEEDED 3 -#define TFTP_ERR_EILL 4 -#define TFTP_ERR_ETID 5 -#define TFTP_ERR_EEXIST 6 -#define TFTP_ERR_EUSR 7 -#define TFTP_ERR_EOPT 8 - -/* Session options */ -#define PICO_TFTP_OPTION_FILE 1 - -/* timeout: 0 -> adaptative, 1-255 -> fixed */ -#define PICO_TFTP_OPTION_TIME 2 - - -#define PICO_TFTP_MAX_TIMEOUT 255 -#define PICO_TFTP_MAX_FILESIZE (65535 * 512 - 1) - -struct pico_tftp_session; - -struct pico_tftp_session *pico_tftp_session_setup(union pico_address *a, uint16_t family); -int pico_tftp_set_option(struct pico_tftp_session *session, uint8_t type, int32_t value); -int pico_tftp_get_option(struct pico_tftp_session *session, uint8_t type, int32_t *value); - -int pico_tftp_start_rx(struct pico_tftp_session *session, uint16_t port, const char *filename, - int (*user_cb)(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg), void *arg); -int pico_tftp_start_tx(struct pico_tftp_session *session, uint16_t port, const char *filename, - int (*user_cb)(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg), void *arg); - -int pico_tftp_reject_request(union pico_address *addr, uint16_t port, uint16_t error_code, const char *error_message); -int32_t pico_tftp_send(struct pico_tftp_session *session, const uint8_t *data, int32_t len); - -int pico_tftp_listen(uint16_t family, void (*cb)(union pico_address *addr, uint16_t port, uint16_t opcode, char *filename, int32_t len)); - -int pico_tftp_parse_request_args(char *args, int32_t len, int *options, uint8_t *timeout, int32_t *filesize); - -int pico_tftp_abort(struct pico_tftp_session *session, uint16_t error, const char *reason); -int pico_tftp_close_server(void); - -int pico_tftp_get_file_size(struct pico_tftp_session *session, int32_t *file_size); - -/* SPECIFIC APPLICATION DRIVEN FUNCTIONS */ -struct pico_tftp_session *pico_tftp_app_setup(union pico_address *a, uint16_t port, uint16_t family, int *synchro); - -int pico_tftp_app_start_rx(struct pico_tftp_session *session, const char *filename); -int pico_tftp_app_start_tx(struct pico_tftp_session *session, const char *filename); - -int32_t pico_tftp_get(struct pico_tftp_session *session, uint8_t *data, int32_t len); -int32_t pico_tftp_put(struct pico_tftp_session *session, uint8_t *data, int32_t len); - -#endif diff --git a/kernel/picotcp/modules/pico_udp.c b/kernel/picotcp/modules/pico_udp.c deleted file mode 100644 index 6d3ba72..0000000 --- a/kernel/picotcp/modules/pico_udp.c +++ /dev/null @@ -1,222 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include "pico_udp.h" -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_socket.h" -#include "pico_stack.h" - -#ifdef DEBUG_UDP -#define udp_dbg dbg -#else -#define udp_dbg(...) do {} while(0) -#endif - -#define UDP_FRAME_OVERHEAD (sizeof(struct pico_frame)) - -/* Queues */ -static struct pico_queue udp_in = { - 0 -}; -static struct pico_queue udp_out = { - 0 -}; - - -/* Functions */ - -uint16_t pico_udp_checksum_ipv4(struct pico_frame *f) -{ - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) f->net_hdr; - struct pico_udp_hdr *udp_hdr = (struct pico_udp_hdr *) f->transport_hdr; - struct pico_socket *s = f->sock; - struct pico_ipv4_pseudo_hdr pseudo; - - if (s) { - /* Case of outgoing frame */ - udp_dbg("UDP CRC: on outgoing frame\n"); - pseudo.src.addr = s->local_addr.ip4.addr; - pseudo.dst.addr = s->remote_addr.ip4.addr; - } else { - /* Case of incomming frame */ - udp_dbg("UDP CRC: on incomming frame\n"); - pseudo.src.addr = hdr->src.addr; - pseudo.dst.addr = hdr->dst.addr; - } - - pseudo.zeros = 0; - pseudo.proto = PICO_PROTO_UDP; - pseudo.len = short_be(f->transport_len); - - return pico_dualbuffer_checksum(&pseudo, sizeof(struct pico_ipv4_pseudo_hdr), udp_hdr, f->transport_len); -} - -#ifdef PICO_SUPPORT_IPV6 -uint16_t pico_udp_checksum_ipv6(struct pico_frame *f) -{ - struct pico_ipv6_hdr *ipv6_hdr = (struct pico_ipv6_hdr *)f->net_hdr; - struct pico_udp_hdr *udp_hdr = (struct pico_udp_hdr *)f->transport_hdr; - struct pico_ipv6_pseudo_hdr pseudo = { - .src = {{0}}, .dst = {{0}}, .len = 0, .zero = {0}, .nxthdr = 0 - }; - struct pico_socket *s = f->sock; - struct pico_remote_endpoint *remote_endpoint = (struct pico_remote_endpoint *)f->info; - - /* XXX If the IPv6 packet contains a Routing header, the Destination - * Address used in the pseudo-header is that of the final destination */ - if (s) { - /* Case of outgoing frame */ - pseudo.src = s->local_addr.ip6; - if (remote_endpoint) - pseudo.dst = remote_endpoint->remote_addr.ip6; - else - pseudo.dst = s->remote_addr.ip6; - } else { - /* Case of incomming frame */ - pseudo.src = ipv6_hdr->src; - pseudo.dst = ipv6_hdr->dst; - } - - pseudo.len = long_be(f->transport_len); - pseudo.nxthdr = PICO_PROTO_UDP; - - return pico_dualbuffer_checksum(&pseudo, sizeof(struct pico_ipv6_pseudo_hdr), udp_hdr, f->transport_len); -} -#endif - - - -static int pico_udp_process_out(struct pico_protocol *self, struct pico_frame *f) -{ - IGNORE_PARAMETER(self); - return (int)pico_network_send(f); -} - -static int pico_udp_push(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_udp_hdr *hdr = (struct pico_udp_hdr *) f->transport_hdr; - struct pico_remote_endpoint *remote_endpoint = (struct pico_remote_endpoint *) f->info; - - /* this (fragmented) frame should contain a transport header */ - if (f->transport_hdr != f->payload) { - hdr->trans.sport = f->sock->local_port; - if (remote_endpoint) { - hdr->trans.dport = remote_endpoint->remote_port; - } else { - hdr->trans.dport = f->sock->remote_port; - } - - hdr->len = short_be(f->transport_len); - - /* do not perform CRC validation. If you want to, a system needs to be - implemented to calculate the CRC over the total payload of a - fragmented payload - */ - hdr->crc = 0; - } - - if (pico_enqueue(self->q_out, f) > 0) { - return f->payload_len; - } else { - return 0; - } -} - -/* Interface: protocol definition */ -struct pico_protocol pico_proto_udp = { - .name = "udp", - .proto_number = PICO_PROTO_UDP, - .layer = PICO_LAYER_TRANSPORT, - .process_in = pico_transport_process_in, - .process_out = pico_udp_process_out, - .push = pico_udp_push, - .q_in = &udp_in, - .q_out = &udp_out, -}; - - - -struct pico_socket *pico_udp_open(void) -{ - struct pico_socket_udp *u = PICO_ZALLOC(sizeof(struct pico_socket_udp)); - if (!u) - return NULL; - - u->mode = PICO_UDP_MODE_UNICAST; - -#ifdef PICO_SUPPORT_MCAST - u->mc_ttl = PICO_IP_DEFAULT_MULTICAST_TTL; - /* enable multicast loopback by default */ - u->sock.opt_flags |= (1 << PICO_SOCKET_OPT_MULTICAST_LOOP); -#endif - - return &u->sock; -} - -static void pico_udp_get_msginfo(struct pico_frame *f, struct pico_msginfo *msginfo) -{ - if (!msginfo || !f->net_hdr) - return; - - msginfo->dev = f->dev; - - if (IS_IPV4(f)) { /* IPV4 */ -#ifdef PICO_SUPPORT_IPV4 - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *)(f->net_hdr); - msginfo->ttl = hdr->ttl; - msginfo->tos = hdr->tos; -#endif - } else { -#ifdef PICO_SUPPORT_IPV6 - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)(f->net_hdr); - msginfo->ttl = hdr->hop; - msginfo->tos = (hdr->vtf >> 20) & 0xFF; /* IPv6 traffic class */ -#endif - } -} - -uint16_t pico_udp_recv(struct pico_socket *s, void *buf, uint16_t len, void *src, uint16_t *port, struct pico_msginfo *msginfo) -{ - struct pico_frame *f = pico_queue_peek(&s->q_in); - if (f) { - if(!f->payload_len) { - f->payload = f->transport_hdr + sizeof(struct pico_udp_hdr); - f->payload_len = (uint16_t)(f->transport_len - sizeof(struct pico_udp_hdr)); - } - - udp_dbg("expected: %d, got: %d\n", len, f->payload_len); - if (src) - pico_store_network_origin(src, f); - - if (port) { - struct pico_trans *hdr = (struct pico_trans *)f->transport_hdr; - *port = hdr->sport; - } - - if (msginfo) { - pico_udp_get_msginfo(f, msginfo); - } - - if (f->payload_len > len) { - memcpy(buf, f->payload, len); - f->payload += len; - f->payload_len = (uint16_t)(f->payload_len - len); - return len; - } else { - uint16_t ret = f->payload_len; - memcpy(buf, f->payload, f->payload_len); - f = pico_dequeue(&s->q_in); - pico_frame_discard(f); - return ret; - } - } else return 0; -} - diff --git a/kernel/picotcp/modules/pico_udp.h b/kernel/picotcp/modules/pico_udp.h deleted file mode 100644 index 7c5814e..0000000 --- a/kernel/picotcp/modules/pico_udp.h +++ /dev/null @@ -1,45 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - *********************************************************************/ -#ifndef INCLUDE_PICO_UDP -#define INCLUDE_PICO_UDP -#include "pico_addressing.h" -#include "pico_protocol.h" -#include "pico_socket.h" -#define PICO_UDP_MODE_UNICAST 0x01 -#define PICO_UDP_MODE_MULTICAST 0x02 -#define PICO_UDP_MODE_BROADCAST 0xFF - -struct pico_socket_udp -{ - struct pico_socket sock; - int mode; - uint8_t mc_ttl; /* Multicasting TTL */ -}; - - -extern struct pico_protocol pico_proto_udp; - -PACKED_STRUCT_DEF pico_udp_hdr { - struct pico_trans trans; - uint16_t len; - uint16_t crc; -}; -#define PICO_UDPHDR_SIZE 8 - -struct pico_socket *pico_udp_open(void); -uint16_t pico_udp_recv(struct pico_socket *s, void *buf, uint16_t len, void *src, uint16_t *port, struct pico_msginfo *msginfo); -uint16_t pico_udp_checksum_ipv4(struct pico_frame *f); - -#ifdef PICO_SUPPORT_IPV6 -uint16_t pico_udp_checksum_ipv6(struct pico_frame *f); -#endif - - -int pico_udp_setsockopt(struct pico_socket *s, int option, void *value); - -#endif diff --git a/kernel/picotcp/rules/6lowpan.mk b/kernel/picotcp/rules/6lowpan.mk deleted file mode 100644 index 58bbf69..0000000 --- a/kernel/picotcp/rules/6lowpan.mk +++ /dev/null @@ -1,62 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_6LOWPAN -DPICO_SUPPORT_IPV6 - -################################################################################ -# DEFAULTS -################################################################################ - -# Enable the 6LoWPAN IPHC compression scheme by default -6LOWPAN_IPHC?=1 - -# Disable MAC framing for mac-enabled radios, disabled by default -6LOWPAN_NOMAC?=0 - -# Enable IEEE802.15.4 device support by default -IEEE802154?=1 - -# Enable radiotest packet dump -RADIOTEST_PCAP?=0 - -################################################################################ -# 6LOWPAN OPTIONS -################################################################################ - -ifeq ($(6LOWPAN_IPHC), 1) - EXTRA_CFLAGS+=-DPICO_6LOWPAN_IPHC_ENABLED -endif - -ifeq ($(6LOWPAN_NOMAC), 1) - EXTRA_CFLAGS+=-DPICO_6LOWPAN_NOMAC -endif - -################################################################################ -# 6LOWPAN LINK LAYER OPTIONS -################################################################################ - -# IEEE802.15.4 with or without mac layer -ifeq ($(IEEE802154), 1) - 6LOWPAN_OPTIONS+=-DPICO_SUPPORT_802154 - POSIX_OBJ+=modules/pico_dev_radiotest.o \ - modules/pico_dev_radio_mgr.o -endif - -OPTIONS+=$(6LOWPAN_OPTIONS) - -# Append module objects -MOD_OBJ+=$(LIBBASE)modules/pico_6lowpan_ll.o -MOD_OBJ+=$(LIBBASE)modules/pico_6lowpan.o -MOD_OBJ+=$(LIBBASE)modules/pico_802154.o - -# Count the amount of supported 6LoWPAN Link Layer protocols based on the amount of words in -# $6LOWPAN_OPTIONS. This allows us to define a static array that can be initialized with the 6LoWPAN -# link layer protocol definitions for the supported link layer protocols. This happens upon -# initialization of the 6LoWPAN_LL-layer. -EXTRA_CFLAGS+=-DPICO_6LOWPAN_LLS=$(words $(6LOWPAN_OPTIONS)) - -################################################################################ -# RADIOTEST -################################################################################ - -ifeq ($(RADIOTEST_PCAP), 1) - EXTRA_CFLAGS+=-DRADIO_PCAP - TEST_LDFLAGS+=-lpcap -endif diff --git a/kernel/picotcp/rules/crc.mk b/kernel/picotcp/rules/crc.mk deleted file mode 100644 index 8ec9344..0000000 --- a/kernel/picotcp/rules/crc.mk +++ /dev/null @@ -1 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_CRC diff --git a/kernel/picotcp/rules/cyassl.mk b/kernel/picotcp/rules/cyassl.mk deleted file mode 100644 index 5903b8e..0000000 --- a/kernel/picotcp/rules/cyassl.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_CYASSL -LDFLAGS+=-lcyassl - diff --git a/kernel/picotcp/rules/debug.mk b/kernel/picotcp/rules/debug.mk deleted file mode 100644 index 481c45d..0000000 --- a/kernel/picotcp/rules/debug.mk +++ /dev/null @@ -1,207 +0,0 @@ -DEBUG_ALL?=0 - -DEBUG_ARP?=0 -DEBUG_AODV?=0 -DEBUG_PPP?=0 -DEBUG_TAP_ALL?=0 -DEBUG_TAP_GENERAL?=0 -DEBUG_TAP_INFO?=0 -DEBUG_TAP_WIN?=0 -DEBUG_TAP_REG?=0 -DEBUG_DHCP_CLIENT?=0 -DEBUG_DHCP_SERVER?=0 -DEBUG_DNS?=0 -DEBUG_DNS_SD?=0 -DEBUG_FRAG?=0 -DEBUG_IGMP?=0 -DEBUG_IPF?=0 -DEBUG_MCAST?=0 -DEBUG_IPV6?=0 -DEBUG_IPV6_ROUTE?=0 -DEBUG_IPV6_ND?=0 -DEBUG_MDNS?=0 -DEBUG_MLD?=0 -DEBUG_MM?=0 -DEBUG_NAT?=0 -DEBUG_OLSR?=0 -DEBUG_SLAACV4?=0 -DEBUG_SNTP?=0 -DEBUG_TCP_ALL?=0 -DEBUG_TCP_NAGLE?=0 -DEBUG_TCP_OPTIONS?=0 -DEBUG_TCP_GENERAL?=0 -DEBUG_TFTP?=0 -DEBUG_UDP?=0 -DEBUG_6LOWPAN?=0 -DEBUG_RADIOTEST?=0 - -ifneq ($(DEBUG_ALL),0) - DEBUG_ARP=1 - DEBUG_AODV=1 - DEBUG_PPP=1 - DEBUG_TAP_ALL=1 - DEBUG_DHCP_CLIENT=1 - DEBUG_DHCP_SERVER=1 - DEBUG_DNS=1 - DEBUG_DNS_SD=1 - DEBUG_FRAG=1 - DEBUG_IGMP=1 - DEBUG_IPF=1 - DEBUG_MCAST=1 - DEBUG_IPV6=1 - DEBUG_IPV6_ROUTE=1 - DEBUG_IPV6_ND=1 - DEBUG_MDNS=1 - DEBUG_MLD=1 - DEBUG_MM=1 - DEBUG_NAT=1 - DEBUG_OLSR=1 - DEBUG_SLAACV4=1 - DEBUG_SNTP=1 - DEBUG_TCP_ALL=1 - DEBUG_TFTP=1 - DEBUG_UDP=1 - DEBUG_6LOWPAN=1 - DEBUG_RADIOTEST=1 -endif - -ifneq ($(DEBUG_TCP_ALL),0) - DEBUG_TCP_NAGLE=1 - DEBUG_TCP_OPTIONS=1 - DEBUG_TCP_GENERAL=1 -endif - -ifneq ($(DEBUG_TAP_ALL),0) - DEBUG_TAP_GENERAL=1 - DEBUG_TAP_INFO=1 - DEBUG_TAP_WIN=1 - DEBUG_TAP_REG=1 -endif - -ifneq ($(DEBUG_ARP),0) - CFLAGS+=-DDEBUG_ARP -endif - -ifneq ($(DEBUG_AODV),0) - CFLAGS+=-DDEBUG_AODV -endif - -ifneq ($(DEBUG_PPP),0) - CFLAGS+=-DDEBUG_PPP -endif - -ifneq ($(DEBUG_TAP_GENERAL),0) - CFLAGS+=-DDEBUG_TAP_GENERAL -endif - -ifneq ($(DEBUG_TAP_INFO),0) - CFLAGS+=-DDEBUG_TAP_INFO -endif - -ifneq ($(DEBUG_TAP_WIN),0) - CFLAGS+=-DDEBUG_TAP_WIN -endif - -ifneq ($(DEBUG_TAP_REG),0) - CFLAGS+=-DDEBUG_TAP_REG -endif - -ifneq ($(DEBUG_DHCP_CLIENT),0) - CFLAGS+=-DDEBUG_DHCP_CLIENT -endif - -ifneq ($(DEBUG_DHCP_SERVER),0) - CFLAGS+=-DDEBUG_DHCP_SERVER -endif - -ifneq ($(DEBUG_DNS),0) - CFLAGS+=-DDEBUG_DNS -endif - -ifneq ($(DEBUG_DNS_SD),0) - CFLAGS+=-DDEBUG_DNS_SD -endif - -ifneq ($(DEBUG_FRAG),0) - CFLAGS+=-DDEBUG_FRAG -endif - -ifneq ($(DEBUG_IGMP),0) - CFLAGS+=-DDEBUG_IGMP -endif - -ifneq ($(DEBUG_IPF),0) - CFLAGS+=-DDEBUG_IPF -endif - -ifneq ($(DEBUG_MCAST),0) - CFLAGS+=-DDEBUG_MCAST -endif - -ifneq ($(DEBUG_IPV6),0) - CFLAGS+=-DDEBUG_IPV6 -endif - -ifneq ($(DEBUG_IPV6_ROUTE),0) - CFLAGS+=-DDEBUG_IPV6_ROUTE -endif - -ifneq ($(DEBUG_IPV6_ND),0) - CFLAGS+=-DDEBUG_IPV6_ND -endif - -ifneq ($(DEBUG_MDNS),0) - CFLAGS+=-DDEBUG_MDNS -endif - -ifneq ($(DEBUG_MLD),0) - CFLAGS+=-DDEBUG_MLD -endif - -ifneq ($(DEBUG_MM),0) - CFLAGS+=-DDEBUG_MM -endif - -ifneq ($(DEBUG_NAT),0) - CFLAGS+=-DDEBUG_NAT -endif - -ifneq ($(DEBUG_OLSR),0) - CFLAGS+=-DDEBUG_OLSR -endif - -ifneq ($(DEBUG_SLAACV4),0) - CFLAGS+=-DDEBUG_SLAACV4 -endif - -ifneq ($(DEBUG_SNTP),0) - CFLAGS+=-DDEBUG_SNTP -endif - -ifneq ($(DEBUG_TCP_NAGLE),0) - CFLAGS+=-DDEBUG_TCP_NAGLE -endif - -ifneq ($(DEBUG_TCP_OPTIONS),0) - CFLAGS+=-DDEBUG_TCP_OPTIONS -endif - -ifneq ($(DEBUG_TCP_GENERAL),0) - CFLAGS+=-DDEBUG_TCP_GENERAL -endif - -ifneq ($(DEBUG_TFTP),0) - CFLAGS+=-DDEBUG_TFTP -endif - -ifneq ($(DEBUG_UDP),0) - CFLAGS+=-DDEBUG_UDP -endif - -ifneq ($(DEBUG_6LOWPAN),0) - CFLAGS+=-DDEBUG_6LOWPAN -endif - -ifneq ($(DEBUG_RADIOTEST), 0) - CFLAGS+=-DDEBUG_RADIOTEST -endif diff --git a/kernel/picotcp/rules/devloop.mk b/kernel/picotcp/rules/devloop.mk deleted file mode 100644 index 066a281..0000000 --- a/kernel/picotcp/rules/devloop.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_DEVLOOP -MOD_OBJ+=$(LIBBASE)modules/pico_dev_loop.o diff --git a/kernel/picotcp/rules/dhcp_client.mk b/kernel/picotcp/rules/dhcp_client.mk deleted file mode 100644 index 94a1507..0000000 --- a/kernel/picotcp/rules/dhcp_client.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_DHCPC -MOD_OBJ+=$(LIBBASE)modules/pico_dhcp_client.o $(LIBBASE)modules/pico_dhcp_common.o diff --git a/kernel/picotcp/rules/dhcp_server.mk b/kernel/picotcp/rules/dhcp_server.mk deleted file mode 100644 index c6d2b59..0000000 --- a/kernel/picotcp/rules/dhcp_server.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_DHCPD -MOD_OBJ+=$(LIBBASE)modules/pico_dhcp_server.o $(LIBBASE)modules/pico_dhcp_common.o diff --git a/kernel/picotcp/rules/dns_client.mk b/kernel/picotcp/rules/dns_client.mk deleted file mode 100644 index f5e14c7..0000000 --- a/kernel/picotcp/rules/dns_client.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_DNS_CLIENT -MOD_OBJ+=$(LIBBASE)modules/pico_dns_client.o $(LIBBASE)modules/pico_dns_common.o diff --git a/kernel/picotcp/rules/dns_sd.mk b/kernel/picotcp/rules/dns_sd.mk deleted file mode 100644 index 62d05cc..0000000 --- a/kernel/picotcp/rules/dns_sd.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_DNS_SD -MOD_OBJ+=$(LIBBASE)modules/pico_dns_sd.o $(LIBBASE)modules/pico_mdns.o $(LIBBASE)modules/pico_dns_common.o \ No newline at end of file diff --git a/kernel/picotcp/rules/eth.mk b/kernel/picotcp/rules/eth.mk deleted file mode 100644 index 50b598a..0000000 --- a/kernel/picotcp/rules/eth.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_ETH -MOD_OBJ+=$(LIBBASE)modules/pico_arp.o -MOD_OBJ+=$(LIBBASE)modules/pico_ethernet.o diff --git a/kernel/picotcp/rules/icmp4.mk b/kernel/picotcp/rules/icmp4.mk deleted file mode 100644 index 826e0e0..0000000 --- a/kernel/picotcp/rules/icmp4.mk +++ /dev/null @@ -1,5 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_ICMP4 -MOD_OBJ+=$(LIBBASE)modules/pico_icmp4.o -ifneq ($(PING),0) - OPTIONS+=-DPICO_SUPPORT_PING -endif diff --git a/kernel/picotcp/rules/igmp.mk b/kernel/picotcp/rules/igmp.mk deleted file mode 100644 index 804079b..0000000 --- a/kernel/picotcp/rules/igmp.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_IGMP -MOD_OBJ+=$(LIBBASE)modules/pico_igmp.o - diff --git a/kernel/picotcp/rules/ipc.mk b/kernel/picotcp/rules/ipc.mk deleted file mode 100644 index 61c5ed2..0000000 --- a/kernel/picotcp/rules/ipc.mk +++ /dev/null @@ -1 +0,0 @@ -MOD_OBJ+=$(LIBBASE)modules/pico_dev_ipc.o diff --git a/kernel/picotcp/rules/ipfilter.mk b/kernel/picotcp/rules/ipfilter.mk deleted file mode 100644 index 3044eb6..0000000 --- a/kernel/picotcp/rules/ipfilter.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_IPFILTER -MOD_OBJ+=$(LIBBASE)modules/pico_ipfilter.o diff --git a/kernel/picotcp/rules/ipv4.mk b/kernel/picotcp/rules/ipv4.mk deleted file mode 100644 index 34d2108..0000000 --- a/kernel/picotcp/rules/ipv4.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_IPV4 -MOD_OBJ+=$(LIBBASE)modules/pico_ipv4.o diff --git a/kernel/picotcp/rules/ipv4frag.mk b/kernel/picotcp/rules/ipv4frag.mk deleted file mode 100644 index a79ccb3..0000000 --- a/kernel/picotcp/rules/ipv4frag.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_IPV4FRAG -MOD_OBJ+=$(LIBBASE)modules/pico_fragments.o diff --git a/kernel/picotcp/rules/ipv6.mk b/kernel/picotcp/rules/ipv6.mk deleted file mode 100644 index f313cbf..0000000 --- a/kernel/picotcp/rules/ipv6.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_IPV6 -DPICO_SUPPORT_ICMP6 -MOD_OBJ+=$(LIBBASE)modules/pico_ipv6.o $(LIBBASE)modules/pico_ipv6_nd.o $(LIBBASE)modules/pico_icmp6.o -include rules/ipv6frag.mk diff --git a/kernel/picotcp/rules/ipv6frag.mk b/kernel/picotcp/rules/ipv6frag.mk deleted file mode 100644 index da6435b..0000000 --- a/kernel/picotcp/rules/ipv6frag.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_IPV6FRAG -MOD_OBJ+=$(LIBBASE)modules/pico_fragments.o diff --git a/kernel/picotcp/rules/mcast.mk b/kernel/picotcp/rules/mcast.mk deleted file mode 100644 index 5628a7b..0000000 --- a/kernel/picotcp/rules/mcast.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_MCAST -MOD_OBJ+=$(LIBBASE)modules/pico_mcast.o diff --git a/kernel/picotcp/rules/mdns.mk b/kernel/picotcp/rules/mdns.mk deleted file mode 100644 index b28aeeb..0000000 --- a/kernel/picotcp/rules/mdns.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_MDNS -MOD_OBJ+=$(LIBBASE)modules/pico_mdns.o $(LIBBASE)modules/pico_dns_common.o diff --git a/kernel/picotcp/rules/memory_manager.mk b/kernel/picotcp/rules/memory_manager.mk deleted file mode 100644 index 7b0d923..0000000 --- a/kernel/picotcp/rules/memory_manager.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_MM -MOD_OBJ+=$(LIBBASE)modules/pico_mm.o diff --git a/kernel/picotcp/rules/mld.mk b/kernel/picotcp/rules/mld.mk deleted file mode 100644 index 3302e7e..0000000 --- a/kernel/picotcp/rules/mld.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_MLD -MOD_OBJ+=$(LIBBASE)modules/pico_mld.o - diff --git a/kernel/picotcp/rules/nat.mk b/kernel/picotcp/rules/nat.mk deleted file mode 100644 index 2bd67b5..0000000 --- a/kernel/picotcp/rules/nat.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_NAT -MOD_OBJ+=$(LIBBASE)modules/pico_nat.o diff --git a/kernel/picotcp/rules/olsr.mk b/kernel/picotcp/rules/olsr.mk deleted file mode 100644 index 3cf4443..0000000 --- a/kernel/picotcp/rules/olsr.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_OLSR -MOD_OBJ+=$(LIBBASE)modules/pico_olsr.o diff --git a/kernel/picotcp/rules/pcap.mk b/kernel/picotcp/rules/pcap.mk deleted file mode 100644 index 9c67277..0000000 --- a/kernel/picotcp/rules/pcap.mk +++ /dev/null @@ -1 +0,0 @@ -MOD_OBJ+=$(LIBBASE)modules/pico_dev_pcap.o diff --git a/kernel/picotcp/rules/polarssl.mk b/kernel/picotcp/rules/polarssl.mk deleted file mode 100644 index e5df9d9..0000000 --- a/kernel/picotcp/rules/polarssl.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS += -DPICO_SUPPORT_POLARSSL -LDFLAGS += -lpolarssl diff --git a/kernel/picotcp/rules/ppp.mk b/kernel/picotcp/rules/ppp.mk deleted file mode 100644 index f47c083..0000000 --- a/kernel/picotcp/rules/ppp.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_PPP -MOD_OBJ+=$(LIBBASE)modules/pico_dev_ppp.o - diff --git a/kernel/picotcp/rules/slaacv4.mk b/kernel/picotcp/rules/slaacv4.mk deleted file mode 100644 index 2073a2a..0000000 --- a/kernel/picotcp/rules/slaacv4.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_SLAACV4 -MOD_OBJ+=$(LIBBASE)modules/pico_slaacv4.o $(LIBBASE)modules/pico_hotplug_detection.o diff --git a/kernel/picotcp/rules/sntp_client.mk b/kernel/picotcp/rules/sntp_client.mk deleted file mode 100644 index 423583b..0000000 --- a/kernel/picotcp/rules/sntp_client.mk +++ /dev/null @@ -1,2 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_SNTP_CLIENT -MOD_OBJ+=$(LIBBASE)modules/pico_sntp_client.o diff --git a/kernel/picotcp/rules/tap.mk b/kernel/picotcp/rules/tap.mk deleted file mode 100644 index 2f3e149..0000000 --- a/kernel/picotcp/rules/tap.mk +++ /dev/null @@ -1 +0,0 @@ -MOD_OBJ+=$(LIBBASE)modules/pico_dev_tap.o diff --git a/kernel/picotcp/rules/tcp.mk b/kernel/picotcp/rules/tcp.mk deleted file mode 100644 index ae53034..0000000 --- a/kernel/picotcp/rules/tcp.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_TCP -MOD_OBJ+=$(LIBBASE)modules/pico_tcp.o -MOD_OBJ+=$(LIBBASE)modules/pico_socket_tcp.o diff --git a/kernel/picotcp/rules/tun.mk b/kernel/picotcp/rules/tun.mk deleted file mode 100644 index a5b93fe..0000000 --- a/kernel/picotcp/rules/tun.mk +++ /dev/null @@ -1 +0,0 @@ -MOD_OBJ+=$(LIBBASE)modules/pico_dev_tun.o diff --git a/kernel/picotcp/rules/udp.mk b/kernel/picotcp/rules/udp.mk deleted file mode 100644 index ec9157c..0000000 --- a/kernel/picotcp/rules/udp.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_UDP -MOD_OBJ+=$(LIBBASE)modules/pico_udp.o -MOD_OBJ+=$(LIBBASE)modules/pico_socket_udp.o diff --git a/kernel/picotcp/rules/wolfssl.mk b/kernel/picotcp/rules/wolfssl.mk deleted file mode 100644 index f66d114..0000000 --- a/kernel/picotcp/rules/wolfssl.mk +++ /dev/null @@ -1,3 +0,0 @@ -OPTIONS+=-DPICO_SUPPORT_CYASSL -DPICO_SUPPORT_WOLFSSL -LDFLAGS+=-lwolfssl - diff --git a/kernel/picotcp/stack/pico_device.c b/kernel/picotcp/stack/pico_device.c deleted file mode 100644 index 6be33bf..0000000 --- a/kernel/picotcp/stack/pico_device.c +++ /dev/null @@ -1,498 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - -#include "pico_config.h" -#include "pico_device.h" -#include "pico_stack.h" -#include "pico_protocol.h" -#include "pico_tree.h" -#include "pico_ipv6.h" -#include "pico_ipv4.h" -#include "pico_icmp6.h" -#include "pico_eth.h" -#include "pico_802154.h" -#include "pico_6lowpan.h" -#include "pico_6lowpan_ll.h" -#include "pico_addressing.h" -#define PICO_DEVICE_DEFAULT_MTU (1500) - -struct pico_devices_rr_info { - struct pico_tree_node *node_in, *node_out; -}; - -static struct pico_devices_rr_info Devices_rr_info = { - NULL, NULL -}; - -static int pico_dev_cmp(void *ka, void *kb) -{ - struct pico_device *a = ka, *b = kb; - if (a->hash < b->hash) - return -1; - - - if (a->hash > b->hash) - return 1; - - return 0; -} - -PICO_TREE_DECLARE(Device_tree, pico_dev_cmp); - -#ifdef PICO_SUPPORT_6LOWPAN -static struct pico_ipv6_link * pico_6lowpan_link_add(struct pico_device *dev, const struct pico_ip6 *prefix) -{ - struct pico_ip6 netmask64 = {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; - struct pico_6lowpan_info *info = (struct pico_6lowpan_info *)dev->eth; - struct pico_ipv6_link *link = NULL; /* Make sure to return NULL */ - struct pico_ip6 newaddr; - - memcpy(newaddr.addr, prefix->addr, PICO_SIZE_IP6); - memcpy(newaddr.addr + 8, info->addr_ext.addr, SIZE_6LOWPAN_EXT); - newaddr.addr[8] = newaddr.addr[8] ^ 0x02; /* Toggle U/L bit */ - - /* RFC6775: No Duplicate Address Detection (DAD) is performed if - * EUI-64-based IPv6 addresses are used (as these addresses are assumed - * to be globally unique). */ - if ((link = pico_ipv6_link_add_no_dad(dev, newaddr, netmask64))) { - if (pico_ipv6_is_linklocal(newaddr.addr)) - pico_6lp_nd_start_soliciting(link, NULL); - else - pico_6lp_nd_register(link); - } - - return link; -} - -static int pico_6lowpan_store_info(struct pico_device *dev, const uint8_t *mac) -{ - if ((dev->eth = PICO_ZALLOC(sizeof(struct pico_6lowpan_info)))) { - memcpy(dev->eth, mac, sizeof(struct pico_6lowpan_info)); - return 0; - } else { - pico_err = PICO_ERR_ENOMEM; - return -1; - } -} -#endif - -#ifdef PICO_SUPPORT_IPV6 -static void device_init_ipv6_final(struct pico_device *dev, struct pico_ip6 *linklocal) -{ - dev->hostvars.basetime = PICO_ND_REACHABLE_TIME; - /* RFC 4861 $6.3.2 value between 0.5 and 1.5 times basetime */ - dev->hostvars.reachabletime = ((5 + (pico_rand() % 10)) * PICO_ND_REACHABLE_TIME) / 10; - dev->hostvars.retranstime = PICO_ND_RETRANS_TIMER; - pico_icmp6_router_solicitation(dev, linklocal, NULL); - dev->hostvars.hoplimit = PICO_IPV6_DEFAULT_HOP; -} - -struct pico_ipv6_link *pico_ipv6_link_add_local(struct pico_device *dev, const struct pico_ip6 *prefix) -{ - struct pico_ip6 netmask64 = {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; - struct pico_ipv6_link *link = NULL; /* Make sure to return NULL */ - struct pico_ip6 newaddr; - - if (0) {} -#ifdef PICO_SUPPORT_6LOWPAN - else if (PICO_DEV_IS_6LOWPAN(dev)) { - link = pico_6lowpan_link_add(dev, prefix); - } -#endif - else { - memcpy(newaddr.addr, prefix->addr, PICO_SIZE_IP6); - /* modified EUI-64 + invert universal/local bit */ - newaddr.addr[8] = (dev->eth->mac.addr[0] ^ 0x02); - newaddr.addr[9] = dev->eth->mac.addr[1]; - newaddr.addr[10] = dev->eth->mac.addr[2]; - newaddr.addr[11] = 0xff; - newaddr.addr[12] = 0xfe; - newaddr.addr[13] = dev->eth->mac.addr[3]; - newaddr.addr[14] = dev->eth->mac.addr[4]; - newaddr.addr[15] = dev->eth->mac.addr[5]; - if ((link = pico_ipv6_link_add(dev, newaddr, netmask64))) { - device_init_ipv6_final(dev, &newaddr); - } - } - - return link; -} -#endif -static int device_init_mac(struct pico_device *dev, const uint8_t *mac) -{ -#ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 linklocal = {{0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xff, 0xfe, 0xaa, 0xaa, 0xaa}}; -#endif - - if (0) {} -#ifdef PICO_SUPPORT_6LOWPAN - else if (PICO_DEV_IS_6LOWPAN(dev)) { - if (pico_6lowpan_store_info(dev, mac)) - return -1; - } -#endif - else { - if ((dev->eth = PICO_ZALLOC(sizeof(struct pico_ethdev)))) { - memcpy(dev->eth->mac.addr, mac, PICO_SIZE_ETH); - } else { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - } - -#ifdef PICO_SUPPORT_IPV6 - if (pico_ipv6_link_add_local(dev, &linklocal) == NULL) { - PICO_FREE(dev->q_in); - PICO_FREE(dev->q_out); - PICO_FREE(dev->eth); - return -1; - } -#endif - - return 0; -} - -int pico_device_ipv6_random_ll(struct pico_device *dev) -{ -#ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 linklocal = {{0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xff, 0xfe, 0xaa, 0xaa, 0xaa}}; - struct pico_ip6 netmask6 = {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; - uint32_t len = (uint32_t)strlen(dev->name); - if (strcmp(dev->name, "loop")) { - do { - /* privacy extension + unset universal/local and individual/group bit */ - len = pico_rand(); - linklocal.addr[8] = (uint8_t)((len & 0xffu) & (uint8_t)(~0x03)); - linklocal.addr[9] = (uint8_t)(len >> 8); - linklocal.addr[10] = (uint8_t)(len >> 16); - linklocal.addr[11] = (uint8_t)(len >> 24); - len = pico_rand(); - linklocal.addr[12] = (uint8_t)len; - linklocal.addr[13] = (uint8_t)(len >> 8); - linklocal.addr[14] = (uint8_t)(len >> 16); - linklocal.addr[15] = (uint8_t)(len >> 24); - pico_rand_feed(dev->hash); - } while (pico_ipv6_link_get(&linklocal)); - - if (pico_ipv6_link_add(dev, linklocal, netmask6) == NULL) { - return -1; - } - } - -#endif - return 0; -} - -static int device_init_nomac(struct pico_device *dev) -{ - if (pico_device_ipv6_random_ll(dev) < 0) { - PICO_FREE(dev->q_in); - PICO_FREE(dev->q_out); - return -1; - } - - dev->eth = NULL; - return 0; -} - -#define DEBUG_IPV6(ip) { \ - char ipstr[40] = { 0 }; \ - pico_ipv6_to_string(ipstr, (ip).addr); \ - dbg("IPv6 (%s)\n", ipstr); \ - } - -int pico_device_init(struct pico_device *dev, const char *name, const uint8_t *mac) -{ - uint32_t len = (uint32_t)strlen(name); - int ret = 0; - - if(len > MAX_DEVICE_NAME) - len = MAX_DEVICE_NAME; - - memcpy(dev->name, name, len); - dev->hash = pico_hash(dev->name, len); - - Devices_rr_info.node_in = NULL; - Devices_rr_info.node_out = NULL; - dev->q_in = PICO_ZALLOC(sizeof(struct pico_queue)); - if (!dev->q_in) - return -1; - - dev->q_out = PICO_ZALLOC(sizeof(struct pico_queue)); - if (!dev->q_out) { - PICO_FREE(dev->q_in); - return -1; - } - - if (pico_tree_insert(&Device_tree, dev)) { - PICO_FREE(dev->q_in); - PICO_FREE(dev->q_out); - return -1; - } - if (!dev->mtu) - dev->mtu = PICO_DEVICE_DEFAULT_MTU; - -#ifdef PICO_SUPPORT_6LOWPAN - if (PICO_DEV_IS_6LOWPAN(dev) && LL_MODE_ETHERNET == dev->mode) - return -1; -#endif - - if (mac) { - ret = device_init_mac(dev, mac); - } else { - if (!dev->mode) { - ret = device_init_nomac(dev); - } -#ifdef PICO_SUPPORT_6LOWPAN - else { - /* RFC6775: Link Local to be formed based on EUI-64 as per RFC6775 */ - dbg("Link local address to be formed based on EUI-64\n"); - return -1; - } -#endif - } - return ret; -} - -static void pico_queue_destroy(struct pico_queue *q) -{ - if (q) { - pico_queue_empty(q); - PICO_FREE(q); - } -} - -void pico_device_destroy(struct pico_device *dev) -{ - - pico_queue_destroy(dev->q_in); - pico_queue_destroy(dev->q_out); - - if (!dev->mode && dev->eth) - PICO_FREE(dev->eth); - -#ifdef PICO_SUPPORT_IPV4 - pico_ipv4_cleanup_links(dev); -#endif -#ifdef PICO_SUPPORT_IPV6 - pico_ipv6_cleanup_links(dev); -#endif - pico_tree_delete(&Device_tree, dev); - - if (dev->destroy) - dev->destroy(dev); - - Devices_rr_info.node_in = NULL; - Devices_rr_info.node_out = NULL; - PICO_FREE(dev); -} - -static int check_dev_serve_interrupt(struct pico_device *dev, int loop_score) -{ - if ((dev->__serving_interrupt) && (dev->dsr)) { - /* call dsr routine */ - loop_score = dev->dsr(dev, loop_score); - } - - return loop_score; -} - -static int check_dev_serve_polling(struct pico_device *dev, int loop_score) -{ - if (dev->poll) { - loop_score = dev->poll(dev, loop_score); - } - - return loop_score; -} - -static int devloop_in(struct pico_device *dev, int loop_score) -{ - struct pico_frame *f; - while(loop_score > 0) { - if (dev->q_in->frames == 0) - break; - - /* Receive */ - f = pico_dequeue(dev->q_in); - if (f) { - pico_datalink_receive(f); - loop_score--; - } - } - return loop_score; -} - -static int devloop_sendto_dev(struct pico_device *dev, struct pico_frame *f) -{ -#ifdef PICO_SUPPORT_6LOWPAN - if (PICO_DEV_IS_6LOWPAN(dev)) { - return (pico_6lowpan_ll_sendto_dev(dev, f) <= 0); - } -#endif - return (dev->send(dev, f->start, (int)f->len) <= 0); -} - -static int devloop_out(struct pico_device *dev, int loop_score) -{ - struct pico_frame *f; - while(loop_score > 0) { - if (dev->q_out->frames == 0) - break; - - /* Device dequeue + send */ - f = pico_queue_peek(dev->q_out); - if (!f) - break; - - if (devloop_sendto_dev(dev, f) == 0) { /* success. */ - f = pico_dequeue(dev->q_out); - pico_frame_discard(f); /* SINGLE POINT OF DISCARD for OUTGOING FRAMES */ - loop_score--; - } else - break; /* Don't discard */ - - } - - return loop_score; -} - -static int devloop(struct pico_device *dev, int loop_score, int direction) -{ - /* If device supports interrupts, read the value of the condition and trigger the dsr */ - loop_score = check_dev_serve_interrupt(dev, loop_score); - - /* If device supports polling, give control. Loop score is managed internally, - * remaining loop points are returned. */ - loop_score = check_dev_serve_polling(dev, loop_score); - - if (direction == PICO_LOOP_DIR_OUT) - loop_score = devloop_out(dev, loop_score); - else - loop_score = devloop_in(dev, loop_score); - - return loop_score; -} - - -static struct pico_tree_node *pico_dev_roundrobin_start(int direction) -{ - if (Devices_rr_info.node_in == NULL) - Devices_rr_info.node_in = pico_tree_firstNode(Device_tree.root); - - if (Devices_rr_info.node_out == NULL) - Devices_rr_info.node_out = pico_tree_firstNode(Device_tree.root); - - if (direction == PICO_LOOP_DIR_IN) - return Devices_rr_info.node_in; - else - return Devices_rr_info.node_out; -} - -static void pico_dev_roundrobin_end(int direction, struct pico_tree_node *last) -{ - if (direction == PICO_LOOP_DIR_IN) - Devices_rr_info.node_in = last; - else - Devices_rr_info.node_out = last; -} - -#define DEV_LOOP_MIN 16 - -int pico_devices_loop(int loop_score, int direction) -{ - struct pico_device *start, *next; - struct pico_tree_node *next_node = pico_dev_roundrobin_start(direction); - - if (!next_node) - return loop_score; - - next = next_node->keyValue; - start = next; - - /* round-robin all devices, break if traversed all devices */ - while ((loop_score > DEV_LOOP_MIN) && (next != NULL)) { - loop_score = devloop(next, loop_score, direction); - next_node = pico_tree_next(next_node); - next = next_node->keyValue; - if (next == NULL) - { - next_node = pico_tree_firstNode(Device_tree.root); - next = next_node->keyValue; - } - - if (next == start) - break; - } - pico_dev_roundrobin_end(direction, next_node); - return loop_score; -} - -struct pico_device *pico_get_device(const char*name) -{ - struct pico_device *dev; - struct pico_tree_node *index; - pico_tree_foreach(index, &Device_tree){ - dev = index->keyValue; - if(strcmp(name, dev->name) == 0) - return dev; - } - return NULL; -} - -int32_t pico_device_broadcast(struct pico_frame *f) -{ - struct pico_tree_node *index; - int32_t ret = -1; - int sent = 0; - - pico_tree_foreach(index, &Device_tree) - { - struct pico_device *dev = index->keyValue; - if(dev != f->dev) - { - struct pico_frame *copy = pico_frame_copy(f); - - if(!copy) - break; - - copy->dev = dev; - ret = copy->dev->send(copy->dev, copy->start, (int)copy->len); - /* FIXME: If a device driver returns zero (which means the device - * driver is currently busy) there is no means to retry the - * broadcast operation later. */ - pico_frame_discard(copy); - } - else - { - ret = f->dev->send(f->dev, f->start, (int)f->len); - /* FIXME: If a device driver returns zero (which means the device - * driver is currently busy) there is no means to retry the - * broadcast operation later. */ - } - - /* FIXME: If at least one device driver was able to sent the frame on - * the wire, the broadcast operation will be considered successful. */ - if (ret > 0) { - sent = 1; - } - } - - ret = sent ? f->len : -1; - pico_frame_discard(f); - return ret; -} - -int pico_device_link_state(struct pico_device *dev) -{ - if (!dev->link_state) - return 1; /* Not supported, assuming link is always up */ - - return dev->link_state(dev); -} diff --git a/kernel/picotcp/stack/pico_frame.c b/kernel/picotcp/stack/pico_frame.c deleted file mode 100644 index 7aad7f3..0000000 --- a/kernel/picotcp/stack/pico_frame.c +++ /dev/null @@ -1,329 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - -#include "pico_config.h" -#include "pico_frame.h" -#include "pico_protocol.h" -#include "pico_stack.h" -#include "pico_socket.h" - -#ifdef PICO_SUPPORT_DEBUG_MEMORY -static int n_frames_allocated; -#endif - -/** frame alloc/dealloc/copy **/ -void pico_frame_discard(struct pico_frame *f) -{ - if (!f) - return; - - (*f->usage_count)--; - if (*f->usage_count == 0) { - if (f->flags & PICO_FRAME_FLAG_EXT_USAGE_COUNTER) - PICO_FREE(f->usage_count); - -#ifdef PICO_SUPPORT_DEBUG_MEMORY - dbg("Discarded buffer @%p, caller: %p\n", f->buffer, __builtin_return_address(3)); - dbg("DEBUG MEMORY: %d frames in use.\n", --n_frames_allocated); -#endif - if (!(f->flags & PICO_FRAME_FLAG_EXT_BUFFER)) - PICO_FREE(f->buffer); - else if (f->notify_free) - f->notify_free(f->buffer); - - if (f->info) - PICO_FREE(f->info); - } - -#ifdef PICO_SUPPORT_DEBUG_MEMORY - else { - dbg("Removed frame @%p(copy), usage count now: %d\n", f, *f->usage_count); - } -#endif - PICO_FREE(f); -} - -struct pico_frame *pico_frame_copy(struct pico_frame *f) -{ - struct pico_frame *new = PICO_ZALLOC(sizeof(struct pico_frame)); - if (!new) - return NULL; - - memcpy(new, f, sizeof(struct pico_frame)); - *(new->usage_count) += 1; -#ifdef PICO_SUPPORT_DEBUG_MEMORY - dbg("Copied frame @%p, into %p, usage count now: %d\n", f, new, *new->usage_count); -#endif - new->next = NULL; - return new; -} - -static struct pico_frame *pico_frame_do_alloc(uint32_t size, int zerocopy, int ext_buffer) -{ - struct pico_frame *p = PICO_ZALLOC(sizeof(struct pico_frame)); - uint32_t frame_buffer_size = size; - if (!p) - return NULL; - - if (ext_buffer && !zerocopy) { - /* external buffer implies zerocopy flag! */ - PICO_FREE(p); - return NULL; - } - - if (!zerocopy) { - unsigned int align = size % sizeof(uint32_t); - /* Ensure that usage_count starts on an aligned address */ - if (align) { - frame_buffer_size += (uint32_t)sizeof(uint32_t) - align; - } - - p->buffer = PICO_ZALLOC((size_t)frame_buffer_size + sizeof(uint32_t)); - if (!p->buffer) { - PICO_FREE(p); - return NULL; - } - - p->usage_count = (uint32_t *)(((uint8_t*)p->buffer) + frame_buffer_size); - } else { - p->buffer = NULL; - p->flags |= PICO_FRAME_FLAG_EXT_USAGE_COUNTER; - p->usage_count = PICO_ZALLOC(sizeof(uint32_t)); - if (!p->usage_count) { - PICO_FREE(p); - return NULL; - } - } - - p->buffer_len = size; - - /* By default, frame content is the full buffer. */ - p->start = p->buffer; - p->len = p->buffer_len; - *p->usage_count = 1; - p->net_hdr = p->buffer; - p->datalink_hdr = p->buffer; - p->transport_hdr = p->buffer; - p->app_hdr = p->buffer; - p->payload = p->buffer; - - if (ext_buffer) - p->flags |= PICO_FRAME_FLAG_EXT_BUFFER; - -#ifdef PICO_SUPPORT_DEBUG_MEMORY - dbg("Allocated buffer @%p, len= %d caller: %p\n", p->buffer, p->buffer_len, __builtin_return_address(2)); - dbg("DEBUG MEMORY: %d frames in use.\n", ++n_frames_allocated); -#endif - return p; -} - -struct pico_frame *pico_frame_alloc(uint32_t size) -{ - return pico_frame_do_alloc(size, 0, 0); -} - -static uint8_t * -pico_frame_new_buffer(struct pico_frame *f, uint32_t size, uint32_t *oldsize) -{ - uint8_t *oldbuf; - uint32_t usage_count, *p_old_usage; - uint32_t frame_buffer_size; - unsigned int align; - - if (!f || (size < f->buffer_len)) { - return NULL; - } - - align = size % sizeof(uint32_t); - frame_buffer_size = size; - if (align) { - frame_buffer_size += (uint32_t)sizeof(uint32_t) - align; - } - - oldbuf = f->buffer; - *oldsize = f->buffer_len; - usage_count = *(f->usage_count); - p_old_usage = f->usage_count; - f->buffer = PICO_ZALLOC((size_t)frame_buffer_size + sizeof(uint32_t)); - if (!f->buffer) { - f->buffer = oldbuf; - return NULL; - } - - f->usage_count = (uint32_t *)(((uint8_t*)f->buffer) + frame_buffer_size); - *f->usage_count = usage_count; - f->buffer_len = size; - - if (f->flags & PICO_FRAME_FLAG_EXT_USAGE_COUNTER) - PICO_FREE(p_old_usage); - /* Now, the frame is not zerocopy anymore, and the usage counter has been moved within it */ - return oldbuf; -} - -static int -pico_frame_update_pointers(struct pico_frame *f, ptrdiff_t addr_diff, uint8_t *oldbuf) -{ - f->net_hdr += addr_diff; - f->datalink_hdr += addr_diff; - f->transport_hdr += addr_diff; - f->app_hdr += addr_diff; - f->start += addr_diff; - f->payload += addr_diff; - - if (!(f->flags & PICO_FRAME_FLAG_EXT_BUFFER)) - PICO_FREE(oldbuf); - else if (f->notify_free) - f->notify_free(oldbuf); - - f->flags = 0; - return 0; -} - -int pico_frame_grow_head(struct pico_frame *f, uint32_t size) -{ - ptrdiff_t addr_diff = 0; - uint32_t oldsize = 0; - uint8_t *oldbuf = pico_frame_new_buffer(f, size, &oldsize); - if (!oldbuf) - return -1; - - /* Put old buffer at the end of new buffer */ - memcpy(f->buffer + f->buffer_len - oldsize, oldbuf, (size_t)oldsize); - addr_diff = (ptrdiff_t)(f->buffer + f->buffer_len - oldsize - oldbuf); - - return pico_frame_update_pointers(f, addr_diff, oldbuf); -} - -int pico_frame_grow(struct pico_frame *f, uint32_t size) -{ - ptrdiff_t addr_diff = 0; - uint32_t oldsize = 0; - uint8_t *oldbuf = pico_frame_new_buffer(f, size, &oldsize); - if (!oldbuf) - return -1; - - /* Just put old buffer at the beginning of new buffer */ - memcpy(f->buffer, oldbuf, (size_t)oldsize); - addr_diff = (ptrdiff_t)(f->buffer - oldbuf); - - return pico_frame_update_pointers(f, addr_diff, oldbuf); -} - -struct pico_frame *pico_frame_alloc_skeleton(uint32_t size, int ext_buffer) -{ - return pico_frame_do_alloc(size, 1, ext_buffer); -} - -int pico_frame_skeleton_set_buffer(struct pico_frame *f, void *buf) -{ - if (!buf) - return -1; - - f->buffer = (uint8_t *) buf; - f->start = f->buffer; - return 0; -} - -struct pico_frame *pico_frame_deepcopy(struct pico_frame *f) -{ - struct pico_frame *new = pico_frame_alloc(f->buffer_len); - ptrdiff_t addr_diff; - unsigned char *buf; - uint32_t *uc; - if (!new) - return NULL; - - /* Save the two key pointers... */ - buf = new->buffer; - uc = new->usage_count; - - /* Overwrite all fields with originals */ - memcpy(new, f, sizeof(struct pico_frame)); - - /* ...restore the two key pointers */ - new->buffer = buf; - new->usage_count = uc; - - /* Update in-buffer pointers with offset */ - addr_diff = (ptrdiff_t)(new->buffer - f->buffer); - new->datalink_hdr += addr_diff; - new->net_hdr += addr_diff; - new->transport_hdr += addr_diff; - new->app_hdr += addr_diff; - new->start += addr_diff; - new->payload += addr_diff; - - if (f->info) { - new->info = PICO_ZALLOC(sizeof(struct pico_remote_endpoint)); - if (!new->info) { - pico_frame_discard(new); - return NULL; - } - memcpy(new->info, f->info, sizeof(struct pico_remote_endpoint)); - } - -#ifdef PICO_SUPPORT_DEBUG_MEMORY - dbg("Deep-Copied frame @%p, into %p, usage count now: %d\n", f, new, *new->usage_count); -#endif - new->next = NULL; - return new; -} - - -static inline uint32_t pico_checksum_adder(uint32_t sum, void *data, uint32_t len) -{ - uint16_t *buf = (uint16_t *)data; - uint16_t *stop; - - if (len & 0x01) { - --len; -#ifdef PICO_BIGENDIAN - sum += (((uint8_t *)data)[len]) << 8; -#else - sum += ((uint8_t *)data)[len]; -#endif - } - - stop = (uint16_t *)(((uint8_t *)data) + len); - - while (buf < stop) { - sum += *buf++; - } - return sum; -} - -static inline uint16_t pico_checksum_finalize(uint32_t sum) -{ - while (sum >> 16) { /* a second carry is possible! */ - sum = (sum & 0x0000FFFF) + (sum >> 16); - } - return short_be((uint16_t) ~sum); -} - -/** - * Calculate checksum of a given string - */ -uint16_t pico_checksum(void *inbuf, uint32_t len) -{ - uint32_t sum; - - sum = pico_checksum_adder(0, inbuf, len); - return pico_checksum_finalize(sum); -} - -/* WARNING: len1 MUST be an EVEN number */ -uint16_t pico_dualbuffer_checksum(void *inbuf1, uint32_t len1, void *inbuf2, uint32_t len2) -{ - uint32_t sum; - - sum = pico_checksum_adder(0, inbuf1, len1); - sum = pico_checksum_adder(sum, inbuf2, len2); - return pico_checksum_finalize(sum); -} - diff --git a/kernel/picotcp/stack/pico_md5.c b/kernel/picotcp/stack/pico_md5.c deleted file mode 100644 index 2c235be..0000000 --- a/kernel/picotcp/stack/pico_md5.c +++ /dev/null @@ -1,43 +0,0 @@ -/********************************************************************* - * PicoTCP. Copyright (c) 2015-2017 Altran Intelligent Systems. Some rights reserved. - * See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - * - * Authors: Daniele Lacamera - * *********************************************************************/ - - -#include - -#if defined (PICO_SUPPORT_CYASSL) -#include - -void pico_md5sum(uint8_t *dst, const uint8_t *src, size_t len) -{ - Md5 md5; - InitMd5(&md5); - Md5Update(&md5, src, len); - Md5Final(&md5, dst); -} - -#elif defined (PICO_SUPPORT_POLARSSL) -#include - -void pico_md5sum(uint8_t *dst, const uint8_t *src, size_t len) -{ - md5(src, len, dst); -} - -#else -static void (*do_pico_md5sum)(uint8_t *dst, const uint8_t *src, size_t len); -void pico_md5sum(uint8_t *dst, const uint8_t *src, size_t len) -{ - if (do_pico_md5sum) { - do_pico_md5sum(dst, src, len); - } -} - -void pico_register_md5sum(void (*md5)(uint8_t *, const uint8_t *, size_t)) -{ - do_pico_md5sum = md5; -} -#endif diff --git a/kernel/picotcp/stack/pico_protocol.c b/kernel/picotcp/stack/pico_protocol.c deleted file mode 100644 index 085a00d..0000000 --- a/kernel/picotcp/stack/pico_protocol.c +++ /dev/null @@ -1,226 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include "pico_protocol.h" -#include "pico_tree.h" - -struct pico_proto_rr -{ - struct pico_tree *t; - struct pico_tree_node *node_in, *node_out; -}; - - -static int pico_proto_cmp(void *ka, void *kb) -{ - struct pico_protocol *a = ka, *b = kb; - if (a->hash < b->hash) - return -1; - - if (a->hash > b->hash) - return 1; - - return 0; -} - -static PICO_TREE_DECLARE(Datalink_proto_tree, pico_proto_cmp); -static PICO_TREE_DECLARE(Network_proto_tree, pico_proto_cmp); -static PICO_TREE_DECLARE(Transport_proto_tree, pico_proto_cmp); -static PICO_TREE_DECLARE(Socket_proto_tree, pico_proto_cmp); - -/* Static variables to keep track of the round robin loop */ -static struct pico_proto_rr proto_rr_datalink = { - &Datalink_proto_tree, NULL, NULL -}; -static struct pico_proto_rr proto_rr_network = { - &Network_proto_tree, NULL, NULL -}; -static struct pico_proto_rr proto_rr_transport = { - &Transport_proto_tree, NULL, NULL -}; -static struct pico_proto_rr proto_rr_socket = { - &Socket_proto_tree, NULL, NULL -}; - -static int proto_loop_in(struct pico_protocol *proto, int loop_score) -{ - struct pico_frame *f; - while(loop_score > 0) { - if (proto->q_in->frames == 0) - break; - - f = pico_dequeue(proto->q_in); - if ((f) && (proto->process_in(proto, f) > 0)) { - loop_score--; - } - } - return loop_score; -} - -static int proto_loop_out(struct pico_protocol *proto, int loop_score) -{ - struct pico_frame *f; - while(loop_score > 0) { - if (proto->q_out->frames == 0) - break; - - f = pico_dequeue(proto->q_out); - if ((f) && (proto->process_out(proto, f) > 0)) { - loop_score--; - } - } - return loop_score; -} - -static int proto_loop(struct pico_protocol *proto, int loop_score, int direction) -{ - - if (direction == PICO_LOOP_DIR_IN) - loop_score = proto_loop_in(proto, loop_score); - else if (direction == PICO_LOOP_DIR_OUT) - loop_score = proto_loop_out(proto, loop_score); - - return loop_score; -} - -static struct pico_tree_node *roundrobin_init(struct pico_proto_rr *rr, int direction) -{ - struct pico_tree_node *next_node = NULL; - /* Initialization (takes place only once) */ - if (rr->node_in == NULL) - rr->node_in = pico_tree_firstNode(rr->t->root); - - if (rr->node_out == NULL) - rr->node_out = pico_tree_firstNode(rr->t->root); - - if (direction == PICO_LOOP_DIR_IN) - next_node = rr->node_in; - else - next_node = rr->node_out; - - return next_node; -} - -static void roundrobin_end(struct pico_proto_rr *rr, int direction, struct pico_tree_node *last) -{ - if (direction == PICO_LOOP_DIR_IN) - rr->node_in = last; - else - rr->node_out = last; -} - -static int pico_protocol_generic_loop(struct pico_proto_rr *rr, int loop_score, int direction) -{ - struct pico_protocol *start, *next; - struct pico_tree_node *next_node = roundrobin_init(rr, direction); - - if (!next_node) - return loop_score; - - next = next_node->keyValue; - - /* init start node */ - start = next; - - /* round-robin all layer protocols, break if traversed all protocols */ - while (loop_score > 1 && next != NULL) { - loop_score = proto_loop(next, loop_score, direction); - next_node = pico_tree_next(next_node); - next = next_node->keyValue; - if (next == NULL) - { - next_node = pico_tree_firstNode(rr->t->root); - next = next_node->keyValue; - } - - if (next == start) - break; - } - roundrobin_end(rr, direction, next_node); - return loop_score; -} - -int pico_protocol_datalink_loop(int loop_score, int direction) -{ - return pico_protocol_generic_loop(&proto_rr_datalink, loop_score, direction); -} - -int pico_protocol_network_loop(int loop_score, int direction) -{ - return pico_protocol_generic_loop(&proto_rr_network, loop_score, direction); -} - -int pico_protocol_transport_loop(int loop_score, int direction) -{ - return pico_protocol_generic_loop(&proto_rr_transport, loop_score, direction); -} - -int pico_protocol_socket_loop(int loop_score, int direction) -{ - return pico_protocol_generic_loop(&proto_rr_socket, loop_score, direction); -} - -int pico_protocols_loop(int loop_score) -{ -/* - loop_score = pico_protocol_datalink_loop(loop_score); - loop_score = pico_protocol_network_loop(loop_score); - loop_score = pico_protocol_transport_loop(loop_score); - loop_score = pico_protocol_socket_loop(loop_score); - */ - return loop_score; -} - -static void proto_layer_rr_reset(struct pico_proto_rr *rr) -{ - rr->node_in = NULL; - rr->node_out = NULL; -} - -void pico_protocol_init(struct pico_protocol *p) -{ - struct pico_tree *tree = NULL; - struct pico_proto_rr *proto = NULL; - - if (!p) - return; - - p->hash = pico_hash(p->name, (uint32_t)strlen(p->name)); - switch (p->layer) { - case PICO_LAYER_DATALINK: - tree = &Datalink_proto_tree; - proto = &proto_rr_datalink; - break; - case PICO_LAYER_NETWORK: - tree = &Network_proto_tree; - proto = &proto_rr_network; - break; - case PICO_LAYER_TRANSPORT: - tree = &Transport_proto_tree; - proto = &proto_rr_transport; - break; - case PICO_LAYER_SOCKET: - tree = &Socket_proto_tree; - proto = &proto_rr_socket; - break; - default: - dbg("Unknown protocol: %s (layer: %d)\n", p->name, p->layer); - return; - } - - if (pico_tree_insert(tree, p)) { - dbg("Failed to insert protocol %s\n", p->name); - return; - } - - proto_layer_rr_reset(proto); - dbg("Protocol %s registered (layer: %d).\n", p->name, p->layer); -} - diff --git a/kernel/picotcp/stack/pico_socket.c b/kernel/picotcp/stack/pico_socket.c deleted file mode 100644 index 8f11a3e..0000000 --- a/kernel/picotcp/stack/pico_socket.c +++ /dev/null @@ -1,2287 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include "pico_config.h" -#include "pico_queue.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_stack.h" -#include "pico_icmp4.h" -#include "pico_nat.h" -#include "pico_tree.h" -#include "pico_device.h" -#include "pico_socket_multicast.h" -#include "pico_socket_tcp.h" -#include "pico_socket_udp.h" - -#if defined (PICO_SUPPORT_IPV4) || defined (PICO_SUPPORT_IPV6) -#if defined (PICO_SUPPORT_TCP) || defined (PICO_SUPPORT_UDP) - - -#define PROTO(s) ((s)->proto->proto_number) -#define PICO_MIN_MSS (1280) -#define TCP_STATE(s) (s->state & PICO_SOCKET_STATE_TCP) - -#ifdef PICO_SUPPORT_MUTEX -static void *Mutex = NULL; -#endif - -/* Mockables */ -#if defined UNIT_TEST -# define MOCKABLE __attribute__((weak)) -#else -# define MOCKABLE -#endif - -#define PROTO(s) ((s)->proto->proto_number) - -#define PICO_SOCKET_MTU 1480 /* Ethernet MTU(1500) - IP header size(20) */ - -#ifdef PICO_SUPPORT_IPV4FRAG - -#ifdef DEBUG_FRAG -#define frag_dbg dbg -#else -#define frag_dbg(...) do {} while(0) -#endif - -#endif - -static struct pico_sockport *sp_udp = NULL, *sp_tcp = NULL; - -struct pico_frame *pico_socket_frame_alloc(struct pico_socket *s, struct pico_device *dev, uint16_t len); - -static int socket_cmp_family(struct pico_socket *a, struct pico_socket *b) -{ - uint32_t a_is_ip6 = is_sock_ipv6(a); - uint32_t b_is_ip6 = is_sock_ipv6(b); - (void)a; - (void)b; - if (a_is_ip6 < b_is_ip6) - return -1; - - if (a_is_ip6 > b_is_ip6) - return 1; - - return 0; -} - - -static int socket_cmp_ipv6(struct pico_socket *a, struct pico_socket *b) -{ - int ret = 0; - (void)a; - (void)b; -#ifdef PICO_SUPPORT_IPV6 - if (!is_sock_ipv6(a) || !is_sock_ipv6(b)) - return 0; - - if ((memcmp(a->local_addr.ip6.addr, PICO_IP6_ANY, PICO_SIZE_IP6) == 0) || (memcmp(b->local_addr.ip6.addr, PICO_IP6_ANY, PICO_SIZE_IP6) == 0)) - ret = 0; - else - ret = memcmp(a->local_addr.ip6.addr, b->local_addr.ip6.addr, PICO_SIZE_IP6); - -#endif - return ret; -} - -static int socket_cmp_ipv4(struct pico_socket *a, struct pico_socket *b) -{ - int ret = 0; - (void)a; - (void)b; - if (!is_sock_ipv4(a) || !is_sock_ipv4(b)) - return 0; - -#ifdef PICO_SUPPORT_IPV4 - if ((a->local_addr.ip4.addr == PICO_IP4_ANY) || (b->local_addr.ip4.addr == PICO_IP4_ANY)) - ret = 0; - else - ret = (int)(a->local_addr.ip4.addr - b->local_addr.ip4.addr); - -#endif - return ret; -} - -static int socket_cmp_remotehost(struct pico_socket *a, struct pico_socket *b) -{ - int ret = 0; - if (is_sock_ipv6(a)) - ret = memcmp(a->remote_addr.ip6.addr, b->remote_addr.ip6.addr, PICO_SIZE_IP6); - else - ret = (int)(a->remote_addr.ip4.addr - b->remote_addr.ip4.addr); - - return ret; -} - -static int socket_cmp_addresses(struct pico_socket *a, struct pico_socket *b) -{ - int ret = 0; - /* At this point, sort by local host */ - ret = socket_cmp_ipv6(a, b); - - if (ret == 0) - ret = socket_cmp_ipv4(a, b); - - /* Sort by remote host */ - if (ret == 0) - ret = socket_cmp_remotehost(a, b); - - return ret; -} - -static int socket_cmp(void *ka, void *kb) -{ - struct pico_socket *a = ka, *b = kb; - int ret = 0; - - /* First, order by network family */ - ret = socket_cmp_family(a, b); - - /* Then, compare by source/destination addresses */ - if (ret == 0) - ret = socket_cmp_addresses(a, b); - - /* And finally by remote port. The two sockets are coincident if the quad is the same. */ - if (ret == 0) - ret = b->remote_port - a->remote_port; - - return ret; -} - - -#define INIT_SOCKPORT { {&LEAF, socket_cmp}, 0, 0 } - -static int sockport_cmp(void *ka, void *kb) -{ - struct pico_sockport *a = ka, *b = kb; - if (a->number < b->number) - return -1; - - if (a->number > b->number) - return 1; - - return 0; -} - -static PICO_TREE_DECLARE(UDPTable, sockport_cmp); -static PICO_TREE_DECLARE(TCPTable, sockport_cmp); - -struct pico_sockport *pico_get_sockport(uint16_t proto, uint16_t port) -{ - struct pico_sockport test = INIT_SOCKPORT; - test.number = port; - - if (proto == PICO_PROTO_UDP) - return pico_tree_findKey(&UDPTable, &test); - - else if (proto == PICO_PROTO_TCP) - return pico_tree_findKey(&TCPTable, &test); - - else return NULL; -} - -#ifdef PICO_SUPPORT_IPV4 - -static int pico_port_in_use_by_nat(uint16_t proto, uint16_t port) -{ - int ret = 0; - (void) proto; - (void) port; -#ifdef PICO_SUPPORT_NAT - if (pico_ipv4_nat_find(port, NULL, 0, (uint8_t)proto)) { - dbg("In use by nat....\n"); - ret = 1; - } - -#endif - return ret; -} - -static int pico_port_in_use_with_this_ipv4_address(struct pico_sockport *sp, struct pico_ip4 ip) -{ - if (sp) { - struct pico_ip4 *s_local; - struct pico_tree_node *idx; - struct pico_socket *s; - pico_tree_foreach(idx, &sp->socks) { - s = idx->keyValue; - if (s->net == &pico_proto_ipv4) { - s_local = (struct pico_ip4*) &s->local_addr; - if ((s_local->addr == PICO_IPV4_INADDR_ANY) || (s_local->addr == ip.addr)) { - return 1; - } - } - } - } - - return 0; -} - - -static int pico_port_in_use_ipv4(struct pico_sockport *sp, void *addr) -{ - struct pico_ip4 ip; - /* IPv4 */ - if (addr) - ip.addr = ((struct pico_ip4 *)addr)->addr; - else - ip.addr = PICO_IPV4_INADDR_ANY; - - if (ip.addr == PICO_IPV4_INADDR_ANY) { - if (!sp) - return 0; - else { - dbg("In use, and asked for ANY\n"); - return 1; - } - } - - return pico_port_in_use_with_this_ipv4_address(sp, ip); -} -#endif - -#ifdef PICO_SUPPORT_IPV6 -static int pico_port_in_use_with_this_ipv6_address(struct pico_sockport *sp, struct pico_ip6 ip) -{ - if (sp) { - struct pico_ip6 *s_local; - struct pico_tree_node *idx; - struct pico_socket *s; - pico_tree_foreach(idx, &sp->socks) { - s = idx->keyValue; - if (s->net == &pico_proto_ipv6) { - s_local = (struct pico_ip6*) &s->local_addr; - if ((pico_ipv6_is_unspecified(s_local->addr)) || (!memcmp(s_local->addr, ip.addr, PICO_SIZE_IP6))) { - return 1; - } - } - } - } - - return 0; -} - -static int pico_port_in_use_ipv6(struct pico_sockport *sp, void *addr) -{ - struct pico_ip6 ip; - /* IPv6 */ - if (addr) - memcpy(ip.addr, ((struct pico_ip6 *)addr)->addr, sizeof(struct pico_ip6)); - else - memcpy(ip.addr, PICO_IP6_ANY, sizeof(struct pico_ip6)); - - if (memcmp(ip.addr, PICO_IP6_ANY, sizeof(struct pico_ip6)) == 0) { - if (!sp) - return 0; - else { - dbg("In use, and asked for ANY\n"); - return 1; - } - } - - return pico_port_in_use_with_this_ipv6_address(sp, ip); -} -#endif - - - -static int pico_generic_port_in_use(uint16_t proto, uint16_t port, struct pico_sockport *sp, void *addr, void *net) -{ -#ifdef PICO_SUPPORT_IPV4 - if (net == &pico_proto_ipv4) - { - if (pico_port_in_use_by_nat(proto, port)) { - return 1; - } - - if (pico_port_in_use_ipv4(sp, addr)) { - return 1; - } - } - -#endif - -#ifdef PICO_SUPPORT_IPV6 - if (net == &pico_proto_ipv6) - { - if (pico_port_in_use_ipv6(sp, addr)) { - return 1; - } - } - -#endif - - return 0; -} - -int pico_is_port_free(uint16_t proto, uint16_t port, void *addr, void *net) -{ - struct pico_sockport *sp; - sp = pico_get_sockport(proto, port); - - if (pico_generic_port_in_use(proto, port, sp, addr, net)) - return 0; - - return 1; -} - -static int pico_check_socket(struct pico_socket *s) -{ - struct pico_sockport *test; - struct pico_socket *found; - struct pico_tree_node *index; - - test = pico_get_sockport(PROTO(s), s->local_port); - - if (!test) { - return -1; - } - - pico_tree_foreach(index, &test->socks){ - found = index->keyValue; - if (s == found) { - return 0; - } - } - - return -1; -} - -struct pico_socket *pico_sockets_find(uint16_t local, uint16_t remote) -{ - struct pico_socket *sock = NULL; - struct pico_tree_node *index = NULL; - struct pico_sockport *sp = NULL; - - sp = pico_get_sockport(PICO_PROTO_TCP, local); - if(sp) - { - pico_tree_foreach(index, &sp->socks) - { - if(((struct pico_socket *)index->keyValue)->remote_port == remote) - { - sock = (struct pico_socket *)index->keyValue; - break; - } - } - } - - return sock; -} - - -int8_t pico_socket_add(struct pico_socket *s) -{ - struct pico_sockport *sp; - if (PROTO(s) != PICO_PROTO_UDP && PROTO(s) != PICO_PROTO_TCP) - { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - sp = pico_get_sockport(PROTO(s), s->local_port); - PICOTCP_MUTEX_LOCK(Mutex); - if (!sp) { - /* dbg("Creating sockport..%04x\n", s->local_port); / * In comment due to spam during test * / */ - sp = PICO_ZALLOC(sizeof(struct pico_sockport)); - - if (!sp) { - pico_err = PICO_ERR_ENOMEM; - PICOTCP_MUTEX_UNLOCK(Mutex); - return -1; - } - - sp->proto = PROTO(s); - sp->number = s->local_port; - sp->socks.root = &LEAF; - sp->socks.compare = socket_cmp; - - if (PROTO(s) == PICO_PROTO_UDP) - { - if (pico_tree_insert(&UDPTable, sp)) { - PICO_FREE(sp); - PICOTCP_MUTEX_UNLOCK(Mutex); - return -1; - } - - } - else if (PROTO(s) == PICO_PROTO_TCP) - { - if (pico_tree_insert(&TCPTable, sp)) { - PICO_FREE(sp); - PICOTCP_MUTEX_UNLOCK(Mutex); - return -1; - } - } - } - - if (pico_tree_insert(&sp->socks, s)) { - PICOTCP_MUTEX_UNLOCK(Mutex); - return -1; - } - s->state |= PICO_SOCKET_STATE_BOUND; - PICOTCP_MUTEX_UNLOCK(Mutex); -#ifdef DEBUG_SOCKET_TREE - { - struct pico_tree_node *index; - pico_tree_foreach(index, &sp->socks){ - s = index->keyValue; - dbg(">>>> List Socket lc=%hu rm=%hu\n", short_be(s->local_port), short_be(s->remote_port)); - } - - } -#endif - return 0; -} - - -static void socket_clean_queues(struct pico_socket *sock) -{ - struct pico_frame *f_in = pico_dequeue(&sock->q_in); - struct pico_frame *f_out = pico_dequeue(&sock->q_out); - while(f_in || f_out) - { - if(f_in) - { - pico_frame_discard(f_in); - f_in = pico_dequeue(&sock->q_in); - } - - if(f_out) - { - pico_frame_discard(f_out); - f_out = pico_dequeue(&sock->q_out); - } - } - pico_queue_deinit(&sock->q_in); - pico_queue_deinit(&sock->q_out); - pico_socket_tcp_cleanup(sock); -} - -static void socket_garbage_collect(pico_time now, void *arg) -{ - struct pico_socket *s = (struct pico_socket *) arg; - IGNORE_PARAMETER(now); - - socket_clean_queues(s); - PICO_FREE(s); -} - - -static void pico_socket_check_empty_sockport(struct pico_socket *s, struct pico_sockport *sp) -{ - if(pico_tree_empty(&sp->socks)) { - if (PROTO(s) == PICO_PROTO_UDP) - { - pico_tree_delete(&UDPTable, sp); - } - else if (PROTO(s) == PICO_PROTO_TCP) - { - pico_tree_delete(&TCPTable, sp); - } - - if(sp_tcp == sp) - sp_tcp = NULL; - - if(sp_udp == sp) - sp_udp = NULL; - - PICO_FREE(sp); - } -} - -int8_t pico_socket_del(struct pico_socket *s) -{ - struct pico_sockport *sp = pico_get_sockport(PROTO(s), s->local_port); - if (!sp) { - pico_err = PICO_ERR_ENXIO; - return -1; - } - - PICOTCP_MUTEX_LOCK(Mutex); - pico_tree_delete(&sp->socks, s); - pico_socket_check_empty_sockport(s, sp); -#ifdef PICO_SUPPORT_MCAST - pico_multicast_delete(s); -#endif - pico_socket_tcp_delete(s); - s->state = PICO_SOCKET_STATE_CLOSED; - if (!pico_timer_add((pico_time)10, socket_garbage_collect, s)) { - dbg("SOCKET: Failed to start garbage collect timer, doing garbage collection now\n"); - PICOTCP_MUTEX_UNLOCK(Mutex); - socket_garbage_collect((pico_time)0, s); - return -1; - } - PICOTCP_MUTEX_UNLOCK(Mutex); - return 0; -} - -static void pico_socket_update_tcp_state(struct pico_socket *s, uint16_t tcp_state) -{ - if (tcp_state) { - s->state &= 0x00FF; - s->state |= tcp_state; - } -} - -static int8_t pico_socket_alter_state(struct pico_socket *s, uint16_t more_states, uint16_t less_states, uint16_t tcp_state) -{ - struct pico_sockport *sp; - if (more_states & PICO_SOCKET_STATE_BOUND) - return pico_socket_add(s); - - if (less_states & PICO_SOCKET_STATE_BOUND) - return pico_socket_del(s); - - sp = pico_get_sockport(PROTO(s), s->local_port); - if (!sp) { - pico_err = PICO_ERR_ENXIO; - return -1; - } - - s->state |= more_states; - s->state = (uint16_t)(s->state & (~less_states)); - pico_socket_update_tcp_state(s, tcp_state); - return 0; -} - - -static int pico_socket_transport_deliver(struct pico_protocol *p, struct pico_sockport *sp, struct pico_frame *f) -{ -#ifdef PICO_SUPPORT_TCP - if (p->proto_number == PICO_PROTO_TCP) - return pico_socket_tcp_deliver(sp, f); - -#endif - -#ifdef PICO_SUPPORT_UDP - if (p->proto_number == PICO_PROTO_UDP) - return pico_socket_udp_deliver(sp, f); - -#endif - - return -1; -} - - -static int pico_socket_deliver(struct pico_protocol *p, struct pico_frame *f, uint16_t localport) -{ - struct pico_sockport *sp = NULL; - struct pico_trans *tr = (struct pico_trans *) f->transport_hdr; - - if (!tr) - return -1; - - sp = pico_get_sockport(p->proto_number, localport); - if (!sp) { - dbg("No such port %d\n", short_be(localport)); - return -1; - } - - return pico_socket_transport_deliver(p, sp, f); -} - -int pico_socket_set_family(struct pico_socket *s, uint16_t family) -{ - (void) family; - - #ifdef PICO_SUPPORT_IPV4 - if (family == PICO_PROTO_IPV4) - s->net = &pico_proto_ipv4; - - #endif - - #ifdef PICO_SUPPORT_IPV6 - if (family == PICO_PROTO_IPV6) - s->net = &pico_proto_ipv6; - - #endif - - if (s->net == NULL) - return -1; - - return 0; -} - -static struct pico_socket *pico_socket_transport_open(uint16_t proto, uint16_t family) -{ - struct pico_socket *s = NULL; - (void)family; -#ifdef PICO_SUPPORT_UDP - if (proto == PICO_PROTO_UDP) - s = pico_socket_udp_open(); - -#endif - -#ifdef PICO_SUPPORT_TCP - if (proto == PICO_PROTO_TCP) - s = pico_socket_tcp_open(family); - -#endif - - return s; - -} - -struct pico_socket *MOCKABLE pico_socket_open(uint16_t net, uint16_t proto, void (*wakeup)(uint16_t ev, struct pico_socket *)) -{ - - struct pico_socket *s = NULL; - - s = pico_socket_transport_open(proto, net); - - if (!s) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - return NULL; - } - - if (pico_socket_set_family(s, net) != 0) { - PICO_FREE(s); - pico_err = PICO_ERR_ENETUNREACH; - return NULL; - } - - s->q_in.max_size = PICO_DEFAULT_SOCKETQ; - s->q_out.max_size = PICO_DEFAULT_SOCKETQ; - - s->wakeup = wakeup; - return s; -} - - -static void pico_socket_clone_assign_address(struct pico_socket *s, struct pico_socket *facsimile) -{ - -#ifdef PICO_SUPPORT_IPV4 - if (facsimile->net == &pico_proto_ipv4) { - s->net = &pico_proto_ipv4; - memcpy(&s->local_addr, &facsimile->local_addr, sizeof(struct pico_ip4)); - memcpy(&s->remote_addr, &facsimile->remote_addr, sizeof(struct pico_ip4)); - } - -#endif - -#ifdef PICO_SUPPORT_IPV6 - if (facsimile->net == &pico_proto_ipv6) { - s->net = &pico_proto_ipv6; - memcpy(&s->local_addr, &facsimile->local_addr, sizeof(struct pico_ip6)); - memcpy(&s->remote_addr, &facsimile->remote_addr, sizeof(struct pico_ip6)); - } - -#endif - -} - -struct pico_socket *pico_socket_clone(struct pico_socket *facsimile) -{ - struct pico_socket *s = NULL; - - s = pico_socket_transport_open(facsimile->proto->proto_number, facsimile->net->proto_number); - if (!s) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - return NULL; - } - - s->local_port = facsimile->local_port; - s->remote_port = facsimile->remote_port; - s->state = facsimile->state; - pico_socket_clone_assign_address(s, facsimile); - if (!s->net) { - PICO_FREE(s); - pico_err = PICO_ERR_ENETUNREACH; - return NULL; - } - - s->q_in.max_size = PICO_DEFAULT_SOCKETQ; - s->q_out.max_size = PICO_DEFAULT_SOCKETQ; - s->wakeup = NULL; - return s; -} - -static int pico_socket_transport_read(struct pico_socket *s, void *buf, int len) -{ - if (PROTO(s) == PICO_PROTO_UDP) - { - /* make sure cast to uint16_t doesn't give unexpected results */ - if(len > 0xFFFF) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return pico_socket_udp_recv(s, buf, (uint16_t)len, NULL, NULL); - } - else if (PROTO(s) == PICO_PROTO_TCP) - return pico_socket_tcp_read(s, buf, (uint32_t)len); - else return 0; -} - -int pico_socket_read(struct pico_socket *s, void *buf, int len) -{ - if (!s || buf == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* check if exists in tree */ - /* See task #178 */ - if (pico_check_socket(s) != 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - if ((s->state & PICO_SOCKET_STATE_BOUND) == 0) { - pico_err = PICO_ERR_EIO; - return -1; - } - - return pico_socket_transport_read(s, buf, len); -} - -static int pico_socket_write_check_state(struct pico_socket *s) -{ - if ((s->state & PICO_SOCKET_STATE_BOUND) == 0) { - pico_err = PICO_ERR_EIO; - return -1; - } - - if ((s->state & PICO_SOCKET_STATE_CONNECTED) == 0) { - pico_err = PICO_ERR_ENOTCONN; - return -1; - } - - if (s->state & PICO_SOCKET_STATE_SHUT_LOCAL) { /* check if in shutdown state */ - pico_err = PICO_ERR_ESHUTDOWN; - return -1; - } - - return 0; -} - -static int pico_socket_write_attempt(struct pico_socket *s, const void *buf, int len) -{ - if (pico_socket_write_check_state(s) < 0) { - return -1; - } else { - return pico_socket_sendto(s, buf, len, &s->remote_addr, s->remote_port); - } -} - -int pico_socket_write(struct pico_socket *s, const void *buf, int len) -{ - if (!s || buf == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* check if exists in tree */ - /* See task #178 */ - if (pico_check_socket(s) != 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - return pico_socket_write_attempt(s, buf, len); -} - -static uint16_t pico_socket_high_port(uint16_t proto) -{ - uint16_t port; - if (0 || -#ifdef PICO_SUPPORT_TCP - (proto == PICO_PROTO_TCP) || -#endif -#ifdef PICO_SUPPORT_UDP - (proto == PICO_PROTO_UDP) || -#endif - 0) { - do { - uint32_t rand = pico_rand(); - port = (uint16_t) (rand & 0xFFFFU); - port = (uint16_t)((port % (65535 - 1024)) + 1024U); - if (pico_is_port_free(proto, port, NULL, NULL)) { - return short_be(port); - } - } while(1); - } - else return 0U; -} - -static void *pico_socket_sendto_get_ip4_src(struct pico_socket *s, struct pico_ip4 *dst) -{ - struct pico_ip4 *src4 = NULL; - -#ifdef PICO_SUPPORT_IPV4 - /* Check if socket is connected: destination address MUST match the - * current connected endpoint - */ - if ((s->state & PICO_SOCKET_STATE_CONNECTED)) { - src4 = &s->local_addr.ip4; - if (s->remote_addr.ip4.addr != ((struct pico_ip4 *)dst)->addr ) { - pico_err = PICO_ERR_EADDRNOTAVAIL; - return NULL; - } - } else { - - src4 = pico_ipv4_source_find(dst); - if (!src4) { - pico_err = PICO_ERR_EHOSTUNREACH; - return NULL; - } - - } - - if (src4->addr != PICO_IPV4_INADDR_ANY) - s->local_addr.ip4.addr = src4->addr; - -#else - pico_err = PICO_ERR_EPROTONOSUPPORT; -#endif - return src4; -} - -static void *pico_socket_sendto_get_ip6_src(struct pico_socket *s, struct pico_ip6 *dst) -{ - struct pico_ip6 *src6 = NULL; - (void)s; - (void)dst; - -#ifdef PICO_SUPPORT_IPV6 - - /* Check if socket is connected: destination address MUST match the - * current connected endpoint - */ - if ((s->state & PICO_SOCKET_STATE_CONNECTED)) { - src6 = &s->local_addr.ip6; - if (memcmp(&s->remote_addr, dst, PICO_SIZE_IP6)) { - pico_err = PICO_ERR_EADDRNOTAVAIL; - return NULL; - } - } else { - src6 = pico_ipv6_source_find(dst); - if (!src6) { - pico_err = PICO_ERR_EHOSTUNREACH; - return NULL; - } - - if (!pico_ipv6_is_unspecified(src6->addr)) - s->local_addr.ip6 = *src6; - } - -#else - pico_err = PICO_ERR_EPROTONOSUPPORT; -#endif - return src6; -} - - -static int pico_socket_sendto_dest_check(struct pico_socket *s, void *dst, uint16_t port) -{ - - /* For the sendto call to be valid, - * dst and remote_port should be always populated. - */ - if (!dst || !port) { - pico_err = PICO_ERR_EADDRNOTAVAIL; - return -1; - } - - /* When coming from pico_socket_send (or _write), - * the destination is automatically assigned to the currently connected endpoint. - * This check will ensure that there is no mismatch when sendto() is called directly - * on a connected socket - */ - if ((s->state & PICO_SOCKET_STATE_CONNECTED) != 0) { - if (port != s->remote_port) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - return 0; -} - -static int pico_socket_sendto_initial_checks(struct pico_socket *s, const void *buf, const int len, void *dst, uint16_t remote_port) -{ - if (len < 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (buf == NULL || s == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return pico_socket_sendto_dest_check(s, dst, remote_port); -} - -static void *pico_socket_sendto_get_src(struct pico_socket *s, void *dst) -{ - void *src = NULL; - if (is_sock_ipv4(s)) - src = pico_socket_sendto_get_ip4_src(s, (struct pico_ip4 *)dst); - - if (is_sock_ipv6(s)) - src = pico_socket_sendto_get_ip6_src(s, (struct pico_ip6 *)dst); - - return src; -} - -static struct pico_remote_endpoint *pico_socket_sendto_destination_ipv4(struct pico_socket *s, struct pico_ip4 *dst, uint16_t port) -{ - struct pico_remote_endpoint *ep = NULL; - (void)s; - ep = PICO_ZALLOC(sizeof(struct pico_remote_endpoint)); - if (!ep) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - ep->remote_addr.ip4.addr = ((struct pico_ip4 *)dst)->addr; - ep->remote_port = port; - return ep; -} - -static void pico_endpoint_free(struct pico_remote_endpoint *ep) -{ - if (ep) - PICO_FREE(ep); -} - -static struct pico_remote_endpoint *pico_socket_sendto_destination_ipv6(struct pico_socket *s, struct pico_ip6 *dst, uint16_t port) -{ - struct pico_remote_endpoint *ep = NULL; - (void)s; - (void)dst; - (void)port; -#ifdef PICO_SUPPORT_IPV6 - ep = PICO_ZALLOC(sizeof(struct pico_remote_endpoint)); - if (!ep) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - memcpy(&ep->remote_addr.ip6, dst, sizeof(struct pico_ip6)); - ep->remote_port = port; -#endif - return ep; -} - - -static struct pico_remote_endpoint *pico_socket_sendto_destination(struct pico_socket *s, void *dst, uint16_t port) -{ - struct pico_remote_endpoint *ep = NULL; - (void)pico_socket_sendto_destination_ipv6; - /* socket remote info could change in a consecutive call, make persistent */ -# ifdef PICO_SUPPORT_UDP - if (PROTO(s) == PICO_PROTO_UDP) { -# ifdef PICO_SUPPORT_IPV6 - if (is_sock_ipv6(s)) - ep = pico_socket_sendto_destination_ipv6(s, (struct pico_ip6 *)dst, port); - -# endif -# ifdef PICO_SUPPORT_IPV4 - if (is_sock_ipv4(s)) - ep = pico_socket_sendto_destination_ipv4(s, (struct pico_ip4 *)dst, port); - -# endif - } - -# endif - return ep; -} - -static int32_t pico_socket_sendto_set_localport(struct pico_socket *s) -{ - - if ((s->state & PICO_SOCKET_STATE_BOUND) == 0) { - s->local_port = pico_socket_high_port(s->proto->proto_number); - if (s->local_port == 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - pico_socket_alter_state(s, PICO_SOCKET_STATE_BOUND, 0, 0); - } - - return s->local_port; -} - -static int pico_socket_sendto_transport_offset(struct pico_socket *s) -{ - int header_offset = -1; - #ifdef PICO_SUPPORT_TCP - if (PROTO(s) == PICO_PROTO_TCP) - header_offset = pico_tcp_overhead(s); - - #endif - - #ifdef PICO_SUPPORT_UDP - if (PROTO(s) == PICO_PROTO_UDP) - header_offset = sizeof(struct pico_udp_hdr); - - #endif - return header_offset; -} - - -static struct pico_remote_endpoint *pico_socket_set_info(struct pico_remote_endpoint *ep) -{ - struct pico_remote_endpoint *info; - info = PICO_ZALLOC(sizeof(struct pico_remote_endpoint)); - if (!info) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - memcpy(info, ep, sizeof(struct pico_remote_endpoint)); - return info; -} - -static void pico_xmit_frame_set_nofrag(struct pico_frame *f) -{ -#ifdef PICO_SUPPORT_IPV4FRAG - f->frag = PICO_IPV4_DONTFRAG; -#else - (void)f; -#endif -} - -static int pico_socket_final_xmit(struct pico_socket *s, struct pico_frame *f) -{ - if (s->proto->push(s->proto, f) > 0) { - return f->payload_len; - } else { - pico_frame_discard(f); - return 0; - } -} - -static int pico_socket_xmit_one(struct pico_socket *s, const void *buf, const int len, void *src, - struct pico_remote_endpoint *ep, struct pico_msginfo *msginfo) -{ - struct pico_frame *f; - struct pico_device *dev = NULL; - uint16_t hdr_offset = (uint16_t)pico_socket_sendto_transport_offset(s); - int ret = 0; - (void)src; - - if (msginfo) { - dev = msginfo->dev; - } -#ifdef PICO_SUPPORT_IPV6 - else if (IS_SOCK_IPV6(s) && ep && pico_ipv6_is_multicast(&ep->remote_addr.ip6.addr[0])) { - dev = pico_ipv6_link_find(src); - } -#endif - else if (IS_SOCK_IPV6(s) && ep) { - dev = pico_ipv6_source_dev_find(&ep->remote_addr.ip6); - } else if (IS_SOCK_IPV4(s) && ep) { - dev = pico_ipv4_source_dev_find(&ep->remote_addr.ip4); - } else { - dev = get_sock_dev(s); - } - - if (!dev) { - return -1; - } - - f = pico_socket_frame_alloc(s, dev, (uint16_t)(len + hdr_offset)); - if (!f) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - f->payload += hdr_offset; - f->payload_len = (uint16_t)(len); - f->sock = s; - transport_flags_update(f, s); - pico_xmit_frame_set_nofrag(f); - if (ep && !f->info) { - f->info = pico_socket_set_info(ep); - if (!f->info) { - pico_frame_discard(f); - return -1; - } - } - - if (msginfo) { - f->send_ttl = (uint8_t)msginfo->ttl; - f->send_tos = (uint8_t)msginfo->tos; - } - - memcpy(f->payload, (const uint8_t *)buf, f->payload_len); - /* dbg("Pushing segment, hdr len: %d, payload_len: %d\n", header_offset, f->payload_len); */ - ret = pico_socket_final_xmit(s, f); - return ret; -} - -static int pico_socket_xmit_avail_space(struct pico_socket *s); - -#ifdef PICO_SUPPORT_IPV4FRAG -static void pico_socket_xmit_first_fragment_setup(struct pico_frame *f, int space, int hdr_offset) -{ - frag_dbg("FRAG: first fragmented frame %p | len = %u offset = 0\n", f, f->payload_len); - /* transport header length field contains total length + header length */ - f->transport_len = (uint16_t)(space); - f->frag = PICO_IPV4_MOREFRAG; - f->payload += hdr_offset; -} - -static void pico_socket_xmit_next_fragment_setup(struct pico_frame *f, int hdr_offset, int total_payload_written, int len) -{ - /* no transport header in fragmented IP */ - f->payload = f->transport_hdr; - /* set offset in octets */ - f->frag = (uint16_t)((total_payload_written + (uint16_t)hdr_offset) >> 3u); /* first fragment had a header offset */ - if (total_payload_written + f->payload_len < len) { - frag_dbg("FRAG: intermediate fragmented frame %p | len = %u offset = %u\n", f, f->payload_len, short_be(f->frag)); - f->frag |= PICO_IPV4_MOREFRAG; - } else { - frag_dbg("FRAG: last fragmented frame %p | len = %u offset = %u\n", f, f->payload_len, short_be(f->frag)); - f->frag &= PICO_IPV4_FRAG_MASK; - } -} -#endif - -/* Implies ep discarding! */ -static int pico_socket_xmit_fragments(struct pico_socket *s, const void *buf, const int len, - void *src, struct pico_remote_endpoint *ep, struct pico_msginfo *msginfo) -{ - int space = pico_socket_xmit_avail_space(s); - int hdr_offset = pico_socket_sendto_transport_offset(s); - int total_payload_written = 0; - int retval = 0; - struct pico_frame *f = NULL; - - if (space < 0) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - pico_endpoint_free(ep); - return -1; - } - - if (space > len) { - retval = pico_socket_xmit_one(s, buf, len, src, ep, msginfo); - pico_endpoint_free(ep); - return retval; - } - -#ifdef PICO_SUPPORT_IPV6 - /* Can't fragment IPv6 */ - if (is_sock_ipv6(s)) { - retval = pico_socket_xmit_one(s, buf, space, src, ep, msginfo); - pico_endpoint_free(ep); - return retval; - } - -#endif - -#ifdef PICO_SUPPORT_IPV4FRAG - while(total_payload_written < len) { - /* Always allocate the max space available: space + offset */ - if (len < space) - space = len; - - if (space > len - total_payload_written) /* update space for last fragment */ - space = len - total_payload_written; - - f = pico_socket_frame_alloc(s, get_sock_dev(s), (uint16_t)(space + hdr_offset)); - if (!f) { - pico_err = PICO_ERR_ENOMEM; - pico_endpoint_free(ep); - return -1; - } - - f->sock = s; - if (ep) { - f->info = pico_socket_set_info(ep); - if (!f->info) { - pico_frame_discard(f); - pico_endpoint_free(ep); - return -1; - } - } - - f->payload_len = (uint16_t) space; - if (total_payload_written == 0) { - /* First fragment: no payload written yet! */ - pico_socket_xmit_first_fragment_setup(f, space, hdr_offset); - space += hdr_offset; /* only first fragments contains transport header */ - hdr_offset = 0; - } else { - /* Next fragment */ - pico_socket_xmit_next_fragment_setup(f, pico_socket_sendto_transport_offset(s), total_payload_written, len); - } - - memcpy(f->payload, (const uint8_t *)buf + total_payload_written, f->payload_len); - transport_flags_update(f, s); - if (s->proto->push(s->proto, f) > 0) { - total_payload_written += f->payload_len; - } else { - pico_frame_discard(f); - break; - } - } /* while() */ - pico_endpoint_free(ep); - return total_payload_written; - -#else - /* Careful with that axe, Eugene! - * - * cropping down datagrams to the MTU value. - */ - (void) f; - (void) hdr_offset; - (void) total_payload_written; - retval = pico_socket_xmit_one(s, buf, space, src, ep, msginfo); - pico_endpoint_free(ep); - return retval; - -#endif -} - -struct pico_device *get_sock_dev(struct pico_socket *s) -{ - if (0) {} - -#ifdef PICO_SUPPORT_IPV6 - else if (is_sock_ipv6(s)) - s->dev = pico_ipv6_source_dev_find(&s->remote_addr.ip6); -#endif -#ifdef PICO_SUPPORT_IPV4 - else if (is_sock_ipv4(s)) - s->dev = pico_ipv4_source_dev_find(&s->remote_addr.ip4); -#endif - - return s->dev; -} - - -static uint32_t pico_socket_adapt_mss_to_proto(struct pico_socket *s, uint32_t mss) -{ -#ifdef PICO_SUPPORT_IPV6 - if (is_sock_ipv6(s)) - mss -= PICO_SIZE_IP6HDR; - else -#endif - mss -= PICO_SIZE_IP4HDR; - return mss; -} - -uint32_t pico_socket_get_mss(struct pico_socket *s) -{ - uint32_t mss = PICO_MIN_MSS; - if (!s) - return mss; - - if (!s->dev) - get_sock_dev(s); - - if (!s->dev) { - mss = PICO_MIN_MSS; - } else { - mss = s->dev->mtu; - } - - return pico_socket_adapt_mss_to_proto(s, mss); -} - - -static int pico_socket_xmit_avail_space(struct pico_socket *s) -{ - int transport_len; - int header_offset; - -#ifdef PICO_SUPPORT_TCP - if (PROTO(s) == PICO_PROTO_TCP) { - transport_len = (uint16_t)pico_tcp_get_socket_mss(s); - } else -#endif - transport_len = (uint16_t)pico_socket_get_mss(s); - header_offset = pico_socket_sendto_transport_offset(s); - if (header_offset < 0) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - } - - transport_len -= pico_socket_sendto_transport_offset(s); - return transport_len; -} - - -static int pico_socket_xmit(struct pico_socket *s, const void *buf, const int len, void *src, - struct pico_remote_endpoint *ep, struct pico_msginfo *msginfo) -{ - int space = pico_socket_xmit_avail_space(s); - int total_payload_written = 0; - - if (space < 0) { - pico_err = PICO_ERR_EPROTONOSUPPORT; - pico_endpoint_free(ep); - return -1; - } - - if ((PROTO(s) == PICO_PROTO_UDP) && (len > space)) { - total_payload_written = pico_socket_xmit_fragments(s, buf, len, src, ep, msginfo); - /* Implies ep discarding */ - return total_payload_written; - } - - while (total_payload_written < len) { - int w, chunk_len = len - total_payload_written; - if (chunk_len > space) - chunk_len = space; - - w = pico_socket_xmit_one(s, (const void *)((const uint8_t *)buf + total_payload_written), chunk_len, src, ep, msginfo); - if (w <= 0) { - break; - } - - total_payload_written += w; - if (PROTO(s) == PICO_PROTO_UDP) { - /* Break after the first datagram sent with at most MTU bytes. */ - break; - } - } - pico_endpoint_free(ep); - return total_payload_written; -} - -static void pico_socket_sendto_set_dport(struct pico_socket *s, uint16_t port) -{ - if ((s->state & PICO_SOCKET_STATE_CONNECTED) == 0) { - s->remote_port = port; - } -} - - -int MOCKABLE pico_socket_sendto_extended(struct pico_socket *s, const void *buf, const int len, - void *dst, uint16_t remote_port, struct pico_msginfo *msginfo) -{ - struct pico_remote_endpoint *remote_endpoint = NULL; - void *src = NULL; - - if(len == 0) - return 0; - - if (pico_socket_sendto_initial_checks(s, buf, len, dst, remote_port) < 0) - return -1; - - - src = pico_socket_sendto_get_src(s, dst); - if (!src) { -#ifdef PICO_SUPPORT_IPV6 - if((s->net->proto_number == PICO_PROTO_IPV6) - && msginfo && msginfo->dev - && pico_ipv6_is_multicast(((struct pico_ip6 *)dst)->addr)) - { - src = &(pico_ipv6_linklocal_get(msginfo->dev)->address); - } - else -#endif - return -1; - } - - remote_endpoint = pico_socket_sendto_destination(s, dst, remote_port); - if (pico_socket_sendto_set_localport(s) < 0) { - pico_endpoint_free(remote_endpoint); - return -1; - } - - pico_socket_sendto_set_dport(s, remote_port); - return pico_socket_xmit(s, buf, len, src, remote_endpoint, msginfo); /* Implies discarding the endpoint */ -} - -int MOCKABLE pico_socket_sendto(struct pico_socket *s, const void *buf, const int len, void *dst, uint16_t remote_port) -{ - return pico_socket_sendto_extended(s, buf, len, dst, remote_port, NULL); -} - -int pico_socket_send(struct pico_socket *s, const void *buf, int len) -{ - if (!s || buf == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* check if exists in tree */ - /* See task #178 */ - if (pico_check_socket(s) != 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - if ((s->state & PICO_SOCKET_STATE_CONNECTED) == 0) { - pico_err = PICO_ERR_ENOTCONN; - return -1; - } - - return pico_socket_sendto(s, buf, len, &s->remote_addr, s->remote_port); -} - -int pico_socket_recvfrom_extended(struct pico_socket *s, void *buf, int len, void *orig, - uint16_t *remote_port, struct pico_msginfo *msginfo) -{ - if (!s || buf == NULL) { /* / || orig == NULL || remote_port == NULL) { */ - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* check if exists in tree */ - if (pico_check_socket(s) != 0) { - pico_err = PICO_ERR_EINVAL; - /* See task #178 */ - return -1; - } - } - - if ((s->state & PICO_SOCKET_STATE_BOUND) == 0) { - pico_err = PICO_ERR_EADDRNOTAVAIL; - return -1; - } - -#ifdef PICO_SUPPORT_UDP - if (PROTO(s) == PICO_PROTO_UDP) { - /* make sure cast to uint16_t doesn't give unexpected results */ - if(len > 0xFFFF) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return pico_udp_recv(s, buf, (uint16_t)len, orig, remote_port, msginfo); - } - -#endif -#ifdef PICO_SUPPORT_TCP - if (PROTO(s) == PICO_PROTO_TCP) { - /* check if in shutdown state and if tcpq_in empty */ - if ((s->state & PICO_SOCKET_STATE_SHUT_REMOTE) && pico_tcp_queue_in_is_empty(s)) { - pico_err = PICO_ERR_ESHUTDOWN; - return -1; - } else { - /* dbg("socket tcp recv\n"); */ - return (int)pico_tcp_read(s, buf, (uint32_t)len); - } - } - -#endif - /* dbg("socket return 0\n"); */ - return 0; -} - -int MOCKABLE pico_socket_recvfrom(struct pico_socket *s, void *buf, int len, void *orig, - uint16_t *remote_port) -{ - return pico_socket_recvfrom_extended(s, buf, len, orig, remote_port, NULL); - -} - -int pico_socket_recv(struct pico_socket *s, void *buf, int len) -{ - return pico_socket_recvfrom(s, buf, len, NULL, NULL); -} - - -int pico_socket_getname(struct pico_socket *s, void *local_addr, uint16_t *port, uint16_t *proto) -{ - - if (!s || !local_addr || !port || !proto) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (is_sock_ipv4(s)) { - #ifdef PICO_SUPPORT_IPV4 - struct pico_ip4 *ip = (struct pico_ip4 *)local_addr; - ip->addr = s->local_addr.ip4.addr; - *proto = PICO_PROTO_IPV4; - #endif - } else if (is_sock_ipv6(s)) { - #ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 *ip = (struct pico_ip6 *)local_addr; - memcpy(ip->addr, s->local_addr.ip6.addr, PICO_SIZE_IP6); - *proto = PICO_PROTO_IPV6; - #endif - } else { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - *port = s->local_port; - return 0; -} - -int pico_socket_getpeername(struct pico_socket *s, void *remote_addr, uint16_t *port, uint16_t *proto) -{ - if (!s || !remote_addr || !port || !proto) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if ((s->state & PICO_SOCKET_STATE_CONNECTED) == 0) { - pico_err = PICO_ERR_ENOTCONN; - return -1; - } - - if (is_sock_ipv4(s)) { - #ifdef PICO_SUPPORT_IPV4 - struct pico_ip4 *ip = (struct pico_ip4 *)remote_addr; - ip->addr = s->remote_addr.ip4.addr; - *proto = PICO_PROTO_IPV4; - #endif - } else if (is_sock_ipv6(s)) { - #ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 *ip = (struct pico_ip6 *)remote_addr; - memcpy(ip->addr, s->remote_addr.ip6.addr, PICO_SIZE_IP6); - *proto = PICO_PROTO_IPV6; - #endif - } else { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - *port = s->remote_port; - return 0; - -} - -int MOCKABLE pico_socket_bind(struct pico_socket *s, void *local_addr, uint16_t *port) -{ - if (!s || !local_addr || !port) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (is_sock_ipv4(s)) { - #ifdef PICO_SUPPORT_IPV4 - struct pico_ip4 *ip = (struct pico_ip4 *)local_addr; - if (ip->addr != PICO_IPV4_INADDR_ANY) { - if (!pico_ipv4_link_find(local_addr)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - #endif - } else if (is_sock_ipv6(s)) { - #ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 *ip = (struct pico_ip6 *)local_addr; - if (!pico_ipv6_is_unspecified(ip->addr)) { - if (!pico_ipv6_link_find(local_addr)) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - #endif - } else { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* When given port = 0, get a random high port to bind to. */ - if (*port == 0) { - *port = pico_socket_high_port(PROTO(s)); - if (*port == 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - if (pico_is_port_free(PROTO(s), *port, local_addr, s->net) == 0) { - pico_err = PICO_ERR_EADDRINUSE; - return -1; - } - - s->local_port = *port; - - if (is_sock_ipv4(s)) { - #ifdef PICO_SUPPORT_IPV4 - struct pico_ip4 *ip = (struct pico_ip4 *)local_addr; - s->local_addr.ip4 = *ip; - #endif - } else if (is_sock_ipv6(s)) { - #ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 *ip = (struct pico_ip6 *)local_addr; - s->local_addr.ip6 = *ip; - #endif - } else { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return pico_socket_alter_state(s, PICO_SOCKET_STATE_BOUND, 0, 0); -} - - -int pico_socket_connect(struct pico_socket *s, const void *remote_addr, uint16_t remote_port) -{ - int ret = -1; - pico_err = PICO_ERR_EPROTONOSUPPORT; - if (!s || remote_addr == NULL || remote_port == 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - s->remote_port = remote_port; - - if (s->local_port == 0) { - s->local_port = pico_socket_high_port(PROTO(s)); - if (!s->local_port) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - if (is_sock_ipv4(s)) { - #ifdef PICO_SUPPORT_IPV4 - struct pico_ip4 *local = NULL; - const struct pico_ip4 *ip = (const struct pico_ip4 *)remote_addr; - s->remote_addr.ip4 = *ip; - local = pico_ipv4_source_find(ip); - if (local) { - get_sock_dev(s); - s->local_addr.ip4 = *local; - } else { - pico_err = PICO_ERR_EHOSTUNREACH; - return -1; - } - - #endif - } else if (is_sock_ipv6(s)) { - #ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 *local = NULL; - const struct pico_ip6 *ip = (const struct pico_ip6 *)remote_addr; - s->remote_addr.ip6 = *ip; - local = pico_ipv6_source_find(ip); - if (local) { - get_sock_dev(s); - s->local_addr.ip6 = *local; - } else { - pico_err = PICO_ERR_EHOSTUNREACH; - return -1; - } - - #endif - } else { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - pico_socket_alter_state(s, PICO_SOCKET_STATE_BOUND, 0, 0); - -#ifdef PICO_SUPPORT_UDP - if (PROTO(s) == PICO_PROTO_UDP) { - pico_socket_alter_state(s, PICO_SOCKET_STATE_CONNECTED, 0, 0); - pico_err = PICO_ERR_NOERR; - ret = 0; - } - -#endif - -#ifdef PICO_SUPPORT_TCP - if (PROTO(s) == PICO_PROTO_TCP) { - if (pico_tcp_initconn(s) == 0) { - pico_socket_alter_state(s, PICO_SOCKET_STATE_CONNECTED | PICO_SOCKET_STATE_TCP_SYN_SENT, PICO_SOCKET_STATE_CLOSED, 0); - pico_err = PICO_ERR_NOERR; - ret = 0; - } else { - pico_err = PICO_ERR_EHOSTUNREACH; - } - } - -#endif - - return ret; -} - - -#ifdef PICO_SUPPORT_TCP - -int pico_socket_listen(struct pico_socket *s, int backlog) -{ - if (!s || backlog < 1) { - pico_err = PICO_ERR_EINVAL; - return -1; - } else { - /* check if exists in tree */ - /* See task #178 */ - if (pico_check_socket(s) != 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - } - - if (PROTO(s) == PICO_PROTO_UDP) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if ((s->state & PICO_SOCKET_STATE_BOUND) == 0) { - pico_err = PICO_ERR_EISCONN; - return -1; - } - - if (PROTO(s) == PICO_PROTO_TCP) - pico_socket_alter_state(s, PICO_SOCKET_STATE_TCP_SYN_SENT, 0, PICO_SOCKET_STATE_TCP_LISTEN); - - s->max_backlog = (uint16_t)backlog; - - return 0; -} - -struct pico_socket *pico_socket_accept(struct pico_socket *s, void *orig, uint16_t *port) -{ - if (!s || !orig || !port) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - pico_err = PICO_ERR_EINVAL; - - if ((s->state & PICO_SOCKET_STATE_BOUND) == 0) { - return NULL; - } - - if (PROTO(s) == PICO_PROTO_UDP) { - return NULL; - } - - if (TCPSTATE(s) == PICO_SOCKET_STATE_TCP_LISTEN) { - struct pico_sockport *sp = pico_get_sockport(PICO_PROTO_TCP, s->local_port); - struct pico_socket *found; - uint32_t socklen = sizeof(struct pico_ip4); - /* If at this point no incoming connection socket is found, - * the accept call is valid, but no connection is established yet. - */ - pico_err = PICO_ERR_EAGAIN; - if (sp) { - struct pico_tree_node *index; - /* RB_FOREACH(found, socket_tree, &sp->socks) { */ - pico_tree_foreach(index, &sp->socks){ - found = index->keyValue; - if ((s == found->parent) && ((found->state & PICO_SOCKET_STATE_TCP) == PICO_SOCKET_STATE_TCP_ESTABLISHED)) { - found->parent = NULL; - pico_err = PICO_ERR_NOERR; - #ifdef PICO_SUPPORT_IPV6 - if (is_sock_ipv6(s)) - socklen = sizeof(struct pico_ip6); - - #endif - memcpy(orig, &found->remote_addr, socklen); - *port = found->remote_port; - s->number_of_pending_conn--; - return found; - } - } - } - } - - return NULL; -} - -#else - -int pico_socket_listen(struct pico_socket *s, int backlog) -{ - IGNORE_PARAMETER(s); - IGNORE_PARAMETER(backlog); - pico_err = PICO_ERR_EINVAL; - return -1; -} - -struct pico_socket *pico_socket_accept(struct pico_socket *s, void *orig, uint16_t *local_port) -{ - IGNORE_PARAMETER(s); - IGNORE_PARAMETER(orig); - IGNORE_PARAMETER(local_port); - pico_err = PICO_ERR_EINVAL; - return NULL; -} - -#endif - - -int MOCKABLE pico_socket_setoption(struct pico_socket *s, int option, void *value) -{ - - if (s == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - - if (PROTO(s) == PICO_PROTO_TCP) - return pico_setsockopt_tcp(s, option, value); - - if (PROTO(s) == PICO_PROTO_UDP) - return pico_setsockopt_udp(s, option, value); - - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - - -int pico_socket_getoption(struct pico_socket *s, int option, void *value) -{ - if (s == NULL) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - - if (PROTO(s) == PICO_PROTO_TCP) - return pico_getsockopt_tcp(s, option, value); - - if (PROTO(s) == PICO_PROTO_UDP) - return pico_getsockopt_udp(s, option, value); - - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - - -int pico_socket_shutdown(struct pico_socket *s, int mode) -{ - if (!s) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* Check if the socket has already been closed */ - if (s->state & PICO_SOCKET_STATE_CLOSED) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - /* unbound sockets can be deleted immediately */ - if (!(s->state & PICO_SOCKET_STATE_BOUND)) - { - socket_garbage_collect((pico_time)10, s); - return 0; - } - -#ifdef PICO_SUPPORT_UDP - if (PROTO(s) == PICO_PROTO_UDP) { - if ((mode & PICO_SHUT_RDWR) == PICO_SHUT_RDWR) - pico_socket_alter_state(s, PICO_SOCKET_STATE_CLOSED, PICO_SOCKET_STATE_CLOSING | PICO_SOCKET_STATE_BOUND | PICO_SOCKET_STATE_CONNECTED, 0); - else if (mode & PICO_SHUT_RD) - pico_socket_alter_state(s, 0, PICO_SOCKET_STATE_BOUND, 0); - } - -#endif -#ifdef PICO_SUPPORT_TCP - if (PROTO(s) == PICO_PROTO_TCP) { - if ((mode & PICO_SHUT_RDWR) == PICO_SHUT_RDWR) - { - pico_socket_alter_state(s, PICO_SOCKET_STATE_SHUT_LOCAL | PICO_SOCKET_STATE_SHUT_REMOTE, 0, 0); - pico_tcp_notify_closing(s); - } - else if (mode & PICO_SHUT_WR) { - pico_socket_alter_state(s, PICO_SOCKET_STATE_SHUT_LOCAL, 0, 0); - pico_tcp_notify_closing(s); - } else if (mode & PICO_SHUT_RD) - pico_socket_alter_state(s, PICO_SOCKET_STATE_SHUT_REMOTE, 0, 0); - - } - -#endif - return 0; -} - -int MOCKABLE pico_socket_close(struct pico_socket *s) -{ - if (!s) - return -1; - -#ifdef PICO_SUPPORT_TCP - if (PROTO(s) == PICO_PROTO_TCP) { - if (pico_tcp_check_listen_close(s) == 0) - return 0; - } - -#endif - return pico_socket_shutdown(s, PICO_SHUT_RDWR); -} - -#ifdef PICO_SUPPORT_CRC -static inline int pico_transport_crc_check(struct pico_frame *f) -{ - struct pico_ipv4_hdr *net_hdr = (struct pico_ipv4_hdr *) f->net_hdr; - struct pico_udp_hdr *udp_hdr = NULL; - uint16_t checksum_invalid = 1; - - switch (net_hdr->proto) - { -#ifdef PICO_SUPPORT_TCP - case PICO_PROTO_TCP: - checksum_invalid = short_be(pico_tcp_checksum(f)); - /* dbg("TCP CRC validation == %u\n", checksum_invalid); */ - if (checksum_invalid) { - dbg("TCP CRC: validation failed!\n"); - pico_frame_discard(f); - return 0; - } - - break; -#endif /* PICO_SUPPORT_TCP */ - -#ifdef PICO_SUPPORT_UDP - case PICO_PROTO_UDP: - udp_hdr = (struct pico_udp_hdr *) f->transport_hdr; - if (short_be(udp_hdr->crc)) { -#ifdef PICO_SUPPORT_IPV4 - if (IS_IPV4(f)) - checksum_invalid = short_be(pico_udp_checksum_ipv4(f)); - -#endif -#ifdef PICO_SUPPORT_IPV6 - if (IS_IPV6(f)) - checksum_invalid = short_be(pico_udp_checksum_ipv6(f)); - -#endif - /* dbg("UDP CRC validation == %u\n", checksum_invalid); */ - if (checksum_invalid) { - /* dbg("UDP CRC: validation failed!\n"); */ - pico_frame_discard(f); - return 0; - } - } - - break; -#endif /* PICO_SUPPORT_UDP */ - - default: - /* Do nothing */ - break; - } - return 1; -} -#else -static inline int pico_transport_crc_check(struct pico_frame *f) -{ - IGNORE_PARAMETER(f); - return 1; -} -#endif /* PICO_SUPPORT_CRC */ - -int pico_transport_process_in(struct pico_protocol *self, struct pico_frame *f) -{ - struct pico_trans *hdr = (struct pico_trans *) f->transport_hdr; - int ret = 0; - - if (!hdr) { - pico_err = PICO_ERR_EFAULT; - return -1; - } - - ret = pico_transport_crc_check(f); - if (ret < 1) - return ret; - else - ret = 0; - - if ((hdr) && (pico_socket_deliver(self, f, hdr->dport) == 0)) - return ret; - - if (!IS_BCAST(f)) { - dbg("Socket not found... \n"); - pico_notify_socket_unreachable(f); - ret = -1; - pico_err = PICO_ERR_ENOENT; - } - - pico_frame_discard(f); - return ret; -} - -#define SL_LOOP_MIN 1 - -#ifdef PICO_SUPPORT_TCP -static int check_socket_sanity(struct pico_socket *s) -{ - - /* checking for pending connections */ - if(TCP_STATE(s) == PICO_SOCKET_STATE_TCP_SYN_RECV) { - if((PICO_TIME_MS() - s->timestamp) >= PICO_SOCKET_BOUND_TIMEOUT) - return -1; - } - - return 0; -} -#endif - - -static int pico_sockets_loop_udp(int loop_score) -{ - -#ifdef PICO_SUPPORT_UDP - static struct pico_tree_node *index_udp; - struct pico_sockport *start; - struct pico_socket *s; - struct pico_frame *f; - - if (sp_udp == NULL) - { - index_udp = pico_tree_firstNode(UDPTable.root); - sp_udp = index_udp->keyValue; - } - - /* init start node */ - start = sp_udp; - - /* round-robin all transport protocols, break if traversed all protocols */ - while (loop_score > SL_LOOP_MIN && sp_udp != NULL) { - struct pico_tree_node *index; - - pico_tree_foreach(index, &sp_udp->socks){ - s = index->keyValue; - f = pico_dequeue(&s->q_out); - while (f && (loop_score > 0)) { - pico_proto_udp.push(&pico_proto_udp, f); - loop_score -= 1; - if (loop_score > 0) /* only dequeue if there is still loop_score, otherwise f might get lost */ - f = pico_dequeue(&s->q_out); - } - } - - index_udp = pico_tree_next(index_udp); - sp_udp = index_udp->keyValue; - - if (sp_udp == NULL) - { - index_udp = pico_tree_firstNode(UDPTable.root); - sp_udp = index_udp->keyValue; - } - - if (sp_udp == start) - break; - } -#endif - return loop_score; -} - -static int pico_sockets_loop_tcp(int loop_score) -{ -#ifdef PICO_SUPPORT_TCP - struct pico_sockport *start; - struct pico_socket *s; - static struct pico_tree_node *index_tcp; - if (sp_tcp == NULL) - { - index_tcp = pico_tree_firstNode(TCPTable.root); - sp_tcp = index_tcp->keyValue; - } - - /* init start node */ - start = sp_tcp; - - while (loop_score > SL_LOOP_MIN && sp_tcp != NULL) { - struct pico_tree_node *index = NULL, *safe_index = NULL; - pico_tree_foreach_safe(index, &sp_tcp->socks, safe_index){ - s = index->keyValue; - loop_score = pico_tcp_output(s, loop_score); - if ((s->ev_pending) && s->wakeup) { - s->wakeup(s->ev_pending, s); - if(!s->parent) - s->ev_pending = 0; - } - - if (loop_score <= 0) { - loop_score = 0; - break; - } - - if(check_socket_sanity(s) < 0) - { - pico_socket_del(s); - index_tcp = NULL; /* forcing the restart of loop */ - sp_tcp = NULL; - break; - } - } - - /* check if RB_FOREACH ended, if not, break to keep the cur sp_tcp */ - if (!index_tcp || (index && index->keyValue)) - break; - - index_tcp = pico_tree_next(index_tcp); - sp_tcp = index_tcp->keyValue; - - if (sp_tcp == NULL) - { - index_tcp = pico_tree_firstNode(TCPTable.root); - sp_tcp = index_tcp->keyValue; - } - - if (sp_tcp == start) - break; - } -#endif - return loop_score; - - -} - -int pico_sockets_loop(int loop_score) -{ - loop_score = pico_sockets_loop_udp(loop_score); - loop_score = pico_sockets_loop_tcp(loop_score); - return loop_score; -} - -int pico_count_sockets(uint8_t proto) -{ - struct pico_sockport *sp; - struct pico_tree_node *idx_sp, *idx_s; - int count = 0; - - if ((proto == 0) || (proto == PICO_PROTO_TCP)) { - pico_tree_foreach(idx_sp, &TCPTable) { - sp = idx_sp->keyValue; - if (sp) { - pico_tree_foreach(idx_s, &sp->socks) - count++; - } - } - } - - if ((proto == 0) || (proto == PICO_PROTO_UDP)) { - pico_tree_foreach(idx_sp, &UDPTable) { - sp = idx_sp->keyValue; - if (sp) { - pico_tree_foreach(idx_s, &sp->socks) - count++; - } - } - } - - return count; -} - - -struct pico_frame *pico_socket_frame_alloc(struct pico_socket *s, struct pico_device *dev, uint16_t len) -{ - struct pico_frame *f = NULL; - -#ifdef PICO_SUPPORT_IPV6 - if (is_sock_ipv6(s)) - f = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, len); - -#endif - -#ifdef PICO_SUPPORT_IPV4 - if (is_sock_ipv4(s)) - f = pico_proto_ipv4.alloc(&pico_proto_ipv4, dev, len); - -#endif - if (!f) { - pico_err = PICO_ERR_ENOMEM; - return f; - } - - f->payload = f->transport_hdr; - f->payload_len = len; - f->sock = s; - return f; -} - -static void pico_transport_error_set_picoerr(int code) -{ - /* dbg("SOCKET ERROR FROM ICMP NOTIFICATION. (icmp code= %d)\n\n", code); */ - switch(code) { - case PICO_ICMP_UNREACH_NET: - pico_err = PICO_ERR_ENETUNREACH; - break; - - case PICO_ICMP_UNREACH_HOST: - pico_err = PICO_ERR_EHOSTUNREACH; - break; - - case PICO_ICMP_UNREACH_PROTOCOL: - pico_err = PICO_ERR_ENOPROTOOPT; - break; - - case PICO_ICMP_UNREACH_PORT: - pico_err = PICO_ERR_ECONNREFUSED; - break; - - case PICO_ICMP_UNREACH_NET_UNKNOWN: - pico_err = PICO_ERR_ENETUNREACH; - break; - - case PICO_ICMP_UNREACH_HOST_UNKNOWN: - pico_err = PICO_ERR_EHOSTDOWN; - break; - - case PICO_ICMP_UNREACH_ISOLATED: - pico_err = PICO_ERR_ENONET; - break; - - case PICO_ICMP_UNREACH_NET_PROHIB: - case PICO_ICMP_UNREACH_HOST_PROHIB: - pico_err = PICO_ERR_EHOSTUNREACH; - break; - - default: - pico_err = PICO_ERR_EOPNOTSUPP; - } -} - -int pico_transport_error(struct pico_frame *f, uint8_t proto, int code) -{ - int ret = -1; - struct pico_trans *trans = (struct pico_trans*) f->transport_hdr; - struct pico_sockport *port = NULL; - struct pico_socket *s = NULL; - switch (proto) { - - -#ifdef PICO_SUPPORT_UDP - case PICO_PROTO_UDP: - port = pico_get_sockport(proto, trans->sport); - break; -#endif - -#ifdef PICO_SUPPORT_TCP - case PICO_PROTO_TCP: - port = pico_get_sockport(proto, trans->sport); - break; -#endif - - default: - /* Protocol not available */ - ret = -1; - } - if (port) { - struct pico_tree_node *index; - ret = 0; - - pico_tree_foreach(index, &port->socks) { - s = index->keyValue; - if (trans->dport == s->remote_port) { - if (s->wakeup) { - pico_transport_error_set_picoerr(code); - s->state |= PICO_SOCKET_STATE_SHUT_REMOTE; - s->wakeup(PICO_SOCK_EV_ERR, s); - } - - break; - } - } - } - - pico_frame_discard(f); - return ret; -} -#endif -#endif diff --git a/kernel/picotcp/stack/pico_socket_multicast.c b/kernel/picotcp/stack/pico_socket_multicast.c deleted file mode 100644 index 8e69e45..0000000 --- a/kernel/picotcp/stack/pico_socket_multicast.c +++ /dev/null @@ -1,1478 +0,0 @@ -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_socket.h" -#include "pico_socket_multicast.h" -#include "pico_tree.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_udp.h" - -#ifdef PICO_SUPPORT_MCAST - -#ifdef DEBUG_MCAST -#define so_mcast_dbg dbg -#else -#define so_mcast_dbg(...) do { } while(0) -#endif - -/* socket - * | - * MCASTListen - * | | | - * ------------ | ------------ - * | | | - * MCASTSources MCASTSources MCASTSources - * | | | | | | | | | | | | - * S S S S S S S S S S S S - * - * MCASTListen: RBTree(mcast_link, mcast_group) - * MCASTSources: RBTree(source) - */ -struct pico_mcast_listen -{ - int8_t filter_mode; - union pico_address mcast_link; - union pico_address mcast_group; - struct pico_tree MCASTSources; - struct pico_tree MCASTSources_ipv6; - uint16_t proto; -}; -/* Parameters */ -struct pico_mcast -{ - struct pico_socket *s; - struct pico_ip_mreq *mreq; - struct pico_ip_mreq_source *mreq_s; - union pico_address *address; - union pico_link *mcast_link; - struct pico_mcast_listen *listen; -}; -static int mcast_listen_link_cmp(struct pico_mcast_listen *a, struct pico_mcast_listen *b) -{ - - if (a->proto < b->proto) - return -1; - - if (a->proto > b->proto) - return 1; - - return pico_address_compare(&a->mcast_link, &b->mcast_link, a->proto); -} - -static int mcast_listen_grp_cmp(struct pico_mcast_listen *a, struct pico_mcast_listen *b) -{ - if (a->mcast_group.ip4.addr < b->mcast_group.ip4.addr) - return -1; - - if (a->mcast_group.ip4.addr > b->mcast_group.ip4.addr) - return 1; - - return mcast_listen_link_cmp(a, b); -} -#ifdef PICO_SUPPORT_IPV6 -static int mcast_listen_grp_cmp_ipv6(struct pico_mcast_listen *a, struct pico_mcast_listen *b) -{ - int tmp = memcmp(&a->mcast_group.ip6, &b->mcast_group.ip6, sizeof(struct pico_ip6)); - if(!tmp) - return mcast_listen_link_cmp(a, b); - - return tmp; -} -#endif - -static int mcast_listen_cmp(void *ka, void *kb) -{ - struct pico_mcast_listen *a = ka, *b = kb; - if (a->proto < b->proto) - return -1; - - if (a->proto > b->proto) - return 1; - - return mcast_listen_grp_cmp(a, b); -} -#ifdef PICO_SUPPORT_IPV6 -static int mcast_listen_cmp_ipv6(void *ka, void *kb) -{ - struct pico_mcast_listen *a = ka, *b = kb; - if (a->proto < b->proto) - return -1; - - if (a->proto > b->proto) - return 1; - - return mcast_listen_grp_cmp_ipv6(a, b); -} -#endif -static int mcast_sources_cmp(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - if (a->ip4.addr < b->ip4.addr) - return -1; - - if (a->ip4.addr > b->ip4.addr) - return 1; - - return 0; -} -#ifdef PICO_SUPPORT_IPV6 -static int mcast_sources_cmp_ipv6(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - return memcmp(&a->ip6, &b->ip6, sizeof(struct pico_ip6)); -} -#endif -static int mcast_socket_cmp(void *ka, void *kb) -{ - struct pico_socket *a = ka, *b = kb; - if (a < b) - return -1; - - if (a > b) - return 1; - - return 0; -} - -/* gather all multicast sockets to hasten filter aggregation */ -static PICO_TREE_DECLARE(MCASTSockets, mcast_socket_cmp); - -static int mcast_filter_cmp(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - if (a->ip4.addr < b->ip4.addr) - return -1; - - if (a->ip4.addr > b->ip4.addr) - return 1; - - return 0; -} -/* gather sources to be filtered */ -static PICO_TREE_DECLARE(MCASTFilter, mcast_filter_cmp); - -static int mcast_filter_cmp_ipv6(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - return memcmp(&a->ip6, &b->ip6, sizeof(struct pico_ip6)); -} -/* gather sources to be filtered */ -static PICO_TREE_DECLARE(MCASTFilter_ipv6, mcast_filter_cmp_ipv6); - -inline static struct pico_tree *mcast_get_src_tree(struct pico_socket *s, struct pico_mcast *mcast) -{ - if( IS_SOCK_IPV4(s)) { - mcast->listen->MCASTSources.compare = mcast_sources_cmp; - return &mcast->listen->MCASTSources; - } - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) { - mcast->listen->MCASTSources_ipv6.compare = mcast_sources_cmp_ipv6; - return &mcast->listen->MCASTSources_ipv6; - } -#endif - return NULL; -} -inline static struct pico_tree *mcast_get_listen_tree(struct pico_socket *s) -{ - if( IS_SOCK_IPV4(s)) - return s->MCASTListen; - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) - return s->MCASTListen_ipv6; -#endif - return NULL; -} -inline static void mcast_set_listen_tree_p_null(struct pico_socket *s) -{ - if( IS_SOCK_IPV4(s)) - s->MCASTListen = NULL; - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) - s->MCASTListen_ipv6 = NULL; -#endif -} -static struct pico_mcast_listen *listen_find(struct pico_socket *s, union pico_address *lnk, union pico_address *grp) -{ - struct pico_mcast_listen ltest = { - 0 - }; - ltest.mcast_link = *lnk; - ltest.mcast_group = *grp; - - if(IS_SOCK_IPV4(s)) - return pico_tree_findKey(s->MCASTListen, <est); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) { - ltest.proto = PICO_PROTO_IPV6; - return pico_tree_findKey(s->MCASTListen_ipv6, <est); - } -#endif - return NULL; -} -static union pico_address *pico_mcast_get_link_address(struct pico_socket *s, union pico_link *mcast_link) -{ - if( IS_SOCK_IPV4(s)) - return (union pico_address *) &mcast_link->ipv4.address; - -#ifdef PICO_SUPPORT_IPV6 - if( IS_SOCK_IPV6(s)) - return (union pico_address *) &mcast_link->ipv6.address; - -#endif - return NULL; -} -static int8_t pico_mcast_filter_excl_excl(struct pico_mcast_listen *listen) -{ - /* filter = intersection of EXCLUDEs */ - /* any record with filter mode EXCLUDE, causes the interface mode to be EXCLUDE */ - /* remove from the interface EXCLUDE filter any source not in the socket EXCLUDE filter */ - struct pico_tree_node *index = NULL, *_tmp = NULL; - union pico_address *source = NULL; - if(!pico_tree_empty(&MCASTFilter)) { - pico_tree_foreach_safe(index, &MCASTFilter, _tmp) - { - source = pico_tree_findKey(&listen->MCASTSources, index->keyValue); - if (!source) - pico_tree_delete(&MCASTFilter, index->keyValue); - } - } - -#ifdef PICO_SUPPORT_IPV6 - if(!pico_tree_empty(&MCASTFilter_ipv6)) { - pico_tree_foreach_safe(index, &MCASTFilter_ipv6, _tmp) - { - source = pico_tree_findKey(&listen->MCASTSources_ipv6, index->keyValue); - if (!source) - pico_tree_delete(&MCASTFilter_ipv6, index->keyValue); - } - } - -#endif - return PICO_IP_MULTICAST_EXCLUDE; -} - -static int8_t pico_mcast_filter_excl_incl(struct pico_mcast_listen *listen) -{ - /* filter = EXCLUDE - INCLUDE */ - /* any record with filter mode EXCLUDE, causes the interface mode to be EXCLUDE */ - /* remove from the interface EXCLUDE filter any source in the socket INCLUDE filter */ - struct pico_tree_node *index = NULL, *_tmp = NULL; - union pico_address *source = NULL; - if(!pico_tree_empty(&listen->MCASTSources)) { - pico_tree_foreach_safe(index, &listen->MCASTSources, _tmp) - { - source = pico_tree_findKey(&MCASTFilter, index->keyValue); - if (source) - pico_tree_delete(&MCASTFilter, source); - } - } - -#ifdef PICO_SUPPORT_IPV6 - if(!pico_tree_empty(&listen->MCASTSources_ipv6)) { - pico_tree_foreach_safe(index, &listen->MCASTSources_ipv6, _tmp) - { - source = pico_tree_findKey(&MCASTFilter_ipv6, index->keyValue); - if (source) - pico_tree_delete(&MCASTFilter_ipv6, source); - } - } - -#endif - return PICO_IP_MULTICAST_EXCLUDE; -} - -static int8_t pico_mcast_filter_incl_excl(struct pico_mcast_listen *listen) -{ - /* filter = EXCLUDE - INCLUDE */ - /* delete from the interface INCLUDE filter any source NOT in the socket EXCLUDE filter */ - struct pico_tree_node *index = NULL, *_tmp = NULL; - union pico_address *source = NULL; - if(!pico_tree_empty(&listen->MCASTSources)) { - pico_tree_foreach_safe(index, &MCASTFilter, _tmp) - { - source = pico_tree_findKey(&listen->MCASTSources, index->keyValue); - if (!source) - pico_tree_delete(&MCASTFilter, index->keyValue); - } - } - -#ifdef PICO_SUPPORT_IPV6 - if(!pico_tree_empty(&listen->MCASTSources_ipv6)) { - pico_tree_foreach_safe(index, &MCASTFilter_ipv6, _tmp) - { - source = pico_tree_findKey(&listen->MCASTSources_ipv6, index->keyValue); - if (!source) - pico_tree_delete(&MCASTFilter_ipv6, index->keyValue); - } - } - -#endif - /* any record with filter mode EXCLUDE, causes the interface mode to be EXCLUDE */ - - /* add to the interface EXCLUDE filter any socket source NOT in the former interface INCLUDE filter */ - if(!pico_tree_empty(&listen->MCASTSources)) { - pico_tree_foreach_safe(index, &listen->MCASTSources, _tmp) - { - source = pico_tree_insert(&MCASTFilter, index->keyValue); - if (source) { - if ((void *)source == (void *)&LEAF) - return -1; - else - pico_tree_delete(&MCASTFilter, source); - } - } - } - -#ifdef PICO_SUPPORT_IPV6 - if(!pico_tree_empty(&listen->MCASTSources_ipv6)) { - pico_tree_foreach_safe(index, &listen->MCASTSources_ipv6, _tmp) - { - source = pico_tree_insert(&MCASTFilter_ipv6, index->keyValue); - if (source) { - if ((void *)source == (void *)&LEAF) - return -1; - else - pico_tree_delete(&MCASTFilter_ipv6, source); - } - } - } - -#endif - return PICO_IP_MULTICAST_EXCLUDE; -} - -static int8_t pico_mcast_filter_incl_incl(struct pico_mcast_listen *listen) -{ - /* filter = summation of INCLUDEs */ - /* mode stays INCLUDE, add all sources to filter */ - struct pico_tree_node *index = NULL, *_tmp = NULL; - union pico_address *source = NULL; - - if( !pico_tree_empty(&listen->MCASTSources)) { - pico_tree_foreach_safe(index, &listen->MCASTSources, _tmp) - { - source = index->keyValue; - if (pico_tree_insert(&MCASTFilter, source) == &LEAF) - return -1; - } - } - -#ifdef PICO_SUPPORT_IPV6 - if( !pico_tree_empty(&listen->MCASTSources_ipv6)) { - pico_tree_foreach_safe(index, &listen->MCASTSources_ipv6, _tmp) - { - source = index->keyValue; - if (pico_tree_insert(&MCASTFilter_ipv6, source) == &LEAF) - return -1; - } - } - -#endif - return PICO_IP_MULTICAST_INCLUDE; -} - -struct pico_mcast_filter_aggregation -{ - int8_t (*call)(struct pico_mcast_listen *); -}; - -static const struct pico_mcast_filter_aggregation mcast_filter_aggr_call[2][2] = -{ - { - /* EXCL + EXCL */ {.call = pico_mcast_filter_excl_excl}, - /* EXCL + INCL */ {.call = pico_mcast_filter_excl_incl} - }, - - { - /* INCL + EXCL */ {.call = pico_mcast_filter_incl_excl}, - /* INCL + INCL */ {.call = pico_mcast_filter_incl_incl} - } -}; - -static int mcast_aggr_validate(int8_t fm, struct pico_mcast_listen *l) -{ - if (!l) - return -1; - - if (fm > 1 || fm < 0) - return -1; - - if (l->filter_mode > 1) - return -1; - - return 0; -} - - -/* MCASTFilter will be empty if no socket is listening on mcast_group on mcast_link anymore */ -static int pico_socket_aggregate_mcastfilters(union pico_address *mcast_link, union pico_address *mcast_group) -{ - int8_t filter_mode = PICO_IP_MULTICAST_INCLUDE; - struct pico_mcast_listen *listen = NULL; - struct pico_socket *mcast_sock = NULL; - struct pico_tree_node *index = NULL, *_tmp = NULL; - - /* cleanup old filter */ - if(!pico_tree_empty(&MCASTFilter)) { - pico_tree_foreach_safe(index, &MCASTFilter, _tmp) - { - pico_tree_delete(&MCASTFilter, index->keyValue); - } - } - -#ifdef PICO_SUPPORT_IPV6 - if(!pico_tree_empty(&MCASTFilter_ipv6)) { - pico_tree_foreach_safe(index, &MCASTFilter_ipv6, _tmp) - { - pico_tree_delete(&MCASTFilter_ipv6, index->keyValue); - } - } - -#endif - /* construct new filter */ - pico_tree_foreach_safe(index, &MCASTSockets, _tmp) - { - mcast_sock = index->keyValue; - listen = listen_find(mcast_sock, mcast_link, mcast_group); - if (listen) { - if (mcast_aggr_validate(filter_mode, listen) < 0) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (mcast_filter_aggr_call[filter_mode][listen->filter_mode].call) { - filter_mode = mcast_filter_aggr_call[filter_mode][listen->filter_mode].call(listen); - if (filter_mode > 1 || filter_mode < 0) - return -1; - } - } - } - return filter_mode; -} - -static int pico_socket_mcast_filter_include(struct pico_mcast_listen *listen, union pico_address *src) -{ - struct pico_tree_node *index = NULL; -#ifdef PICO_DEBUG_MCAST - char tmp_string[PICO_IPV6_STRING]; -#endif - if(!pico_tree_empty(&listen->MCASTSources)) { - pico_tree_foreach(index, &listen->MCASTSources) - { - if (src->ip4.addr == ((union pico_address *)index->keyValue)->ip4.addr) { - so_mcast_dbg("MCAST: IP %08X in included socket source list\n", src->ip4.addr); - return 0; - } - } - } - -#ifdef PICO_SUPPORT_IPV6 - if(!pico_tree_empty(&listen->MCASTSources_ipv6)) { - pico_tree_foreach(index, &listen->MCASTSources_ipv6) - { - if (memcmp(&src->ip6, &((union pico_address *)index->keyValue)->ip6, sizeof(struct pico_ip6))) { -#ifdef PICO_DEBUG_MCAST - pico_ipv6_to_string(tmp_string, src->ip6.addr); - so_mcast_dbg("MCAST: IP %s in included socket source list\n", tmp_string); -#endif - return 0; - } - } - } - -#endif - /* XXX IPV6 ADDRESS */ - so_mcast_dbg("MCAST: IP %08X NOT in included socket source list\n", src->ip4.addr); - return -1; - -} - -static int pico_socket_mcast_filter_exclude(struct pico_mcast_listen *listen, union pico_address *src) -{ - struct pico_tree_node *index = NULL; -#ifdef PICO_DEBUG_MCAST - char tmp_string[PICO_IPV6_STRING]; -#endif - if(!pico_tree_empty(&listen->MCASTSources)) { - pico_tree_foreach(index, &listen->MCASTSources) - { - if (src->ip4.addr == ((union pico_address *)index->keyValue)->ip4.addr) { - so_mcast_dbg("MCAST: IP %08X in excluded socket source list\n", src->ip4.addr); - return -1; - } - } - } - -#ifdef PICO_SUPPORT_IPV6 - if(!pico_tree_empty(&listen->MCASTSources_ipv6)) { - pico_tree_foreach(index, &listen->MCASTSources_ipv6) - { - if (memcmp(&src->ip6, &((union pico_address *)index->keyValue)->ip6, sizeof(struct pico_ip6))) { -#ifdef PICO_DEBUG_MCAST - pico_ipv6_to_string(tmp_string, src->ip6.addr); - so_mcast_dbg("MCAST: IP %s in excluded socket source list\n", tmp_string); -#endif - return 0; - } - } - } - -#endif - /* XXX IPV6 ADDRESS */ - so_mcast_dbg("MCAST: IP %08X NOT in excluded socket source list\n", src->ip4.addr); - return 0; -} - -static int pico_socket_mcast_source_filtering(struct pico_mcast_listen *listen, union pico_address *src) -{ - /* perform source filtering */ - if (listen->filter_mode == PICO_IP_MULTICAST_INCLUDE) - return pico_socket_mcast_filter_include(listen, src); - - if (listen->filter_mode == PICO_IP_MULTICAST_EXCLUDE) - return pico_socket_mcast_filter_exclude(listen, src); - - return -1; -} - -static void *pico_socket_mcast_filter_link_get(struct pico_socket *s) -{ - /* check if no multicast enabled on socket */ - if (!s->MCASTListen) - return NULL; - - if( IS_SOCK_IPV4(s)) { - if (!s->local_addr.ip4.addr) - return pico_ipv4_get_default_mcastlink(); - - return pico_ipv4_link_get(&s->local_addr.ip4); - } - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) { - if (pico_ipv6_is_null_address(&s->local_addr.ip6)) - return pico_ipv6_get_default_mcastlink(); - - return pico_ipv6_link_get(&s->local_addr.ip6); - } -#endif - return NULL; -} - -int pico_socket_mcast_filter(struct pico_socket *s, union pico_address *mcast_group, union pico_address *src) -{ - void *mcast_link = NULL; - struct pico_mcast_listen *listen = NULL; - mcast_link = pico_socket_mcast_filter_link_get(s); - if (!mcast_link) - return -1; - - if(IS_SOCK_IPV4(s)) - listen = listen_find(s, (union pico_address *) &((struct pico_ipv4_link*)(mcast_link))->address, mcast_group); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) - listen = listen_find(s, (union pico_address *)&((struct pico_ipv6_link*)(mcast_link))->address, mcast_group); -#endif - if (!listen) - return -1; - - return pico_socket_mcast_source_filtering(listen, src); -} - - -static struct pico_ipv4_link *get_mcast_link(union pico_address *a) -{ - if (!a->ip4.addr) - return pico_ipv4_get_default_mcastlink(); - - return pico_ipv4_link_get(&a->ip4); -} -#ifdef PICO_SUPPORT_IPV6 -static struct pico_ipv6_link *get_mcast_link_ipv6(union pico_address *a) -{ - - if (pico_ipv6_is_null_address(&a->ip6)) { - return pico_ipv6_get_default_mcastlink(); - } - - return pico_ipv6_link_get(&a->ip6); -} -#endif - -static int pico_socket_setoption_pre_validation(struct pico_ip_mreq *mreq) -{ - if (!mreq) - return -1; - - if (!mreq->mcast_group_addr.ip4.addr) - return -1; - - return 0; -} -#ifdef PICO_SUPPORT_IPV6 -static int pico_socket_setoption_pre_validation_ipv6(struct pico_ip_mreq *mreq) -{ - if (!mreq) - return -1; - - if (pico_ipv6_is_null_address((struct pico_ip6*)&mreq->mcast_group_addr)) - return -1; - - return 0; -} -#endif - -static struct pico_ipv4_link *pico_socket_setoption_validate_mreq(struct pico_ip_mreq *mreq) -{ - if (pico_socket_setoption_pre_validation(mreq) < 0) - return NULL; - - if (pico_ipv4_is_unicast(mreq->mcast_group_addr.ip4.addr)) - return NULL; - - return get_mcast_link((union pico_address *)&mreq->mcast_link_addr); -} - -#ifdef PICO_SUPPORT_IPV6 -static struct pico_ipv6_link *pico_socket_setoption_validate_mreq_ipv6(struct pico_ip_mreq *mreq) -{ - if (pico_socket_setoption_pre_validation_ipv6(mreq) < 0) - return NULL; - - if (pico_ipv6_is_unicast((struct pico_ip6 *)&mreq->mcast_group_addr)) - return NULL; - - return get_mcast_link_ipv6((union pico_address *)&mreq->mcast_link_addr); -} -#endif - -static int pico_socket_setoption_pre_validation_s(struct pico_ip_mreq_source *mreq) -{ - if (!mreq) - return -1; - - if (!mreq->mcast_group_addr.ip4.addr) - return -1; - - return 0; -} -#ifdef PICO_SUPPORT_IPV6 -static int pico_socket_setoption_pre_validation_s_ipv6(struct pico_ip_mreq_source *mreq) -{ - if (!mreq) - return -1; - - if (pico_ipv6_is_null_address((struct pico_ip6 *)&mreq->mcast_group_addr)) - return -1; - - return 0; -} -#endif - -static struct pico_ipv4_link *pico_socket_setoption_validate_s_mreq(struct pico_ip_mreq_source *mreq) -{ - if (pico_socket_setoption_pre_validation_s(mreq) < 0) - return NULL; - - if (pico_ipv4_is_unicast(mreq->mcast_group_addr.ip4.addr)) - return NULL; - - if (!pico_ipv4_is_unicast(mreq->mcast_source_addr.ip4.addr)) - return NULL; - - return get_mcast_link((union pico_address *)&mreq->mcast_link_addr); -} -#ifdef PICO_SUPPORT_IPV6 -static struct pico_ipv6_link *pico_socket_setoption_validate_s_mreq_ipv6(struct pico_ip_mreq_source *mreq) -{ - if (pico_socket_setoption_pre_validation_s_ipv6(mreq) < 0) { - return NULL; - } - - if (pico_ipv6_is_unicast((struct pico_ip6 *)&mreq->mcast_group_addr)) { - return NULL; - } - - if (!pico_ipv6_is_unicast((struct pico_ip6 *)&mreq->mcast_source_addr)) { - return NULL; - } - - return get_mcast_link_ipv6(&mreq->mcast_link_addr); -} -#endif - -static struct pico_ipv4_link *setop_multicast_link_search(void *value, int bysource) -{ - - struct pico_ip_mreq *mreq = NULL; - struct pico_ipv4_link *mcast_link = NULL; - struct pico_ip_mreq_source *mreq_src = NULL; - if (!bysource) { - mreq = (struct pico_ip_mreq *)value; - mcast_link = pico_socket_setoption_validate_mreq(mreq); - if (!mcast_link) - return NULL; - - if (!mreq->mcast_link_addr.ip4.addr) - mreq->mcast_link_addr.ip4.addr = mcast_link->address.addr; - } else { - mreq_src = (struct pico_ip_mreq_source *)value; - if (!mreq_src) { - return NULL; - } - - mcast_link = pico_socket_setoption_validate_s_mreq(mreq_src); - if (!mcast_link) { - return NULL; - } - - if (!mreq_src->mcast_link_addr.ip4.addr) - mreq_src->mcast_link_addr.ip4 = mcast_link->address; - } - - return mcast_link; -} -#ifdef PICO_SUPPORT_IPV6 -static struct pico_ipv6_link *setop_multicast_link_search_ipv6(void *value, int bysource) -{ - struct pico_ip_mreq *mreq = NULL; - struct pico_ipv6_link *mcast_link = NULL; - struct pico_ip_mreq_source *mreq_src = NULL; - if (!bysource) { - mreq = (struct pico_ip_mreq *)value; - mcast_link = pico_socket_setoption_validate_mreq_ipv6(mreq); - if (!mcast_link) { - return NULL; - } - - if (pico_ipv6_is_null_address(&mreq->mcast_link_addr.ip6)) - mreq->mcast_link_addr.ip6 = mcast_link->address; - } else { - mreq_src = (struct pico_ip_mreq_source *)value; - if (!mreq_src) { - return NULL; - } - - mcast_link = pico_socket_setoption_validate_s_mreq_ipv6(mreq_src); - if (!mcast_link) { - return NULL; - } - - if (pico_ipv6_is_null_address(&mreq_src->mcast_link_addr.ip6)) - mreq_src->mcast_link_addr.ip6 = mcast_link->address; - } - - return mcast_link; -} -#endif -static int setop_verify_listen_tree(struct pico_socket *s, int alloc) -{ - if(!alloc) - return -1; - - if( IS_SOCK_IPV4(s)) { - - s->MCASTListen = PICO_ZALLOC(sizeof(struct pico_tree)); - if (!s->MCASTListen) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - s->MCASTListen->root = &LEAF; - s->MCASTListen->compare = mcast_listen_cmp; - return 0; - } - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) { - s->MCASTListen_ipv6 = PICO_ZALLOC(sizeof(struct pico_tree)); - if (!s->MCASTListen_ipv6) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - s->MCASTListen_ipv6->root = &LEAF; - s->MCASTListen_ipv6->compare = mcast_listen_cmp_ipv6; - return 0; - - } -#endif - return -1; -} - - -static void *setopt_multicast_check(struct pico_socket *s, void *value, int alloc, int bysource) -{ - void *mcast_link = NULL; - struct pico_tree *listen_tree = mcast_get_listen_tree(s); - if (!value) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - if(IS_SOCK_IPV4(s)) - mcast_link = setop_multicast_link_search(value, bysource); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) - mcast_link = setop_multicast_link_search_ipv6(value, bysource); -#endif - if (!mcast_link) { - pico_err = PICO_ERR_EINVAL; - return NULL; - } - - if (!listen_tree) { /* No RBTree allocated yet */ - if (setop_verify_listen_tree(s, alloc) < 0) { - return NULL; - } - } - - return mcast_link; -} - -void pico_multicast_delete(struct pico_socket *s) -{ - int filter_mode; - struct pico_tree_node *index = NULL, *_tmp = NULL, *index2 = NULL, *_tmp2 = NULL; - struct pico_mcast_listen *listen = NULL; - union pico_address *source = NULL; - struct pico_tree *tree, *listen_tree; - struct pico_mcast mcast; - listen_tree = mcast_get_listen_tree(s); - if(listen_tree) { - pico_tree_delete(&MCASTSockets, s); - pico_tree_foreach_safe(index, listen_tree, _tmp) - { - listen = index->keyValue; - mcast.listen = listen; - tree = mcast_get_src_tree(s, &mcast); - if (tree) { - pico_tree_foreach_safe(index2, tree, _tmp2) - { - source = index2->keyValue; - pico_tree_delete(tree, source); - PICO_FREE(source); - } - } - - filter_mode = pico_socket_aggregate_mcastfilters((union pico_address *)&listen->mcast_link, (union pico_address *)&listen->mcast_group); - if (filter_mode >= 0) { - if(IS_SOCK_IPV4(s)) - pico_ipv4_mcast_leave(&listen->mcast_link.ip4, &listen->mcast_group.ip4, 1, (uint8_t)filter_mode, &MCASTFilter); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) - pico_ipv6_mcast_leave(&listen->mcast_link.ip6, &listen->mcast_group.ip6, 1, (uint8_t)filter_mode, &MCASTFilter_ipv6); -#endif - } - - pico_tree_delete(listen_tree, listen); - PICO_FREE(listen); - } - PICO_FREE(listen_tree); - mcast_set_listen_tree_p_null(s); - } -} - - -int pico_getsockopt_mcast(struct pico_socket *s, int option, void *value) -{ - switch(option) { - case PICO_IP_MULTICAST_IF: - pico_err = PICO_ERR_EOPNOTSUPP; - return -1; - - case PICO_IP_MULTICAST_TTL: - if (s->proto->proto_number == PICO_PROTO_UDP) { - pico_udp_get_mc_ttl(s, (uint8_t *) value); - } else { - *(uint8_t *)value = 0; - pico_err = PICO_ERR_EINVAL; - return -1; - } - - break; - - case PICO_IP_MULTICAST_LOOP: - if (s->proto->proto_number == PICO_PROTO_UDP) { - *(uint8_t *)value = (uint8_t)PICO_SOCKET_GETOPT(s, PICO_SOCKET_OPT_MULTICAST_LOOP); - } else { - *(uint8_t *)value = 0; - pico_err = PICO_ERR_EINVAL; - return -1; - } - - break; - default: - pico_err = PICO_ERR_EINVAL; - return -1; - } - - return 0; -} - -static int mcast_so_loop(struct pico_socket *s, void *value) -{ - uint8_t val = (*(uint8_t *)value); - if (val == 0u) { - PICO_SOCKET_SETOPT_DIS(s, PICO_SOCKET_OPT_MULTICAST_LOOP); - return 0; - } else if (val == 1u) { - PICO_SOCKET_SETOPT_EN(s, PICO_SOCKET_OPT_MULTICAST_LOOP); - return 0; - } - - pico_err = PICO_ERR_EINVAL; - return -1; -} -static int mcast_get_param(struct pico_mcast *mcast, struct pico_socket *s, void *value, int alloc, int by_source) -{ - if(by_source) - mcast->mreq_s = (struct pico_ip_mreq_source *)value; - else - mcast->mreq = (struct pico_ip_mreq *)value; - - mcast->mcast_link = setopt_multicast_check(s, value, alloc, by_source); - if (!mcast->mcast_link) - return -1; - - mcast->address = pico_mcast_get_link_address(s, mcast->mcast_link); - if(by_source) - mcast->listen = listen_find(s, &(mcast->mreq_s)->mcast_link_addr, &mcast->mreq_s->mcast_group_addr); - else - mcast->listen = listen_find(s, &(mcast->mreq)->mcast_link_addr, &mcast->mreq->mcast_group_addr); - - return 0; -} -static int mcast_so_addm(struct pico_socket *s, void *value) -{ - int filter_mode = 0; - struct pico_mcast mcast; - struct pico_tree *tree, *listen_tree; - if(mcast_get_param(&mcast, s, value, 1, 0) < 0) - return -1; - - if (mcast.listen) { - if (mcast.listen->filter_mode != PICO_IP_MULTICAST_EXCLUDE) { - so_mcast_dbg("pico_socket_setoption: ERROR any-source multicast (exclude) on source-specific multicast (include)\n"); - } else { - so_mcast_dbg("pico_socket_setoption: ERROR duplicate PICO_IP_ADD_MEMBERSHIP\n"); - } - - pico_err = PICO_ERR_EINVAL; - return -1; - } - - mcast.listen = PICO_ZALLOC(sizeof(struct pico_mcast_listen)); - if (!mcast.listen) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - mcast.listen->filter_mode = PICO_IP_MULTICAST_EXCLUDE; - mcast.listen->mcast_link = mcast.mreq->mcast_link_addr; - mcast.listen->mcast_group = mcast.mreq->mcast_group_addr; - mcast.listen->proto = s->net->proto_number; - - tree = mcast_get_src_tree(s, &mcast); - listen_tree = mcast_get_listen_tree(s); -#ifdef PICO_SUPPORT_IPV6 - if( IS_SOCK_IPV6(s)) - mcast.listen->proto = PICO_PROTO_IPV6; - -#endif - tree->root = &LEAF; - if (pico_tree_insert(listen_tree, mcast.listen)) { - PICO_FREE(mcast.listen); - return -1; - } - - if (pico_tree_insert(&MCASTSockets, s) == &LEAF) { - pico_tree_delete(listen_tree, mcast.listen); - PICO_FREE(mcast.listen); - return -1; - } - - filter_mode = pico_socket_aggregate_mcastfilters(mcast.address, &mcast.mreq->mcast_group_addr); - if (filter_mode < 0) - return -1; - - so_mcast_dbg("PICO_IP_ADD_MEMBERSHIP - success, added %p\n", s); - if(IS_SOCK_IPV4(s)) - return pico_ipv4_mcast_join((struct pico_ip4*)&mcast.mreq->mcast_link_addr, (struct pico_ip4*) &mcast.mreq->mcast_group_addr, 1, (uint8_t)filter_mode, &MCASTFilter); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) { - return pico_ipv6_mcast_join((struct pico_ip6*)&mcast.mreq->mcast_link_addr, (struct pico_ip6*)&mcast.mreq->mcast_group_addr, 1, (uint8_t)filter_mode, &MCASTFilter_ipv6); - } -#endif - return -1; -} - -static int mcast_so_dropm(struct pico_socket *s, void *value) -{ - int filter_mode = 0; - union pico_address *source = NULL; - struct pico_tree_node *_tmp, *index; - struct pico_mcast mcast; - struct pico_tree *listen_tree, *tree; - if(mcast_get_param(&mcast, s, value, 0, 0) < 0) - return -1; - - if (!mcast.listen) { - so_mcast_dbg("pico_socket_setoption: ERROR PICO_IP_DROP_MEMBERSHIP before PICO_IP_ADD_MEMBERSHIP/SOURCE_MEMBERSHIP\n"); - pico_err = PICO_ERR_EADDRNOTAVAIL; - return -1; - } - - tree = mcast_get_src_tree(s, &mcast); - listen_tree = mcast_get_listen_tree(s); - - pico_tree_foreach_safe(index, tree, _tmp) - { - source = index->keyValue; - pico_tree_delete(tree, source); - } - pico_tree_delete(listen_tree, mcast.listen); - PICO_FREE(mcast.listen); - if (pico_tree_empty(listen_tree)) { - PICO_FREE(listen_tree); - mcast_set_listen_tree_p_null(s); - pico_tree_delete(&MCASTSockets, s); - } - - filter_mode = pico_socket_aggregate_mcastfilters(mcast.address, &mcast.mreq->mcast_group_addr); - if (filter_mode < 0) - return -1; - - if(IS_SOCK_IPV4(s)) - return pico_ipv4_mcast_leave((struct pico_ip4*) &mcast.mreq->mcast_link_addr, (struct pico_ip4 *) &mcast.mreq->mcast_group_addr, 1, (uint8_t)filter_mode, &MCASTFilter); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) { } - return pico_ipv6_mcast_leave((struct pico_ip6*)&mcast.mreq->mcast_link_addr, (struct pico_ip6*)&mcast.mreq->mcast_group_addr, 1, (uint8_t)filter_mode, &MCASTFilter_ipv6); -#endif - return -1; -} - -static int mcast_so_unblock_src(struct pico_socket *s, void *value) -{ - int filter_mode = 0; - union pico_address stest, *source = NULL; - struct pico_mcast mcast; - if(mcast_get_param(&mcast, s, value, 0, 1) < 0) - return -1; - - memset(&stest, 0, sizeof(union pico_address)); - if (!mcast.listen) { - so_mcast_dbg("pico_socket_setoption: ERROR PICO_IP_UNBLOCK_SOURCE before PICO_IP_ADD_MEMBERSHIP\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (mcast.listen->filter_mode != PICO_IP_MULTICAST_EXCLUDE) { - so_mcast_dbg("pico_socket_setoption: ERROR any-source multicast (exclude) on source-specific multicast (include)\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - stest = mcast.mreq_s->mcast_source_addr; - if( IS_SOCK_IPV4(s)) - source = pico_tree_findKey(&mcast.listen->MCASTSources, &stest); - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) - source = pico_tree_findKey(&mcast.listen->MCASTSources_ipv6, &stest); -#endif - if (!source) { - so_mcast_dbg("pico_socket_setoption: ERROR address to unblock not in source list\n"); - pico_err = PICO_ERR_EADDRNOTAVAIL; - return -1; - } - - if( IS_SOCK_IPV4(s)) - pico_tree_delete(&mcast.listen->MCASTSources, source); - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) - pico_tree_delete(&mcast.listen->MCASTSources_ipv6, source); -#endif - - filter_mode = pico_socket_aggregate_mcastfilters(mcast.address, &mcast.mreq_s->mcast_group_addr); - if (filter_mode < 0) - return -1; - - if(IS_SOCK_IPV4(s)) - return pico_ipv4_mcast_leave((struct pico_ip4 *)&mcast.mreq_s->mcast_link_addr, (struct pico_ip4*) &mcast.mreq_s->mcast_group_addr, 0, (uint8_t)filter_mode, &MCASTFilter); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) { } - return pico_ipv6_mcast_leave((struct pico_ip6*)&mcast.mreq_s->mcast_link_addr, (struct pico_ip6*)&mcast.mreq_s->mcast_group_addr, 0, (uint8_t)filter_mode, &MCASTFilter_ipv6); -#endif - return -1; -} - -static int mcast_so_block_src(struct pico_socket *s, void *value) -{ - int filter_mode = 0; - union pico_address stest, *source = NULL; - struct pico_mcast mcast; - if(mcast_get_param(&mcast, s, value, 0, 1) < 0) - return -1; - - memset(&stest, 0, sizeof(union pico_address)); - if (!mcast.listen) { - dbg("pico_socket_setoption: ERROR PICO_IP_BLOCK_SOURCE before PICO_IP_ADD_MEMBERSHIP\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - if (mcast.listen->filter_mode != PICO_IP_MULTICAST_EXCLUDE) { - so_mcast_dbg("pico_socket_setoption: ERROR any-source multicast (exclude) on source-specific multicast (include)\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - stest = mcast.mreq_s->mcast_source_addr; - if( IS_SOCK_IPV4(s)) - source = pico_tree_findKey(&mcast.listen->MCASTSources, &stest); - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) - source = pico_tree_findKey(&mcast.listen->MCASTSources_ipv6, &stest); -#endif - if (source) { - so_mcast_dbg("pico_socket_setoption: ERROR address to block already in source list\n"); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - source = PICO_ZALLOC(sizeof(union pico_address)); - if (!source) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - *source = mcast.mreq_s->mcast_source_addr; - if( IS_SOCK_IPV4(s)) { - if (pico_tree_insert(&mcast.listen->MCASTSources, source)) { - PICO_FREE(source); - return -1; - } - } - -#ifdef PICO_SUPPORT_IPV6 - else if( IS_SOCK_IPV6(s)) - if (pico_tree_insert(&mcast.listen->MCASTSources_ipv6, source)) { - PICO_FREE(source); - return -1; - } -#endif - - filter_mode = pico_socket_aggregate_mcastfilters(mcast.address, &mcast.mreq_s->mcast_group_addr); - if (filter_mode < 0) - return -1; - - if(IS_SOCK_IPV4(s)) - return pico_ipv4_mcast_join((struct pico_ip4 *) &mcast.mreq_s->mcast_link_addr, (struct pico_ip4*)&mcast.mreq_s->mcast_group_addr, 0, (uint8_t)filter_mode, &MCASTFilter); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) { } - return pico_ipv6_mcast_join((struct pico_ip6 *)&mcast.mreq_s->mcast_link_addr, (struct pico_ip6*)&mcast.mreq_s->mcast_group_addr, 0, (uint8_t)filter_mode, &MCASTFilter_ipv6); -#endif - return -1; -} - -static int mcast_so_addsrcm(struct pico_socket *s, void *value) -{ - int filter_mode = 0, reference_count = 0; - union pico_address stest, *source = NULL; - struct pico_mcast mcast; - struct pico_tree *tree, *listen_tree; - if(mcast_get_param(&mcast, s, value, 1, 1) < 0) - return -1; - - memset(&stest, 0, sizeof(union pico_address)); - listen_tree = mcast_get_listen_tree(s); - if (mcast.listen) { - tree = mcast_get_src_tree(s, &mcast); - if (mcast.listen->filter_mode != PICO_IP_MULTICAST_INCLUDE) { - so_mcast_dbg("pico_socket_setoption: ERROR source-specific multicast (include) on any-source multicast (exclude)\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - stest = mcast.mreq_s->mcast_source_addr; - source = pico_tree_findKey(tree, &stest); - if (source) { - so_mcast_dbg("pico_socket_setoption: ERROR source address to allow already in source list\n"); - pico_err = PICO_ERR_EADDRNOTAVAIL; - return -1; - } - - source = PICO_ZALLOC(sizeof(union pico_address)); - if (!source) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - *source = mcast.mreq_s->mcast_source_addr; - if (pico_tree_insert(tree, source)) { - PICO_FREE(source); - return -1; - } - - } else { - mcast.listen = PICO_ZALLOC(sizeof(struct pico_mcast_listen)); - if (!mcast.listen) { - pico_err = PICO_ERR_ENOMEM; - return -1; - } - - tree = mcast_get_src_tree(s, &mcast); - mcast.listen->filter_mode = PICO_IP_MULTICAST_INCLUDE; - mcast.listen->mcast_link = mcast.mreq_s->mcast_link_addr; - mcast.listen->mcast_group = mcast.mreq_s->mcast_group_addr; - tree->root = &LEAF; - source = PICO_ZALLOC(sizeof(union pico_address)); - if (!source) { - PICO_FREE(mcast.listen); - pico_err = PICO_ERR_ENOMEM; - return -1; - } - -#ifdef PICO_SUPPORT_IPV6 - if( IS_SOCK_IPV6(s)) - mcast.listen->proto = PICO_PROTO_IPV6; - -#endif - *source = mcast.mreq_s->mcast_source_addr; - if (pico_tree_insert(tree, source)) { - PICO_FREE(mcast.listen); - PICO_FREE(source); - return -1; - } - - if (pico_tree_insert(listen_tree, mcast.listen)) { - pico_tree_delete(tree, source); - PICO_FREE(source); - PICO_FREE(mcast.listen); - return -1; - } - reference_count = 1; - } - - if (pico_tree_insert(&MCASTSockets, s) == &LEAF) { - return -1; - } - - filter_mode = pico_socket_aggregate_mcastfilters(mcast.address, &mcast.mreq_s->mcast_group_addr); - if (filter_mode < 0) - return -1; - - if(IS_SOCK_IPV4(s)) - return pico_ipv4_mcast_join((struct pico_ip4 *)&mcast.mreq_s->mcast_link_addr, (struct pico_ip4*)&mcast.mreq_s->mcast_group_addr, (uint8_t)reference_count, (uint8_t)filter_mode, &MCASTFilter); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) { } - return pico_ipv6_mcast_join((struct pico_ip6 *) &mcast.mreq_s->mcast_link_addr, (struct pico_ip6*)&mcast.mreq_s->mcast_group_addr, (uint8_t)reference_count, (uint8_t)filter_mode, &MCASTFilter_ipv6); -#endif - return -1; -} - -static int mcast_so_dropsrcm(struct pico_socket *s, void *value) -{ - int filter_mode = 0, reference_count = 0; - union pico_address stest, *source = NULL; - struct pico_mcast mcast; - struct pico_tree *tree, *listen_tree; - if(mcast_get_param(&mcast, s, value, 0, 1) < 0) - return -1; - - memset(&stest, 0, sizeof(union pico_address)); - listen_tree = mcast_get_listen_tree(s); - if (!mcast.listen) { - so_mcast_dbg("pico_socket_setoption: ERROR PICO_IP_DROP_SOURCE_MEMBERSHIP before PICO_IP_ADD_SOURCE_MEMBERSHIP\n"); - pico_err = PICO_ERR_EADDRNOTAVAIL; - return -1; - } - - if (mcast.listen->filter_mode != PICO_IP_MULTICAST_INCLUDE) { - so_mcast_dbg("pico_socket_setoption: ERROR source-specific multicast (include) on any-source multicast (exclude)\n"); - pico_err = PICO_ERR_EINVAL; - return -1; - } - - tree = mcast_get_src_tree(s, &mcast); - stest = mcast.mreq_s->mcast_source_addr; - source = pico_tree_findKey(tree, &stest); - if (!source) { - so_mcast_dbg("pico_socket_setoption: ERROR address to drop not in source list\n"); - pico_err = PICO_ERR_EADDRNOTAVAIL; - return -1; - } - - pico_tree_delete(tree, source); - if (pico_tree_empty(tree)) { /* 1 if empty, 0 otherwise */ - reference_count = 1; - pico_tree_delete(listen_tree, mcast.listen); - PICO_FREE(mcast.listen); - if (pico_tree_empty(listen_tree)) { - PICO_FREE(listen_tree); - mcast_set_listen_tree_p_null(s); - pico_tree_delete(&MCASTSockets, s); - } - } - - filter_mode = pico_socket_aggregate_mcastfilters(mcast.address, &mcast.mreq_s->mcast_group_addr); - if (filter_mode < 0) - return -1; - - if(IS_SOCK_IPV4(s)) - return pico_ipv4_mcast_leave((struct pico_ip4 *) &mcast.mreq_s->mcast_link_addr, (struct pico_ip4*)&mcast.mreq_s->mcast_group_addr, (uint8_t)reference_count, (uint8_t)filter_mode, &MCASTFilter); - -#ifdef PICO_SUPPORT_IPV6 - else if(IS_SOCK_IPV6(s)) { } - return pico_ipv6_mcast_leave((struct pico_ip6 *)&mcast.mreq_s->mcast_link_addr, (struct pico_ip6*)&mcast.mreq_s->mcast_group_addr, (uint8_t)reference_count, (uint8_t)filter_mode, &MCASTFilter_ipv6); -#endif - return -1; -} - - -struct pico_setsockopt_mcast_call -{ - int option; - int (*call)(struct pico_socket *, void *); -}; - -static const struct pico_setsockopt_mcast_call mcast_so_calls[1 + PICO_IP_DROP_SOURCE_MEMBERSHIP - PICO_IP_MULTICAST_IF] = -{ - { PICO_IP_MULTICAST_IF, NULL }, - { PICO_IP_MULTICAST_TTL, pico_udp_set_mc_ttl }, - { PICO_IP_MULTICAST_LOOP, mcast_so_loop }, - { PICO_IP_ADD_MEMBERSHIP, mcast_so_addm }, - { PICO_IP_DROP_MEMBERSHIP, mcast_so_dropm }, - { PICO_IP_UNBLOCK_SOURCE, mcast_so_unblock_src }, - { PICO_IP_BLOCK_SOURCE, mcast_so_block_src }, - { PICO_IP_ADD_SOURCE_MEMBERSHIP, mcast_so_addsrcm }, - { PICO_IP_DROP_SOURCE_MEMBERSHIP, mcast_so_dropsrcm } -}; - - -static int mcast_so_check_socket(struct pico_socket *s) -{ - pico_err = PICO_ERR_EINVAL; - if (!s) - return -1; - - if (!s->proto) - return -1; - - if (s->proto->proto_number != PICO_PROTO_UDP) - return -1; - - pico_err = PICO_ERR_NOERR; - return 0; -} - -int pico_setsockopt_mcast(struct pico_socket *s, int option, void *value) -{ - int arrayn = option - PICO_IP_MULTICAST_IF; - if (option < PICO_IP_MULTICAST_IF || option > PICO_IP_DROP_SOURCE_MEMBERSHIP) { - pico_err = PICO_ERR_EOPNOTSUPP; - return -1; - } - - if (mcast_so_check_socket(s) < 0) - return -1; - - if (!mcast_so_calls[arrayn].call) { - pico_err = PICO_ERR_EOPNOTSUPP; - return -1; - } - - return (mcast_so_calls[arrayn].call(s, value)); -} - -int pico_udp_set_mc_ttl(struct pico_socket *s, void *_ttl) -{ - struct pico_socket_udp *u; - uint8_t ttl = *(uint8_t *)_ttl; - if(!s) { - pico_err = PICO_ERR_EINVAL; - return -1; - } - - u = (struct pico_socket_udp *) s; - u->mc_ttl = ttl; - return 0; -} - -int pico_udp_get_mc_ttl(struct pico_socket *s, uint8_t *ttl) -{ - struct pico_socket_udp *u; - if(!s) - return -1; - - u = (struct pico_socket_udp *) s; - *ttl = u->mc_ttl; - return 0; -} -#else -int pico_udp_set_mc_ttl(struct pico_socket *s, void *_ttl) -{ - IGNORE_PARAMETER(s); - IGNORE_PARAMETER(_ttl); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -int pico_udp_get_mc_ttl(struct pico_socket *s, uint8_t *ttl) -{ - IGNORE_PARAMETER(s); - IGNORE_PARAMETER(ttl); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -int pico_socket_mcast_filter(struct pico_socket *s, union pico_address *mcast_group, union pico_address *src) -{ - IGNORE_PARAMETER(s); - IGNORE_PARAMETER(mcast_group); - IGNORE_PARAMETER(src); - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -void pico_multicast_delete(struct pico_socket *s) -{ - (void)s; -} - -int pico_getsockopt_mcast(struct pico_socket *s, int option, void *value) -{ - (void)s; - (void)option; - (void)value; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; -} - -int pico_setsockopt_mcast(struct pico_socket *s, int option, void *value) -{ - (void)s; - (void)option; - (void)value; - pico_err = PICO_ERR_EPROTONOSUPPORT; - return -1; - -} -#endif /* PICO_SUPPORT_MCAST */ - diff --git a/kernel/picotcp/stack/pico_stack.c b/kernel/picotcp/stack/pico_stack.c deleted file mode 100644 index 007cd40..0000000 --- a/kernel/picotcp/stack/pico_stack.c +++ /dev/null @@ -1,962 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - . - - Authors: Daniele Lacamera - *********************************************************************/ - - -#include "pico_config.h" -#include "pico_frame.h" -#include "pico_device.h" -#include "pico_protocol.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_dns_client.h" - -#include "pico_6lowpan_ll.h" -#include "pico_ethernet.h" -#include "pico_6lowpan.h" -#include "pico_olsr.h" -#include "pico_aodv.h" -#include "pico_eth.h" -#include "pico_arp.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_icmp4.h" -#include "pico_icmp6.h" -#include "pico_igmp.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_socket.h" -#include "heap.h" - -/* Mockables */ -#if defined UNIT_TEST -# define MOCKABLE __attribute__((weak)) -#else -# define MOCKABLE -#endif - - -volatile pico_time pico_tick; -volatile pico_err_t pico_err; - -static uint32_t _rand_seed; - -void WEAK pico_rand_feed(uint32_t feed) -{ - if (!feed) - return; - - _rand_seed *= 1664525; - _rand_seed += 1013904223; - _rand_seed ^= ~(feed); -} - -uint32_t WEAK pico_rand(void) -{ - pico_rand_feed((uint32_t)pico_tick); - return _rand_seed; -} - -void pico_to_lowercase(char *str) -{ - int i = 0; - if (!str) - return; - - while(str[i]) { - if ((str[i] <= 'Z') && (str[i] >= 'A')) - str[i] = (char) (str[i] - (char)('A' - 'a')); - - i++; - } -} - -/* NOTIFICATIONS: distributed notifications for stack internal errors. - */ - -int pico_notify_socket_unreachable(struct pico_frame *f) -{ - if (0) {} - -#ifdef PICO_SUPPORT_ICMP4 - else if (IS_IPV4(f)) { - pico_icmp4_port_unreachable(f); - } -#endif -#ifdef PICO_SUPPORT_ICMP6 - else if (IS_IPV6(f)) { - pico_icmp6_port_unreachable(f); - } -#endif - - return 0; -} - -int pico_notify_proto_unreachable(struct pico_frame *f) -{ - if (0) {} - -#ifdef PICO_SUPPORT_ICMP4 - else if (IS_IPV4(f)) { - pico_icmp4_proto_unreachable(f); - } -#endif -#ifdef PICO_SUPPORT_ICMP6 - else if (IS_IPV6(f)) { - pico_icmp6_proto_unreachable(f); - } -#endif - return 0; -} - -int pico_notify_dest_unreachable(struct pico_frame *f) -{ - if (0) {} - -#ifdef PICO_SUPPORT_ICMP4 - else if (IS_IPV4(f)) { - pico_icmp4_dest_unreachable(f); - } -#endif -#ifdef PICO_SUPPORT_ICMP6 - else if (IS_IPV6(f)) { - pico_icmp6_dest_unreachable(f); - } -#endif - return 0; -} - -int pico_notify_ttl_expired(struct pico_frame *f) -{ - if (0) {} - -#ifdef PICO_SUPPORT_ICMP4 - else if (IS_IPV4(f)) { - pico_icmp4_ttl_expired(f); - } -#endif -#ifdef PICO_SUPPORT_ICMP6 - else if (IS_IPV6(f)) { - pico_icmp6_ttl_expired(f); - } -#endif - return 0; -} - -int pico_notify_frag_expired(struct pico_frame *f) -{ - if (0) {} - -#ifdef PICO_SUPPORT_ICMP4 - else if (IS_IPV4(f)) { - pico_icmp4_frag_expired(f); - } -#endif -#ifdef PICO_SUPPORT_ICMP6 - else if (IS_IPV6(f)) { - pico_icmp6_frag_expired(f); - } -#endif - return 0; -} - -int pico_notify_pkt_too_big(struct pico_frame *f) -{ - if (0) {} - -#ifdef PICO_SUPPORT_ICMP4 - else if (IS_IPV4(f)) { - pico_icmp4_mtu_exceeded(f); - } -#endif -#ifdef PICO_SUPPORT_ICMP6 - else if (IS_IPV6(f)) { - pico_icmp6_pkt_too_big(f); - } -#endif - return 0; -} - -/******************************************************************************* - * TRANSPORT LAYER - ******************************************************************************/ - -MOCKABLE int32_t pico_transport_receive(struct pico_frame *f, uint8_t proto) -{ - int32_t ret = -1; - switch (proto) { - -#ifdef PICO_SUPPORT_ICMP4 - case PICO_PROTO_ICMP4: - ret = pico_enqueue(pico_proto_icmp4.q_in, f); - break; -#endif - -#ifdef PICO_SUPPORT_ICMP6 - case PICO_PROTO_ICMP6: - ret = pico_enqueue(pico_proto_icmp6.q_in, f); - break; -#endif - - -#if defined(PICO_SUPPORT_IGMP) && defined(PICO_SUPPORT_MCAST) - case PICO_PROTO_IGMP: - ret = pico_enqueue(pico_proto_igmp.q_in, f); - break; -#endif - -#ifdef PICO_SUPPORT_UDP - case PICO_PROTO_UDP: - ret = pico_enqueue(pico_proto_udp.q_in, f); - break; -#endif - -#ifdef PICO_SUPPORT_TCP - case PICO_PROTO_TCP: - ret = pico_enqueue(pico_proto_tcp.q_in, f); - break; -#endif - - default: - /* Protocol not available */ - dbg("pkt: no such protocol (%d)\n", proto); - pico_notify_proto_unreachable(f); - pico_frame_discard(f); - ret = -1; - } - return ret; -} - -/******************************************************************************* - * NETWORK LAYER - ******************************************************************************/ - -MOCKABLE int32_t pico_network_receive(struct pico_frame *f) -{ - if (0) {} - -#ifdef PICO_SUPPORT_IPV4 - else if (IS_IPV4(f)) { - pico_enqueue(pico_proto_ipv4.q_in, f); - } -#endif -#ifdef PICO_SUPPORT_IPV6 - else if (IS_IPV6(f)) { - pico_enqueue(pico_proto_ipv6.q_in, f); - } -#endif - else { - dbg("Network not found.\n"); - pico_frame_discard(f); - return -1; - } - return (int32_t)f->buffer_len; -} - -/// Interface towards socket for frame sending -int32_t pico_network_send(struct pico_frame *f) -{ - if (!f || !f->sock || !f->sock->net) { - pico_frame_discard(f); - return -1; - } - - return f->sock->net->push(f->sock->net, f); -} - -int pico_source_is_local(struct pico_frame *f) -{ - if (0) { } - -#ifdef PICO_SUPPORT_IPV4 - else if (IS_IPV4(f)) { - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *)f->net_hdr; - if (hdr->src.addr == PICO_IPV4_INADDR_ANY) - return 1; - - if (pico_ipv4_link_find(&hdr->src)) - return 1; - } -#endif -#ifdef PICO_SUPPORT_IPV6 - else if (IS_IPV6(f)) { - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_unspecified(hdr->src.addr) || pico_ipv6_link_find(&hdr->src)) - return 1; - } -#endif - return 0; -} - -void pico_store_network_origin(void *src, struct pico_frame *f) -{ - #ifdef PICO_SUPPORT_IPV4 - struct pico_ip4 *ip4; - #endif - - #ifdef PICO_SUPPORT_IPV6 - struct pico_ip6 *ip6; - #endif - - #ifdef PICO_SUPPORT_IPV4 - if (IS_IPV4(f)) { - struct pico_ipv4_hdr *hdr; - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - ip4 = (struct pico_ip4 *) src; - ip4->addr = hdr->src.addr; - } - - #endif - #ifdef PICO_SUPPORT_IPV6 - if (IS_IPV6(f)) { - struct pico_ipv6_hdr *hdr; - hdr = (struct pico_ipv6_hdr *) f->net_hdr; - ip6 = (struct pico_ip6 *) src; - memcpy(ip6->addr, hdr->src.addr, PICO_SIZE_IP6); - } - - #endif -} - -int pico_address_compare(union pico_address *a, union pico_address *b, uint16_t proto) -{ - #ifdef PICO_SUPPORT_IPV6 - if (proto == PICO_PROTO_IPV6) { - return pico_ipv6_compare(&a->ip6, &b->ip6); - } - - #endif - #ifdef PICO_SUPPORT_IPV4 - if (proto == PICO_PROTO_IPV4) { - return pico_ipv4_compare(&a->ip4, &b->ip4); - } - - #endif - return 0; - -} - -int pico_frame_dst_is_unicast(struct pico_frame *f) -{ - if (0) { - return 0; - } - -#ifdef PICO_SUPPORT_IPV4 - if (IS_IPV4(f)) { - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *)f->net_hdr; - if (pico_ipv4_is_multicast(hdr->dst.addr) || pico_ipv4_is_broadcast(hdr->dst.addr)) - return 0; - - return 1; - } - -#endif - -#ifdef PICO_SUPPORT_IPV6 - if (IS_IPV6(f)) { - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->net_hdr; - if (pico_ipv6_is_multicast(hdr->dst.addr) || pico_ipv6_is_unspecified(hdr->dst.addr)) - return 0; - - return 1; - } - -#endif - else return 0; -} - -/******************************************************************************* - * DATALINK LAYER - ******************************************************************************/ - -int pico_datalink_receive(struct pico_frame *f) -{ - if (f->dev->eth) { - /* If device has stack with datalink-layer pass frame through it */ - switch (f->dev->mode) { - #ifdef PICO_SUPPORT_802154 - case LL_MODE_IEEE802154: - f->datalink_hdr = f->buffer; - return pico_enqueue(pico_proto_6lowpan_ll.q_in, f); - #endif - default: - #ifdef PICO_SUPPORT_ETH - f->datalink_hdr = f->buffer; - return pico_enqueue(pico_proto_ethernet.q_in,f); - #else - return -1; - #endif - } - } else { - /* If device handles raw IP-frames send it straight to network-layer */ - f->net_hdr = f->buffer; - pico_network_receive(f); - } - - return 0; -} - -MOCKABLE int pico_datalink_send(struct pico_frame *f) -{ - if (f->dev->eth) { - switch (f->dev->mode) { - #ifdef PICO_SUPPORT_802154 - case LL_MODE_IEEE802154: - return pico_enqueue(pico_proto_6lowpan.q_out, f); - #endif - default: - #ifdef PICO_SUPPORT_ETH - return pico_enqueue(pico_proto_ethernet.q_out, f); - #else - return -1; - #endif - } - } else { - /* non-ethernet: no post-processing needed */ - return pico_sendto_dev(f); - } -} - -/******************************************************************************* - * PHYSICAL LAYER - ******************************************************************************/ - -struct pico_frame *pico_stack_recv_new_frame(struct pico_device *dev, uint8_t *buffer, uint32_t len) -{ - struct pico_frame *f; - if (len == 0) - return NULL; - - f = pico_frame_alloc(len); - if (!f) - { - dbg("Cannot alloc incoming frame!\n"); - return NULL; - } - - /* Association to the device that just received the frame. */ - f->dev = dev; - - /* Setup the start pointer, length. */ - f->start = f->buffer; - f->len = f->buffer_len; - if (f->len > 8) { - uint32_t rand, mid_frame = (f->buffer_len >> 2) << 1; - mid_frame -= (mid_frame % 4); - memcpy(&rand, f->buffer + mid_frame, sizeof(uint32_t)); - pico_rand_feed(rand); - } - - memcpy(f->buffer, buffer, len); - return f; -} - -/* LOWEST LEVEL: interface towards devices. */ -/* Device driver will call this function which returns immediately. - * Incoming packet will be processed later on in the dev loop. - */ -int32_t pico_stack_recv(struct pico_device *dev, uint8_t *buffer, uint32_t len) -{ - struct pico_frame *f = pico_stack_recv_new_frame (dev, buffer, len); - int32_t ret; - - if (!f) - return -1; - - ret = pico_enqueue(dev->q_in, f); - if (ret <= 0) { - pico_frame_discard(f); - } - return ret; -} - -static int32_t _pico_stack_recv_zerocopy(struct pico_device *dev, uint8_t *buffer, uint32_t len, int ext_buffer, void (*notify_free)(uint8_t *)) -{ - struct pico_frame *f; - int ret; - if (len == 0) - return -1; - - f = pico_frame_alloc_skeleton(len, ext_buffer); - if (!f) - { - dbg("Cannot alloc incoming frame!\n"); - return -1; - } - - if (pico_frame_skeleton_set_buffer(f, buffer) < 0) - { - dbg("Invalid zero-copy buffer!\n"); - PICO_FREE(f->usage_count); - PICO_FREE(f); - return -1; - } - - if (notify_free) { - f->notify_free = notify_free; - } - - f->dev = dev; - ret = pico_enqueue(dev->q_in, f); - if (ret <= 0) { - pico_frame_discard(f); - } - - return ret; -} - -int32_t pico_stack_recv_zerocopy(struct pico_device *dev, uint8_t *buffer, uint32_t len) -{ - return _pico_stack_recv_zerocopy(dev, buffer, len, 0, NULL); -} - -int32_t pico_stack_recv_zerocopy_ext_buffer(struct pico_device *dev, uint8_t *buffer, uint32_t len) -{ - return _pico_stack_recv_zerocopy(dev, buffer, len, 1, NULL); -} - -int32_t pico_stack_recv_zerocopy_ext_buffer_notify(struct pico_device *dev, uint8_t *buffer, uint32_t len, void (*notify_free)(uint8_t *buffer)) -{ - return _pico_stack_recv_zerocopy(dev, buffer, len, 1, notify_free); -} - -int32_t pico_sendto_dev(struct pico_frame *f) -{ - if (!f->dev) { - pico_frame_discard(f); - return -1; - } else { - if (f->len > 8) { - uint32_t rand, mid_frame = (f->buffer_len >> 2) << 1; - mid_frame -= (mid_frame % 4); - memcpy(&rand, f->buffer + mid_frame, sizeof(uint32_t)); - pico_rand_feed(rand); - } - - return pico_enqueue(f->dev->q_out, f); - } -} - -struct pico_timer -{ - void *arg; - void (*timer)(pico_time timestamp, void *arg); -}; - - -static uint32_t tmr_id = 0u; -struct pico_timer_ref -{ - pico_time expire; - uint32_t id; - uint32_t hash; - struct pico_timer *tmr; -}; - -typedef struct pico_timer_ref pico_timer_ref; - -DECLARE_HEAP(pico_timer_ref, expire); - -static heap_pico_timer_ref *Timers; - -int32_t pico_seq_compare(uint32_t a, uint32_t b) -{ - uint32_t thresh = ((uint32_t)(-1)) >> 1; - - if (a > b) /* return positive number, if not wrapped */ - { - if ((a - b) > thresh) /* b wrapped */ - return -(int32_t)(b - a); /* b = very small, a = very big */ - else - return (int32_t)(a - b); /* a = biggest, b = a bit smaller */ - - } - - if (a < b) /* return negative number, if not wrapped */ - { - if ((b - a) > thresh) /* a wrapped */ - return (int32_t)(a - b); /* a = very small, b = very big */ - else - return -(int32_t)(b - a); /* b = biggest, a = a bit smaller */ - - } - - return 0; -} - -static void pico_check_timers(void) -{ - struct pico_timer *t; - struct pico_timer_ref tref_unused, *tref = heap_first(Timers); - pico_tick = PICO_TIME_MS(); - while((tref) && (tref->expire < pico_tick)) { - t = tref->tmr; - if (t && t->timer) - t->timer(pico_tick, t->arg); - - if (t) - { - PICO_FREE(t); - } - - heap_peek(Timers, &tref_unused); - tref = heap_first(Timers); - } -} - -void MOCKABLE pico_timer_cancel(uint32_t id) -{ - uint32_t i; - struct pico_timer_ref *tref; - if (id == 0u) - return; - - for (i = 1; i <= Timers->n; i++) { - tref = heap_get_element(Timers, i); - if (tref->id == id) { - if (tref->tmr) - { - PICO_FREE(tref->tmr); - tref->tmr = NULL; - tref->id = 0; - } - break; - } - } -} - -void pico_timer_cancel_hashed(uint32_t hash) -{ - uint32_t i; - struct pico_timer_ref *tref; - if (hash == 0u) - return; - - for (i = 1; i <= Timers->n; i++) { - tref = heap_get_element(Timers, i); - if (tref->hash == hash) { - if (tref->tmr) - { - PICO_FREE(tref->tmr); - tref->tmr = NULL; - tref[i].id = 0; - } - } - } -} - -#define PROTO_DEF_NR 11 -#define PROTO_DEF_AVG_NR 4 -#define PROTO_DEF_SCORE 32 -#define PROTO_MIN_SCORE 32 -#define PROTO_MAX_SCORE 128 -#define PROTO_LAT_IND 3 /* latency indication 0-3 (lower is better latency performance), x1, x2, x4, x8 */ -#define PROTO_MAX_LOOP (PROTO_MAX_SCORE << PROTO_LAT_IND) /* max global loop score, so per tick */ - -static int calc_score(int *score, int *index, int avg[][PROTO_DEF_AVG_NR], int *ret) -{ - int temp, i, j, sum; - int max_total = PROTO_MAX_LOOP, total = 0; - - /* dbg("USED SCORES> "); */ - - for (i = 0; i < PROTO_DEF_NR; i++) { - - /* if used looped score */ - if (ret[i] < score[i]) { - temp = score[i] - ret[i]; /* remaining loop score */ - - /* dbg("%3d - ",temp); */ - - if (index[i] >= PROTO_DEF_AVG_NR) - index[i] = 0; /* reset index */ - - j = index[i]; - avg[i][j] = temp; - - index[i]++; - - if (ret[i] == 0 && ((score[i] * 2) <= PROTO_MAX_SCORE) && ((total + (score[i] * 2)) < max_total)) { /* used all loop score -> increase next score directly */ - score[i] *= 2; - total += score[i]; - continue; - } - - sum = 0; - for (j = 0; j < PROTO_DEF_AVG_NR; j++) - sum += avg[i][j]; /* calculate sum */ - - sum /= 4; /* divide by 4 to get average used score */ - - /* criterion to increase next loop score */ - if (sum > (score[i] - (score[i] / 4)) && ((score[i] * 2) <= PROTO_MAX_SCORE) && ((total + (score[i] / 2)) < max_total)) { /* > 3/4 */ - score[i] *= 2; /* double loop score */ - total += score[i]; - continue; - } - - /* criterion to decrease next loop score */ - if ((sum < (score[i] / 4)) && ((score[i] / 2) >= PROTO_MIN_SCORE)) { /* < 1/4 */ - score[i] /= 2; /* half loop score */ - total += score[i]; - continue; - } - - /* also add non-changed scores */ - total += score[i]; - } - else if (ret[i] == score[i]) { - /* no used loop score - gradually decrease */ - - /* dbg("%3d - ",0); */ - - if (index[i] >= PROTO_DEF_AVG_NR) - index[i] = 0; /* reset index */ - - j = index[i]; - avg[i][j] = 0; - - index[i]++; - - sum = 0; - for (j = 0; j < PROTO_DEF_AVG_NR; j++) - sum += avg[i][j]; /* calculate sum */ - - sum /= 2; /* divide by 4 to get average used score */ - - if ((sum == 0) && ((score[i] / 2) >= PROTO_MIN_SCORE)) { - score[i] /= 2; /* half loop score */ - total += score[i]; - for (j = 0; j < PROTO_DEF_AVG_NR; j++) - avg[i][j] = score[i]; - } - - } - } - /* dbg("\n"); */ - - return 0; -} - -void pico_stack_tick(void) -{ - static int score[PROTO_DEF_NR] = { - PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE, PROTO_DEF_SCORE - }; - static int index[PROTO_DEF_NR] = { - 0, 0, 0, 0, 0, 0 - }; - static int avg[PROTO_DEF_NR][PROTO_DEF_AVG_NR]; - static int ret[PROTO_DEF_NR] = { - 0 - }; - - pico_check_timers(); - - /* dbg("LOOP_SCORES> %3d - %3d - %3d - %3d - %3d - %3d - %3d - %3d - %3d - %3d - %3d\n",score[0],score[1],score[2],score[3],score[4],score[5],score[6],score[7],score[8],score[9],score[10]); */ - - /* score = pico_protocols_loop(100); */ - - ret[0] = pico_devices_loop(score[0], PICO_LOOP_DIR_IN); - pico_rand_feed((uint32_t)ret[0]); - - ret[1] = pico_protocol_datalink_loop(score[1], PICO_LOOP_DIR_IN); - pico_rand_feed((uint32_t)ret[1]); - - ret[2] = pico_protocol_network_loop(score[2], PICO_LOOP_DIR_IN); - pico_rand_feed((uint32_t)ret[2]); - - ret[3] = pico_protocol_transport_loop(score[3], PICO_LOOP_DIR_IN); - pico_rand_feed((uint32_t)ret[3]); - - - ret[5] = score[5]; -#if defined (PICO_SUPPORT_IPV4) || defined (PICO_SUPPORT_IPV6) -#if defined (PICO_SUPPORT_TCP) || defined (PICO_SUPPORT_UDP) - ret[5] = pico_sockets_loop(score[5]); /* swapped */ - pico_rand_feed((uint32_t)ret[5]); -#endif -#endif - - ret[4] = pico_protocol_socket_loop(score[4], PICO_LOOP_DIR_IN); - pico_rand_feed((uint32_t)ret[4]); - - - ret[6] = pico_protocol_socket_loop(score[6], PICO_LOOP_DIR_OUT); - pico_rand_feed((uint32_t)ret[6]); - - ret[7] = pico_protocol_transport_loop(score[7], PICO_LOOP_DIR_OUT); - pico_rand_feed((uint32_t)ret[7]); - - ret[8] = pico_protocol_network_loop(score[8], PICO_LOOP_DIR_OUT); - pico_rand_feed((uint32_t)ret[8]); - - ret[9] = pico_protocol_datalink_loop(score[9], PICO_LOOP_DIR_OUT); - pico_rand_feed((uint32_t)ret[9]); - - ret[10] = pico_devices_loop(score[10], PICO_LOOP_DIR_OUT); - pico_rand_feed((uint32_t)ret[10]); - - /* calculate new loop scores for next iteration */ - calc_score(score, index, (int (*)[])avg, ret); -} - -void pico_stack_loop(void) -{ - while(1) { - pico_stack_tick(); - PICO_IDLE(); - } -} - -static uint32_t -pico_timer_ref_add(pico_time expire, struct pico_timer *t, uint32_t id, uint32_t hash) -{ - struct pico_timer_ref tref; - - tref.expire = PICO_TIME_MS() + expire; - tref.tmr = t; - tref.id = id; - tref.hash = hash; - - if (heap_insert(Timers, &tref) < 0) { - dbg("Error: failed to insert timer(ID %u) into heap\n", id); - PICO_FREE(t); - pico_err = PICO_ERR_ENOMEM; - return 0; - } - if (Timers->n > PICO_MAX_TIMERS) { - dbg("Warning: I have %d timers\n", (int)Timers->n); - } - - return tref.id; -} - -static struct pico_timer * -pico_timer_create(void (*timer)(pico_time, void *), void *arg) -{ - struct pico_timer *t = PICO_ZALLOC(sizeof(struct pico_timer)); - - if (!t) { - pico_err = PICO_ERR_ENOMEM; - return NULL; - } - - t->arg = arg; - t->timer = timer; - - return t; -} - -MOCKABLE uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - struct pico_timer *t = pico_timer_create(timer, arg); - - /* zero is guard for timers */ - if (tmr_id == 0u) { - tmr_id++; - } - - if (!t) - return 0; - - return pico_timer_ref_add(expire, t, tmr_id++, 0); -} - -uint32_t pico_timer_add_hashed(pico_time expire, void (*timer)(pico_time, void *), void *arg, uint32_t hash) -{ - struct pico_timer *t = pico_timer_create(timer, arg); - - /* zero is guard for timers */ - if (tmr_id == 0u) { - tmr_id++; - } - - if (!t) - return 0; - - return pico_timer_ref_add(expire, t, tmr_id++, hash); -} /* Static path count: 4 */ - -int MOCKABLE pico_stack_init(void) -{ -#ifdef PICO_SUPPORT_ETH - pico_protocol_init(&pico_proto_ethernet); -#endif - -#ifdef PICO_SUPPORT_6LOWPAN - pico_protocol_init(&pico_proto_6lowpan); - pico_protocol_init(&pico_proto_6lowpan_ll); -#endif - -#ifdef PICO_SUPPORT_IPV4 - pico_protocol_init(&pico_proto_ipv4); -#endif - -#ifdef PICO_SUPPORT_IPV6 - pico_protocol_init(&pico_proto_ipv6); -#endif - -#ifdef PICO_SUPPORT_ICMP4 - pico_protocol_init(&pico_proto_icmp4); -#endif - -#ifdef PICO_SUPPORT_ICMP6 - pico_protocol_init(&pico_proto_icmp6); -#endif - -#if defined(PICO_SUPPORT_IGMP) && defined(PICO_SUPPORT_MCAST) - pico_protocol_init(&pico_proto_igmp); -#endif - -#ifdef PICO_SUPPORT_UDP - pico_protocol_init(&pico_proto_udp); -#endif - -#ifdef PICO_SUPPORT_TCP - pico_protocol_init(&pico_proto_tcp); -#endif - -#ifdef PICO_SUPPORT_DNS_CLIENT - pico_dns_client_init(); -#endif - - pico_rand_feed(123456); - - /* Initialize timer heap */ - Timers = heap_init(); - if (!Timers) - return -1; - -#if ((defined PICO_SUPPORT_IPV4) && (defined PICO_SUPPORT_ETH)) - /* Initialize ARP module */ - pico_arp_init(); -#endif - -#ifdef PICO_SUPPORT_IPV6 - /* Initialize Neighbor discovery module */ - pico_ipv6_nd_init(); -#endif - -#ifdef PICO_SUPPORT_OLSR - pico_olsr_init(); -#endif -#ifdef PICO_SUPPORT_AODV - pico_aodv_init(); -#endif -#ifdef PICO_SUPPORT_6LOWPAN - if (pico_6lowpan_init()) - return -1; -#endif - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - return 0; -} - diff --git a/kernel/picotcp/stack/pico_tree.c b/kernel/picotcp/stack/pico_tree.c deleted file mode 100644 index 8955696..0000000 --- a/kernel/picotcp/stack/pico_tree.c +++ /dev/null @@ -1,565 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - - Author: Andrei Carp - *********************************************************************/ - -#include "pico_tree.h" -#include "pico_config.h" -#include "pico_protocol.h" -#include "pico_mm.h" - -#define RED 0 -#define BLACK 1 - -/* By default the null leafs are black */ -struct pico_tree_node LEAF = { - NULL, /* key */ - &LEAF, &LEAF, &LEAF, /* parent, left,right */ - BLACK, /* color */ -}; - -#define IS_LEAF(x) (x == &LEAF) -#define IS_NOT_LEAF(x) (x != &LEAF) -#define INIT_LEAF (&LEAF) - -#define AM_I_LEFT_CHILD(x) (x == x->parent->leftChild) -#define AM_I_RIGHT_CHILD(x) (x == x->parent->rightChild) - -#define PARENT(x) (x->parent) -#define GRANPA(x) (x->parent->parent) - -/* - * Local Functions - */ -static struct pico_tree_node *create_node(struct pico_tree *tree, void *key, uint8_t allocator); -static void rotateToLeft(struct pico_tree*tree, struct pico_tree_node*node); -static void rotateToRight(struct pico_tree*root, struct pico_tree_node*node); -static void fix_insert_collisions(struct pico_tree*tree, struct pico_tree_node*node); -static void fix_delete_collisions(struct pico_tree*tree, struct pico_tree_node *node); -static void switchNodes(struct pico_tree*tree, struct pico_tree_node*nodeA, struct pico_tree_node*nodeB); -void *pico_tree_insert_implementation(struct pico_tree *tree, void *key, uint8_t allocator); -void *pico_tree_delete_implementation(struct pico_tree *tree, void *key, uint8_t allocator); - -#ifdef PICO_SUPPORT_MM -/* The memory manager also uses the pico_tree to keep track of all the different slab sizes it has. - * These nodes should be placed in the manager page which is in a different memory region then the nodes - * which are used for the pico stack in general. - * Therefore the following 2 functions are created so that pico_tree can use them to to put these nodes - * into the correct memory regions. - * If pico_tree_insert is called from the memory manager module, then create_node should use - * pico_mem_page0_zalloc to create a node. The same for pico_tree_delete. - */ -extern void*pico_mem_page0_zalloc(size_t len); -extern void pico_mem_page0_free(void*ptr); -#endif /* PICO_SUPPORT_MM */ - -/* - * Exported functions - */ - -struct pico_tree_node *pico_tree_firstNode(struct pico_tree_node *node) -{ - while(IS_NOT_LEAF(node->leftChild)) - node = node->leftChild; - return node; -} - -struct pico_tree_node *pico_tree_lastNode(struct pico_tree_node *node) -{ - while(IS_NOT_LEAF(node->rightChild)) - node = node->rightChild; - return node; -} - -struct pico_tree_node *pico_tree_next(struct pico_tree_node *node) -{ - if (!node) - return NULL; - - if(IS_NOT_LEAF(node->rightChild)) - { - node = node->rightChild; - while(IS_NOT_LEAF(node->leftChild)) - node = node->leftChild; - } - else - { - if (IS_NOT_LEAF(node->parent) && AM_I_LEFT_CHILD(node)) - node = node->parent; - else { - while (IS_NOT_LEAF(node->parent) && AM_I_RIGHT_CHILD(node)) - node = node->parent; - node = node->parent; - } - } - - return node; -} - -struct pico_tree_node *pico_tree_prev(struct pico_tree_node *node) -{ - if (IS_NOT_LEAF(node->leftChild)) { - node = node->leftChild; - while (IS_NOT_LEAF(node->rightChild)) - node = node->rightChild; - } else { - if (IS_NOT_LEAF(node->parent) && AM_I_RIGHT_CHILD(node)) - node = node->parent; - else { - while (IS_NOT_LEAF(node) && AM_I_LEFT_CHILD(node)) - node = node->parent; - node = node->parent; - } - } - - return node; -} - -/* The memory manager also uses the pico_tree to keep track of all the different slab sizes it has. - * These nodes should be placed in the manager page which is in a different memory region then the nodes - * which are used for the pico stack in general. - * Therefore the following wrapper for pico_tree_insert is created. - * The actual implementation can be found in pico_tree_insert_implementation. - */ -void *pico_tree_insert(struct pico_tree *tree, void *key) -{ - return pico_tree_insert_implementation(tree, key, USE_PICO_ZALLOC); -} - -static void pico_tree_insert_node(struct pico_tree *tree, struct pico_tree_node *insert) -{ - struct pico_tree_node *temp = tree->root; - struct pico_tree_node *last_node = INIT_LEAF; - int result = 0; - - /* search for the place to insert the new node */ - while(IS_NOT_LEAF(temp)) - { - last_node = temp; - result = tree->compare(insert->keyValue, temp->keyValue); - - temp = (result < 0) ? (temp->leftChild) : (temp->rightChild); - } - /* make the needed connections */ - insert->parent = last_node; - - if(IS_LEAF(last_node)) - tree->root = insert; - else{ - result = tree->compare(insert->keyValue, last_node->keyValue); - if(result < 0) - last_node->leftChild = insert; - else - last_node->rightChild = insert; - } -} - -void *pico_tree_insert_implementation(struct pico_tree *tree, void *key, uint8_t allocator) -{ - struct pico_tree_node *insert; - void *LocalKey; - - LocalKey = (IS_NOT_LEAF(tree->root) ? pico_tree_findKey(tree, key) : NULL); - - /* if node already in, bail out */ - if(LocalKey) { - pico_err = PICO_ERR_EEXIST; - return LocalKey; - } - - insert = create_node(tree, key, allocator); - - if(!insert) - { - pico_err = PICO_ERR_ENOMEM; - /* to let the user know that it couldn't insert */ - return (void *)&LEAF; - } - - pico_tree_insert_node(tree, insert); - - /* fix colour issues */ - fix_insert_collisions(tree, insert); - - return NULL; -} - -struct pico_tree_node *pico_tree_findNode(struct pico_tree *tree, void *key) -{ - struct pico_tree_node *found; - - found = tree->root; - - while(IS_NOT_LEAF(found)) - { - int result; - result = tree->compare(found->keyValue, key); - if(result == 0) - return found; - else if(result < 0) - found = found->rightChild; - else - found = found->leftChild; - } - return NULL; -} - -void *pico_tree_findKey(struct pico_tree *tree, void *key) -{ - struct pico_tree_node *found; - - found = pico_tree_findNode(tree, key); - if (found == NULL) - return NULL; - return found->keyValue; -} - -void *pico_tree_first(struct pico_tree *tree) -{ - return pico_tree_firstNode(tree->root)->keyValue; -} - -void *pico_tree_last(struct pico_tree *tree) -{ - return pico_tree_lastNode(tree->root)->keyValue; -} - -static uint8_t pico_tree_delete_node(struct pico_tree *tree, struct pico_tree_node *d, struct pico_tree_node **temp) -{ - struct pico_tree_node *min; - struct pico_tree_node *ltemp = d; - uint8_t nodeColor; - min = pico_tree_firstNode(d->rightChild); - nodeColor = min->color; - - *temp = min->rightChild; - if(min->parent == ltemp && IS_NOT_LEAF(*temp)) - (*temp)->parent = min; - else{ - switchNodes(tree, min, min->rightChild); - min->rightChild = ltemp->rightChild; - if(IS_NOT_LEAF(min->rightChild)) min->rightChild->parent = min; - } - - switchNodes(tree, ltemp, min); - min->leftChild = ltemp->leftChild; - - if(IS_NOT_LEAF(min->leftChild)) - min->leftChild->parent = min; - - min->color = ltemp->color; - return nodeColor; -} - -static uint8_t pico_tree_delete_check_switch(struct pico_tree *tree, struct pico_tree_node *delete, struct pico_tree_node **temp) -{ - struct pico_tree_node *ltemp = delete; - uint8_t nodeColor = delete->color; - if(IS_LEAF(delete->leftChild)) - { - *temp = ltemp->rightChild; - switchNodes(tree, ltemp, ltemp->rightChild); - } - else - if(IS_LEAF(delete->rightChild)) - { - struct pico_tree_node *_ltemp = delete; - *temp = _ltemp->leftChild; - switchNodes(tree, _ltemp, _ltemp->leftChild); - } - else{ - nodeColor = pico_tree_delete_node(tree, delete, temp); - } - - return nodeColor; - -} - -/* The memory manager also uses the pico_tree to keep track of all the different slab sizes it has. - * These nodes should be placed in the manager page which is in a different memory region then the nodes - * which are used for the pico stack in general. - * Therefore the following wrapper for pico_tree_delete is created. - * The actual implementation can be found in pico_tree_delete_implementation. - */ -void *pico_tree_delete(struct pico_tree *tree, void *key) -{ - return pico_tree_delete_implementation(tree, key, USE_PICO_ZALLOC); -} - -static inline void if_nodecolor_black_fix_collisions(struct pico_tree *tree, struct pico_tree_node *temp, uint8_t nodeColor) -{ - /* deleted node is black, this will mess up the black path property */ - if(nodeColor == BLACK) - fix_delete_collisions(tree, temp); -} - -void *pico_tree_delete_implementation(struct pico_tree *tree, void *key, uint8_t allocator) -{ - struct pico_tree_node *temp; - uint8_t nodeColor; /* keeps the color of the node to be deleted */ - void *lkey; /* keeps a copy of the key which will be removed */ - struct pico_tree_node *delete; /* keeps a copy of the node to be extracted */ - if (!key) - return NULL; - - delete = pico_tree_findNode(tree, key); - - /* this key isn't in the tree, bail out */ - if(!delete) - return NULL; - - lkey = delete->keyValue; - nodeColor = pico_tree_delete_check_switch(tree, delete, &temp); - - if_nodecolor_black_fix_collisions(tree, temp, nodeColor); - - if(allocator == USE_PICO_ZALLOC) - PICO_FREE(delete); - -#ifdef PICO_SUPPORT_MM - else - pico_mem_page0_free(delete); -#endif - return lkey; -} - -int pico_tree_empty(struct pico_tree *tree) -{ - return (!tree->root || IS_LEAF(tree->root)); -} - -/* - * Private functions - */ -static void rotateToLeft(struct pico_tree*tree, struct pico_tree_node*node) -{ - struct pico_tree_node*temp; - - temp = node->rightChild; - - if(temp == &LEAF) return; - - node->rightChild = temp->leftChild; - - if(IS_NOT_LEAF(temp->leftChild)) - temp->leftChild->parent = node; - - temp->parent = node->parent; - - if(IS_LEAF(node->parent)) - tree->root = temp; - else - if(node == node->parent->leftChild) - node->parent->leftChild = temp; - else - node->parent->rightChild = temp; - - temp->leftChild = node; - node->parent = temp; -} - - -static void rotateToRight(struct pico_tree *tree, struct pico_tree_node *node) -{ - struct pico_tree_node*temp; - - temp = node->leftChild; - node->leftChild = temp->rightChild; - - if(temp == &LEAF) return; - - if(IS_NOT_LEAF(temp->rightChild)) - temp->rightChild->parent = node; - - temp->parent = node->parent; - - if(IS_LEAF(node->parent)) - tree->root = temp; - else - if(node == node->parent->rightChild) - node->parent->rightChild = temp; - else - node->parent->leftChild = temp; - - temp->rightChild = node; - node->parent = temp; - return; -} - -static struct pico_tree_node *create_node(struct pico_tree *tree, void*key, uint8_t allocator) -{ - struct pico_tree_node *temp = NULL; - IGNORE_PARAMETER(tree); - if(allocator == USE_PICO_ZALLOC) - temp = (struct pico_tree_node *)PICO_ZALLOC(sizeof(struct pico_tree_node)); - -#ifdef PICO_SUPPORT_MM - else - temp = (struct pico_tree_node *)pico_mem_page0_zalloc(sizeof(struct pico_tree_node)); -#endif - - if(!temp) - return NULL; - - temp->keyValue = key; - temp->parent = &LEAF; - temp->leftChild = &LEAF; - temp->rightChild = &LEAF; - /* by default every new node is red */ - temp->color = RED; - return temp; -} - -/* - * This function fixes the possible collisions in the tree. - * Eg. if a node is red his children must be black ! - */ -static void fix_insert_collisions(struct pico_tree*tree, struct pico_tree_node*node) -{ - struct pico_tree_node*temp; - - while(node->parent->color == RED && IS_NOT_LEAF(GRANPA(node))) - { - if(AM_I_RIGHT_CHILD(node->parent)) - { - temp = GRANPA(node)->leftChild; - if(temp->color == RED) { - node->parent->color = BLACK; - temp->color = BLACK; - GRANPA(node)->color = RED; - node = GRANPA(node); - } - else if(temp->color == BLACK) { - if(AM_I_LEFT_CHILD(node)) { - node = node->parent; - rotateToRight(tree, node); - } - - node->parent->color = BLACK; - GRANPA(node)->color = RED; - rotateToLeft(tree, GRANPA(node)); - } - } - else if(AM_I_LEFT_CHILD(node->parent)) - { - temp = GRANPA(node)->rightChild; - if(temp->color == RED) { - node->parent->color = BLACK; - temp->color = BLACK; - GRANPA(node)->color = RED; - node = GRANPA(node); - } - else if(temp->color == BLACK) { - if(AM_I_RIGHT_CHILD(node)) { - node = node->parent; - rotateToLeft(tree, node); - } - - node->parent->color = BLACK; - GRANPA(node)->color = RED; - rotateToRight(tree, GRANPA(node)); - } - } - } - /* make sure that the root of the tree stays black */ - tree->root->color = BLACK; -} - -static void switchNodes(struct pico_tree*tree, struct pico_tree_node*nodeA, struct pico_tree_node*nodeB) -{ - - if(IS_LEAF(nodeA->parent)) - tree->root = nodeB; - else - if(IS_NOT_LEAF(nodeA)) - { - if(AM_I_LEFT_CHILD(nodeA)) - nodeA->parent->leftChild = nodeB; - else - nodeA->parent->rightChild = nodeB; - } - - if(IS_NOT_LEAF(nodeB)) nodeB->parent = nodeA->parent; - -} - -/* - * This function fixes the possible collisions in the tree. - * Eg. if a node is red his children must be black ! - * In this case the function fixes the constant black path property. - */ -static void fix_delete_collisions(struct pico_tree*tree, struct pico_tree_node *node) -{ - struct pico_tree_node*temp; - - while( node != tree->root && node->color == BLACK && IS_NOT_LEAF(node)) - { - if(AM_I_LEFT_CHILD(node)) { - - temp = node->parent->rightChild; - if(temp->color == RED) - { - temp->color = BLACK; - node->parent->color = RED; - rotateToLeft(tree, node->parent); - temp = node->parent->rightChild; - } - - if(temp->leftChild->color == BLACK && temp->rightChild->color == BLACK) - { - temp->color = RED; - node = node->parent; - } - else - { - if(temp->rightChild->color == BLACK) - { - temp->leftChild->color = BLACK; - temp->color = RED; - rotateToRight(tree, temp); - temp = temp->parent->rightChild; - } - - temp->color = node->parent->color; - node->parent->color = BLACK; - temp->rightChild->color = BLACK; - rotateToLeft(tree, node->parent); - node = tree->root; - } - } - else{ - temp = node->parent->leftChild; - if(temp->color == RED) - { - temp->color = BLACK; - node->parent->color = RED; - rotateToRight(tree, node->parent); - temp = node->parent->leftChild; - } - - if(temp->rightChild->color == BLACK && temp->leftChild->color == BLACK) - { - temp->color = RED; - node = node->parent; - } - else{ - if(temp->leftChild->color == BLACK) - { - temp->rightChild->color = BLACK; - temp->color = RED; - rotateToLeft(tree, temp); - temp = temp->parent->leftChild; - } - - temp->color = node->parent->color; - node->parent->color = BLACK; - temp->leftChild->color = BLACK; - rotateToRight(tree, node->parent); - node = tree->root; - } - } - } - node->color = BLACK; -} diff --git a/kernel/picotcp/test/Makefile b/kernel/picotcp/test/Makefile deleted file mode 100644 index 038d1e6..0000000 --- a/kernel/picotcp/test/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -CC=gcc -CFLAGS=-ggdb -I../build/include/ -L../build/lib/ -lpicotcp -lvdeplug -APPNAME=test_tftp_app_client - -.PHONY: clean -all: test_tftp_app_client -test_tftp_app_client: test_tftp_app_client.o - $(CC) -o test_tftp_app_client ../build/modules/pico_dev_vde.o $^ $(CFLAGS) -clean: - rm -f ${APPNAME}.o diff --git a/kernel/picotcp/test/README.md b/kernel/picotcp/test/README.md deleted file mode 100644 index 0e284ff..0000000 --- a/kernel/picotcp/test/README.md +++ /dev/null @@ -1,6 +0,0 @@ -To run these tests on your linux system, you will have to install these dependencies: -* vde2 -* libvdeplug2-dev -* libpcap0.8-dev - -This will allow you to compile the 'make test' and run the tests diff --git a/kernel/picotcp/test/autotest.sh b/kernel/picotcp/test/autotest.sh deleted file mode 100755 index 407b042..0000000 --- a/kernel/picotcp/test/autotest.sh +++ /dev/null @@ -1,360 +0,0 @@ -#!/bin/bash - -TFTP_EXEC_DIR="$(pwd)/build/test" -TFTP_WORK_DIR="${TFTP_EXEC_DIR}/tmp" -TFTP_WORK_SUBDIR="${TFTP_WORK_DIR}/subdir" -TFTP_WORK_FILE="test.img" - - - -function tftp_setup() { - dd if=/dev/urandom bs=1000 count=10 of=${1}/$TFTP_WORK_FILE -} - -function tftp_cleanup() { - echo CLEANUP - pwd;ls - killall -wq picoapp.elf - rm -rf $TFTP_WORK_DIR - if [ $1 ]; then - exit $1 - fi -} - -if ! [ -x "$(command -v vde_switch)" ]; then - echo 'VDE Switch is not installed.' >&2 -fi - -if [ ! -e test/vde_sock_start_user.sh ]; then - echo "VDE SOCK START FILE NOT FOUND. NO VDE SETUP. EXITING" - exit 1 -else - echo "VDE SOCK START SCRIPT STARTED." - ./test/vde_sock_start_user.sh -fi - -rm -f /tmp/pico-mem-report-* -sleep 2 -ulimit -c unlimited -killall -wq picoapp.elf -killall -wq picoapp6.elf - - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ 6LoWPAN PING 1HOP (1500B) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(build/test/picoapp6.elf -6 0,0,0) & -pids="$! " -sleep 1 -(build/test/picoapp6.elf -6 1,2,1 -a noop) & -pids+="$! " -sleep 1 -build/test/picoapp6.elf -6 2,1,0 -a ping,2aaa:abcd:0000:0000:0200:00aa:ab00:0001,1500,0,1 || exit 1 -#TODO roll out this check for all "daemon" processes -for pid in $pids; do ps -o pid= -p $pid || exit 1; done # check whether daemon processes didn't die from e.g. ASAN -killall -w picoapp6.elf -s SIGQUIT - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ 6LoWPAN UDP 1HOP (1400B) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -#TODO are these "daemon" processes that need to be killed, or are they intended to halt on their own, giving a status code? -(build/test/picoapp6.elf -6 0,0,0) & -sleep 1 -(build/test/picoapp6.elf -6 1,2,1 -a udpecho,::0,6667,) & -sleep 1 -build/test/picoapp6.elf -6 2,1,0 -a udpclient,2aaa:abcd:0000:0000:0200:00aa:ab00:0001,6667,6667,1400,10,1, || exit 1 -killall -w picoapp6.elf -s SIGQUIT - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ MULTICAST6 TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic1,/tmp/pic0.ctl,aaaa::2,ffff::, -a mcastreceive_ipv6,aaaa::2,ff00::e007:707,6667,6667,) & -(./build/test/picoapp6.elf --vde pic2,/tmp/pic0.ctl,aaaa::3,ffff::, -a mcastreceive_ipv6,aaaa::3,ff00::e007:707,6667,6667,) & -(./build/test/picoapp6.elf --vde pic3,/tmp/pic0.ctl,aaaa::4,ffff::, -a mcastreceive_ipv6,aaaa::4,ff00::e007:707,6667,6667,) & -sleep 2 - ./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::1,ffff::, -a mcastsend_ipv6,aaaa::1,ff00::e007:707,6667,6667,|| exit 1 -killall -w picoapp6.elf -s SIGQUIT - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ PING6 LOCALHOST TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -./build/test/picoapp6.elf --loop -a ping,::1,,,, || exit 1 -killall -w picoapp6.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ PING6 TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::1,ffff::,,,,) & -./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::2,ffff::,,, -a ping,aaaa::1,,,, || exit 1 -killall -w picoapp6.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ PING6 TEST (aborted in 4 seconds...) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::1,ffff::,,,,) & -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::2,ffff::,,, -a ping,aaaa::1,64,4,,) & -sleep 7 -killall -w picoapp6.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TCP6 TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::1,ffff::,,, -a tcpbench,r,6667,,) & -./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::2,ffff::,,, -a tcpbench,t,aaaa::1,6667,, || exit 1 -killall -w picoapp6.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TCP6 TEST (with 2% packet loss on both directions) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::1,ffff::,,2,2, -a tcpbench,r,6667,,) & -./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::2,ffff::,,, -a tcpbench,t,aaaa::1,6667,, || exit 1 -killall -w picoapp6.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TCP6 TEST (nagle) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::1,ffff::,,, -a tcpbench,r,6667,n,) & -./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::2,ffff::,,, -a tcpbench,t,aaaa::1,6667,n, || exit 1 -killall -w picoapp6.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ UDP6 TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::1,ffff::,,, -a udpecho,::0,6667,) & -pids="$! " -./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,aaaa::2,ffff::,,, -a udpclient,aaaa::1,6667,6667,1400,100,10, || exit 1 -wait $pids || exit 1 -killall -w picoapp6.elf - -echo -echo -echo -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ IPV6 FWD TCP TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp6.elf --vde pic0,/tmp/pic1.ctl,2001:aabb::2,ffff:ffff::,2001:aabb::ff,, -a tcpbench,r,6667,,) & -(./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,2001:aaaa::ff,ffff:ffff::,,, --vde pic1,/tmp/pic1.ctl,2001:aabb::ff,ffff:ffff::,,, -a noop,) & -./build/test/picoapp6.elf --vde pic0,/tmp/pic0.ctl,2001:aaaa::1,ffff:ffff::,2001:aaaa::ff,, -a tcpbench,t,2001:aabb::2,6667,, || exit 1 -sleep 2 -killall -w picoapp6.elf - - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ MULTICAST TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic1:/tmp/pic0.ctl:10.40.0.3:255.255.0.0: -a mcastreceive:10.40.0.3:224.7.7.7:6667:6667:) & -(./build/test/picoapp.elf --vde pic2:/tmp/pic0.ctl:10.40.0.4:255.255.0.0: -a mcastreceive:10.40.0.4:224.7.7.7:6667:6667:) & -(./build/test/picoapp.elf --vde pic3:/tmp/pic0.ctl:10.40.0.5:255.255.0.0: -a mcastreceive:10.40.0.5:224.7.7.7:6667:6667:) & -sleep 2 -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.0.0: -a mcastsend:10.40.0.2:224.7.7.7:6667:6667: || exit 1 -killall -w picoapp.elf - -echo -echo -echo -echo -echo - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ IPV4 tests! ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ PING LOCALHOST TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -./build/test/picoapp.elf --loop -a ping:127.0.0.1:::: || exit 1 - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ PING TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0:::) & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a ping:10.40.0.8:::: || exit 1 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ PING TEST -- Aborted in 4 seconds ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0:::) & -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a ping:10.40.0.8:64:4::) & -sleep 7 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TCP TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0:::: -a tcpbench:r:6667::) & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a tcpbench:t:10.40.0.8:6667:: || exit 1 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TCP TEST (with global route) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0:::: --vde pic1:/tmp/pic1.ctl:10.50.0.10:255.255.0.0:10.50.0.1: -a tcpbench:r:6667::) & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a tcpbench:t:10.40.0.8:6667:: || exit 1 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TCP TEST (with 2% packet loss on both directions) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0::2:2: -a tcpbench:r:6667::) & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a tcpbench:t:10.40.0.8:6667:: || exit 1 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TCP TEST (nagle) ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0::: -a tcpbench:r:6667:n:) & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a tcpbench:t:10.40.0.8:6667:n: || exit 1 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ UDP TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0::: -a udpecho:10.40.0.8:6667:) & -pids="$! " -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a udpclient:10.40.0.8:6667:6667:1400:100:10: || exit 1 -wait $pids || exit 1 - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ UDP TEST with fragmentation ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0::: -a udpecho:10.40.0.8:6667:) & -pids="$! " -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0::: -a udpclient:10.40.0.8:6667:6667:4500:100:10: || exit 1 -wait $pids || exit 1 - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ NAT TCP TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.10:255.255.0.0::: --vde pic1:/tmp/pic1.ctl:10.50.0.10:255.255.0.0: -a natbox:10.50.0.10) & -sleep 2 -(./build/test/picoapp.elf --vde pic0:/tmp/pic1.ctl:10.50.0.8:255.255.0.0::: -a tcpbench:r:6667:) & -sleep 2 -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0:10.40.0.10::: -a tcpbench:t:10.50.0.8:6667: || exit 1 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ NAT UDP TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.10:255.255.0.0::: --vde pic1:/tmp/pic1.ctl:10.50.0.10:255.255.0.0::: -a natbox:10.50.0.10) & -(./build/test/picoapp.elf --vde pic0:/tmp/pic1.ctl:10.50.0.8:255.255.0.0::: -a udpecho:10.50.0.8:6667:) & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0:10.40.0.10::: -a udpclient:10.50.0.8:6667:6667:1400:100:10: || exit 1 -#sometimes udpecho finishes before reaching wait %2 -#wait %2 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ MULTICAST TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic1:/tmp/pic0.ctl:10.40.0.3:255.255.0.0::: -a mcastreceive:10.40.0.3:224.7.7.7:6667:6667:) & -(./build/test/picoapp.elf --vde pic2:/tmp/pic0.ctl:10.40.0.4:255.255.0.0::: -a mcastreceive:10.40.0.4:224.7.7.7:6667:6667:) & -(./build/test/picoapp.elf --vde pic3:/tmp/pic0.ctl:10.40.0.5:255.255.0.0::: -a mcastreceive:10.40.0.5:224.7.7.7:6667:6667:) & -sleep 2 -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.0.0::: -a mcastsend:10.40.0.2:224.7.7.7:6667:6667: || exit 1 -killall -w picoapp.elf - -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ DHCP TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.1:255.255.0.0::: -a dhcpserver:pic0:10.40.0.1:255.255.255.0:64:128:) & -./build/test/picoapp.elf --barevde pic0:/tmp/pic0.ctl: -a dhcpclient:pic0 || exit 1 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ DHCP DUAL TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.0.0::: -a dhcpserver:pic0:10.40.0.2:255.255.255.0:64:128:) & -(./build/test/picoapp.elf --vde pic1:/tmp/pic1.ctl:10.50.0.2:255.255.0.0::: -a dhcpserver:pic1:10.50.0.2:255.255.255.0:64:128:) & -./build/test/picoapp.elf --barevde pic0:/tmp/pic0.ctl: --barevde pic1:/tmp/pic1.ctl: -a dhcpclient:pic0:pic1: || exit 1 -killall -w picoapp.elf - -#TO DO: the ping address 169.254.22.5 is hardcoded in the slaacv4 test. Nice to pass that by parameter -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ SLAACV4 TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:169.254.22.5:255.255.0.0:::) & -./build/test/picoapp.elf --barevde pic0:/tmp/pic0.ctl: -a slaacv4:pic0 || exit 1 -killall -w picoapp.elf - - -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.0.0:10.40.0.1::: -a udpdnsclient:www.google.be:173.194.67.94:: & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.0.0:10.40.0.1::: -a udpdnsclient:ipv6.google.be:doesntmatter:ipv6: & -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.2:255.255.0.0:10.50.0.1::: -a sntp:0.europe.pool.ntp.org & -sleep 20 -killall -w picoapp.elf - - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ MDNS TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -#retrieve a local mdns host name from the host -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.2:255.255.255.0:10.50.0.1: --app mdns:hostfoo.local:hostbar.local:) & -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.3:255.255.255.0:10.50.0.1: --app mdns:hostbar.local:hostfoo.local:) & -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.2:255.255.255.0:10.50.0.1: --app mdns:hostfoobar.local:nonexisting.local:) & -sleep 10 -killall -w picoapp.elf - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ DNS_SD TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -#register a service -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.2:255.255.255.0:10.50.0.1: --app dns_sd:host.local:WebServer) & -(./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.3:255.255.255.0:10.50.0.1: --app dns_sd:host.local:WebServer) & -sleep 30 -killall -w picoapp.elf - -sleep 1 -sync - - -# TFTP TEST BEGINS... - -if [ ! -d $TFTP_WORK_DIR ]; then - mkdir $TFTP_WORK_DIR || exit 1 -fi -if [ ! -d ${TFTP_WORK_SUBDIR}/server ]; then - mkdir $TFTP_WORK_SUBDIR || exit 1 -fi - -pushd $TFTP_WORK_DIR - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TFTP GET TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -tftp_setup $TFTP_WORK_DIR -(${TFTP_EXEC_DIR}/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.2:255.255.255.0:10.50.0.1: --app tftp:S:) & -cd $TFTP_WORK_SUBDIR -sleep 2 -${TFTP_EXEC_DIR}/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.3:255.255.255.0:10.50.0.1: --app tftp:R:${TFTP_WORK_FILE}:10.50.0.2: || tftp_cleanup 1 -sleep 3 -killall -w picoapp.elf - -sleep 1 - -rm $TFTP_WORK_FILE - -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "~~~ TFTP PUT TEST ~~~" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -(${TFTP_EXEC_DIR}/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.2:255.255.255.0:10.50.0.1: --app tftp:S:) & -cd $TFTP_WORK_DIR -tftp_setup $TFTP_WORK_DIR -sleep 2 -${TFTP_EXEC_DIR}/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.3:255.255.255.0:10.50.0.1: --app tftp:T:${TFTP_WORK_FILE}:10.50.0.2: || tftp_cleanup 1 -sleep 3 - -tftp_cleanup -popd -# TFTP TEST ENDS. - -MAXMEM=`cat /tmp/pico-mem-report-* | sort -r -n |head -1` -echo -echo -echo -echo "MAX memory used: $MAXMEM" -rm -f /tmp/pico-mem-report-* - -./test/vde_sock_start_user.sh stop -echo "SUCCESS!" diff --git a/kernel/picotcp/test/coverage.sh b/kernel/picotcp/test/coverage.sh deleted file mode 100755 index f326bc7..0000000 --- a/kernel/picotcp/test/coverage.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -./test/units.sh || exit 1 -./test/autotest.sh || exit 2 -exit 0 diff --git a/kernel/picotcp/test/dummy.c b/kernel/picotcp/test/dummy.c deleted file mode 100644 index 7420581..0000000 --- a/kernel/picotcp/test/dummy.c +++ /dev/null @@ -1,12 +0,0 @@ -#include "pico_stack.h" - -#if defined(PICO_SUPPORT_RTOS) || defined (PICO_SUPPORT_PTHREAD) -volatile uint32_t pico_ms_tick; -#endif - -int main(void) -{ - pico_stack_init(); - pico_stack_tick(); - return 0; -} diff --git a/kernel/picotcp/test/examples/Makefile b/kernel/picotcp/test/examples/Makefile deleted file mode 100644 index 3434891..0000000 --- a/kernel/picotcp/test/examples/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -PREFIX?=../../build -CFLAGS+=-I../../include/ -I../../modules -I../../build/include -I . -ggdb - - -$(PREFIX)/examples/%.o: %.c - @mkdir -p $(PREFIX)/examples - @echo -e "\t[CC] $@" - @$(CC) -c $(CFLAGS) -o $@ $< - -OBJS:= \ -$(PREFIX)/examples/dhcp_client.o \ -$(PREFIX)/examples/dhcp_server.o \ -$(PREFIX)/examples/dns_sd.o \ -$(PREFIX)/examples/dnsclient.o \ -$(PREFIX)/examples/mdns.o \ -$(PREFIX)/examples/multicast_recv.o \ -$(PREFIX)/examples/multicast_ip6_recv.o \ -$(PREFIX)/examples/multicast_send.o \ -$(PREFIX)/examples/multicast_ip6_send.o \ -$(PREFIX)/examples/natbox.o \ -$(PREFIX)/examples/noop.o \ -$(PREFIX)/examples/ping.o \ -$(PREFIX)/examples/slaacv4.o \ -$(PREFIX)/examples/sntp.o \ -$(PREFIX)/examples/tcpbench.o \ -$(PREFIX)/examples/tcpclient.o \ -$(PREFIX)/examples/tcpecho.o \ -$(PREFIX)/examples/tftp.o \ -$(PREFIX)/examples/udp_client.o \ -$(PREFIX)/examples/udp_echo.o \ -$(PREFIX)/examples/udpnat.o \ -$(PREFIX)/examples/udp_sendto_test.o \ -$(PREFIX)/examples/iperfc.o \ - - -all: $(OBJS) diff --git a/kernel/picotcp/test/examples/dhcp_client.c b/kernel/picotcp/test/examples/dhcp_client.c deleted file mode 100644 index bb0ee20..0000000 --- a/kernel/picotcp/test/examples/dhcp_client.c +++ /dev/null @@ -1,114 +0,0 @@ -#include "utils.h" -#include -#include -#include -#include -#include -/*** START DHCP Client ***/ -#ifdef PICO_SUPPORT_DHCPC - -/* This must stay global, its lifetime is the same as the dhcp negotiation */ -uint32_t dhcpclient_xid; - - -static uint8_t dhcpclient_devices = 0; - -void ping_callback_dhcpclient(struct pico_icmp4_stats *s) -{ - char host[30] = { }; - - pico_ipv4_to_string(host, s->dst.addr); - if (s->err == 0) { - dbg("DHCP client: %lu bytes from %s: icmp_req=%lu ttl=64 time=%lu ms\n", - s->size, host, s->seq, (long unsigned int)s->time); - if (s->seq >= 3) { - dbg("DHCP client: TEST SUCCESS!\n"); - if (--dhcpclient_devices <= 0) - exit(0); - } - } else { - dbg("DHCP client: ping %lu to %s error %d\n", s->seq, host, s->err); - dbg("DHCP client: TEST FAILED!\n"); - exit(1); - } -} - -void callback_dhcpclient(void *arg, int code) -{ - struct pico_ip4 address = ZERO_IP4, gateway = ZERO_IP4; - char s_address[16] = { }, s_gateway[16] = { }; - - printf("DHCP client: callback happened with code %d!\n", code); - if (code == PICO_DHCP_SUCCESS) { - address = pico_dhcp_get_address(arg); - gateway = pico_dhcp_get_gateway(arg); - pico_ipv4_to_string(s_address, address.addr); - pico_ipv4_to_string(s_gateway, gateway.addr); - printf("DHCP client: got IP %s assigned with cli %p\n", s_address, arg); -#ifdef PICO_SUPPORT_PING - pico_icmp4_ping(s_gateway, 3, 1000, 5000, 32, ping_callback_dhcpclient); - /* optional test to check routing when links get added and deleted */ - /* do { - char *new_arg = NULL, *p = NULL; - new_arg = calloc(1, strlen(s_address) + strlen(":224.7.7.7:6667:6667") + 1); - p = strcat(new_arg, s_address); - p = strcat(p + strlen(s_address), ":224.7.7.7:6667:6667"); - app_mcastsend(new_arg); - } while (0); - */ -#endif - } -} - -void app_dhcp_client(char *arg) -{ - char *sdev = NULL; - char *nxt = arg; - struct pico_device *dev = NULL; - - if (!nxt) - goto out; - - while (nxt) { - if (nxt) { - nxt = cpy_arg(&sdev, nxt); - if(!sdev) { - goto out; - } - } - - dev = pico_get_device(sdev); - if(dev == NULL) { - if (sdev) - free(sdev); - - printf("%s: error getting device %s: %s\n", __FUNCTION__, dev->name, strerror(pico_err)); - exit(255); - } - - printf("Starting negotiation\n"); - - if (pico_dhcp_initiate_negotiation(dev, &callback_dhcpclient, &dhcpclient_xid) < 0) { - printf("%s: error initiating negotiation: %s\n", __FUNCTION__, strerror(pico_err)); - if (sdev) - free(sdev); - - exit(255); - } - - if (sdev) - free(sdev); - - dhcpclient_devices++; - } - return; - -out: - fprintf(stderr, "dhcpclient expects the following format: dhcpclient:dev_name:[dev_name]\n"); - if (sdev) - free(sdev); - - exit(255); -} -#endif -/*** END DHCP Client ***/ diff --git a/kernel/picotcp/test/examples/dhcp_server.c b/kernel/picotcp/test/examples/dhcp_server.c deleted file mode 100644 index 63ff8fe..0000000 --- a/kernel/picotcp/test/examples/dhcp_server.c +++ /dev/null @@ -1,99 +0,0 @@ -#include "utils.h" -#include -#include -#include - -/*** START DHCP Server ***/ -#ifdef PICO_SUPPORT_DHCPD -/* ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.1:255.255.0.0: -a dhcpserver:pic0:10.40.0.1:255.255.255.0:64:128 - * ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.10:255.255.255.0: --vde pic1:/tmp/pic1.ctl:10.50.0.10:255.255.255.0: \ - * -a dhcpserver:pic0:10.40.0.10:255.255.255.0:64:128:pic1:10.50.0.10:255.255.255.0:64:128 - */ -void app_dhcp_server(char *arg) -{ - struct pico_device *dev = NULL; - struct pico_dhcp_server_setting s = { - 0 - }; - int pool_start = 0, pool_end = 0; - char *s_name = NULL, *s_addr = NULL, *s_netm = NULL, *s_pool_start = NULL, *s_pool_end = NULL; - char *nxt = arg; - - if (!nxt) - goto out; - - while (nxt) { - if (nxt) { - nxt = cpy_arg(&s_name, nxt); - if (!s_name) { - goto out; - } - } else { - goto out; - } - - if (nxt) { - nxt = cpy_arg(&s_addr, nxt); - if (s_addr) { - pico_string_to_ipv4(s_addr, &s.server_ip.addr); - } else { - goto out; - } - } else { - goto out; - } - - if (nxt) { - nxt = cpy_arg(&s_netm, nxt); - if (s_netm) { - pico_string_to_ipv4(s_netm, &s.netmask.addr); - } else { - goto out; - } - } else { - goto out; - } - - if (nxt) { - nxt = cpy_arg(&s_pool_start, nxt); - if (s_pool_start && atoi(s_pool_start)) { - pool_start = atoi(s_pool_start); - } else { - goto out; - } - } else { - goto out; - } - - if (nxt) { - nxt = cpy_arg(&s_pool_end, nxt); - if (s_pool_end && atoi(s_pool_end)) { - pool_end = atoi(s_pool_end); - } else { - goto out; - } - } else { - goto out; - } - - dev = (struct pico_device *)pico_get_device(s_name); - if (dev == NULL) { - fprintf(stderr, "No device with name %s found\n", s_name); - exit(255); - } - - s.dev = dev; - s.pool_start = (s.server_ip.addr & s.netmask.addr) | long_be(pool_start); - s.pool_end = (s.server_ip.addr & s.netmask.addr) | long_be(pool_end); - - pico_dhcp_server_initiate(&s); - } - return; - -out: - fprintf(stderr, "dhcpserver expects the following format: dhcpserver:dev_name:dev_addr:dev_netm:pool_start:pool_end\n"); - exit(255); - -} -#endif -/*** END DHCP Server ***/ diff --git a/kernel/picotcp/test/examples/dns_sd.c b/kernel/picotcp/test/examples/dns_sd.c deleted file mode 100644 index dd2e96f..0000000 --- a/kernel/picotcp/test/examples/dns_sd.c +++ /dev/null @@ -1,112 +0,0 @@ -#include "utils.h" -#include -#include -#include -#include -#include - -/*** START DNS_SD ***/ -#ifdef PICO_SUPPORT_DNS_SD - -#define TTL 30 -#define SECONDS 10 - -static int fully_initialized = 0; -static char *service_name = NULL; - -void dns_sd_claimed_callback( pico_mdns_rtree *tree, - char *str, - void *arg ) -{ - printf("DONE - Registering DNS-SD Service\n"); - - IGNORE_PARAMETER(tree); - IGNORE_PARAMETER(str); - IGNORE_PARAMETER(arg); -} - -void dns_sd_init_callback( pico_mdns_rtree *tree, - char *str, - void *arg ) -{ - PICO_DNS_SD_KV_VECTOR_DECLARE(key_value_pair_vector); - - IGNORE_PARAMETER(str); - IGNORE_PARAMETER(arg); - IGNORE_PARAMETER(tree); - - pico_dns_sd_kv_vector_add(&key_value_pair_vector, "key", "value"); - - printf("DONE - Initialising DNS Service Discovery module.\n"); - - if (pico_dns_sd_register_service(service_name, - "_http._tcp", 80, - &key_value_pair_vector, - TTL, dns_sd_claimed_callback, NULL) < 0) { - printf("Registering service failed!\n"); - } - - fully_initialized = 1; -} - -void app_dns_sd(char *arg, struct pico_ip4 address) -{ - char *hostname; - char *nxt = arg; - uint64_t starttime = 0; - int once = 0; - - if (!nxt) { - exit(255); - } - - nxt = cpy_arg(&hostname, nxt); - if(!hostname) { - exit(255); - } - - if(!nxt) { - printf("Not enough args supplied!\n"); - exit(255); - } - - nxt = cpy_arg(&service_name, nxt); - if(!service_name) { - exit(255); - } - - printf("\nStarting DNS Service Discovery module...\n"); - if (pico_dns_sd_init(hostname, address, &dns_sd_init_callback, NULL) != 0) { - printf("Initialisation returned with Error!\n"); - exit(255); - } - - printf("\nTry reinitialising DNS-SD\n"); - if (pico_dns_sd_init(hostname, address, &dns_sd_init_callback, NULL)) { - printf("Initialisation returned with Error!\n"); - exit(255); - } - - printf("DONE - Re-initialising DNS-SD module.\n"); - - starttime = PICO_TIME_MS(); - printf("Starting time: %d\n", starttime); - - while(1) { - pico_stack_tick(); - usleep(2000); - - if (((PICO_TIME_MS() - starttime) > SECONDS * 1000) && fully_initialized && !once) { - printf("\nTry reinitialising DNS-SD (a second time)\n"); - if (pico_dns_sd_init(hostname, address, &dns_sd_init_callback, NULL)) { - printf("Initialisation returned with Error!\n"); - exit(255); - } - once = 1; - printf("DONE - Re-initialising mDNS module. (a second time)\n"); - } - } -} - -#endif -/*** END DNS_SD ***/ diff --git a/kernel/picotcp/test/examples/dnsclient.c b/kernel/picotcp/test/examples/dnsclient.c deleted file mode 100644 index 0a1d276..0000000 --- a/kernel/picotcp/test/examples/dnsclient.c +++ /dev/null @@ -1,120 +0,0 @@ -#include -#include -#include -#include -#include "utils.h" -extern int IPV6_MODE; - -/*** START UDP DNS CLIENT ***/ -/* - ./test/vde_sock_start.sh - echo 1 > /proc/sys/net/ipv4/ip_forward - iptables -t nat -A POSTROUTING -o wlan0 -j MASQUERADE - iptables -A FORWARD -i pic0 -o wlan0 -j ACCEPT - iptables -A FORWARD -i wlan0 -o pic0 -j ACCEPT - ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.0.0:10.40.0.1: -a udpdnsclient:www.google.be:173.194.67.94 - */ -void cb_udpdnsclient_getaddr(char *ip, void *arg) -{ - uint8_t *id = (uint8_t *) arg; - - if (!ip) { - picoapp_dbg("%s: ERROR occured! (id: %u)\n", __FUNCTION__, *id); - return; - } - - picoapp_dbg("%s: ip %s (id: %u)\n", __FUNCTION__, ip, *id); - if (arg) - PICO_FREE(arg); -} - -void cb_udpdnsclient_getname(char *name, void *arg) -{ - uint8_t *id = (uint8_t *) arg; - - if (!name) { - picoapp_dbg("%s: ERROR occured! (id: %u)\n", __FUNCTION__, *id); - return; - } - - picoapp_dbg("%s: name %s (id: %u)\n", __FUNCTION__, name, *id); - if (arg) - PICO_FREE(arg); -} - -void app_udpdnsclient(char *arg) -{ - struct pico_ip4 nameserver; - char *dname, *daddr; - char *nxt; - char *ipver; - int v = 4; - uint8_t *getaddr_id, *getname_id, *getaddr6_id, *getname6_id; - - nxt = cpy_arg(&dname, arg); - if (!dname || !nxt) { - picoapp_dbg(" udpdnsclient expects the following format: udpdnsclient:dest_name:dest_ip:[ipv6]\n"); - exit(255); - } - - nxt = cpy_arg(&daddr, nxt); - if (!daddr || !nxt) { - picoapp_dbg(" udpdnsclient expects the following format: udpdnsclient:dest_name:dest_ip:[ipv6]\n"); - exit(255); - } - - nxt = cpy_arg(&ipver, nxt); - if (!ipver || strcmp("ipv6", ipver) != 0) - v = 4; - else - v = 6; - - picoapp_dbg("UDP DNS client started.\n"); - - picoapp_dbg("----- Deleting non existant nameserver -----\n"); - pico_string_to_ipv4("127.0.0.1", &nameserver.addr); - pico_dns_client_nameserver(&nameserver, PICO_DNS_NS_DEL); - picoapp_dbg("----- Adding 8.8.8.8 nameserver -----\n"); - pico_string_to_ipv4("8.8.8.8", &nameserver.addr); - pico_dns_client_nameserver(&nameserver, PICO_DNS_NS_ADD); - picoapp_dbg("----- Deleting 8.8.8.8 nameserver -----\n"); - pico_string_to_ipv4("8.8.8.8", &nameserver.addr); - pico_dns_client_nameserver(&nameserver, PICO_DNS_NS_DEL); - picoapp_dbg("----- Adding 8.8.8.8 nameserver -----\n"); - pico_string_to_ipv4("8.8.8.8", &nameserver.addr); - pico_dns_client_nameserver(&nameserver, PICO_DNS_NS_ADD); - picoapp_dbg("----- Adding 8.8.4.4 nameserver -----\n"); - pico_string_to_ipv4("8.8.4.4", &nameserver.addr); - pico_dns_client_nameserver(&nameserver, PICO_DNS_NS_ADD); - if (!IPV6_MODE) { - if (v == 4) { - picoapp_dbg("Mode: IPv4\n"); - getaddr_id = calloc(1, sizeof(uint8_t)); - *getaddr_id = 1; - picoapp_dbg(">>>>> DNS GET ADDR OF %s\n", dname); - pico_dns_client_getaddr(dname, &cb_udpdnsclient_getaddr, getaddr_id); - - getname_id = calloc(1, sizeof(uint8_t)); - *getname_id = 2; - picoapp_dbg(">>>>> DNS GET NAME OF %s\n", daddr); - pico_dns_client_getname(daddr, &cb_udpdnsclient_getname, getname_id); - return; - } - - picoapp_dbg("Mode: IPv6\n"); - -#ifdef PICO_SUPPORT_IPV6 - getaddr6_id = calloc(1, sizeof(uint8_t)); - *getaddr6_id = 3; - picoapp_dbg(">>>>> DNS GET ADDR6 OF %s\n", dname); - pico_dns_client_getaddr6(dname, &cb_udpdnsclient_getaddr, getaddr6_id); - getname6_id = calloc(1, sizeof(uint8_t)); - *getname6_id = 4; - picoapp_dbg(">>>>> DNS GET NAME OF ipv6 addr 2a00:1450:400c:c06::64\n"); - pico_dns_client_getname6("2a00:1450:400c:c06::64", &cb_udpdnsclient_getname, getname6_id); -#endif - } - - return; -} -/*** END UDP DNS CLIENT ***/ diff --git a/kernel/picotcp/test/examples/iperfc.c b/kernel/picotcp/test/examples/iperfc.c deleted file mode 100644 index fbe7e67..0000000 --- a/kernel/picotcp/test/examples/iperfc.c +++ /dev/null @@ -1,142 +0,0 @@ -#include -#include -#include -#include "pico_ipv6.h" -#include "pico_stack.h" -#include "pico_socket.h" -#include "utils.h" - -#define DURATION 30 - -struct iperf_hdr { - int32_t flags; /* 0 */ - int32_t numThreads; /* 1 */ - int32_t mPort; /* 5001 */ - int32_t bufferlen; /* 0 */ - int32_t mWinBand; /* 0 */ - int32_t mAmount; /* 0xfffffc18 */ -}; - -#define IPERF_PORT 5001 -#define MTU 1444 -#define SEND_BUF_SIZ (1024 * 2048) - -char *cpy_arg(char **dst, char *str); -extern int IPV6_MODE; - -static pico_time deadline; - -static void panic(void) -{ - for(;; ) ; -} - -static char buf[MTU] = {}; - -static void buf_paint(void) -{ - char paint[11] = "0123456789"; - int i; - for (i = 0; i < MTU; i++) { - buf[i] = paint[i % 10]; - } -} - -static void send_hdr(struct pico_socket *s) -{ - struct iperf_hdr hdr = {}; - hdr.numThreads = long_be(1); - hdr.mPort = long_be(5001); - hdr.mAmount = long_be(0xfffffc18); - pico_socket_write(s, &hdr, sizeof(hdr)); - deadline = PICO_TIME_MS() + DURATION * 1000; -} - -static void iperf_cb(uint16_t ev, struct pico_socket *s) -{ - int r; - static int end = 0; - if (ev & PICO_SOCK_EV_CONN) { - send_hdr(s); - return; - } - - if ((!end) && (ev & PICO_SOCK_EV_WR)) { - if (PICO_TIME_MS() > deadline) { - pico_socket_close(s); - if (!pico_timer_add(2000, deferred_exit, NULL)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - end++; - } - - pico_socket_write(s, buf, MTU); - } - - if (!(end) && (ev & (PICO_SOCK_EV_FIN | PICO_SOCK_EV_CLOSE))) { - if (!pico_timer_add(2000, deferred_exit, NULL)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - end++; - } -} - -static void iperfc_socket_setup(union pico_address *addr, uint16_t family) -{ - int yes = 1; - uint16_t send_port = 0; - struct pico_socket *s = NULL; - uint32_t bufsize = SEND_BUF_SIZ; - send_port = short_be(5001); - s = pico_socket_open(family, PICO_PROTO_TCP, &iperf_cb); - pico_socket_setoption(s, PICO_SOCKET_OPT_SNDBUF, &bufsize); - pico_socket_connect(s, addr, send_port); -} - -void app_iperfc(char *arg) -{ - struct pico_ip4 my_eth_addr, netmask; - struct pico_device *pico_dev_eth; - char *daddr = NULL, *dport = NULL; - char *nxt = arg; - uint16_t send_port = 0, listen_port = short_be(5001); - int i = 0, ret = 0, yes = 1; - struct pico_socket *s = NULL; - uint16_t family = PICO_PROTO_IPV4; - union pico_address dst = { - .ip4 = {0}, .ip6 = {{0}} - }; - union pico_address inaddr_any = { - .ip4 = {0}, .ip6 = {{0}} - }; - - /* start of argument parsing */ - if (nxt) { - nxt = cpy_arg(&daddr, arg); - if (daddr) { - if (!IPV6_MODE) - pico_string_to_ipv4(daddr, &dst.ip4.addr); - - #ifdef PICO_SUPPORT_IPV6 - else { - pico_string_to_ipv6(daddr, dst.ip6.addr); - family = PICO_PROTO_IPV6; - } - #endif - } else { - goto out; - } - } else { - /* missing dest_addr */ - goto out; - } - - iperfc_socket_setup(&dst, family); - return; -out: - dbg("Error parsing options!\n"); - exit(1); -} - diff --git a/kernel/picotcp/test/examples/mdns.c b/kernel/picotcp/test/examples/mdns.c deleted file mode 100644 index 853448f..0000000 --- a/kernel/picotcp/test/examples/mdns.c +++ /dev/null @@ -1,83 +0,0 @@ -#include "utils.h" -#include "pico_dns_common.h" -#include "pico_mdns.h" -#include "pico_ipv4.h" -#include "pico_addressing.h" - -/*** START MDNS ***/ - -#ifdef PICO_SUPPORT_MDNS - -#define SECONDS 10 - -static int fully_initialized = 0; - -void mdns_init_callback( pico_mdns_rtree *rtree, - char *str, - void *arg ) -{ - printf("\nInitialised with hostname: %s\n\n", str); - - fully_initialized = 1; -} - -void app_mdns(char *arg, struct pico_ip4 address) -{ - char *hostname, *peername; - char *nxt = arg; - uint64_t starttime = 0; - int once = 0; - - if (!nxt) - exit(255); - - nxt = cpy_arg(&hostname, nxt); - if(!hostname) { - exit(255); - } - - if(!nxt) { - printf("Not enough args supplied!\n"); - exit(255); - } - - nxt = cpy_arg(&peername, nxt); - if(!peername) { - exit(255); - } - - printf("\nStarting mDNS module...\n"); - if (pico_mdns_init(hostname, address, &mdns_init_callback, NULL)) { - printf("Initialisation returned with Error!\n"); - exit(255); - } - - printf("\nTry reinitialising mDNS\n"); - if (pico_mdns_init(hostname, address, &mdns_init_callback, NULL)) { - printf("Initialisation returned with Error!\n"); - exit(255); - } - - printf("DONE - Re-initialising mDNS module.\n"); - - starttime = PICO_TIME_MS(); - printf("Starting time: %d\n", starttime); - - while(1) { - pico_stack_tick(); - usleep(2000); - - if (((PICO_TIME_MS() - starttime) > SECONDS * 1000) && fully_initialized && !once) { - printf("\nTry reinitialising mDNS (a second time)\n"); - if (pico_mdns_init(hostname, address, &mdns_init_callback, NULL)) { - printf("Initialisation returned with Error!\n"); - exit(255); - } - once = 1; - printf("DONE - Re-initialising mDNS module. (a second time)\n"); - } - - } -} -#endif -/*** END MDNS ***/ diff --git a/kernel/picotcp/test/examples/multicast_ip6_recv.c b/kernel/picotcp/test/examples/multicast_ip6_recv.c deleted file mode 100644 index 22055fc..0000000 --- a/kernel/picotcp/test/examples/multicast_ip6_recv.c +++ /dev/null @@ -1,182 +0,0 @@ -#include "utils.h" -#include -#include - -extern void app_udpecho(char *arg); - -/*** START Multicast RECEIVE + ECHO ***/ -/* - * multicast receive expects the following format: mcastreceive:link_addr:mcast_addr:listen_port:sendto_port - * link_addr: mcastreceive picoapp IP address - * mcast_addr: multicast IP address to receive - * listen_port: port number on which the mcastreceive listens - * sendto_port: port number to echo multicast traffic to (echo to originating IP address) - * - * f.e.: ./build/test/picoapp.elf --vde pic1:/tmp/pic0.ctl:10.40.0.3:255.255.0.0: -a mcastreceive:10.40.0.3:224.7.7.7:6667:6667 - */ -extern struct udpclient_pas *udpclient_pas; -extern struct udpecho_pas *udpecho_pas; -#ifdef PICO_SUPPORT_MCAST -void app_mcastreceive_ipv6(char *arg) -{ - char *new_arg = NULL, *p = NULL, *nxt = arg; - char *laddr = NULL, *maddr = NULL, *lport = NULL, *sport = NULL; - uint16_t listen_port = 0; - union pico_address inaddr_link = { - 0 - }, inaddr_mcast = { - 0 - }, src[5] = { - {.ip6 = { 0xfe, 0x80, 0x00, 0x00, 0x00, 0, 0, 0, 0, 0, 0, 0, 0xac, 0x10, 0x01, 0 }}, - {.ip6 = { 0xfe, 0x80, 0x00, 0x00, 0x00, 0, 0, 0, 0, 0, 0, 0, 0xac, 0x10, 0x01, 0x10}}, - {.ip6 = { 0xfe, 0x80, 0x00, 0x00, 0x00, 0, 0, 0, 0, 0, 0, 0, 0xac, 0x10, 0x01, 0x01 }}, - {.ip6 = { 0xff, 0x00, 0x00, 0x00, 0x00, 0, 0, 0, 0, 0, 0, 0, 0xe0, 0x01, 0x01, 0x01 }}, - }; - struct pico_ip_mreq mreq = ZERO_MREQ_IP6; - struct pico_ip_mreq_source mreq_source = ZERO_MREQ_SRC_IP6; - /* start of parameter parsing */ - if (nxt) { - nxt = cpy_arg(&laddr, nxt); - if (laddr) { - pico_string_to_ipv6(laddr, &inaddr_link.ip6.addr[0]); - } else { - goto out; - } - } else { - /* no arguments */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&maddr, nxt); - if (maddr) { - pico_string_to_ipv6(maddr, &inaddr_mcast.ip6.addr[0]); - } else { - goto out; - } - } else { - /* missing multicast address */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&lport, nxt); - if (lport && atoi(lport)) { - listen_port = short_be(atoi(lport)); - } else { - /* incorrect listen_port */ - goto out; - } - } else { - /* missing listen_port */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&sport, nxt); - if (sport && atoi(sport)) { - /* unused at this moment */ - /* send_port = short_be(atoi(sport)); */ - } else { - /* incorrect send_port */ - goto out; - } - } else { - /* missing send_port */ - goto out; - } - - /* end of parameter parsing */ - - printf("\n%s: multicast receive started. Receiving packets on [%s]:%d\n\n", __FUNCTION__, maddr, short_be(listen_port)); - - /* udpecho:bind_addr:listen_port[:sendto_port:datasize] */ - new_arg = calloc(1, strlen(laddr) + 1 + strlen(lport) + 1 + strlen(sport) + strlen(",64:") + 1); - p = strcat(new_arg, laddr); - p = strcat(p + strlen(laddr), ","); - p = strcat(p + 1, lport); - p = strcat(p + strlen(lport), ","); - p = strcat(p + 1, sport); - p = strcat(p + strlen(sport), ",64,"); - - /* DAD needs to verify the link address before we can continue */ - while(!pico_ipv6_link_get(&inaddr_link.ip6)) { - pico_stack_tick(); - usleep(2000); - } - app_udpecho(new_arg); - - memcpy(&mreq.mcast_group_addr, &inaddr_mcast, sizeof(struct pico_ip6)); - memcpy( &mreq_source.mcast_group_addr, &inaddr_mcast, sizeof(struct pico_ip6)); - memcpy(&mreq.mcast_link_addr, &inaddr_link, sizeof(struct pico_ip6)); - memcpy(&mreq_source.mcast_link_addr, &inaddr_link, sizeof(struct pico_ip6)); - memcpy(&mreq_source.mcast_source_addr, &src[0], sizeof(struct pico_ip6)); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_BLOCK_SOURCE, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_BLOCK_SOURCE failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_UNBLOCK_SOURCE, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_UNBLOCK_SOURCE failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - memcpy(&mreq_source.mcast_source_addr, &src[1], sizeof(struct pico_ip6)); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - memcpy(&mreq_source.mcast_source_addr, &src[2], sizeof(struct pico_ip6)); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - memcpy(&mreq_source.mcast_group_addr, &src[3], sizeof(struct pico_ip6)); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - return; - -out: - fprintf(stderr, "mcastreceive expects the following format: mcastreceive:link_addr:mcast_addr:listen_port[:send_port]\n"); - exit(255); -} -#else -void app_mcastreceive_ipv6(char *arg) -{ - printf("ERROR: PICO_SUPPORT_MCAST disabled\n"); - return; -} -#endif -/*** END Multicast RECEIVE + ECHO ***/ diff --git a/kernel/picotcp/test/examples/multicast_ip6_send.c b/kernel/picotcp/test/examples/multicast_ip6_send.c deleted file mode 100644 index 1b2615b..0000000 --- a/kernel/picotcp/test/examples/multicast_ip6_send.c +++ /dev/null @@ -1,140 +0,0 @@ -#include "utils.h" -#include -#include -#include - -extern void app_udpclient(char *arg); -/*** START Multicast SEND ***/ -/* - * multicast send expects the following format: mcastsend:link_addr:mcast_addr:sendto_port:listen_port - * link_addr: mcastsend picoapp IP address - * mcast_addr: multicast IP address to send to - * sendto_port: port number to send multicast traffic to - * listen_port: port number on which the mcastsend can receive data - * - * f.e.: ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.255.0: -a mcastsend:10.40.0.2:224.7.7.7:6667:6667 - */ -extern struct udpclient_pas *udpclient_pas; -#ifdef PICO_SUPPORT_MCAST -void app_mcastsend_ipv6(char *arg) -{ - int retval = 0; - char *maddr = NULL, *laddr = NULL, *lport = NULL, *sport = NULL; - uint16_t sendto_port = 0; - struct pico_ip6 inaddr_link = { - 0 - }, inaddr_mcast = { - 0 - }; - char *new_arg = NULL, *p = NULL, *nxt = arg; - struct pico_ip_mreq mreq = ZERO_MREQ_IP6; - - /* start of parameter parsing */ - if (nxt) { - nxt = cpy_arg(&laddr, nxt); - if (laddr) { - pico_string_to_ipv6(laddr, &inaddr_link.addr[0]); - } else { - goto out; - } - } else { - /* no arguments */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&maddr, nxt); - if (maddr) { - pico_string_to_ipv6(maddr, &inaddr_mcast.addr[0]); - } else { - goto out; - } - } else { - /* missing multicast address */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&sport, nxt); - if (sport && atoi(sport)) { - sendto_port = short_be(atoi(sport)); - } else { - /* incorrect send_port */ - goto out; - } - } else { - /* missing send_port */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&lport, nxt); - if (lport && atoi(lport)) { - /* unused at this moment */ - /* listen_port = short_be(atoi(lport)); */ - } else { - /* incorrect listen_port */ - goto out; - } - } else { - /* missing listen_port */ - goto out; - } - - picoapp_dbg("\n%s: mcastsend started. Sending packets to %s:%u\n\n", __FUNCTION__, maddr, short_be(sendto_port)); - - /* udpclient:dest_addr:sendto_port[:listen_port:datasize:loops:subloops] */ - new_arg = calloc(1, strlen(maddr) + 1 + strlen(sport) + 1 + strlen(lport) + strlen(",64,10,5,") + 1); - p = strcat(new_arg, maddr); - p = strcat(p + strlen(maddr), ","); - p = strcat(p + 1, sport); - p = strcat(p + strlen(sport), ","); - p = strcat(p + 1, lport); - p = strcat(p + strlen(lport), ",64,10,5,"); - - /* DAD needs to verify the link address before we can continue */ - while(!pico_ipv6_link_get(&inaddr_link)) { - pico_stack_tick(); - usleep(2000); - } - app_udpclient(new_arg); - - memcpy(&mreq.mcast_group_addr, &inaddr_mcast, sizeof(struct pico_ip6)); - memcpy(&mreq.mcast_link_addr, &inaddr_link, sizeof(struct pico_ip6)); - if(pico_socket_setoption(udpclient_pas->s, PICO_IP_ADD_MEMBERSHIP, &mreq) < 0) { - picoapp_dbg("%s: socket_setoption PICO_IP_ADD_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - retval = 1; - } - - if (new_arg) - free(new_arg); - - if (lport) - free(lport); - - if (maddr) - free(maddr); - - if (sport) - free(sport); - - if (laddr) - free(laddr); - - if (retval) - exit(retval); - - return; - -out: - picoapp_dbg("mcastsend expects the following format: mcastsend:link_addr:mcast_addr:sendto_port:listen_port\n"); - exit(255); -} -#else -void app_mcastsend_ipv6(char *arg) -{ - picoapp_dbg("ERROR: PICO_SUPPORT_MCAST disabled\n"); - return; -} -#endif -/*** END Multicast SEND ***/ diff --git a/kernel/picotcp/test/examples/multicast_recv.c b/kernel/picotcp/test/examples/multicast_recv.c deleted file mode 100644 index 8e95e26..0000000 --- a/kernel/picotcp/test/examples/multicast_recv.c +++ /dev/null @@ -1,171 +0,0 @@ -#include "utils.h" -#include -#include - -extern void app_udpecho(char *arg); - -/*** START Multicast RECEIVE + ECHO ***/ -/* - * multicast receive expects the following format: mcastreceive:link_addr:mcast_addr:listen_port:sendto_port - * link_addr: mcastreceive picoapp IP address - * mcast_addr: multicast IP address to receive - * listen_port: port number on which the mcastreceive listens - * sendto_port: port number to echo multicast traffic to (echo to originating IP address) - * - * f.e.: ./build/test/picoapp.elf --vde pic1:/tmp/pic0.ctl:10.40.0.3:255.255.0.0: -a mcastreceive:10.40.0.3:224.7.7.7:6667:6667 - */ -extern struct udpclient_pas *udpclient_pas; -extern struct udpecho_pas *udpecho_pas; -#ifdef PICO_SUPPORT_MCAST -void app_mcastreceive(char *arg) -{ - char *new_arg = NULL, *p = NULL, *nxt = arg; - char *laddr = NULL, *maddr = NULL, *lport = NULL, *sport = NULL; - uint16_t listen_port = 0; - union pico_address inaddr_link = { - 0 - }, inaddr_mcast = { - 0 - }; - struct pico_ip_mreq mreq = ZERO_MREQ; - struct pico_ip_mreq_source mreq_source = ZERO_MREQ_SRC; - - /* start of parameter parsing */ - if (nxt) { - nxt = cpy_arg(&laddr, nxt); - if (laddr) { - pico_string_to_ipv4(laddr, &inaddr_link.ip4.addr); - } else { - goto out; - } - } else { - /* no arguments */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&maddr, nxt); - if (maddr) { - pico_string_to_ipv4(maddr, &inaddr_mcast.ip4.addr); - } else { - goto out; - } - } else { - /* missing multicast address */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&lport, nxt); - if (lport && atoi(lport)) { - listen_port = short_be(atoi(lport)); - } else { - /* incorrect listen_port */ - goto out; - } - } else { - /* missing listen_port */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&sport, nxt); - if (sport && atoi(sport)) { - /* unused at this moment */ - /* send_port = short_be(atoi(sport)); */ - } else { - /* incorrect send_port */ - goto out; - } - } else { - /* missing send_port */ - goto out; - } - - /* end of parameter parsing */ - - printf("\n%s: multicast receive started. Receiving packets on %s:%d\n\n", __FUNCTION__, maddr, short_be(listen_port)); - - /* udpecho:bind_addr:listen_port[:sendto_port:datasize] */ - new_arg = calloc(1, strlen(laddr) + 1 + strlen(lport) + 1 + strlen(sport) + strlen(":64:") + 1); - p = strcat(new_arg, laddr); - p = strcat(p + strlen(laddr), ":"); - p = strcat(p + 1, lport); - p = strcat(p + strlen(lport), ":"); - p = strcat(p + 1, sport); - p = strcat(p + strlen(sport), ":64:"); - - app_udpecho(new_arg); - - mreq.mcast_group_addr = mreq_source.mcast_group_addr = inaddr_mcast; - mreq.mcast_link_addr = mreq_source.mcast_link_addr = inaddr_link; - mreq_source.mcast_source_addr.ip4.addr = long_be(0XAC100101); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_BLOCK_SOURCE, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_BLOCK_SOURCE failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_UNBLOCK_SOURCE, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_UNBLOCK_SOURCE failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - mreq_source.mcast_source_addr.ip4.addr = long_be(0XAC10010A); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_DROP_MEMBERSHIP, &mreq) < 0) { - printf("%s: socket_setoption PICO_IP_DROP_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - } - - mreq_source.mcast_source_addr.ip4.addr = long_be(0XAC100101); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - mreq_source.mcast_group_addr.ip4.addr = long_be(0XE0010101); - if(pico_socket_setoption(udpecho_pas->s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source) < 0) { - printf("%s: socket_setoption PICO_IP_ADD_SOURCE_MEMBERSHIP: %s\n", __FUNCTION__, strerror(pico_err)); - } - - return; - -out: - fprintf(stderr, "mcastreceive expects the following format: mcastreceive:link_addr:mcast_addr:listen_port[:send_port]\n"); - exit(255); -} -#else -void app_mcastreceive(char *arg) -{ - printf("ERROR: PICO_SUPPORT_MCAST disabled\n"); - return; -} -#endif -/*** END Multicast RECEIVE + ECHO ***/ diff --git a/kernel/picotcp/test/examples/multicast_send.c b/kernel/picotcp/test/examples/multicast_send.c deleted file mode 100644 index a0cd435..0000000 --- a/kernel/picotcp/test/examples/multicast_send.c +++ /dev/null @@ -1,129 +0,0 @@ -#include "utils.h" -#include -#include - -extern void app_udpclient(char *arg); -/*** START Multicast SEND ***/ -/* - * multicast send expects the following format: mcastsend:link_addr:mcast_addr:sendto_port:listen_port - * link_addr: mcastsend picoapp IP address - * mcast_addr: multicast IP address to send to - * sendto_port: port number to send multicast traffic to - * listen_port: port number on which the mcastsend can receive data - * - * f.e.: ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.255.0: -a mcastsend:10.40.0.2:224.7.7.7:6667:6667 - */ -extern struct udpclient_pas *udpclient_pas; -#ifdef PICO_SUPPORT_MCAST -void app_mcastsend(char *arg) -{ - char *maddr = NULL, *laddr = NULL, *lport = NULL, *sport = NULL; - uint16_t sendto_port = 0; - union pico_address inaddr_link = { - 0 - }, inaddr_mcast = { - 0 - }; - char *new_arg = NULL, *p = NULL, *nxt = arg; - struct pico_ip_mreq mreq = ZERO_MREQ; - - /* start of parameter parsing */ - if (nxt) { - nxt = cpy_arg(&laddr, nxt); - if (laddr) { - pico_string_to_ipv4(laddr, &inaddr_link.ip4.addr); - } else { - goto out; - } - } else { - /* no arguments */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&maddr, nxt); - if (maddr) { - pico_string_to_ipv4(maddr, &inaddr_mcast.ip4.addr); - } else { - goto out; - } - } else { - /* missing multicast address */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&sport, nxt); - if (sport && atoi(sport)) { - sendto_port = short_be(atoi(sport)); - } else { - /* incorrect send_port */ - goto out; - } - } else { - /* missing send_port */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&lport, nxt); - if (lport && atoi(lport)) { - /* unused at this moment */ - /* listen_port = short_be(atoi(lport)); */ - } else { - /* incorrect listen_port */ - goto out; - } - } else { - /* missing listen_port */ - goto out; - } - - picoapp_dbg("\n%s: mcastsend started. Sending packets to %08X:%u\n\n", __FUNCTION__, long_be(inaddr_mcast.addr), short_be(sendto_port)); - - /* udpclient:dest_addr:sendto_port[:listen_port:datasize:loops:subloops] */ - new_arg = calloc(1, strlen(maddr) + 1 + strlen(sport) + 1 + strlen(lport) + strlen(":64:10:5:") + 1); - p = strcat(new_arg, maddr); - p = strcat(p + strlen(maddr), ":"); - p = strcat(p + 1, sport); - p = strcat(p + strlen(sport), ":"); - p = strcat(p + 1, lport); - p = strcat(p + strlen(lport), ":64:10:5:"); - - app_udpclient(new_arg); - free(new_arg); - - mreq.mcast_group_addr = inaddr_mcast; - mreq.mcast_link_addr = inaddr_link; - if(pico_socket_setoption(udpclient_pas->s, PICO_IP_ADD_MEMBERSHIP, &mreq) < 0) { - picoapp_dbg("%s: socket_setoption PICO_IP_ADD_MEMBERSHIP failed: %s\n", __FUNCTION__, strerror(pico_err)); - exit(1); - } - - /* free strdups */ - if (maddr) - free(maddr); - - if (laddr) - free(laddr); - - if (lport) - free(lport); - - if (sport) - free(sport); - - return; - -out: - picoapp_dbg("mcastsend expects the following format: mcastsend:link_addr:mcast_addr:sendto_port:listen_port\n"); - exit(255); -} -#else -void app_mcastsend(char *arg) -{ - picoapp_dbg("ERROR: PICO_SUPPORT_MCAST disabled\n"); - return; -} -#endif -/*** END Multicast SEND ***/ diff --git a/kernel/picotcp/test/examples/natbox.c b/kernel/picotcp/test/examples/natbox.c deleted file mode 100644 index 1f54ecb..0000000 --- a/kernel/picotcp/test/examples/natbox.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "utils.h" -#include -#include - -/*** START NATBOX ***/ -void app_natbox(char *arg) -{ - char *dest = NULL; - struct pico_ip4 ipdst, pub_addr, priv_addr; - struct pico_ipv4_link *link; - - cpy_arg(&dest, arg); - if (!dest) { - fprintf(stderr, "natbox needs the following format: natbox:dst_addr\n"); - exit(255); - } - - pico_string_to_ipv4(dest, &ipdst.addr); - link = pico_ipv4_link_get(&ipdst); - if (!link) { - fprintf(stderr, "natbox: Destination not found.\n"); - exit(255); - } - - pico_ipv4_nat_enable(link); - pico_string_to_ipv4("10.50.0.10", &pub_addr.addr); - pico_string_to_ipv4("10.40.0.08", &priv_addr.addr); - pico_ipv4_port_forward(pub_addr, short_be(5555), priv_addr, short_be(6667), PICO_PROTO_UDP, PICO_NAT_PORT_FORWARD_ADD); - fprintf(stderr, "natbox: started.\n"); -} -/*** END NATBOX ***/ diff --git a/kernel/picotcp/test/examples/noop.c b/kernel/picotcp/test/examples/noop.c deleted file mode 100644 index d7dd5a4..0000000 --- a/kernel/picotcp/test/examples/noop.c +++ /dev/null @@ -1,11 +0,0 @@ -/* NOOP */ -#include -void app_noop(void) -{ - while(1) { - pico_stack_tick(); - usleep(2000); - } -} - -/* END NOOP */ diff --git a/kernel/picotcp/test/examples/ping.c b/kernel/picotcp/test/examples/ping.c deleted file mode 100644 index 7d33709..0000000 --- a/kernel/picotcp/test/examples/ping.c +++ /dev/null @@ -1,138 +0,0 @@ -#include "utils.h" -#include -#include -#include -#include -/*** START PING ***/ -#ifdef PICO_SUPPORT_PING -#define NUM_PING 10 - -void cb_ping(struct pico_icmp4_stats *s) -{ - char host[30]; - pico_ipv4_to_string(host, s->dst.addr); - if (s->err == 0) { - dbg("%lu bytes from %s: icmp_req=%lu ttl=%lu time=%lu ms\n", s->size, host, s->seq, - s->ttl, (long unsigned int)s->time); - if (s->seq >= NUM_PING) - exit(0); - } else { - dbg("PING %lu to %s: Error %d\n", s->seq, host, s->err); - exit(1); - } -} - -#ifdef PICO_SUPPORT_IPV6 -void cb_ping6(struct pico_icmp6_stats *s) -{ - char host[50]; - pico_ipv6_to_string(host, s->dst.addr); - if (s->err == 0) { - dbg("%lu bytes from %s: icmp_req=%lu ttl=%lu time=%lu ms\n", s->size, host, s->seq, - s->ttl, (long unsigned int)s->time); - if (s->seq >= NUM_PING) - exit(0); - } else { - dbg("PING %lu to %s: Error %d\n", s->seq, host, s->err); - exit(1); - } -} -#endif - -void ping_abort_timer(pico_time now, void *_id) -{ - int *id = (int *) _id; - printf("Ping: aborting...\n"); - if (!IPV6_MODE) - pico_icmp4_ping_abort(*id); - -#ifdef PICO_SUPPORT_IPV6 - else - pico_icmp6_ping_abort(*id); -#endif -} - -void app_ping(char *arg) -{ - char *dest = NULL; - char *next = NULL; - char *abort = NULL; - char *delay = NULL; - char *asize = NULL; - int initial_delay = 0; - struct pico_ip6 dst; - static int id; - int timeout = 0; - int size = 64; - - next = cpy_arg(&dest, arg); - if (!dest) { - fprintf(stderr, "ping needs the following format: ping:dst_addr:[size:[abort after N sec:[wait N sec before start]]]\n"); - exit(255); - } - pico_string_to_ipv6(dest, dst.addr); - if (next) { - next = cpy_arg(&asize, next); - size = atoi(asize); - free(asize); - if (size <= 0) { - size = 64; /* Default */ - } - } - - if (next) { - next = cpy_arg(&abort, next); - if (strlen(abort) > 0) { - printf("Got arg: '%s'\n", abort); - timeout = atoi(abort); - if (timeout < 0) { - fprintf(stderr, "ping needs the following format: ping:dst_addr:[size:[abort after N sec:[wait N sec before start]]]\n"); - exit(255); - } - printf("Aborting ping after %d seconds\n", timeout); - } - } - - if (next) { - next = cpy_arg(&delay, next); - if (strlen(delay) > 0) { - initial_delay = atoi(delay); - if (initial_delay > 0) { - printf("Initial delay: %d seconds\n", initial_delay); - initial_delay = PICO_TIME_MS() + initial_delay * 1000; - while (PICO_TIME_MS() < initial_delay) { - pico_stack_tick(); - usleep(10000); - } - } - } - free(delay); - } - printf("Starting ping.\n"); - - if (!IPV6_MODE) - id = pico_icmp4_ping(dest, NUM_PING, 1000, 10000, size, cb_ping); - -#ifdef PICO_SUPPORT_IPV6 - else - id = pico_icmp6_ping(dest, NUM_PING, 1000, 10000, size, cb_ping6, pico_ipv6_source_dev_find(&dst)); -#endif - if (timeout > 0) { - printf("Adding abort timer after %d seconds for id %d\n", timeout, id); - if (!pico_timer_add(timeout * 1000, ping_abort_timer, &id)) { - printf("Failed to set ping abort timeout, aborting ping\n"); - ping_abort_timer((pico_time)0, &id); - exit(1); - } - } - - /* free copied args */ - if (dest) - free(dest); - - if (abort) - free(abort); -} -#endif -/*** END PING ***/ - diff --git a/kernel/picotcp/test/examples/slaacv4.c b/kernel/picotcp/test/examples/slaacv4.c deleted file mode 100644 index a4ca62d..0000000 --- a/kernel/picotcp/test/examples/slaacv4.c +++ /dev/null @@ -1,72 +0,0 @@ -#include "utils.h" -#include -#include -/*** START SLAACV4 ***/ - -void ping_callback_slaacv4(struct pico_icmp4_stats *s) -{ - char host[30] = { }; - - pico_ipv4_to_string(host, s->dst.addr); - if (s->err == 0) { - dbg("SLAACV4: %lu bytes from %s: icmp_req=%lu ttl=64 time=%lu ms\n", s->size, host, - s->seq, (long unsigned int)s->time); - if (s->seq >= 3) { - dbg("SLAACV4: TEST SUCCESS!\n"); - pico_slaacv4_unregisterip(); - exit(0); - } - } else { - dbg("SLAACV4: ping %lu to %s error %d\n", s->seq, host, s->err); - dbg("SLAACV4: TEST FAILED!\n"); - exit(1); - } -} - -void slaacv4_cb(struct pico_ip4 *ip, uint8_t code) -{ - char dst[16] = "169.254.22.5"; - printf("SLAACV4 CALLBACK ip:0x%X code:%d \n", ip->addr, code); - if (code == 0) - { -#ifdef PICO_SUPPORT_PING - pico_icmp4_ping(dst, 3, 1000, 5000, 32, ping_callback_slaacv4); -#else - exit(0); -#endif - } - else - { - exit(255); - } - -} - - -void app_slaacv4(char *arg) -{ - char *sdev = NULL; - char *nxt = arg; - struct pico_device *dev = NULL; - - if (!nxt) - exit(255); - - while (nxt) { - if (nxt) { - nxt = cpy_arg(&sdev, nxt); - if(!sdev) { - exit(255); - } - } - } - dev = pico_get_device(sdev); - free(sdev); - if(dev == NULL) { - printf("%s: error getting device %s: %s\n", __FUNCTION__, dev->name, strerror(pico_err)); - exit(255); - } - - pico_slaacv4_claimip(dev, slaacv4_cb); -} -/*** END SLAACv4 ***/ diff --git a/kernel/picotcp/test/examples/sntp.c b/kernel/picotcp/test/examples/sntp.c deleted file mode 100644 index 4a55dd3..0000000 --- a/kernel/picotcp/test/examples/sntp.c +++ /dev/null @@ -1,59 +0,0 @@ -#include "utils.h" -#include -/*** START SNTP ***/ - -#ifdef PICO_SUPPORT_SNTP_CLIENT - -void sntp_timeout(pico_time __attribute__((unused)) now, void *arg) -{ - struct pico_timeval ptv; - struct timeval tv; - pico_sntp_gettimeofday(&ptv); - gettimeofday(&tv, NULL); - printf("Linux sec: %u, msec: %u\n", (unsigned int)tv.tv_sec, (unsigned int)tv.tv_usec / 1000); - printf("Picotcp sec: %u, msec: %u\n", (unsigned int)ptv.tv_sec, (unsigned int)ptv.tv_msec); - printf("SNTP test succesfull!\n"); - exit(0); -} - -void cb_synced(pico_err_t status) -{ - if(status == PICO_ERR_ENETDOWN) { - printf("SNTP: Cannot resolve ntp server name\n"); - exit(1); - } else if (status == PICO_ERR_ETIMEDOUT) { - printf("SNTP: Timed out, did not receive ntp packet from server\n"); - exit(1); - } else if (status == PICO_ERR_EINVAL) { - printf("SNTP: Conversion error\n"); - exit(1); - } else if (status == PICO_ERR_ENOTCONN) { - printf("SNTP: Socket error\n"); - exit(1); - } else if (status == PICO_ERR_NOERR) { - if (!pico_timer_add(2000, sntp_timeout, NULL)) { - printf("SNTP: Failed to start timeout timer, exiting program \n"); - exit(1); - } - } else { - printf("SNTP: Invalid status received in cb_synced\n"); - exit(1); - } -} - -void app_sntp(char *servername) -{ - struct pico_timeval tv; - printf("Starting SNTP query towards %s\n", servername); - if(pico_sntp_gettimeofday(&tv) == 0) - printf("Wrongly succesfull gettimeofday\n"); - else - printf("Unsuccesfull gettimeofday (not synced)\n"); - - if(pico_sntp_sync(servername, &cb_synced) == 0) - printf("Succesfull sync call!\n"); - else - printf("Error in sync\n"); -} -#endif -/*** END SNTP ***/ diff --git a/kernel/picotcp/test/examples/tcpbench.c b/kernel/picotcp/test/examples/tcpbench.c deleted file mode 100644 index 39b29a4..0000000 --- a/kernel/picotcp/test/examples/tcpbench.c +++ /dev/null @@ -1,320 +0,0 @@ -#include "utils.h" -#include -#include -#include -/*** START TCP BENCH ***/ -#define TCP_BENCH_TX 1 -#define TCP_BENCH_RX 2 -#define TCP_BENCH_TX_FOREVER 3 -static char *buffer1; -static char *buffer0; - -int tcpbench_mode = 0; -struct pico_socket *tcpbench_sock = NULL; -static pico_time tcpbench_time_start, tcpbench_time_end; - -void cb_tcpbench(uint16_t ev, struct pico_socket *s) -{ - static int closed = 0; - static unsigned long count = 0; - uint8_t recvbuf[1500]; - uint16_t port; - char peer[200]; - /* struct pico_socket *sock_a; */ - - static int tcpbench_wr_size = 0; - static int tcpbench_rd_size = 0; - int tcpbench_w = 0; - int tcpbench_r = 0; - double tcpbench_time = 0; - - count++; - - if (ev & PICO_SOCK_EV_RD) { - do { - /* read data, but discard */ - tcpbench_r = pico_socket_read(s, recvbuf, 1500); - if (tcpbench_r > 0) { - tcpbench_rd_size += tcpbench_r; - } - } while (tcpbench_r > 0); - if (tcpbench_time_start == 0) - tcpbench_time_start = PICO_TIME_MS(); - - printf("tcpbench_rd_size = %d \r", tcpbench_rd_size); - } - - if (ev & PICO_SOCK_EV_CONN) { - if (!IPV6_MODE) { - struct pico_ip4 orig; - if (tcpbench_mode == TCP_BENCH_TX || tcpbench_mode == TCP_BENCH_TX_FOREVER) { - printf("tcpbench> Connection established with server.\n"); - } else if (tcpbench_mode == TCP_BENCH_RX) { - /* sock_a = pico_socket_accept(s, &orig, &port); */ - pico_socket_accept(s, &orig, &port); - pico_ipv4_to_string(peer, orig.addr); - printf("tcpbench> Connection established with %s:%d.\n", peer, short_be(port)); - } - } else { - struct pico_ip6 orig; - if (tcpbench_mode == TCP_BENCH_TX || tcpbench_mode == TCP_BENCH_TX_FOREVER) { - printf("tcpbench> Connection established with server.\n"); - } else if (tcpbench_mode == TCP_BENCH_RX) { - /* sock_a = pico_socket_accept(s, &orig, &port); */ - pico_socket_accept(s, &orig, &port); -#ifdef PICO_SUPPORT_IPV6 - pico_ipv6_to_string(peer, orig.addr); - printf("tcpbench> Connection established with [%s]:%d.\n", peer, short_be(port)); -#endif - } - } - } - - if (ev & PICO_SOCK_EV_FIN) { - printf("tcpbench> Socket closed. Exit normally. \n"); - if (tcpbench_mode == TCP_BENCH_RX) { - tcpbench_time_end = PICO_TIME_MS(); - tcpbench_time = (tcpbench_time_end - tcpbench_time_start) / 1000.0; /* get number of seconds */ - printf("tcpbench> received %d bytes in %lf seconds\n", tcpbench_rd_size, tcpbench_time); - printf("tcpbench> average read throughput %lf kbit/sec\n", ((tcpbench_rd_size * 8.0) / tcpbench_time) / 1000); - pico_socket_shutdown(s, PICO_SHUT_WR); - printf("tcpbench> Called shutdown write, ev = %d\n", ev); - } - - if (!pico_timer_add(5000, deferred_exit, NULL)) { - printf("tcpbench> Failed to start exit timer, exiting now\n"); - exit(1); - } - } - - if (ev & PICO_SOCK_EV_ERR) { - printf("tcpbench> ---- Socket Error received: %s. Bailing out.\n", strerror(pico_err)); - if (!pico_err == PICO_ERR_ECONNRESET) { - if (pico_timer_add(5000, deferred_exit, NULL)) { - printf("tcpbench> Failed to start exit timer, exiting now\n"); - exit(1); - } - } - else { - printf("tcpbench> ---- Socket Error: '%s'. Was unexpected! Something went wrong.\n", strerror(pico_err)); - exit(2); - } - } - - if (ev & PICO_SOCK_EV_CLOSE) { - printf("tcpbench> event close\n"); - if (tcpbench_mode == TCP_BENCH_RX) { - pico_socket_close(s); - printf("tcpbench> Called shutdown write, ev = %d\n", ev); - } else if (tcpbench_mode == TCP_BENCH_TX || tcpbench_mode == TCP_BENCH_TX_FOREVER) { - pico_socket_close(s); - return; - } - } - - if (ev & PICO_SOCK_EV_WR) { - if (((tcpbench_wr_size < TCPSIZ) && (tcpbench_mode == TCP_BENCH_TX)) || tcpbench_mode == TCP_BENCH_TX_FOREVER) { - do { - tcpbench_w = pico_socket_write(tcpbench_sock, buffer0 + (tcpbench_wr_size % TCPSIZ), TCPSIZ - (tcpbench_wr_size % TCPSIZ)); - if (tcpbench_w > 0) { - tcpbench_wr_size += tcpbench_w; - /* printf("tcpbench> SOCKET WRITTEN - %d\n",tcpbench_w); */ - } else { - /* printf("pico_socket_write returned %d\n", tcpbench_w); */ - } - - if (tcpbench_time_start == 0) - tcpbench_time_start = PICO_TIME_MS(); - } while(tcpbench_w > 0); - printf("tcpbench_wr_size = %d \r", tcpbench_wr_size); - } else { - if (!closed && tcpbench_mode == TCP_BENCH_TX) { - tcpbench_time_end = PICO_TIME_MS(); - pico_socket_shutdown(s, PICO_SHUT_WR); - printf("tcpbench> TCPSIZ written\n"); - printf("tcpbench> Called shutdown()\n"); - tcpbench_time = (tcpbench_time_end - tcpbench_time_start) / 1000.0; /* get number of seconds */ - printf("tcpbench> Transmitted %u bytes in %lf seconds\n", TCPSIZ, tcpbench_time); - printf("tcpbench> average write throughput %lf kbit/sec\n", ((TCPSIZ * 8.0) / tcpbench_time) / 1000); - closed = 1; - } - } - } -} - -void app_tcpbench(char *arg) -{ - struct pico_socket *s; - char *dport = NULL; - char *dest = NULL; - char *mode = NULL; - char *nagle = NULL; - int port = 0, i; - uint16_t port_be = 0; - char *nxt; - char *sport = NULL; - int nagle_off = 1; - union { - struct pico_ip4 ip4; - struct pico_ip6 ip6; - } inaddr_any = { - .ip4 = {0}, .ip6 = {{0}} - }; - - nxt = cpy_arg(&mode, arg); - - if ((*mode == 't') || (*mode == 'f')) { /* TEST BENCH SEND MODE */ - if (*mode == 't') - tcpbench_mode = TCP_BENCH_TX; - else - tcpbench_mode = TCP_BENCH_TX_FOREVER; - - printf("tcpbench> TX\n"); - - nxt = cpy_arg(&dest, nxt); - if (!dest) { - fprintf(stderr, "tcpbench send needs the following format: tcpbench:tx:dst_addr[:dport][:n] -- 'n' is for nagle\n"); - exit(255); - } - - printf ("+++ Dest is %s\n", dest); - if (nxt) { - printf("Next arg: %s\n", nxt); - nxt = cpy_arg(&dport, nxt); - printf("Dport: %s\n", dport); - } - - if (nxt) { - printf("Next arg: %s\n", nxt); - nxt = cpy_arg(&nagle, nxt); - printf("nagle: %s\n", nagle); - if (strlen(nagle) == 1 && nagle[0] == 'n') { - nagle_off = 0; - printf("Nagle algorithm enabled\n"); - } - } - - if (dport) { - port = atoi(dport); - port_be = short_be((uint16_t)port); - } - - if (port == 0) { - port_be = short_be(5555); - } - - buffer0 = malloc(TCPSIZ); - buffer1 = malloc(TCPSIZ); - printf("Buffer1 (%p)\n", buffer1); - for (i = 0; i < TCPSIZ; i++) { - char c = (i % 26) + 'a'; - buffer0[i] = c; - } - memset(buffer1, 'a', TCPSIZ); - printf("tcpbench> Connecting to: %s:%d\n", dest, short_be(port_be)); - - if (!IPV6_MODE) { - struct pico_ip4 server_addr; - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, &cb_tcpbench); - if (!s) - exit(1); - - pico_socket_setoption(s, PICO_TCP_NODELAY, &nagle_off); - - /* NOTE: used to set a fixed local port and address - local_port = short_be(6666); - pico_string_to_ipv4("10.40.0.11", &local_addr.addr); - pico_socket_bind(s, &local_addr, &local_port);*/ - - pico_string_to_ipv4(dest, &server_addr.addr); - pico_socket_connect(s, &server_addr, port_be); - } else { - struct pico_ip6 server_addr; - s = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_TCP, &cb_tcpbench); - if (!s) - exit(1); - - pico_socket_setoption(s, PICO_TCP_NODELAY, &nagle_off); - - /* NOTE: used to set a fixed local port and address - local_port = short_be(6666); - pico_string_to_ipv4("10.40.0.11", &local_addr.addr); - pico_socket_bind(s, &local_addr, &local_port);*/ -#ifdef PICO_SUPPORT_IPV6 - pico_string_to_ipv6(dest, server_addr.addr); - pico_socket_connect(s, &server_addr, port_be); -#endif - - } - - } else if (*mode == 'r') { /* TEST BENCH RECEIVE MODE */ - int ret; - tcpbench_mode = TCP_BENCH_RX; - printf("tcpbench> RX\n"); - - cpy_arg(&sport, nxt); - if (!sport) { - fprintf(stderr, "tcpbench receive needs the following format: tcpbench:rx[:dport]\n"); - exit(255); - } - - if (sport) { - printf("s-port is %s\n", sport); - port = atoi(sport); - port_be = short_be((uint16_t)port); - printf("tcpbench> Got port %d\n", port); - free(sport); - } - - if (port == 0) { - port_be = short_be(5555); - } - - printf("tcpbench> OPEN\n"); - if (!IPV6_MODE) - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, &cb_tcpbench); - else - s = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_TCP, &cb_tcpbench); - - if (!s) - exit(1); - - printf("tcpbench> BIND\n"); - if (!IPV6_MODE) - ret = pico_socket_bind(s, &inaddr_any.ip4, &port_be); - else - ret = pico_socket_bind(s, &inaddr_any.ip6, &port_be); - - if (ret < 0) { - printf("tcpbench> BIND failed because %s\n", strerror(pico_err)); - exit(1); - } - - printf("tcpbench> LISTEN\n"); - if (pico_socket_listen(s, 40) != 0) - exit(1); - - printf("tcpbench> listening port %u ...\n", short_be(port_be)); - } else { - printf("tcpbench> wrong mode argument\n"); - exit(1); - } - - tcpbench_sock = s; - - /* free strdups */ - if (dport) - free(dport); - - if (dest) - free (dest); - - if (mode) - free (mode); - - if (nagle) - free (nagle); - - return; -} -/*** END TCP BENCH ***/ diff --git a/kernel/picotcp/test/examples/tcpclient.c b/kernel/picotcp/test/examples/tcpclient.c deleted file mode 100644 index e81bddd..0000000 --- a/kernel/picotcp/test/examples/tcpclient.c +++ /dev/null @@ -1,197 +0,0 @@ -#include "utils.h" -#include -#include -#include -/*** START TCP CLIENT ***/ -static char *buffer1; -static char *buffer0; - -void compare_results(pico_time __attribute__((unused)) now, void __attribute__((unused)) *arg) -{ -#ifdef CONSISTENCY_CHECK /* TODO: Enable */ - int i; - printf("Calculating result.... (%p)\n", buffer1); - - if (memcmp(buffer0, buffer1, TCPSIZ) == 0) - exit(0); - - for (i = 0; i < TCPSIZ; i++) { - if (buffer0[i] != buffer1[i]) { - fprintf(stderr, "Error at byte %d - %c!=%c\n", i, buffer0[i], buffer1[i]); - exit(115); - } - } -#endif - exit(0); - -} - -void cb_tcpclient(uint16_t ev, struct pico_socket *s) -{ - static int w_size = 0; - static int r_size = 0; - static int closed = 0; - int r, w; - static unsigned long count = 0; - - count++; - picoapp_dbg("tcpclient> wakeup %lu, event %u\n", count, ev); - - if (ev & PICO_SOCK_EV_RD) { - do { - r = pico_socket_read(s, buffer1 + r_size, TCPSIZ - r_size); - if (r > 0) { - r_size += r; - picoapp_dbg("SOCKET READ - %d\n", r_size); - } - - if (r < 0) - exit(5); - } while(r > 0); - } - - if (ev & PICO_SOCK_EV_CONN) { - printf("Connection established with server.\n"); - } - - if (ev & PICO_SOCK_EV_FIN) { - printf("Socket closed. Exit normally. \n"); - if (!pico_timer_add(2000, compare_results, NULL)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - } - - if (ev & PICO_SOCK_EV_ERR) { - printf("Socket error received: %s. Bailing out.\n", strerror(pico_err)); - exit(1); - } - - if (ev & PICO_SOCK_EV_CLOSE) { - printf("Socket received close from peer - Wrong case if not all client data sent!\n"); - pico_socket_close(s); - return; - } - - if (ev & PICO_SOCK_EV_WR) { - if (w_size < TCPSIZ) { - do { - w = pico_socket_write(s, buffer0 + w_size, TCPSIZ - w_size); - if (w > 0) { - w_size += w; - picoapp_dbg("SOCKET WRITTEN - %d\n", w_size); - if (w < 0) - exit(5); - } - } while(w > 0); - } else { -#ifdef INFINITE_TCPTEST - w_size = 0; - return; -#endif - if (!closed) { - pico_socket_shutdown(s, PICO_SHUT_WR); - printf("Called shutdown()\n"); - closed = 1; - } - } - } -} - -void app_tcpclient(char *arg) -{ - char *daddr = NULL, *dport = NULL; - char *nxt = arg; - uint16_t send_port = 0, listen_port = short_be(5555); - int i = 0, ret = 0, yes = 1; - struct pico_socket *s = NULL; - union pico_address dst = { - .ip4 = {0}, .ip6 = {{0}} - }; - union pico_address inaddr_any = { - .ip4 = {0}, .ip6 = {{0}} - }; - - /* start of argument parsing */ - if (nxt) { - nxt = cpy_arg(&daddr, arg); - if (daddr) { - if (!IPV6_MODE) - pico_string_to_ipv4(daddr, &dst.ip4.addr); - - #ifdef PICO_SUPPORT_IPV6 - else - pico_string_to_ipv6(daddr, dst.ip6.addr); - #endif - } else { - goto out; - } - } else { - /* missing dest_addr */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&dport, nxt); - if (dport && atoi(dport)) { - send_port = short_be(atoi(dport)); - } else { - goto out; - } - } else { - /* missing send_port */ - goto out; - } - - /* end of argument parsing */ - - buffer0 = malloc(TCPSIZ); - buffer1 = malloc(TCPSIZ); - printf("Buffer1 (%p)\n", buffer1); - for (i = 0; i < TCPSIZ; i++) { - char c = (i % 26) + 'a'; - buffer0[i] = c; - } - memset(buffer1, 'a', TCPSIZ); - - printf("Connecting to: %s:%d\n", daddr, short_be(send_port)); - - if (!IPV6_MODE) - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, &cb_tcpclient); - else - s = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_TCP, &cb_tcpclient); - - if (!s) { - printf("%s: error opening socket: %s\n", __FUNCTION__, strerror(pico_err)); - exit(1); - } - - pico_socket_setoption(s, PICO_TCP_NODELAY, &yes); - - if (!IPV6_MODE) - ret = pico_socket_bind(s, &inaddr_any.ip4, &listen_port); - else - ret = pico_socket_bind(s, &inaddr_any.ip6, &listen_port); - - if (ret < 0) { - printf("%s: error binding socket to port %u: %s\n", __FUNCTION__, short_be(listen_port), strerror(pico_err)); - exit(1); - } - - if (!IPV6_MODE) - ret = pico_socket_connect(s, &dst.ip4, send_port); - else - ret = pico_socket_connect(s, &dst.ip6, send_port); - - if (ret < 0) { - printf("%s: error connecting to %s:%u: %s\n", __FUNCTION__, daddr, short_be(send_port), strerror(pico_err)); - exit(1); - } - - return; - -out: - fprintf(stderr, "tcpclient expects the following format: tcpclient:dest_addr:dest_port\n"); - exit(255); -} -/*** END TCP CLIENT ***/ diff --git a/kernel/picotcp/test/examples/tcpecho.c b/kernel/picotcp/test/examples/tcpecho.c deleted file mode 100644 index b109d9c..0000000 --- a/kernel/picotcp/test/examples/tcpecho.c +++ /dev/null @@ -1,178 +0,0 @@ -#include "utils.h" -#include -#include -/*** START TCP ECHO ***/ -#define BSIZE (1024 * 10) -static char recvbuf[BSIZE]; -static int pos = 0, len = 0; -static int flag = 0; - -int send_tcpecho(struct pico_socket *s) -{ - int w, ww = 0; - if (len > pos) { - do { - w = pico_socket_write(s, recvbuf + pos, len - pos); - if (w > 0) { - pos += w; - ww += w; - if (pos >= len) { - pos = 0; - len = 0; - } - } - } while((w > 0) && (pos < len)); - } - - return ww; -} - -void cb_tcpecho(uint16_t ev, struct pico_socket *s) -{ - int r = 0; - - picoapp_dbg("tcpecho> wakeup ev=%u\n", ev); - - if (ev & PICO_SOCK_EV_RD) { - if (flag & PICO_SOCK_EV_CLOSE) - printf("SOCKET> EV_RD, FIN RECEIVED\n"); - - while (len < BSIZE) { - r = pico_socket_read(s, recvbuf + len, BSIZE - len); - if (r > 0) { - len += r; - flag &= ~(PICO_SOCK_EV_RD); - } else { - flag |= PICO_SOCK_EV_RD; - break; - } - } - if (flag & PICO_SOCK_EV_WR) { - flag &= ~PICO_SOCK_EV_WR; - send_tcpecho(s); - } - } - - if (ev & PICO_SOCK_EV_CONN) { - uint32_t ka_val = 0; - struct pico_socket *sock_a = { - 0 - }; - struct pico_ip4 orig = { - 0 - }; - uint16_t port = 0; - char peer[30] = { - 0 - }; - int yes = 1; - - sock_a = pico_socket_accept(s, &orig, &port); - pico_ipv4_to_string(peer, orig.addr); - printf("Connection established with %s:%d.\n", peer, short_be(port)); - pico_socket_setoption(sock_a, PICO_TCP_NODELAY, &yes); - /* Set keepalive options */ - ka_val = 5; - pico_socket_setoption(sock_a, PICO_SOCKET_OPT_KEEPCNT, &ka_val); - ka_val = 30000; - pico_socket_setoption(sock_a, PICO_SOCKET_OPT_KEEPIDLE, &ka_val); - ka_val = 5000; - pico_socket_setoption(sock_a, PICO_SOCKET_OPT_KEEPINTVL, &ka_val); - } - - if (ev & PICO_SOCK_EV_FIN) { - printf("Socket closed. Exit normally. \n"); - if (!pico_timer_add(2000, deferred_exit, NULL)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - } - - if (ev & PICO_SOCK_EV_ERR) { - printf("Socket error received: %s. Bailing out.\n", strerror(pico_err)); - exit(1); - } - - if (ev & PICO_SOCK_EV_CLOSE) { - printf("Socket received close from peer.\n"); - if (flag & PICO_SOCK_EV_RD) { - pico_socket_shutdown(s, PICO_SHUT_WR); - printf("SOCKET> Called shutdown write, ev = %d\n", ev); - } - } - - if (ev & PICO_SOCK_EV_WR) { - r = send_tcpecho(s); - if (r == 0) - flag |= PICO_SOCK_EV_WR; - else - flag &= (~PICO_SOCK_EV_WR); - } -} - -void app_tcpecho(char *arg) -{ - char *nxt = arg; - char *lport = NULL; - uint16_t listen_port = 0; - int ret = 0, yes = 1; - struct pico_socket *s = NULL; - union { - struct pico_ip4 ip4; - struct pico_ip6 ip6; - } inaddr_any = { - .ip4 = {0}, .ip6 = {{0}} - }; - - /* start of argument parsing */ - if (nxt) { - nxt = cpy_arg(&lport, nxt); - if (lport && atoi(lport)) { - listen_port = short_be(atoi(lport)); - } else { - goto out; - } - } else { - /* missing listen_port */ - goto out; - } - - /* end of argument parsing */ - - if (!IPV6_MODE) - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, &cb_tcpecho); - else - s = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_TCP, &cb_tcpecho); - - if (!s) { - printf("%s: error opening socket: %s\n", __FUNCTION__, strerror(pico_err)); - exit(1); - } - - pico_socket_setoption(s, PICO_TCP_NODELAY, &yes); - - - - if (!IPV6_MODE) - ret = pico_socket_bind(s, &inaddr_any.ip4, &listen_port); - else - ret = pico_socket_bind(s, &inaddr_any.ip6, &listen_port); - - if (ret < 0) { - printf("%s: error binding socket to port %u: %s\n", __FUNCTION__, short_be(listen_port), strerror(pico_err)); - exit(1); - } - - if (pico_socket_listen(s, 40) != 0) { - printf("%s: error listening on port %u\n", __FUNCTION__, short_be(listen_port)); - exit(1); - } - - printf("Launching PicoTCP echo server\n"); - return; - -out: - fprintf(stderr, "tcpecho expects the following format: tcpecho:listen_port\n"); - exit(255); -} -/*** END TCP ECHO ***/ diff --git a/kernel/picotcp/test/examples/tftp.c b/kernel/picotcp/test/examples/tftp.c deleted file mode 100644 index d691ca4..0000000 --- a/kernel/picotcp/test/examples/tftp.c +++ /dev/null @@ -1,485 +0,0 @@ -#include "utils.h" -#include -#include -#include -#include -#include -#include -#include -#include - -/* Let's use linux fs */ -#include - -#include - -/*** START TFTP ***/ -#ifdef PICO_SUPPORT_TFTP -#define TFTP_MODE_SRV 0 -#define TFTP_MODE_CLI 1 -#define TFTP_MODE_PSH 2 -#define TFTP_TX_COUNT 2000 -#define TFTP_PAYLOAD_SIZE 512 -unsigned char tftp_txbuf[TFTP_PAYLOAD_SIZE]; -static uint16_t family; - -struct command_t { - char operation; - char *filename; - union pico_address server_address; - struct command_t *next; -}; - -struct note_t { - char *filename; - int fd; - char direction; - int32_t filesize; - struct note_t *next; -}; - -struct note_t *clipboard = NULL; - -struct note_t *add_note(const char *filename, int fd, char direction) -{ - struct note_t *note = PICO_ZALLOC(sizeof(struct note_t)); - - note->filename = strdup(filename); - note->fd = fd; - note->direction = direction; - note->filesize = 0; - note->next = clipboard; - clipboard = note; - return note; -} - -void del_note(struct note_t *note) -{ - struct note_t *prev; - - if (note == clipboard) - { - clipboard = clipboard->next; - if (note->filename) - free (note->filename); - - PICO_FREE(note); - } else { - for (prev = clipboard; prev->next; prev = prev->next) - if (prev->next == note) { - prev->next = note->next; - if (note->filename) - free (note->filename); - - PICO_FREE(note); - break; - } - - } -} - -struct command_t *add_command(struct command_t *commands, char operation, - char *filename, union pico_address *server_address) -{ - struct command_t *command = PICO_ZALLOC(sizeof(struct command_t)); - - command->operation = operation; - command->filename = filename; - memcpy(&command->server_address, server_address, sizeof(union pico_address)); - command->next = commands; - return command; -} - -int32_t get_filesize(const char *filename) -{ - int ret; - struct stat buf; - - ret = stat(filename, &buf); - if (ret) - return -1; - - return buf.st_size; -} - -struct note_t *setup_transfer(char operation, const char *filename) -{ - int fd; - - printf("operation %c\n", operation); - fd = open(filename, (toupper(operation) == 'T') ? O_RDONLY : O_WRONLY | O_EXCL | O_CREAT, 0666); - if (fd < 0) { - perror("open"); - fprintf(stderr, "Unable to handle file %s\n", filename); - return NULL; - } - - return add_note(filename, fd, operation); -} - -int cb_tftp_tx(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg) -{ - struct note_t *note = (struct note_t *) arg; - - if (event != PICO_TFTP_EV_OK) { - fprintf(stderr, "TFTP: Error %" PRIu16 ": %s\n", event, block); - exit(1); - } - - len = read(note->fd, tftp_txbuf, PICO_TFTP_PAYLOAD_SIZE); - - if (len >= 0) { - note->filesize += len; - pico_tftp_send(session, tftp_txbuf, len); - if (len < PICO_TFTP_PAYLOAD_SIZE) { - printf("TFTP: file %s (%" PRId32 " bytes) TX transfer complete!\n", note->filename, note->filesize); - close(note->fd); - del_note(note); - } - } else { - perror("read"); - fprintf(stderr, "Filesystem error reading file %s, cancelling current transfer\n", note->filename); - pico_tftp_abort(session, TFTP_ERR_EACC, "Error on read"); - del_note(note); - } - - if (!clipboard) { - if (!pico_timer_add(3000, deferred_exit, NULL)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - } - - return len; -} - -int cb_tftp_tx_opt(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg) -{ - int ret; - int32_t filesize; - - if (event == PICO_TFTP_EV_OPT) { - ret = pico_tftp_get_option(session, PICO_TFTP_OPTION_FILE, &filesize); - if (ret) - printf("TFTP: Option filesize is not used\n"); - else - printf("TFTP: We expect to transmit %" PRId32 " bytes\n", filesize); - - event = PICO_TFTP_EV_OK; - } - - return cb_tftp_tx(session, event, block, len, arg); -} - -int cb_tftp_rx(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg) -{ - struct note_t *note = (struct note_t *) arg; - int ret; - - if (event != PICO_TFTP_EV_OK) { - fprintf(stderr, "TFTP: Error %" PRIu16 ": %s\n", event, block); - exit(1); - } - - if (!note) - return 0; - - note->filesize += len; - if (write(note->fd, block, len) < 0) { - perror("write"); - fprintf(stderr, "Filesystem error writing file %s, cancelling current transfer\n", note->filename); - pico_tftp_abort(session, TFTP_ERR_EACC, "Error on write"); - del_note(note); - } else { - if (len != PICO_TFTP_PAYLOAD_SIZE) { - printf("TFTP: file %s (%" PRId32 " bytes) RX transfer complete!\n", note->filename, note->filesize); - close(note->fd); - del_note(note); - } - } - - if (!clipboard) { - if (!pico_timer_add(3000, deferred_exit, NULL)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - } - - return len; -} - -int cb_tftp_rx_opt(struct pico_tftp_session *session, uint16_t event, uint8_t *block, int32_t len, void *arg) -{ - int ret; - int32_t filesize; - - if (event == PICO_TFTP_EV_OPT) { - ret = pico_tftp_get_option(session, PICO_TFTP_OPTION_FILE, &filesize); - if (ret) - printf("TFTP: Option filesize is not used\n"); - else - printf("TFTP: We expect to receive %" PRId32 " bytes\n", filesize); - - return 0; - } - - return cb_tftp_rx(session, event, block, len, arg); -} - -struct pico_tftp_session *make_session_or_die(union pico_address *addr, uint16_t family) -{ - struct pico_tftp_session *session; - - session = pico_tftp_session_setup(addr, family); - if (!session) { - fprintf(stderr, "TFTP: Error in session setup\n"); - exit(3); - } - - return session; -} - -struct note_t *transfer_prepare(struct pico_tftp_session **psession, char operation, const char *filename, union pico_address *addr, uint16_t family) -{ - struct note_t *note; - - note = setup_transfer(operation, filename); - *psession = make_session_or_die(addr, family); - return note; -} - -void start_rx(struct pico_tftp_session *session, const char *filename, uint16_t port, - int (*rx_callback)(struct pico_tftp_session *session, uint16_t err, uint8_t *block, int32_t len, void *arg), - struct note_t *note) -{ - if (pico_tftp_start_rx(session, port, filename, rx_callback, note)) { - fprintf(stderr, "TFTP: Error in initialization\n"); - exit(1); - } -} - -void start_tx(struct pico_tftp_session *session, const char *filename, uint16_t port, - int (*tx_callback)(struct pico_tftp_session *session, uint16_t err, uint8_t *block, int32_t len, void *arg), - struct note_t *note) -{ - if (pico_tftp_start_tx(session, port, filename, tx_callback, note)) { - fprintf(stderr, "TFTP: Error in initialization\n"); - exit(1); - } -} - -void tftp_listen_cb(union pico_address *addr, uint16_t port, uint16_t opcode, char *filename, int32_t len) -{ - struct note_t *note; - struct pico_tftp_session *session; - - printf("TFTP listen callback (BASIC) from remote port %" PRIu16 ".\n", short_be(port)); - if (opcode == PICO_TFTP_RRQ) { - printf("Received TFTP get request for %s\n", filename); - note = transfer_prepare(&session, 't', filename, addr, family); - start_tx(session, filename, port, cb_tftp_tx, note); - } else if (opcode == PICO_TFTP_WRQ) { - printf("Received TFTP put request for %s\n", filename); - note = transfer_prepare(&session, 'r', filename, addr, family); - start_rx(session, filename, port, cb_tftp_rx, note); - } -} - -void tftp_listen_cb_opt(union pico_address *addr, uint16_t port, uint16_t opcode, char *filename, int32_t len) -{ - struct note_t *note; - struct pico_tftp_session *session; - int options; - uint8_t timeout; - int32_t filesize; - int ret; - - printf("TFTP listen callback (OPTIONS) from remote port %" PRIu16 ".\n", short_be(port)); - /* declare the options we want to support */ - ret = pico_tftp_parse_request_args(filename, len, &options, &timeout, &filesize); - if (ret) - pico_tftp_reject_request(addr, port, TFTP_ERR_EOPT, "Malformed request"); - - if (opcode == PICO_TFTP_RRQ) { - printf("Received TFTP get request for %s\n", filename); - note = transfer_prepare(&session, 'T', filename, addr, family); - - if (options & PICO_TFTP_OPTION_TIME) - pico_tftp_set_option(session, PICO_TFTP_OPTION_TIME, timeout); - - if (options & PICO_TFTP_OPTION_FILE) { - ret = get_filesize(filename); - if (ret < 0) { - pico_tftp_reject_request(addr, port, TFTP_ERR_ENOENT, "File not found"); - return; - } - - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, ret); - } - - start_tx(session, filename, port, cb_tftp_tx_opt, note); - } else { /* opcode == PICO_TFTP_WRQ */ - printf("Received TFTP put request for %s\n", filename); - - note = transfer_prepare(&session, 'R', filename, addr, family); - if (options & PICO_TFTP_OPTION_TIME) - pico_tftp_set_option(session, PICO_TFTP_OPTION_TIME, timeout); - - if (options & PICO_TFTP_OPTION_FILE) - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, filesize); - - start_rx(session, filename, port, cb_tftp_rx_opt, note); - } -} - -void print_usage(int exit_code) -{ - printf("\nUsage: tftp:OPTION:[OPTION]...\n" - "\nOtions can be repeated. Every option may be one of the following:\n" - "\ts\t\t\t starts the basic server (RFC1350)\n" - "\tS\t\t\t starts the server with option handling capability\n" - "\tt:file:ip\t\t PUT request (without options) for file to server ip\n" - "\tT:file:ip\t\t PUT request for file to server ip\n" - "\tr:file:ip\t\t GET request (without options) for file to server ip\n" - "\tR:file:ip\t\t GET request for file to server ip\n" - "Example:\n" - "\t\t tftp:S:T:firstFile:10.40.0.2:R:another.file:10.40.0.5:T:secondFile:10.40.0.2\n\n"); - exit(exit_code); -} - -struct command_t *parse_arguments_recursive(struct command_t *commands, char *arg) -{ - char *next; - char *operation; - char *filename; - char *address; - static union pico_address remote_address; - int ret; - struct command_t *new_cmd = NULL; - - if (!arg) - return commands; - - next = cpy_arg(&operation, arg); - switch (*operation) { - case 'S': - case 's': - filename = address = NULL; - break; - case 'T': - case 'R': - case 't': - case 'r': - if (!next) { - fprintf(stderr, "Incomplete client command %s (filename componet is missing)\n", arg); - return NULL; - } - - next = cpy_arg(&filename, next); - if (!next) { - fprintf(stderr, "Incomplete client command %s (address component is missing)\n", arg); - return NULL; - } - - next = cpy_arg(&address, next); - if (!IPV6_MODE) - ret = pico_string_to_ipv4(address, &remote_address.ip4.addr); - else - ret = pico_string_to_ipv6(address, remote_address.ip6.addr); - - if (ret < 0) { - fprintf(stderr, "Invalid IP address %s\n", address); - print_usage(2); - } - - if (address) - free(address); - - break; - default: - fprintf(stderr, "Invalid command %s\n", operation); - return NULL; - }; - - new_cmd = add_command(commands, *operation, filename, &remote_address); - free(operation); - return parse_arguments_recursive(new_cmd, next); -} - -struct command_t *parse_arguments(char *arg) -{ - struct command_t *reversed = parse_arguments_recursive(NULL, arg); - struct command_t *commands = NULL; - struct command_t *current; - - if (!reversed) { - fprintf(stderr, "Wrong command line!\n"); - print_usage(1); - } - - while (reversed) { - current = reversed; - reversed = reversed->next; - current->next = commands; - commands = current; - } - return commands; -} - -void app_tftp(char *arg) -{ - struct command_t *commands, *old_cmd; - struct note_t *note; - struct pico_tftp_session *session; - int is_server_enabled = 0; - int filesize; - - family = IPV6_MODE ? PICO_PROTO_IPV6 : PICO_PROTO_IPV4; - - commands = parse_arguments(arg); - while (commands) { - - if (toupper(commands->operation) != 'S') - note = transfer_prepare(&session, commands->operation, commands->filename, &commands->server_address, family); - - switch (commands->operation) { - case 'S': - case 's': - if (!is_server_enabled) { - pico_tftp_listen(PICO_PROTO_IPV4, (commands->operation == 'S') ? tftp_listen_cb_opt : tftp_listen_cb); - is_server_enabled = 1; - } - - break; - case 'T': - filesize = get_filesize(commands->filename); - if (filesize < 0) { - fprintf(stderr, "TFTP: unable to read size of file %s\n", commands->filename); - exit(3); - } - - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, filesize); - start_tx(session, commands->filename, short_be(PICO_TFTP_PORT), cb_tftp_tx_opt, note); - break; - case 't': - start_tx(session, commands->filename, short_be(PICO_TFTP_PORT), cb_tftp_tx, note); - break; - case 'R': - pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, 0); - start_rx(session, commands->filename, short_be(PICO_TFTP_PORT), cb_tftp_rx_opt, note); - break; - case 'r': - start_rx(session, commands->filename, short_be(PICO_TFTP_PORT), cb_tftp_rx, note); - } - old_cmd = commands; - commands = commands->next; - if (old_cmd->filename) - free(old_cmd->filename); - - /* commands are allocated using PICO_ZALLOC, so use PICO_FREE */ - PICO_FREE(old_cmd); - } -} - -#endif -/* END TFTP */ diff --git a/kernel/picotcp/test/examples/udp_client.c b/kernel/picotcp/test/examples/udp_client.c deleted file mode 100644 index ded23bf..0000000 --- a/kernel/picotcp/test/examples/udp_client.c +++ /dev/null @@ -1,290 +0,0 @@ -#include "utils.h" -#include -#include -#include - -/*** START UDP CLIENT ***/ -/* - * udpclient expects the following format: udpclient:dest_addr:sendto_port[:listen_port:datasize:loops:subloops] - * dest_addr: IP address to send datagrams to - * sendto_port: port number to send datagrams to - * listen_port [OPTIONAL]: port number on which the udpclient listens - * datasize [OPTIONAL]: size of the data given to the socket in one go - * loops [OPTIONAL]: number of intervals in which data is send - * subloops [OPTIONAL]: number of sends in one interval - * - * REMARK: once an optional parameter is given, all optional parameters need a value! - * - * f.e.: ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.2:255.255.255.0: -a udpclient:10.40.0.3:6667:6667:1400:100:10 - */ - -struct udpclient_pas *udpclient_pas; - -static int exit_retry = 0; - -static void request_exit_echo(pico_time now, void *arg) -{ - struct pico_socket *s = (struct pico_socket *)arg; - char end[4] = "end"; - pico_socket_send(s, end, 4); - if (exit_retry++ > 3) { - if (!pico_timer_add(1000, deferred_exit, udpclient_pas)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - } else { - if (!pico_timer_add(1000, request_exit_echo, s)) { - printf("Failed to start request_exit_echo timer, sending request now\n"); - request_exit_echo((pico_time)0, NULL); - exit(1); - } - printf("%s: requested exit of echo\n", __FUNCTION__); - } -} - -void udpclient_send(pico_time __attribute__((unused)) now, void __attribute__((unused)) *arg) -{ - struct pico_socket *s = udpclient_pas->s; - char *buf = NULL; - int i = 0, w = 0; - static uint16_t loop = 0; - - if (++loop > udpclient_pas->loops) { - if (!pico_timer_add(1000, request_exit_echo, s)) { - printf("Failed to start request_exit_echo timer, sending request now\n"); - request_exit_echo((pico_time)0, NULL); - exit(1); - } - return; - } else { - buf = calloc(1, udpclient_pas->datasize); - if (!buf) { - printf("%s: no memory available\n", __FUNCTION__); - return; - } - - memset(buf, '1', udpclient_pas->datasize); - picoapp_dbg("%s: performing loop %u\n", __FUNCTION__, loop); - for (i = 0; i < udpclient_pas->subloops; i++) { - w = pico_socket_send(s, buf, udpclient_pas->datasize); - if (w <= 0) - break; - } - picoapp_dbg("%s: written %u byte(s) in each of %u subloops\n", __FUNCTION__, udpclient_pas->datasize, i); - free(buf); - } - - if (!pico_timer_add(100, udpclient_send, NULL)) { - printf("Failed to start send timer, sending exit request to echo and exiting\n"); - request_exit_echo((pico_time)0, NULL); - exit(1); - } -} - -void cb_udpclient(uint16_t ev, struct pico_socket *s) -{ - char *recvbuf = NULL; - int r = 0; - - if (ev & PICO_SOCK_EV_RD) { - recvbuf = calloc(1, udpclient_pas->datasize); - if (!recvbuf) { - printf("%s: no memory available\n", __FUNCTION__); - return; - } - - do { - r = pico_socket_recv(s, recvbuf, udpclient_pas->datasize); - } while ( r > 0); - free(recvbuf); - } - - if (ev == PICO_SOCK_EV_ERR) { - printf("Socket Error received. Bailing out.\n"); - free(udpclient_pas); - exit(7); - } -} - -void app_udpclient(char *arg) -{ - char *daddr = NULL, *lport = NULL, *sport = NULL, *s_datasize = NULL, *s_loops = NULL, *s_subloops = NULL; - char *nxt = arg; - char sinaddr_any[40] = { - 0 - }; - uint16_t listen_port = 0; - int ret = 0; - - udpclient_pas = calloc(1, sizeof(struct udpclient_pas)); - if (!udpclient_pas) { - printf("%s: no memory available\n", __FUNCTION__); - exit(255); - } - - udpclient_pas->s = NULL; - udpclient_pas->loops = 100; - udpclient_pas->subloops = 10; - udpclient_pas->datasize = 1400; - - /* start of argument parsing */ - if (nxt) { - nxt = cpy_arg(&daddr, arg); - if (daddr) { - if (!IPV6_MODE) - pico_string_to_ipv4(daddr, &udpclient_pas->dst.ip4.addr); - - #ifdef PICO_SUPPORT_IPV6 - else - pico_string_to_ipv6(daddr, udpclient_pas->dst.ip6.addr); - #endif - } else { - goto out; - } - } else { - /* missing dest_addr */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&sport, nxt); - if (sport && atoi(sport)) { - udpclient_pas->sport = short_be(atoi(sport)); - } else { - goto out; - } - } else { - /* missing send_port */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&lport, nxt); - if (lport && atoi(lport)) { - listen_port = short_be(atoi(lport)); - } else { - goto out; - } - } else { - /* missing listen_port, use default */ - listen_port = 0; - } - - if (nxt) { - nxt = cpy_arg(&s_datasize, nxt); - if (s_datasize && atoi(s_datasize)) { - udpclient_pas->datasize = atoi(s_datasize); - } else { - goto out; - } - } else { - /* missing datasize, incomplete optional parameters? -> exit */ - if (lport) - goto out; - } - - if (nxt) { - nxt = cpy_arg(&s_loops, nxt); - if (s_loops && atoi(s_loops)) { - udpclient_pas->loops = atoi(s_loops); - } else { - goto out; - } - } else { - /* missing loops, incomplete optional parameters? -> exit */ - if (s_datasize) - goto out; - } - - if (nxt) { - nxt = cpy_arg(&s_subloops, nxt); - if (s_subloops && atoi(s_subloops)) { - udpclient_pas->subloops = atoi(s_subloops); - } else { - goto out; - } - } else { - /* missing subloops, incomplete optional parameters? -> exit */ - if (s_loops) - goto out; - } - - /* end of argument parsing */ - - if (!IPV6_MODE) - pico_ipv4_to_string(sinaddr_any, inaddr_any.addr); - - #ifdef PICO_SUPPORT_IPV6 - else - pico_ipv6_to_string(sinaddr_any, inaddr6_any.addr); - #endif - - if (!IPV6_MODE) - udpclient_pas->s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &cb_udpclient); - else - udpclient_pas->s = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_UDP, &cb_udpclient); - - if (!udpclient_pas->s) { - printf("%s: error opening socket: %s\n", __FUNCTION__, strerror(pico_err)); - free(udpclient_pas); - exit(1); - } - - if (!IPV6_MODE) - ret = pico_socket_bind(udpclient_pas->s, &inaddr_any, &listen_port); - else - ret = pico_socket_bind(udpclient_pas->s, &inaddr6_any, &listen_port); - - if (ret < 0) { - free(udpclient_pas); - printf("%s: error binding socket to %s:%u: %s\n", __FUNCTION__, sinaddr_any, short_be(listen_port), strerror(pico_err)); - exit(1); - } - - if (!IPV6_MODE) - ret = pico_socket_connect(udpclient_pas->s, &udpclient_pas->dst.ip4, udpclient_pas->sport); - else - ret = pico_socket_connect(udpclient_pas->s, &udpclient_pas->dst.ip6, udpclient_pas->sport); - - if (ret < 0) { - printf("%s: error connecting to [%s]:%u: %s\n", __FUNCTION__, daddr, short_be(udpclient_pas->sport), strerror(pico_err)); - free(udpclient_pas); - exit(1); - } - - printf("\n%s: UDP client launched. Sending packets of %u bytes in %u loops and %u subloops to %s:%u\n\n", - __FUNCTION__, udpclient_pas->datasize, udpclient_pas->loops, udpclient_pas->subloops, daddr, short_be(udpclient_pas->sport)); - - if (!pico_timer_add(100, udpclient_send, NULL)) { - printf("Failed to start send timer, sending exit request to echo and exiting\n"); - request_exit_echo((pico_time)0, NULL); - exit(1); - } - - /* free strdups */ - if (daddr) - free (daddr); - - if (lport) - free (lport); - - if (sport) - free (sport); - - if (s_datasize) - free (s_datasize); - - if (s_loops) - free (s_loops); - - if (s_subloops) - free (s_subloops); - - return; - -out: - fprintf(stderr, "udpclient expects the following format: udpclient:dest_addr:dest_port[:listen_port:datasize:loops:subloops]\n"); - free(udpclient_pas); - exit(255); -} -/*** END UDP CLIENT ***/ diff --git a/kernel/picotcp/test/examples/udp_echo.c b/kernel/picotcp/test/examples/udp_echo.c deleted file mode 100644 index 2a9ce3c..0000000 --- a/kernel/picotcp/test/examples/udp_echo.c +++ /dev/null @@ -1,216 +0,0 @@ -#include "utils.h" -#include -#include -#include - -/**** START UDP ECHO ****/ -/* - * udpecho expects the following format: udpecho:bind_addr:listen_port[:sendto_port:datasize] - * bind_addr: IP address to bind to - * listen_port: port number on which the udpecho listens - * sendto_port [OPTIONAL]: port number to echo datagrams to (echo to originating IP address) - * datasize [OPTIONAL]: max size of the data red from the socket in one go - * - * REMARK: once an optional parameter is given, all optional parameters need a value! - * - * f.e.: ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.3:255.255.255.0: -a udpecho:10.40.0.3:6667:6667:1400 - */ -static int udpecho_exit = 0; - -struct udpecho_pas *udpecho_pas; - -void cb_udpecho(uint16_t ev, struct pico_socket *s) -{ - char *recvbuf = NULL; - uint16_t port = 0; - int r = 0; - union { - struct pico_ip4 ip4; - struct pico_ip6 ip6; - } peer; - if (udpecho_exit) - return; - - if (ev == PICO_SOCK_EV_RD) { - recvbuf = calloc(1, udpecho_pas->datasize); - if (!recvbuf) { - printf("%s: no memory available\n", __FUNCTION__); - return; - } - - do { - r = pico_socket_recvfrom(s, recvbuf, udpecho_pas->datasize, IPV6_MODE ? (void *)peer.ip6.addr : (void *)&peer.ip4.addr, &port); - /* printf("UDP recvfrom returned %d\n", r); */ - if (r > 0) { - if (strncmp(recvbuf, "end", 3) == 0) { - printf("Client requested to exit... test successful.\n"); - if (!pico_timer_add(1000, deferred_exit, udpecho_pas)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - udpecho_exit++; - } - - pico_socket_sendto(s, recvbuf, r, IPV6_MODE ? (void *)peer.ip6.addr : (void *)&peer.ip4.addr, port); - } - } while (r > 0); - free(recvbuf); - } - - if (ev == PICO_SOCK_EV_ERR) { - printf("Socket Error received. Bailing out.\n"); - free(udpecho_pas); - exit(7); - } - - picoapp_dbg("%s: received packet from %08X:%u\n", __FUNCTION__, long_be(peer), short_be(port)); -} - -void app_udpecho(char *arg) -{ - char *baddr = NULL, *lport = NULL, *sport = NULL, *s_datasize = NULL; - char *nxt = arg; - uint16_t listen_port = 0; - struct pico_ip4 inaddr_bind = { }; - struct pico_ip6 inaddr_bind6 = { }; - int ret = 0; - - udpecho_pas = calloc(1, sizeof(struct udpecho_pas)); - if (!udpecho_pas) { - printf("%s: no memory available\n", __FUNCTION__); - exit(255); - } - - udpecho_pas->s = NULL; - udpecho_pas->sendto_port = 0; - udpecho_pas->datasize = 5000; - - /* start of argument parsing */ - if (nxt) { - nxt = cpy_arg(&baddr, nxt); - if (baddr) { - if (!IPV6_MODE) - pico_string_to_ipv4(baddr, &inaddr_bind.addr); - - #ifdef PICO_SUPPORT_IPV6 - else - pico_string_to_ipv6(baddr, inaddr_bind6.addr); - #endif - } else { - goto out; - } - } else { - /* missing bind_addr */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&lport, nxt); - if (lport && atoi(lport)) { - listen_port = short_be(atoi(lport)); - } else { - listen_port = short_be(5555); - } - } else { - /* missing listen_port */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&sport, nxt); - if (sport && atoi(sport)) { - udpecho_pas->sendto_port = atoi(sport); - } else { - /* incorrect send_port */ - goto out; - } - } else { - /* missing send_port, use default */ - } - - if (nxt) { - nxt = cpy_arg(&s_datasize, nxt); - if (s_datasize && atoi(s_datasize)) { - udpecho_pas->datasize = atoi(s_datasize); - } else { - /* incorrect datasize */ - goto out; - } - } else { - /* missing datasize, incomplete optional parameters? -> exit */ - if (sport) - goto out; - } - - /* end of argument parsing */ - if (!IPV6_MODE) - udpecho_pas->s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &cb_udpecho); - else - udpecho_pas->s = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_UDP, &cb_udpecho); - - if (!udpecho_pas->s) { - printf("%s: error opening socket: %s\n", __FUNCTION__, strerror(pico_err)); - free(udpecho_pas); - exit(1); - } - - if (!IPV6_MODE) - ret = pico_socket_bind(udpecho_pas->s, &inaddr_bind, &listen_port); - else { - ret = pico_socket_bind(udpecho_pas->s, &inaddr_bind6, &listen_port); - printf("udpecho> Bound to [%s]:%d.\n", baddr, short_be(listen_port)); - } - - if (ret != 0) { - free(udpecho_pas); - if (!IPV6_MODE) - printf("%s: error binding socket to %08X:%u: %s\n", __FUNCTION__, long_be(inaddr_bind.addr), short_be(listen_port), strerror(pico_err)); - else - printf("%s: error binding socket to [%s]:%u: %s\n", __FUNCTION__, baddr, short_be(listen_port), strerror(pico_err)); - - exit(1); - } - -#ifdef PICOAPP_IPFILTER - { - struct pico_ip4 address, in_addr_netmask, in_addr; - /* struct pico_ipv4_link *link; */ - int ret = 0; - address.addr = 0x0800280a; - in_addr_netmask.addr = 0x00FFFFFF; - in_addr.addr = 0x0000320a; - /* link = pico_ipv4_link_get(&address); */ - - printf("udpecho> IPFILTER ENABLED\n"); - - /*Adjust your IPFILTER*/ - ret |= pico_ipv4_filter_add(NULL, 17, NULL, NULL, &in_addr, &in_addr_netmask, 0, 5555, 0, 0, FILTER_DROP); - - if (ret < 0) - printf("Filter_add invalid argument\n"); - } -#endif - - printf("\n%s: UDP echo launched. Receiving packets of %u bytes on port %u\n\n", __FUNCTION__, udpecho_pas->datasize, short_be(listen_port)); - - /* free strdups */ - if (baddr) - free (baddr); - - if (lport) - free (lport); - - if (sport) - free (sport); - - if (s_datasize) - free (s_datasize); - - return; - -out: - fprintf(stderr, "udpecho expects the following format: udpecho:bind_addr:listen_port[:sendto_port:datasize]\n"); - free(udpecho_pas); - exit(255); -} -/*** END UDP ECHO ***/ diff --git a/kernel/picotcp/test/examples/udp_sendto_test.c b/kernel/picotcp/test/examples/udp_sendto_test.c deleted file mode 100644 index 3d8e520..0000000 --- a/kernel/picotcp/test/examples/udp_sendto_test.c +++ /dev/null @@ -1,88 +0,0 @@ -#include "utils.h" -#include -#include - -/**** START UDP ECHO ****/ -/* - * udpecho expects the following format: udpecho:bind_addr:listen_port[:sendto_port:datasize] - * bind_addr: IP address to bind to - * listen_port: port number on which the udpecho listens - * sendto_port [OPTIONAL]: port number to echo datagrams to (echo to originating IP address) - * datasize [OPTIONAL]: max size of the data red from the socket in one go - * - * REMARK: once an optional parameter is given, all optional parameters need a value! - * - * f.e.: ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.3:255.255.255.0: -a udpecho:10.40.0.3:6667:6667:1400 - */ - -void dummy_cb(uint16_t __attribute__((unused)) ev, struct pico_socket __attribute__((unused)) *s) -{ - -} - -void app_sendto_test(char *arg) -{ - char *nxt = arg; - char *dstaddr = NULL; - char *dstport = NULL; - struct pico_ip4 inaddr_dst = {}; - struct pico_ip6 inaddr_dst6 = {}; - uint16_t dport; - struct pico_socket *sock; - int ret; - - /* start of argument parsing */ - if (nxt) { - nxt = cpy_arg(&dstaddr, nxt); - if (dstaddr) { - if (!IPV6_MODE) - pico_string_to_ipv4(dstaddr, &inaddr_dst.addr); - - #ifdef PICO_SUPPORT_IPV6 - else - pico_string_to_ipv6(dstaddr, inaddr_dst6.addr); - #endif - } else { - goto out; - } - } else { - /* missing bind_addr */ - goto out; - } - - if (nxt) { - nxt = cpy_arg(&dstport, nxt); - if (dstport && atoi(dstport)) { - dport = short_be(atoi(dstport)); - } else { - dport = short_be(5555); - } - } else { - /* missing listen_port */ - goto out; - } - - if (!IPV6_MODE) - sock = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &dummy_cb); - else - sock = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_UDP, &dummy_cb); - - ret = pico_socket_sendto(sock, "Testing", 7u, ((IPV6_MODE) ? (void *)(&inaddr_dst6) : (void *)(&inaddr_dst)), dport); - if (ret < 0) - printf("Failure in first pico_socket_send\n"); - - ret = pico_socket_sendto(sock, "Testing", 7u, ((IPV6_MODE) ? (void *)(&inaddr_dst6) : (void *)(&inaddr_dst)), dport); - if (ret < 0) - printf("Failure in second pico_socket_send\n"); - - ret = pico_socket_close(sock); - if (ret) - printf("Failure in pico_socket_close\n"); - - printf("\n%s: UDP sendto test launched. Sending packets to ip %s port %u\n\n", __FUNCTION__, dstaddr, short_be(dport)); - return; - -out: - fprintf(stderr, "udp_sendto_test expects the following format: udp_sendto_test:dest_addr:[dest_por]t\n"); - exit(255); -} diff --git a/kernel/picotcp/test/examples/udpnat.c b/kernel/picotcp/test/examples/udpnat.c deleted file mode 100644 index b8b72f3..0000000 --- a/kernel/picotcp/test/examples/udpnat.c +++ /dev/null @@ -1,151 +0,0 @@ -#include "utils.h" -#include -#include - -/*** START UDP NAT CLIENT ***/ -/* ./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0:10.40.0.10: -a udpnatclient:10.50.0.8:6667: */ -static struct pico_ip4 udpnatclient_inaddr_dst; -static uint16_t udpnatclient_port_be; - -void udpnatclient_send(pico_time __attribute__((unused)) now, void *arg) -{ - int i, w; - struct pico_socket *s = (struct pico_socket *)arg; - char buf[1400] = { }; - char end[4] = "end"; - static int loop = 0; - - for ( i = 0; i < 3; i++) { - w = pico_socket_send(s, buf, 1400); - } - if (++loop > 1000) { - udpnatclient_port_be = 0; - for (i = 0; i < 3; i++) { - w = pico_socket_send(s, end, 4); - if (w <= 0) - break; - - printf("End!\n"); - } - if (!pico_timer_add(1000, deferred_exit, NULL)) { - printf("Failed to start exit timer, exiting now\n"); - exit(1); - } - return; - } -} - -void cb_udpnatclient(uint16_t ev, struct pico_socket *s) -{ - char recvbuf[1400]; - int r = 0; - - if (ev & PICO_SOCK_EV_RD) { - do { - r = pico_socket_recv(s, recvbuf, 1400); - } while(r > 0); - } - - if (ev == PICO_SOCK_EV_ERR) { - printf("Socket Error received. Bailing out.\n"); - exit(7); - } - - /* Not closing to test port check */ - /* pico_socket_close(s); */ -} - -void udpnatclient_open_socket(pico_time __attribute__((unused)) now, void __attribute__((unused)) *arg) -{ - struct pico_socket *s = NULL; - static int loop; - - if (!udpnatclient_port_be) - return; - - loop++; - picoapp_dbg(">>>>> Loop %d\n", loop); - if (!(loop % 100)) - printf("Created %d sockets\n", loop); - - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &cb_udpnatclient); - if (!s) - exit(1); - - if (pico_socket_connect(s, &udpnatclient_inaddr_dst, udpnatclient_port_be) != 0) - { - printf("Error connecting\n"); - exit(1); - } - - picoapp_dbg("New socket with port %u\n", s->local_port); - - if (!pico_timer_add(25, udpnatclient_send, s)) { - printf("Failed to start send timer, exiting now\n"); - exit(1); - } - - if (!pico_timer_add(25, udpnatclient_open_socket, 0)) { - printf("Failed to start open_socket timer, exiting now\n"); - exit(1); - } -} - -void app_udpnatclient(char *arg) -{ - struct pico_socket *s; - char *daddr, *dport; - int port = 0; - uint16_t port_be = 0; - struct pico_ip4 inaddr_dst = ZERO_IP4; - char *nxt; - - nxt = cpy_arg(&daddr, arg); - if (!daddr) { - fprintf(stderr, " udpnatclient expects the following format: udpnatclient:dest_addr[:dest_port]\n"); - exit(255); - } - - if (nxt) { - nxt = cpy_arg(&dport, nxt); - if (dport) { - port = atoi(dport); - if (port > 0) - port_be = short_be(port); - } - } - - if (port == 0) { - port_be = short_be(5555); - } - - printf("UDP NAT client started. Sending packets to %s:%d\n", daddr, short_be(port_be)); - - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &cb_udpnatclient); - if (!s) - exit(1); - - pico_string_to_ipv4(daddr, &inaddr_dst.addr); - - if (pico_socket_connect(s, &inaddr_dst, port_be) != 0) - { - printf("Error binding the port \n"); - exit(1); - } - - picoapp_dbg("New socket with port %u\n", s->local_port); - - udpnatclient_inaddr_dst = inaddr_dst; - udpnatclient_port_be = port_be; - - if (!pico_timer_add(100, udpnatclient_send, s)) { - printf("Failed to start send timer, exiting now\n"); - exit(1); - } - - if (!pico_timer_add(1000, udpnatclient_open_socket, 0)) { - printf("Failed to start open_socket timer, exiting now\n"); - exit(1); - } -} -/*** END UDP NAT CLIENT ***/ diff --git a/kernel/picotcp/test/examples/utils.h b/kernel/picotcp/test/examples/utils.h deleted file mode 100644 index a606318..0000000 --- a/kernel/picotcp/test/examples/utils.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef PICO_EXAMPLES_UTILS_H -#define PICO_EXAMPLES_UTILS_H -#include -#define TCPSIZ (1024 * 1024 * 5) -extern struct pico_ip4 ZERO_IP4; -extern struct pico_ip_mreq ZERO_MREQ; -extern struct pico_ip_mreq_source ZERO_MREQ_SRC; -extern struct pico_ip6 ZERO_IP6; -extern struct pico_ip_mreq ZERO_MREQ_IP6; -extern struct pico_ip_mreq_source ZERO_MREQ_SRC_IP6; -#define picoapp_dbg(...) do {} while(0) -/* #define picoapp_dbg printf */ -extern int IPV6_MODE; - - -extern struct pico_ip4 inaddr_any; -extern struct pico_ip6 inaddr6_any; - -extern char *cpy_arg(char **dst, char *str); - -extern void deferred_exit(pico_time now, void *arg); - -struct udpclient_pas { - struct pico_socket *s; - uint8_t loops; - uint8_t subloops; - uint16_t datasize; - uint16_t sport; - union pico_address dst; -}; /* per application struct */ - -struct udpecho_pas { - struct pico_socket *s; - uint16_t sendto_port; /* big-endian */ - uint16_t datasize; -}; /* per application struct */ - - -#endif diff --git a/kernel/picotcp/test/mkunits.sh b/kernel/picotcp/test/mkunits.sh deleted file mode 100755 index ee51e35..0000000 --- a/kernel/picotcp/test/mkunits.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash -# By Daniele. -#set -x -filename=$1 -if [ [x$1] == [x] ]; then - echo USAGE: $0 filename.c - exit 4 -fi - -#CMOCK="../CMock/lib/cmock.rb" - -bname=`basename $filename` -cat $filename |grep static|grep \( | grep \) >/tmp/$bname - -if (test -f ./test/unit/modunit_$bname); then - echo The destination file ./test/unit/modunit_$bname already exists. Exiting... - exit 0 -fi - -cat $filename |grep "\#include " > ./test/unit/modunit_$bname -MYSELF=`echo $bname | cut -d"." -f1`.h -INCLUDES=`cat $filename |grep "\#include \"" |grep -v $MYSELF| cut -d '"' -f 2` - -echo includes are: -echo $INCLUDES -echo "#include \"$filename\"" >>./test/unit/modunit_$bname -echo "#include \"check.h\"" >>./test/unit/modunit_$bname -echo >> ./test/unit/modunit_$bname -echo >> ./test/unit/modunit_$bname - -while read fn ; do - fname=`echo $fn | cut -d "(" -f 1| cut -d" " -f 3` - echo "START_TEST(tc_$fname)" >>./test/unit/modunit_$bname - echo "{" >>./test/unit/modunit_$bname - echo " /* TODO: test this: $fn */" >>./test/unit/modunit_$bname - echo "}" >>./test/unit/modunit_$bname - echo "END_TEST" >>./test/unit/modunit_$bname -done > ./test/unit/modunit_$bname -echo >> ./test/unit/modunit_$bname -echo "Suite *pico_suite(void) -{ - Suite *s = suite_create(\"PicoTCP\"); -" >> ./test/unit/modunit_$bname - -while read fn ; do - fname=`echo $fn | cut -d "(" -f 1| cut -d" " -f 3` - echo " TCase *TCase_$fname = tcase_create(\"Unit test for $fname\");" >> ./test/unit/modunit_$bname -done > ./test/unit/modunit_$bname -echo >> ./test/unit/modunit_$bname - -while read fn ; do - fname=`echo $fn | cut -d "(" -f 1| cut -d" " -f 3` - echo " tcase_add_test(TCase_$fname, tc_$fname);" >> ./test/unit/modunit_$bname - echo " suite_add_tcase(s, TCase_$fname);" >> ./test/unit/modunit_$bname -done > ./test/unit/modunit_$bname -echo "}">> ./test/unit/modunit_$bname - -echo " -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -}" >>./test/unit/modunit_$bname - - -echo Gernerated test ./test/unit/modunit_$bname -#echo Generating mocks... -#mkdir -p mocks -# -#CFILES="" -#for i in $INCLUDES; do -# ii=`find -name $i | grep -v build` -# ruby $CMOCK $ii -# CFILE=`basename $ii |cut -d "." -f 1`.c -# CFILES="$CFILES mocks/Mock$CFILE" -#done -ELF=`echo build/test/modunit_$bname | sed -e "s/\.c/.elf/g"` - -echo -echo - -MOCKS=$(gcc -I include/ -I modules/ -I. test/unit/modunit_$bname $CFILES -lcheck -pthread -lm -lrt -o $ELF 2>&1 |grep "undefined reference to" | sed -e "s/.*\`//g" | sed -e "s/'.*$//g" |sort | uniq) - -for m in $MOCKS; do - decl=`grep -R $m * |grep -v ");" | grep -v Binary | cut -d ":" -f 2` - echo $decl >> ./test/unit/modunit_$bname - echo "{" >> ./test/unit/modunit_$bname - echo "/* TODO: MOCK ME! */">> ./test/unit/modunit_$bname - echo "}" >> ./test/unit/modunit_$bname -done -gcc -I include/ -I modules/ -I. test/unit/modunit_$bname $CFILES -lcheck -pthread -lm -lrt -o $ELF && echo "Successfully compiled $ELF" - -#echo " /* TODO: MOCKS NEEDED: $MOCKS */ " >>./test/unit/modunit_$bname diff --git a/kernel/picotcp/test/olsr_test.sh b/kernel/picotcp/test/olsr_test.sh deleted file mode 100755 index 597b233..0000000 --- a/kernel/picotcp/test/olsr_test.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash - -sudo vde_switch -t pic0 -s /tmp/pic0.ctl -d -sudo vde_switch -s /tmp/pic1.ctl -d -sudo vde_switch -s /tmp/pic2.ctl -d -sudo vde_switch -s /tmp/pic3.ctl -d - -sudo ifconfig pic0 10.40.0.254/16 up - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic0.ctl:10.40.0.8:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic0.ctl:10.40.0.9:255.255.0.0: \ - --vde pic1:/tmp/pic1.ctl:10.41.0.9:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic1.ctl:10.41.0.1:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic1.ctl:10.41.0.2:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic1.ctl:10.41.0.3:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic1.ctl:10.41.0.10:255.255.0.0: \ - --vde pic1:/tmp/pic2.ctl:10.42.0.10:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic2.ctl:10.42.0.1:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic2.ctl:10.42.0.2:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic1.ctl:10.42.0.3:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic2.ctl:10.42.0.11:255.255.0.0: \ - --vde pic1:/tmp/pic3.ctl:10.43.0.11:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic3.ctl:10.43.0.1:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic3.ctl:10.43.0.2:255.255.0.0: \ - -a olsr: & - -./build/test/picoapp.elf \ - --vde pic0:/tmp/pic3.ctl:10.43.0.3:255.255.0.0: \ - -a olsr: & - -sleep 5 -sudo killall olsrd -sudo olsrd -i pic0 - diff --git a/kernel/picotcp/test/perf.sh b/kernel/picotcp/test/perf.sh deleted file mode 100755 index c9debbe..0000000 --- a/kernel/picotcp/test/perf.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -THRESHOLD=300 -sh ./test/vde_sock_start_user.sh -sleep 2 - -(iperf -s >/tmp/iperf.log)& -./build/test/picoapp.elf --vde pic0:/tmp/pic0.ctl:10.50.0.2:255.255.255.0:10.50.0.1: --app iperfc:10.50.0.1: &>/dev/null -killall iperf -RES=`cat /tmp/iperf.log |grep Mbits |sed -e "s/.*Bytes//g" |sed -e "s/^[ ]*//g"` -SPEED=`echo $RES | cut -d " " -f 1` -UNITS=`echo $RES | cut -d " " -f 2` - -if [ ["$UNITS"] != ["Mbits/sec"] ]; then - echo "Wrong test result units: expected Mbits/sec, got $UNITS" - exit 1 -fi - -if (test $SPEED -lt $THRESHOLD); then - echo "Speed too low: expected $THRESHOLD MBits/s, got $SPEED $UNITS" - exit 2 -fi - -echo Test result: $SPEED $UNITS - -rm -f /tmp/iperf.log -exit 0 diff --git a/kernel/picotcp/test/pico_faulty.c b/kernel/picotcp/test/pico_faulty.c deleted file mode 100644 index c9a4405..0000000 --- a/kernel/picotcp/test/pico_faulty.c +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include -#include -#include "pico_faulty.h" - -/* #warning "COMPILING for MEMORY TESTS!" */ - -uint32_t mm_failure_count = 0; -uint32_t cur_mem, max_mem; - -static int called_atexit = 0; - - -void memory_stats(void) -{ - fprintf(stderr, " ################ MAX MEMORY USED in this test: %u\n", max_mem); - -} - -int pico_set_mm_failure(uint32_t nxt) -{ - if (!called_atexit) { - atexit(memory_stats); - called_atexit++; - } - - mm_failure_count = nxt; - return 0; -} diff --git a/kernel/picotcp/test/pico_faulty.h b/kernel/picotcp/test/pico_faulty.h deleted file mode 100644 index 45f7956..0000000 --- a/kernel/picotcp/test/pico_faulty.h +++ /dev/null @@ -1,134 +0,0 @@ -/********************************************************************* - PicoTCP. Copyright (c) 2012-2017 Altran Intelligent Systems. Some rights reserved. - See COPYING, LICENSE.GPLv2 and LICENSE.GPLv3 for usage. - Do not redistribute without a written permission by the Copyright - holders. - *********************************************************************/ - - -/* This is a test implementation, with a faulty memory manager, - * intended to increase test coverage - * Warning: not intended for production! - * - */ - - -#ifndef PICO_SUPPORT_POSIX -#define PICO_SUPPORT_POSIX - -#define PICO_FAULTY - -#define MEM_LIMIT (0) - -#include -#include -#include -#include -#include -#include - -extern uint32_t mm_failure_count; -int pico_set_mm_failure(uint32_t nxt); -extern uint32_t max_mem; -extern uint32_t cur_mem; - -/* - #define TIME_PRESCALE - */ -#define dbg printf - -#define stack_fill_pattern(...) do {} while(0) -#define stack_count_free_words(...) do {} while(0) -#define stack_get_free_words() (0) - -static inline void mem_stat_store(void) -{ - char fname_mod[] = "/tmp/pico-mem-report-%hu.txt"; - char fname[200]; - char buffer[20]; - int fd; - snprintf(fname, 200, fname_mod, getpid()); - fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0660); - if (fd < 0) { - return; - } - - snprintf(buffer, 20, "%d\n", max_mem); - write(fd, buffer, strlen(buffer)); - close(fd); -} - - -static inline void *pico_zalloc(size_t x) -{ - uint32_t *ptr; - if (mm_failure_count > 0) { - if (--mm_failure_count == 0) { - fprintf(stderr, "Malloc failed, for test purposes\n"); - return NULL; - } - } - - ptr = (uint32_t *)calloc(x + sizeof(uint32_t), 1); - *ptr = (uint32_t)x; /* store size of alloc */ - cur_mem += (uint32_t)x; - -#ifndef DISABLE_MM_STATS - if (cur_mem > max_mem) { - max_mem = cur_mem; - if ((MEM_LIMIT > 0) && (max_mem > MEM_LIMIT)) - abort(); - - mem_stat_store(); - } - -#endif - return (void*)(ptr + 1); -} - -static inline void pico_free(void *x) -{ - uint32_t *ptr = (uint32_t*)(((uint8_t *)x) - sizeof(uint32_t)); /* fetch size of the alloc */ - cur_mem -= *ptr; - free(ptr); -} - -/* time prescaler */ -#ifdef TIME_PRESCALE -extern int32_t prescale_time; -#endif - -static inline uint32_t PICO_TIME(void) -{ - struct timeval t; - gettimeofday(&t, NULL); - #ifdef TIME_PRESCALE - return (prescale_time < 0) ? (uint32_t)(t.tv_sec / 1000 << (-prescale_time)) : \ - (uint32_t)(t.tv_sec / 1000 >> prescale_time); - #else - return (uint32_t)t.tv_sec; - #endif -} - -static inline uint32_t PICO_TIME_MS(void) -{ - struct timeval t; - gettimeofday(&t, NULL); - #ifdef TIME_PRESCALER - uint32_t tmp = ((t.tv_sec * 1000) + (t.tv_usec / 1000)); - return (prescale_time < 0) ? (uint32_t)(tmp / 1000 << (-prescale_time)) : \ - (uint32_t)(tmp / 1000 >> prescale_time); - #else - return (uint32_t)((t.tv_sec * 1000) + (t.tv_usec / 1000)); - #endif -} - -static inline void PICO_IDLE(void) -{ - usleep(5000); -} - -void memory_stats(void); - -#endif /* PICO_SUPPORT_POSIX */ - diff --git a/kernel/picotcp/test/picoapp.c b/kernel/picotcp/test/picoapp.c deleted file mode 100644 index 58b6ede..0000000 --- a/kernel/picotcp/test/picoapp.c +++ /dev/null @@ -1,757 +0,0 @@ -/* PicoTCP Test application */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "utils.h" - -#include "pico_stack.h" -#include "pico_config.h" -#include "pico_dev_vde.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_socket.h" -#include "pico_dev_tun.h" -#include "pico_dev_tap.h" -#include "pico_nat.h" -#include "pico_icmp4.h" -#include "pico_icmp6.h" -#include "pico_dns_client.h" -#include "pico_dev_loop.h" -#include "pico_dhcp_client.h" -#include "pico_dhcp_server.h" -#include "pico_ipfilter.h" -#include "pico_olsr.h" -#include "pico_sntp_client.h" -#include "pico_mdns.h" -#include "pico_tftp.h" -#include "pico_dev_radiotest.h" -#include "pico_dev_radio_mgr.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef FAULTY -#include "pico_faulty.h" -#endif - -void app_udpecho(char *args); -void app_tcpecho(char *args); -void app_udpclient(char *args); -void app_tcpclient(char *args); -void app_tcpbench(char *args); -void app_natbox(char *args); -void app_udpdnsclient(char *args); -void app_udpnatclient(char *args); -void app_mcastsend(char *args); -void app_mcastreceive_ipv6(char *args); -void app_mcastsend_ipv6(char *args); -void app_mcastreceive(char *args); -void app_ping(char *args); -void app_dhcp_server(char *args); -void app_dhcp_client(char *args); -void app_dns_sd(char *arg, struct pico_ip4 addr); -void app_mdns(char *arg, struct pico_ip4 addr); -void app_sntp(char *args); -void app_tftp(char *args); -void app_slaacv4(char *args); -void app_udpecho(char *args); -void app_sendto_test(char *args); -void app_noop(void); - -struct pico_ip4 ZERO_IP4 = { - 0 -}; -struct pico_ip_mreq ZERO_MREQ = { - .mcast_group_addr = {{0}}, - .mcast_link_addr = {{0}} -}; -struct pico_ip_mreq_source ZERO_MREQ_SRC = { - .mcast_group_addr.ip4 = {0}, - .mcast_link_addr.ip4 = {0}, - .mcast_source_addr.ip4 = {0} -}; -struct pico_ip6 ZERO_IP6 = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } -}; -struct pico_ip_mreq ZERO_MREQ_IP6 = { - .mcast_group_addr.ip6 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}, - .mcast_link_addr.ip6 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }} -}; -struct pico_ip_mreq_source ZERO_MREQ_SRC_IP6 = { - .mcast_group_addr.ip6 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}, - .mcast_link_addr.ip6 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}, - .mcast_source_addr.ip6 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }} -}; - -/* #define INFINITE_TCPTEST */ -#define picoapp_dbg(...) do {} while(0) -/* #define picoapp_dbg printf */ - -/* #define PICOAPP_IPFILTER 1 */ - -int IPV6_MODE; - - -struct pico_ip4 inaddr_any = { - 0 -}; -struct pico_ip6 inaddr6_any = {{0}}; - -char *cpy_arg(char **dst, char *str); - -void deferred_exit(pico_time __attribute__((unused)) now, void *arg) -{ - if (arg) { - free(arg); - arg = NULL; - } - - printf("%s: quitting\n", __FUNCTION__); - exit(0); -} - - - -/** From now on, parsing the command line **/ -#define NXT_MAC(x) ++ x[5] - -/* Copy a string until the separator, - terminate it and return the next index, - or NULL if it encounters a EOS */ -char *cpy_arg(char **dst, char *str) -{ - char *p, *nxt = NULL; - char *start = str; - char *end = start + strlen(start); - char sep = ':'; - - if (IPV6_MODE) - sep = ','; - - p = str; - while (p) { - if ((*p == sep) || (*p == '\0')) { - *p = (char)0; - nxt = p + 1; - if ((*nxt == 0) || (nxt >= end)) - nxt = 0; - - printf("dup'ing %s\n", start); - *dst = strdup(start); - break; - } - - p++; - } - return nxt; -} - -static void __wakeup(uint16_t __attribute__((unused)) ev, struct pico_socket __attribute__((unused)) *s) -{ - -} - - -static void usage(char *arg0) -{ - printf("Usage: %s [--vde name:sock:address:netmask[:gateway]] [--vde ...] [--tun name:address:netmask[:gateway]] [--tun ...] [--app name[:args]]\n\n\n", arg0); - printf("\tall arguments can be repeated, e.g. to run on multiple links or applications\n"); - printf("\t* --app arguments must be at the end *\n"); - exit(255); -} - -#define IF_APPNAME(x) if(strcmp(x, name) == 0) - -int main(int argc, char **argv) -{ - unsigned char macaddr[6] = { - 0, 0, 0, 0xa, 0xb, 0x0 - }; - uint16_t *macaddr_low = (uint16_t *) (macaddr + 2); - struct pico_device *dev = NULL; - struct pico_ip4 addr4 = { - 0 - }; - struct pico_ip4 bcastAddr = ZERO_IP4; - - struct option long_options[] = { - {"help", 0, 0, 'h'}, - {"vde", 1, 0, 'v'}, - {"barevde", 1, 0, 'b'}, - {"tun", 1, 0, 't'}, - {"tap", 1, 0, 'T'}, - {"route", 1, 0, 'r'}, - {"app", 1, 0, 'a'}, - {"dns", 1, 0, 'd'}, - {"loop", 0, 0, 'l'}, - {0, 0, 0, 0} - }; - int option_idx = 0; - int c; - char *app = NULL, *p = argv[0]; - /* parse till we find the name of the executable */ - while (p) { - if (*p == '/') - app = p + 1; - else if (*p == '\0') - break; - else - {} /* do nothing */ - - p++; - } - if (strcmp(app, "picoapp6.elf") == 0) - IPV6_MODE = 1; - - *macaddr_low = (uint16_t)(*macaddr_low ^ (uint16_t)((uint16_t)getpid() & (uint16_t)0xFFFFU)); - printf("My macaddr base is: %02x %02x\n", macaddr[2], macaddr[3]); - printf("My macaddr is: %02x %02x %02x %02x %02x %02x\n", macaddr[0], macaddr[1], macaddr[2], macaddr[3], macaddr[4], macaddr[5]); - -#ifdef PICO_SUPPORT_MM - pico_mem_init(128 * 1024); -#endif - pico_stack_init(); - /* Parse args */ - while(1) { - c = getopt_long(argc, argv, "6:v:b:t:T:a:r:hl", long_options, &option_idx); - if (c < 0) - break; - - switch(c) { - case 'h': - usage(argv[0]); - break; - case 'T': - { - char *nxt, *name = NULL, *addr = NULL, *nm = NULL, *gw = NULL; - struct pico_ip4 ipaddr, netmask, gateway, zero = ZERO_IP4; - do { - nxt = cpy_arg(&name, optarg); - if (!nxt) break; - - nxt = cpy_arg(&addr, nxt); - if (!nxt) break; - - nxt = cpy_arg(&nm, nxt); - if (!nxt) break; - - cpy_arg(&gw, nxt); - } while(0); - if (!nm) { - fprintf(stderr, "Tun: bad configuration...\n"); - exit(1); - } - - dev = pico_tap_create(name); - if (!dev) { - perror("Creating tap"); - exit(1); - } - - pico_string_to_ipv4(addr, &ipaddr.addr); - pico_string_to_ipv4(nm, &netmask.addr); - pico_ipv4_link_add(dev, ipaddr, netmask); - bcastAddr.addr = (ipaddr.addr) | (~netmask.addr); - if (gw && *gw) { - pico_string_to_ipv4(gw, &gateway.addr); - printf("Adding default route via %08x\n", gateway.addr); - pico_ipv4_route_add(zero, zero, gateway, 1, NULL); - } - -#ifdef PICO_SUPPORT_IPV6 - if (IPV6_MODE) { - struct pico_ip6 ipaddr6 = {{0}}, netmask6 = {{0}}, gateway6 = {{0}}, zero6 = {{0}}; - pico_string_to_ipv6(addr, ipaddr6.addr); - pico_string_to_ipv6(nm, netmask6.addr); - pico_ipv6_link_add(dev, ipaddr6, netmask6); - if (gw && *gw) { - pico_string_to_ipv6(gw, gateway6.addr); - pico_ipv6_route_add(zero6, zero6, gateway6, 1, NULL); - } - - pico_ipv6_dev_routing_enable(dev); - } - -#endif - } - break; - case 't': - { - char *nxt, *name = NULL, *addr = NULL, *nm = NULL, *gw = NULL; - struct pico_ip4 ipaddr, netmask, gateway, zero = ZERO_IP4; - do { - nxt = cpy_arg(&name, optarg); - if (!nxt) break; - - nxt = cpy_arg(&addr, nxt); - if (!nxt) break; - - nxt = cpy_arg(&nm, nxt); - if (!nxt) break; - - cpy_arg(&gw, nxt); - } while(0); - if (!nm) { - fprintf(stderr, "Tun: bad configuration...\n"); - exit(1); - } - - dev = pico_tun_create(name); - if (!dev) { - perror("Creating tun"); - exit(1); - } - - pico_string_to_ipv4(addr, &ipaddr.addr); - pico_string_to_ipv4(nm, &netmask.addr); - pico_ipv4_link_add(dev, ipaddr, netmask); - bcastAddr.addr = (ipaddr.addr) | (~netmask.addr); - if (gw && *gw) { - pico_string_to_ipv4(gw, &gateway.addr); - printf("Adding default route via %08x\n", gateway.addr); - pico_ipv4_route_add(zero, zero, gateway, 1, NULL); - } - -#ifdef PICO_SUPPORT_IPV6 - if (IPV6_MODE) { - struct pico_ip6 ipaddr6 = {{0}}, netmask6 = {{0}}, gateway6 = {{0}}, zero6 = {{0}}; - pico_string_to_ipv6(addr, ipaddr6.addr); - pico_string_to_ipv6(nm, netmask6.addr); - pico_ipv6_link_add(dev, ipaddr6, netmask6); - if (gw && *gw) { - pico_string_to_ipv6(gw, gateway6.addr); - pico_ipv6_route_add(zero6, zero6, gateway6, 1, NULL); - } - - pico_ipv6_dev_routing_enable(dev); - } - -#endif - } - break; - case 'v': - { - char *nxt, *name = NULL, *sock = NULL, *addr = NULL, *nm = NULL, *gw = NULL, *addr6 = NULL, *nm6 = NULL, *gw6 = NULL, *loss_in = NULL, *loss_out = NULL; - struct pico_ip4 ipaddr, netmask, gateway, zero = ZERO_IP4; - uint32_t i_pc = 0, o_pc = 0; - printf("+++ OPTARG %s\n", optarg); - do { - nxt = cpy_arg(&name, optarg); - if (!nxt) break; - - nxt = cpy_arg(&sock, nxt); - if (!nxt) break; - - if (!IPV6_MODE) { - nxt = cpy_arg(&addr, nxt); - if (!nxt) break; - - nxt = cpy_arg(&nm, nxt); - if (!nxt) break; - - nxt = cpy_arg(&gw, nxt); - if (!nxt) break; - - nxt = cpy_arg(&loss_in, nxt); - if (!nxt) break; - - nxt = cpy_arg(&loss_out, nxt); - if (!nxt) break; - } else { - nxt = cpy_arg(&addr6, nxt); - if (!nxt) break; - - printf("addr6: %s\n", addr6); - - nxt = cpy_arg(&nm6, nxt); - if (!nxt) break; - - nxt = cpy_arg(&gw6, nxt); - if (!nxt) break; - - nxt = cpy_arg(&loss_in, nxt); - if (!nxt) break; - - nxt = cpy_arg(&loss_out, nxt); - if (!nxt) break; - } - } while(0); - if (!nm && !nm6) { - fprintf(stderr, "Vde: bad configuration...\n"); - exit(1); - } - - macaddr[4] ^= (uint8_t)(getpid() >> 8); - macaddr[5] ^= (uint8_t) (getpid() & 0xFF); - dev = pico_vde_create(sock, name, macaddr); - NXT_MAC(macaddr); - if (!dev) { - perror("Creating vde"); - exit(1); - } - - printf("Vde created.\n"); - - if (!IPV6_MODE) { - pico_string_to_ipv4(addr, &ipaddr.addr); - pico_string_to_ipv4(nm, &netmask.addr); - pico_ipv4_link_add(dev, ipaddr, netmask); - addr4 = ipaddr; - bcastAddr.addr = (ipaddr.addr) | (~netmask.addr); - if (gw && *gw) { - pico_string_to_ipv4(gw, &gateway.addr); - pico_ipv4_route_add(zero, zero, gateway, 1, NULL); - } - } - -#ifdef PICO_SUPPORT_IPV6 - if (IPV6_MODE) { - struct pico_ip6 ipaddr6 = {{0}}, netmask6 = {{0}}, gateway6 = {{0}}, zero6 = {{0}}; - printf("SETTING UP IPV6 ADDRESS\n"); - pico_string_to_ipv6(addr6, ipaddr6.addr); - pico_string_to_ipv6(nm6, netmask6.addr); - pico_ipv6_link_add(dev, ipaddr6, netmask6); - if (gw6 && *gw6) { - pico_string_to_ipv6(gw6, gateway6.addr); - pico_ipv6_route_add(zero6, zero6, gateway6, 1, NULL); - } - - pico_ipv6_dev_routing_enable(dev); - } - -#endif - if (loss_in && (strlen(loss_in) > 0)) { - i_pc = (uint32_t)atoi(loss_in); - } - - if (loss_out && (strlen(loss_out) > 0)) { - o_pc = (uint32_t)atoi(loss_out); - } - - if (i_pc || o_pc) { - printf(" ---------- >Setting vde packet loss %u:%u\n", i_pc, o_pc); - pico_vde_set_packetloss(dev, i_pc, o_pc); - } - - - } - break; - - case '6': - { - char *nxt, *name = NULL, *area0 = NULL, *area1 = NULL, *dump = NULL; - const char pan_addr[] = "2aaa:abcd::0"; - uint8_t n_id, n_area0, n_area1; - struct pico_ip6 pan; - - /* Copy required command line arguments */ - nxt = cpy_arg(&name, optarg); - if (!nxt) - goto check; - nxt = cpy_arg(&area0, nxt); - if (!nxt) - goto check; - nxt = cpy_arg(&area1, nxt); - if (!nxt) - goto check; - - /* Check required arguments */ -check: if (!name || !area0 || !area1) { - fprintf(stderr, "Usage: -6,id,area\n"); - exit(1); - } - - n_id = (uint8_t) atoi(name); - n_area0 = (uint8_t) atoi(area0); - n_area1 = (uint8_t) atoi(area1); - - if (nxt) { - nxt = cpy_arg(&dump, nxt); - } - - printf("%d:%d:%d\n", n_id, n_area0, n_area1); - - if (!n_id) { - printf("Starting radio-network...\n"); - pico_radio_mgr_start(); - } else { - dev = pico_radiotest_create(n_id, n_area0, n_area1, 0, dump); - if (!dev) { - exit(1); - } - - printf("Radiotest created.\n"); - - /* Add a routable link */ - pico_string_to_ipv6(pan_addr, pan.addr); - pico_ipv6_link_add_local(dev, &pan); - - /* Enable routing on first device */ - if (n_id == 1) { - pico_ipv6_dev_routing_enable(dev); - } - } - break; - } - case 'b': - { - char *nxt, *name = NULL, *sock = NULL; - printf("+++ OPTARG %s\n", optarg); - do { - nxt = cpy_arg(&name, optarg); - if (!nxt) break; - - nxt = cpy_arg(&sock, nxt); - } while(0); - if (!sock) { - fprintf(stderr, "Vde: bad configuration...\n"); - exit(1); - } - - macaddr[4] ^= (uint8_t)(getpid() >> 8); - macaddr[5] ^= (uint8_t)(getpid() & 0xFF); - dev = pico_vde_create(sock, name, macaddr); - NXT_MAC(macaddr); - if (!dev) { - if (sock) - free(sock); - - if (name) - free(name); - - perror("Creating vde"); - exit(1); - } - - if (sock) - free(sock); - - if (name) - free(name); - - printf("Vde created.\n"); - } - break; - case 'l': - { - struct pico_ip4 ipaddr, netmask; - - dev = pico_loop_create(); - if (!dev) { - perror("Creating loop"); - exit(1); - } - - pico_string_to_ipv4("127.0.0.1", &ipaddr.addr); - pico_string_to_ipv4("255.0.0.0", &netmask.addr); - pico_ipv4_link_add(dev, ipaddr, netmask); - printf("Loopback created\n"); -#ifdef PICO_SUPPORT_IPV6 - if (IPV6_MODE) { - struct pico_ip6 ipaddr6 = {{0}}, netmask6 = {{0}}; - pico_string_to_ipv6("::1", ipaddr6.addr); - pico_string_to_ipv6("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", netmask6.addr); - pico_ipv6_link_add(dev, ipaddr6, netmask6); - } - - pico_ipv6_dev_routing_enable(dev); - -#endif - } - break; - case 'r': - { - char *nxt, *addr, *nm, *gw; - struct pico_ip4 ipaddr, netmask, gateway; - /* XXX adjust for IPv6 */ - addr = NULL, nm = NULL, gw = NULL; - printf("+++ ROUTEOPTARG %s\n", optarg); - do { - nxt = cpy_arg(&addr, optarg); - if (!nxt) break; - - nxt = cpy_arg(&nm, nxt); - if (!nxt) break; - - nxt = cpy_arg(&gw, nxt); - } while(0); - if (!addr || !nm || !gw) { - fprintf(stderr, "--route expects addr:nm:gw:\n"); - usage(argv[0]); - } - - pico_string_to_ipv4(addr, &ipaddr.addr); - pico_string_to_ipv4(nm, &netmask.addr); - pico_string_to_ipv4(gw, &gateway.addr); - if (pico_ipv4_route_add(ipaddr, netmask, gateway, 1, NULL) == 0) - fprintf(stderr, "ROUTE ADDED *** to %s via %s\n", addr, gw); - else - fprintf(stderr, "ROUTE ADD: ERROR %s \n", strerror(pico_err)); - - break; - } - case 'd': - { - /* Add a DNS nameserver IP address */ - char *straddr; - struct pico_ip4 ipaddr; - printf("DNS nameserver address = %s\n", optarg); - cpy_arg(&straddr, optarg); - pico_string_to_ipv4(straddr, &ipaddr.addr); - pico_dns_client_nameserver(&ipaddr, PICO_DNS_NS_ADD); - break; - } - case 'a': - { - char *name = NULL, *args = NULL; - printf("+++ OPTARG %s\n", optarg); - args = cpy_arg(&name, optarg); - - printf("+++ NAME: %s ARGS: %s\n", name, args); - IF_APPNAME("udpecho") { - app_udpecho(args); - } else IF_APPNAME("tcpecho") { - app_tcpecho(args); - } else IF_APPNAME("udpclient") { - app_udpclient(args); - } else IF_APPNAME("tcpclient") { - app_tcpclient(args); - } else IF_APPNAME("tcpbench") { - app_tcpbench(args); - } else IF_APPNAME("natbox") { - app_natbox(args); - } else IF_APPNAME("udpdnsclient") { - app_udpdnsclient(args); - } else IF_APPNAME("udpnatclient") { - app_udpnatclient(args); - } else IF_APPNAME("mcastsend") { -#ifndef PICO_SUPPORT_MCAST - return 0; -#endif - app_mcastsend(args); - } else IF_APPNAME("mcastreceive") { -#ifndef PICO_SUPPORT_MCAST - return 0; -#endif - app_mcastreceive(args); - } - else IF_APPNAME("mcastsend_ipv6") { -#ifndef PICO_SUPPORT_MCAST - return 0; -#endif - app_mcastsend_ipv6(args); - } else IF_APPNAME("mcastreceive_ipv6") { -#ifndef PICO_SUPPORT_MCAST - return 0; -#endif - app_mcastreceive_ipv6(args); - } - -#ifdef PICO_SUPPORT_PING - else IF_APPNAME("ping") { - app_ping(args); - } -#endif - else IF_APPNAME("dhcpserver") { -#ifndef PICO_SUPPORT_DHCPD - return 0; -#else - app_dhcp_server(args); -#endif - } else IF_APPNAME("dhcpclient") { -#ifndef PICO_SUPPORT_DHCPC - return 0; -#else - app_dhcp_client(args); -#endif - } else IF_APPNAME("dns_sd") { -#ifndef PICO_SUPPORT_DNS_SD - return 0; -#else - app_dns_sd(args, addr4); -#endif - } else IF_APPNAME("mdns") { -#ifndef PICO_SUPPORT_MDNS - return 0; -#else - app_mdns(args, addr4); -#endif -#ifdef PICO_SUPPORT_SNTP_CLIENT - } else IF_APPNAME("sntp") { - app_sntp(args); -#endif - } else IF_APPNAME("bcast") { - struct pico_ip4 any = { - .addr = 0xFFFFFFFFu - }; - - struct pico_socket *s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, &__wakeup); - pico_socket_sendto(s, "abcd", 5u, &any, 1000); - - pico_socket_sendto(s, "abcd", 5u, &bcastAddr, 1000); -#ifdef PICO_SUPPORT_TFTP - } else IF_APPNAME("tftp") { - app_tftp(args); -#endif - } else IF_APPNAME("noop") { - app_noop(); -#ifdef PICO_SUPPORT_OLSR - } else IF_APPNAME("olsr") { - dev = pico_get_device("pic0"); - if(dev) { - pico_olsr_add(dev); - } - - dev = pico_get_device("pic1"); - if(dev) { - pico_olsr_add(dev); - } - - app_noop(); -#endif - } else IF_APPNAME("slaacv4") { -#ifndef PICO_SUPPORT_SLAACV4 - return 0; -#else - app_slaacv4(args); -#endif - } else IF_APPNAME("udp_sendto_test") { - app_sendto_test(args); - } else { - fprintf(stderr, "Unknown application %s\n", name); - usage(argv[0]); - } - } - break; - } - } - if (!dev) { - printf("nodev"); - usage(argv[0]); - } - -#ifdef FAULTY - atexit(memory_stats); -#endif - printf("%s: launching PicoTCP loop\n", __FUNCTION__); - while(1) { - pico_stack_tick(); - usleep(2000); - } -} diff --git a/kernel/picotcp/test/ppp.c b/kernel/picotcp/test/ppp.c deleted file mode 100644 index 5a95b3c..0000000 --- a/kernel/picotcp/test/ppp.c +++ /dev/null @@ -1,193 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef PICO_SUPPORT_POLARSSL -#include -#endif -#ifdef PICO_SUPPORT_CYASSL -#include -#endif -#define MODEM "/dev/ttyUSB0" -#define SPEED 236800 -/* #define APN "gprs.base.be" */ -#define APN "web.be" -#define PASSWD "web" -#define USERNAME "altran" -/* #define DEBUG_FLOW */ -static int fd = -1; -static int idx; -static int ping_on = 0; -static struct pico_device *ppp = NULL; - -static void sigusr1_hdl(int signo) -{ - fprintf(stderr, "SIGUSR1: Connecting!\n"); - if (ppp) - pico_ppp_connect(ppp); -} - -static void sigusr2_hdl(int signo) -{ - fprintf(stderr, "SIGUSR2/SIGINT: Disconnecting!\n"); - if (ppp) - pico_ppp_disconnect(ppp); - - if (signo == SIGINT) - exit(0); -} - -#ifdef PICO_SUPPORT_POLARSSL -static void md5sum(uint8_t *dst, const uint8_t *src, size_t len) -{ - md5(src, len, dst); -} -#endif - -#ifdef PICO_SUPPORT_CYASSL -static void md5sum(uint8_t *dst, const uint8_t *src, size_t len) -{ - Md5 md5; - InitMd5(&md5); - Md5Update(&md5, src, len); - Md5Final(&md5, dst); -} -#endif - -int modem_read(struct pico_device *dev, void *data, int len) -{ - int r; - r = read(fd, data, len); -#ifdef DEBUG_FLOW - if (r > 0) { - printf(" <<< "); - for(idx = 0; idx < r; idx++) { - printf(" %02x", ((uint8_t*)data)[idx]); - } - printf("\n"); - } - -#endif - - return r; -} - -int modem_write(struct pico_device *dev, const void *data, int len) -{ - int r; -#ifdef DEBUG_FLOW - printf(" >>> "); - for(idx = 0; idx < len; idx++) { - printf(" %02x", ((uint8_t*)data)[idx]); - } - printf("\n"); -#endif - r = write(fd, data, len); - return r; -} - -int modem_set_speed(struct pico_device *dev, uint32_t speed) -{ - struct termios term; - if (tcgetattr(fd, &term) != 0) - return 6; - - if (cfsetspeed(&term, B115200) != 0) - return 7; - - if (tcsetattr(fd, TCSANOW, &term) != 0) - return 8; - - printf("Speed set to 115200.\n"); - return 0; -} - -void cb_ping(struct pico_icmp4_stats *s) -{ - char host[30]; - pico_ipv4_to_string(host, s->dst.addr); - if (s->err == 0) { - dbg("%lu bytes from %s: icmp_req=%lu ttl=64 time=%lu ms\n", s->size, host, s->seq, s->time); - } else { - dbg("PING %lu to %s: Error %d\n", s->seq, host, s->err); - } -} - -static void cb_sock(uint16_t ev, struct pico_socket *s) -{ - -} - -static void ping(void) -{ - struct pico_socket *s; - struct pico_ip4 dst; - - pico_string_to_ipv4("80.68.95.85", &dst.addr); - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, cb_sock); - pico_socket_connect(s, &dst, short_be(80)); - pico_icmp4_ping("80.68.95.85", 10, 1000, 4000, 8, cb_ping); -} - - -int main(int argc, const char *argv[]) -{ - const char *path = MODEM; - const char *apn = APN; - const char *passwd = PASSWD; - const char *username = USERNAME; - - if (argc > 1) - path = argv[1]; - - if (argc > 2) - apn = argv[2]; - - if (argc > 3) - passwd = argv[3]; - - fd = open(path, O_RDWR); - if (fd < 0) - return 1; - - fcntl(fd, F_SETFL, O_NONBLOCK); - - signal(SIGUSR1, sigusr1_hdl); - signal(SIGUSR2, sigusr2_hdl); - signal(SIGINT, sigusr2_hdl); - - pico_stack_init(); - -#if defined PICO_SUPPORT_POLARSSL || defined PICO_SUPPORT_CYASSL - pico_register_md5sum(md5sum); -#endif - - ppp = pico_ppp_create(); - if (!ppp) - return 2; - - pico_ppp_set_serial_read(ppp, modem_read); - pico_ppp_set_serial_write(ppp, modem_write); - pico_ppp_set_serial_set_speed(ppp, modem_set_speed); - - pico_ppp_set_apn(ppp, apn); - pico_ppp_set_password(ppp, passwd); - pico_ppp_set_username(ppp, username); - - pico_ppp_connect(ppp); - - while(1 < 2) { - pico_stack_tick(); - usleep(1000); - if (ppp->link_state(ppp) && !ping_on) { - ping_on++; - ping(); - } - } -} diff --git a/kernel/picotcp/test/python/dhcp.py b/kernel/picotcp/test/python/dhcp.py deleted file mode 100755 index 3891914..0000000 --- a/kernel/picotcp/test/python/dhcp.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python -# dhcp.py -# Butterfly-like topology test for dhcp -# One DHCP server, serving on two interface -# Eigth DHCP clients, four on each network -# -# s1----@ @----r1 -# s2-----\__ DHCP __/-----r2 -# s3-----/ \-----r3 -# s4----@ @----r4 -# - - -from topology import * - -T = Topology() -net1 = Network(T, "pyt1") -net2 = Network(T, "pyt2") - -server = Host(T, net1, net2, args="dhcpserver:eth1:172.16.1.2:255.255.255.0:64:128:eth2:172.16.2.2:255.255.255.0:64:128") - -s1 = Host(T, net1, args="dhcpclient:eth1") -s2 = Host(T, net1, args="dhcpclient:eth1") -s3 = Host(T, net1, args="dhcpclient:eth1") -s4 = Host(T, net1, args="dhcpclient:eth1") -r1 = Host(T, net2, args="dhcpclient:eth1") -r2 = Host(T, net2, args="dhcpclient:eth1") -r3 = Host(T, net2, args="dhcpclient:eth1") -r4 = Host(T, net2, args="dhcpclient:eth1") - -raw_input("Press enter to continue ...") -start(T) - -wait(server) - -cleanup() diff --git a/kernel/picotcp/test/python/fairness.py b/kernel/picotcp/test/python/fairness.py deleted file mode 100755 index d78b85e..0000000 --- a/kernel/picotcp/test/python/fairness.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/python -# fairness.py -# A complex test for butterly-like topology, -# using 3 TCP connections and 3 ping flows. -# -# s1---. .---r1 -# s2----\ / -# s3-----\__.R1---R2.__/__.--r2 -# s4-----/ \ -# s5----/ \_.--r3 -# s6---^ -# - -from topology import * - -T = Topology() -net1 = Network(T) -net2 = Network(T) -net3 = Network(T) - -#router1 = Host(T, net1, net2, "natbox:172.16.2.1:") -#router2 = Host(T, net2, net3, "natbox:172.16.3.1:") -router1 = Host(T, net1, net2) -router2 = Host(T, net2, net3) - -send1 = Host(T, net1, args="tcpbench:t:172.16.3.2:") -send2 = Host(T, net1, args="tcpbench:t:172.16.3.3:") -send3 = Host(T, net1, args="tcpbench:t:172.16.3.4:") - -send4 = Host(T, net1, args="ping:172.16.3.2:") -send5 = Host(T, net1, args="ping:172.16.3.3:") -send6 = Host(T, net1, args="ping:172.16.3.4:") - - -recv1 = Host(T, net3, args="tcpbench:r:") -recv2 = Host(T, net3, args="tcpbench:r:") -recv3 = Host(T, net3, args="tcpbench:r:") -recv4 = Host(T, net3, args="tcpbench:r:") - - -sleep(1) -start(T) - -wait(send1) -wait(send2) -wait(send3) - -cleanup() diff --git a/kernel/picotcp/test/python/fairness_bottleneck.py b/kernel/picotcp/test/python/fairness_bottleneck.py deleted file mode 100755 index a88cb32..0000000 --- a/kernel/picotcp/test/python/fairness_bottleneck.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/python -# fairness.py -# A complex test for butterly-like topology, -# using 3 TCP connections and 3 ping flows. -# -# Bottleneck of 4 Mbit/300 ms overall delay is added. -# -# s1---. .---r1 -# s2----\ / -# s3-----\__.R1---R2.__/__.--r2 -# s4-----/ \ -# s5----/ \_.--r3 -# s6---^ -# - -from topology import * - -T = Topology() -net1 = Network(T) -net2 = Network(T) -net3 = Network(T) - -router1 = Host(T, net1, net2, delay2="150", bw2="20M") -router2 = Host(T, net2, net3) - -send1 = Host(T, net1, args="tcpbench:t:172.16.3.2:") -send2 = Host(T, net1, args="tcpbench:t:172.16.3.3:") -send3 = Host(T, net1, args="tcpbench:t:172.16.3.4:") - -send4 = Host(T, net1, args="ping:172.16.3.2:") -send5 = Host(T, net1, args="ping:172.16.3.3:") -send6 = Host(T, net1, args="ping:172.16.3.4:") - - -recv1 = Host(T, net3, args="tcpbench:r:") -recv2 = Host(T, net3, args="tcpbench:r:") -recv3 = Host(T, net3, args="tcpbench:r:") - - -sleep(1) -start(T) - -wait(send1) -wait(send2) -wait(send3) - -cleanup() diff --git a/kernel/picotcp/test/python/fairness_bottleneck_linux.py b/kernel/picotcp/test/python/fairness_bottleneck_linux.py deleted file mode 100755 index 3da270f..0000000 --- a/kernel/picotcp/test/python/fairness_bottleneck_linux.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/python -# fairness.py -# A complex test for butterly-like topology, -# using 3 TCP connections and 3 ping flows. -# -# Bottleneck of 4 Mbit/300 ms overall delay is added. -# -# s1---. .---r1 -# s2----\ / -# s3-----\__.R1---R2.__/__.--r2 -# s4-----/ \ -# s5----/ \_.--r3 -# s6---^ -# - -from topology import * - -T = Topology() -net1 = Network(T) -net2 = Network(T, "pyt0") -net3 = Network(T) - -router1 = Host(T, net1, net2, delay2="150", bw2="4M") -router2 = Host(T, net2, net3) - -send1 = Host(T, net1, args="tcpbench:t:172.16.3.2:") -send2 = Host(T, net1, args="tcpbench:t:172.16.3.3:") -send3 = Host(T, net1, args="tcpbench:t:172.16.3.4:") - -send4 = Host(T, net1, args="ping:172.16.3.2:") -send5 = Host(T, net1, args="ping:172.16.3.3:") -send6 = Host(T, net1, args="ping:172.16.3.4:") - - -recv1 = Host(T, net3, args="tcpbench:r:") -recv2 = Host(T, net3, args="tcpbench:r:") -recv3 = Host(T, net3, args="tcpbench:r:") - - -sleep(1) -start(T) - -wait(send1) -wait(send2) -wait(send3) - -cleanup() diff --git a/kernel/picotcp/test/python/fragmentation.py b/kernel/picotcp/test/python/fragmentation.py deleted file mode 100755 index 72a5929..0000000 --- a/kernel/picotcp/test/python/fragmentation.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/python -# -# fragmentation.py -# -# Fragmentation test with PicoTCP sending and Linux receiving -# -# (sender) (Receiver) -# PicoTCP ------------------------ Linux -# -# An udpclient is started which will give DATASIZE bytes in one go -# to the socket. This data will be fragmented and send over to the -# Linux, where it is reassembled and received in one piece. -# - -from topology import * -import socket, random, string - -SRC_ADDR = '' -DST_ADDR = '172.16.1.1' -SRC_PORT = 6667 -SENDTO_PORT = 6667 -LISTEN_PORT = 6667 -DATASIZE = 4000 -LOOPS = 4 -SUBLOOPS = 1 -UDPCLIENT = "udpclient:" + str(DST_ADDR) + ":" + str(SENDTO_PORT) + ":" + str(LISTEN_PORT) + ":" + str(DATASIZE) + ":" + str(LOOPS) + ":" + str(SUBLOOPS) - -print UDPCLIENT - -T = Topology() -net1 = Network(T, "pyt0") -h1 = Host(T, net1, args=UDPCLIENT) - -s_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) -s_udp.bind((SRC_ADDR, SRC_PORT)) -s_udp.settimeout(5); - -raw_input("Press enter to continue ...") -start(T) - -while True: - data, addr = s_udp.recvfrom(DATASIZE) - #print data - if len(data) == DATASIZE: - print '\n\n' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '+++++ fragmentation test IS successful +++++' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '\n\n' - cleanup() - exit(0) - -print '\n\n' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '+++++ fragmentation test NOT successful ++++' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '\n\n' -cleanup() -exit(1) - diff --git a/kernel/picotcp/test/python/howto.py b/kernel/picotcp/test/python/howto.py deleted file mode 100644 index 7499886..0000000 --- a/kernel/picotcp/test/python/howto.py +++ /dev/null @@ -1,135 +0,0 @@ -#PicoTCP topology test environment. -#Guidelines to prepare test scenarios. -# -#The interface is simple, it has three objects: -# * Topology -# * Network -# * Host -# -#And a handful of helping routines, such as: -# * start() -# * loop() -# * sleep() -# * wait() -# -# -######################################################################## -#== Create a test scenario ==# -######################################################################## -# Every script file will start with: "#!/usr/bin/python" in the first -# line, and will have execution permissions. This script is an exception -# because it is not intended to be run, as it is in fact a walkthrough to -# all the functionalities. - -# Importing the topology objects is mandatory, so add: -from topology import * - -# A Topology must be created to use all other objects: -T = Topology() - -# Now, we can create "Network" objects. The networks will have address -# 172.16.X.0/24, where 'X' is the order of creation, starting from 1. -# - -network1 = Network(T) -network2 = Network(T) -# The two networks are separated and using different address pools: -# -# ## ### ## ## ## ### ## ## -# # network1 # # network2 # -# # 172.16.1.0 # # 172.16.2.0 # -# ## ## ###### ## ## ###### -# - -# If you are running your test as root, you can also add a tun-tap connection -# to the network, which will be automatically configured: -networkLocal = Network(T,'tap0') - - -# In the same way ad networks, you can create a PicoTCP Host that connects to a -# network as follows: -host1_1 = Host(T, network1) - -# Also, you can specify a role for the application/host, by using picoapp's -# args format for '--app'. For example, the machine below will ping the previously -# created one: -host1_2 = Host(T, network1, args ="ping:172.16.1.1:") -# -# ## ### ## ## ## ### ## ## -# host1.1--# network1 # # network2 # -# # 172.16.1.0 # # 172.16.2.0 # -# ## ## ###### ## ## ###### -# / -# host1.2___/ -# (ping host1.1) -# - -# At this point, a picoTCP host with two network cards can connect -# the two networks like this: -router1 = Host(T, network1, network2) -# -# ## ### ## ## router1 ## ### ## ## -# host1.1--# network1 #__/ \__ # network2 # -# # 172.16.1.0 # # 172.16.2.0 # -# ## ## ###### ## ## ###### -# / -# host1.2___/ -# (ping host1.1) - -# Now, we can attach an host to the second network too: -# Connection to the host can be an emulated channel, i.e. -# it is possible to add bidirectional delay and limited -# bandwidth in the link between the host and the network: -# - -host2_2 = Host(network2, delay1="100", bw1="500K") -# -# ## ### ## ## router1 ## ### ## ## -# host1.1--# network1 #__/ \__ # network2 # -# # 172.16.1.0 # # 172.16.2.0 # -# ## ## ###### ## ## ###### -# / * -# host1.2.__/ \._*_*_host2.2 -# (ping host1.1) - -## Since the routes will be automatically added before the test starts, -# all the hosts in the networks will be reachable to each other: -# all the picoapps will have their static routes populated automatically -# by the topology tool, no matter how complex the network is. The only -# requirement is that all the networks share at least one router. -# -# For this reason, we can create a host that pings across the network: -host1_4 = Host(T, network1, args="ping:172.16.2.2:") -# -# host1.4. -# (ping 2.2) \ -# \## ### ## ## router1 ## ### ## ## -# host1.1--# network1 #__/ \__ # network2 # -# # 172.16.1.0 # # 172.16.2.0 # -# ## ## ###### ## ## ###### -# / * -# host1.2.__/ \._*_*_host2.2 -# (ping host1.1) - -######################################################################## -#== Start the test ==# -######################################################################## -# All the host will be connected and activated when you call: -start() - -# At this point you may want to define your exit strategy. Valid commands -# are: - -loop() # Loop forever, until the test is interrupted (e.g. by ctrl+c) - -sleep(N) # Sleep N seconds - -wait(host1_4) # Wait for application running on host 1.4, and return only if - # it has terminated - - -######################################################################## -#== End the test ==# -######################################################################## -# Always call: -cleanup() diff --git a/kernel/picotcp/test/python/http_server_linux.py b/kernel/picotcp/test/python/http_server_linux.py deleted file mode 100755 index 8f72845..0000000 --- a/kernel/picotcp/test/python/http_server_linux.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T, "pyt0") - -h2 = Host(T, net1, args="httpd") - -sleep(1) -start(T) - -wait(h2) -cleanup() diff --git a/kernel/picotcp/test/python/multicast_recv.py b/kernel/picotcp/test/python/multicast_recv.py deleted file mode 100755 index 534dc32..0000000 --- a/kernel/picotcp/test/python/multicast_recv.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/python -# multicast_recv.py -# -# Multicast test with PicoTCP receiving and Linux sending -# -# (sender) (Receiver) -# Linux ------------------------ PicoTCP -# mcast to 224.7.7.7 -# -from topology import * -import socket, random, string - -IF_ADDR = '172.16.1.1' -LINK_ADDR = '172.16.1.2' -MCAST_ADDR = '224.7.7.7' -SRC_PORT = 5555 -LISTEN_PORT = 6667 -SENDTO_PORT = 6667 -MCASTRECV = "mcastreceive:" + str(LINK_ADDR) + ":" + str(MCAST_ADDR) + ":" + str(LISTEN_PORT) + ":" + str(SENDTO_PORT) - -print MCASTRECV - -T = Topology() -net1 = Network(T, "pyt0") -h1 = Host(T, net1, args=MCASTRECV) - -# sending socket -s_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) -s_udp.bind((IF_ADDR, SRC_PORT)) -s_udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) -s_udp.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_TTL, 2) -s_udp.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_IF, socket.inet_aton(str(IF_ADDR))) - -# receiving socket -s_udp_recv = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) -s_udp_recv.bind((IF_ADDR, LISTEN_PORT)) -s_udp_recv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) -s_udp_recv.settimeout(5); - -raw_input("Press enter to continue ...") -start(T) -sleep(1) - -while True: - s_udp.sendto("multicast test succeeded", (str(MCAST_ADDR), LISTEN_PORT)) - data = s_udp_recv.recv(4096) - #print data - if 'succeeded' in data: - print '\n\n' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '+++++ multicast_recv test IS successful +++++' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '\n\n' - cleanup() - exit(0) - -print '\n\n' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '+++++ multicast_recv test NOT successful ++++' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '\n\n' -cleanup() -exit(1) - diff --git a/kernel/picotcp/test/python/multicast_send.py b/kernel/picotcp/test/python/multicast_send.py deleted file mode 100755 index 76ec581..0000000 --- a/kernel/picotcp/test/python/multicast_send.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python -# -# multicast_send.py -# -# Multicast test with PicoTCP sending and Linux receiving -# -# (sender) (Receiver) -# PicoTCP ------------------------ Linux -# mcast to 224.7.7.7 -# - -from topology import * -import socket, random, string, struct - -IF_ADDR = '172.16.1.1' -LINK_ADDR = '172.16.1.2' -MCAST_ADDR = '224.7.7.7' -LISTEN_PORT = 6667 -SENDTO_PORT = 6667 -MCASTSEND = "mcastsend:" + str(LINK_ADDR) + ":" + str(MCAST_ADDR) + ":" + str(SENDTO_PORT) + ":" + str(LISTEN_PORT) - -print MCASTSEND - -T = Topology() -net1 = Network(T, "pyt0") -h1 = Host(T, net1, args=MCASTSEND) - -s_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) -s_udp.bind((MCAST_ADDR, LISTEN_PORT)) -s_udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) -s_udp.settimeout(5); - -mreq = struct.pack("=4s4s", socket.inet_aton(str(MCAST_ADDR)), socket.inet_aton(str(IF_ADDR))) -s_udp.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) - -raw_input("Press enter to continue ...") -start(T) -sleep(1) - -while True: - data = s_udp.recv(4096) - #print data - if 'end' in data: - print '\n\n' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '+++++ multicast_send test IS successful +++++' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '\n\n' - cleanup() - exit(0) - -print '\n\n' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '+++++ multicast_send test NOT successful ++++' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '\n\n' -cleanup() -exit(1) diff --git a/kernel/picotcp/test/python/noop.py b/kernel/picotcp/test/python/noop.py deleted file mode 100755 index 9d4d456..0000000 --- a/kernel/picotcp/test/python/noop.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T, "pyt0") - -#h1 = Host(T, net1) -h2 = Host(T, net1, args="noop") - -sleep(1) -start(T) - -wait(h2) -cleanup() diff --git a/kernel/picotcp/test/python/ping.py b/kernel/picotcp/test/python/ping.py deleted file mode 100755 index a7283ce..0000000 --- a/kernel/picotcp/test/python/ping.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T) - -h1 = Host(T, net1) -h2 = Host(T, net1, args="ping:172.16.1.1:") - -sleep(1) -start(T) - -wait(h2) -cleanup() diff --git a/kernel/picotcp/test/python/ping_delay.py b/kernel/picotcp/test/python/ping_delay.py deleted file mode 100755 index 7aa7d7f..0000000 --- a/kernel/picotcp/test/python/ping_delay.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T) - -h1 = Host(T, net1, delay1="200") -h2 = Host(T, net1, args="ping:172.16.1.1:") - -sleep(1) -start(T) - -wait(h2) -cleanup() diff --git a/kernel/picotcp/test/python/ping_linux.py b/kernel/picotcp/test/python/ping_linux.py deleted file mode 100755 index b79158e..0000000 --- a/kernel/picotcp/test/python/ping_linux.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T, "pyt0") - -#h1 = Host(T, net1) -h2 = Host(T, net1, args="ping:172.16.1.1:") - -sleep(1) -start(T) - -wait(h2) -cleanup() diff --git a/kernel/picotcp/test/python/ping_nat.py b/kernel/picotcp/test/python/ping_nat.py deleted file mode 100755 index 336ce8f..0000000 --- a/kernel/picotcp/test/python/ping_nat.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T, 'nat0') -net2 = Network(T) - - -h1 = Host(T, net1, args="ping:172.16.2.1:") -h2 = Host(T, net2) -router1 = Host(T, net1, net2, args="natbox:172.16.2.2:") - -sleep(1) -start(T) - -wait(h1) -cleanup() diff --git a/kernel/picotcp/test/python/reassembly.py b/kernel/picotcp/test/python/reassembly.py deleted file mode 100755 index 82a4f27..0000000 --- a/kernel/picotcp/test/python/reassembly.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/python -# -# reassemby.py -# -# Reassemly test with PicoTCP receiving and Linux sending -# -# (receiver) (Sender) -# PicoTCP ------------------------ Linux -# -# An udpecho is started which will receive DATASIZE bytes in one go -# from the socket. The Linux will send DATASIZE bytes in one go to the -# udpecho, this data will be sent fragmented. The udpecho is to reassemble -# this data and echo it back. -# - -from topology import * -import socket, random, string - -SRC_ADDR = '' -LINK_ADDR = '172.16.1.2' -SRC_PORT = 5555 -LISTEN_PORT = 6667 -SENDTO_PORT = 5555 -DATASIZE = 3400 -UDPECHO = "udpecho:" + str(LINK_ADDR) + ":" + str(LISTEN_PORT) + ":" + str(SENDTO_PORT) + ":" + str(DATASIZE) - -print UDPECHO - -T = Topology() -net1 = Network(T, "pyt0") -h1 = Host(T, net1, args=UDPECHO) - -str_send = ''.join(random.choice(string.ascii_lowercase) for x in range(DATASIZE)) -#print str_send -s_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) -s_udp.bind((SRC_ADDR, SRC_PORT)) -s_udp.settimeout(5); - -raw_input("Press enter to continue ...") -start(T) - -while True: - s_udp.sendto(str_send, (LINK_ADDR, LISTEN_PORT)) - data = s_udp.recv(DATASIZE) - #print len(data) - if len(data) == DATASIZE: - print '\n\n' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '+++++ reassembly test IS successful +++++' - print '+++++++++++++++++++++++++++++++++++++++++++++' - print '\n\n' - cleanup() - exit(0) - -print '\n\n' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '+++++ reassembly test NOT successful ++++' -print '+++++++++++++++++++++++++++++++++++++++++++++' -print '\n\n' -cleanup() -exit(1) - diff --git a/kernel/picotcp/test/python/tcpbench-delay.py b/kernel/picotcp/test/python/tcpbench-delay.py deleted file mode 100755 index cf46681..0000000 --- a/kernel/picotcp/test/python/tcpbench-delay.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/python -# - -from topology import * - -T = Topology() -net1 = Network(T, "vde0") - -send1 = Host(T, net1, args="tcpbench:t:172.16.1.3:7770:") -recv1 = Host(T, net1, args="tcpbench:r:7770:", delay1="30", loss1="1") - - -sleep(1) -raw_input("Press enter to continue ...") - -start(T) -wait(send1) -cleanup() diff --git a/kernel/picotcp/test/python/tcpbench-tap.py b/kernel/picotcp/test/python/tcpbench-tap.py deleted file mode 100755 index 6e30dae..0000000 --- a/kernel/picotcp/test/python/tcpbench-tap.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/python -# - -from topology import * - -T = Topology() -net1 = Network(T, "pyt0") -send1 = Host(T, net1, args="tcpbench:t:172.16.1.3:7770:") -recv1 = Host(T, net1, args="tcpbench:r:7770:") - - -sleep(1) -raw_input("Press enter to continue ...") - -start(T) -wait(send1) -cleanup() diff --git a/kernel/picotcp/test/python/tcpbench.py b/kernel/picotcp/test/python/tcpbench.py deleted file mode 100755 index 58e4768..0000000 --- a/kernel/picotcp/test/python/tcpbench.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/python -# - -from topology import * - -T = Topology() -net1 = Network(T) -send1 = Host(T, net1, args="tcpbench:t:172.16.1.2:7770:") -recv1 = Host(T, net1, args="tcpbench:r:7770:") - - -sleep(1) -#raw_input("Press enter to continue ...") - -start(T) -wait(send1) -cleanup() diff --git a/kernel/picotcp/test/python/tcpbench_rx_linux.py b/kernel/picotcp/test/python/tcpbench_rx_linux.py deleted file mode 100755 index d4fa02e..0000000 --- a/kernel/picotcp/test/python/tcpbench_rx_linux.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T, "pyt0") - -h2 = Host(T, net1, args="tcpbench:r:6660:") - -sleep(1) -raw_input("Press enter to continue ...") -start(T) - -wait(h2) -cleanup() diff --git a/kernel/picotcp/test/python/tcpbench_tx_linux.py b/kernel/picotcp/test/python/tcpbench_tx_linux.py deleted file mode 100755 index 3bee3d7..0000000 --- a/kernel/picotcp/test/python/tcpbench_tx_linux.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T, "pyt0") - -#h1 = Host(T, net1) -h3 = Host(T, net1, args="tcpbench:t:172.16.1.1:6660:") - -sleep(1) -raw_input("Press enter to continue ...") -start(T) - -wait(h3) -cleanup() diff --git a/kernel/picotcp/test/python/tcpecho_linux.py b/kernel/picotcp/test/python/tcpecho_linux.py deleted file mode 100755 index 39065be..0000000 --- a/kernel/picotcp/test/python/tcpecho_linux.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/python -from topology import * - -T = Topology() -net1 = Network(T, "pyt0") - -#h1 = Host(T, net1) -h2 = Host(T, net1, args="tcpecho:8888", delay1="20", loss1="0.01") -#h3 = Host(T, net1, args="tcpbench:t:172.16.1.1:") - -sleep(1) -start(T) - -#wait(h3) -wait(h2) -cleanup() diff --git a/kernel/picotcp/test/python/topology.py b/kernel/picotcp/test/python/topology.py deleted file mode 100755 index a9df735..0000000 --- a/kernel/picotcp/test/python/topology.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/python -# Python classes definition for the picoTCP -# topology test environment -# -# Copyright (c) 2013-2017 Altran Intelligent Systems. See LICENSE for usage. - -import sys, os, subprocess, time, re - -def test_tuntap(): - if not os.geteuid()==0: - sys.exit("\nOnly root can use real devices contained in this script\n") - -class Topology: - def __init__(self): - self.nets = [] - self.nodes = [] - self.nextn = 1 - self.hosts = [] - - -class Network: - def __init__(self, topology, real=''): - self.n = topology.nextn - topology.nextn += 1 - self.nodes = [] - self.topology = topology - self.topology.nets.append(self) - self.sock = "/tmp/topology/net"+`self.n` - self.nextn = 1 - vdecmd = ["vde_switch", "-x" , "-s", self.sock, "-m", self.sock+".mgmt"] - if real != '': - test_tuntap() - vdecmd.append('-t') - vdecmd.append(real) - vdecmd.append('-x') - self.pop = subprocess.Popen(vdecmd, stdin=subprocess.PIPE) - self.hosts = [] - print "" - print vdecmd - print "Created network "+self.sock - if real != '': - subprocess.call(["ifconfig",real,"172.16."+`self.n`+".1", "netmask", "255.255.255.0", "up"]) - self.nextn = 2 - -class Node: - def __init__(self,topology, network = None): - if (network is None): - network = Network(topology) - self.net = network - self.n = network.nextn - network.nextn += 1 - self.net.nodes.append(self) - self.topology = topology - self.topology.nodes.append(self) - -class Host: - def add_routes(self, topology): - for eth in [self.eth1, self.eth2]: - if eth and not self.args.startswith("dhcpclient"): - net = eth.net - for h in topology.hosts: - if h.eth1 and h.eth2: - dst="" - gw="" - routing=False - if (h.eth2.net.n == net.n) and (self not in h.eth1.net.hosts): - if h.eth1.net.n > net.n or h.nat == False: - print "FOUND route to net "+`h.eth1.net.n` - dst_net = h.eth1.net.n - gw_net = h.eth2.net.n - gw_n = h.eth2.n - routing=True - elif (h.eth1.net.n == net.n) and (self not in h.eth2.net.hosts): - if h.eth2.net.n > net.n or h.nat == False: - print "FOUND route to net "+`h.eth2.net.n` - dst_net = h.eth2.net.n - gw_net = h.eth1.net.n - gw_n = h.eth1.n - routing=True - - if (routing): - dst = "172.16."+`dst_net`+".0" - gw = "172.16."+`gw_net`+"."+`gw_n` - self.routes.append("-r") - self.routes.append(dst+":255.255.255.0:"+gw+":") - if (routing and gw_net > dst_net and h.nat == False): - dst_net -= 1 - while(dst_net > 0): - dst = "172.16."+`dst_net`+".0" - self.routes.append("-r") - self.routes.append(dst+":255.255.255.0:"+gw+":") - dst_net -= 1 - elif (routing and gw_net != None and gw_net < dst_net): - dst_net += 1 - while(dst_net < net.topology.nextn): - dst = "172.16."+`dst_net`+".0" - self.routes.append("-r") - self.routes.append(dst+":255.255.255.0:"+gw+":") - dst_net += 1 - def parse_options(self, eth, delay, bw, loss): - if (delay != "" or bw != ""): - mysock = eth.net.sock + "__" + `eth.n` - wirecmd = ['wirefilter', '-v'] - wirecmd.append(mysock +":" + eth.net.sock) - if (delay != ''): - wirecmd.append("-d") - wirecmd.append(delay) - if (bw != ''): - wirecmd.append("-b") - wirecmd.append(bw) - if (loss != ''): - wirecmd.append("-l") - wirecmd.append(loss) - print wirecmd - subprocess.Popen(['vde_switch', '-s', mysock], stdin=subprocess.PIPE) - subprocess.Popen(wirecmd, stdin=subprocess.PIPE) - else: - mysock = eth.net.sock - return mysock - - def __init__(self, topology, net1=None, net2=None, gw=None, args="tcpecho:5555", delay1="", bw1="", delay2="", bw2="", loss1="", loss2=""): - if net1: - self.eth1 = Node(topology, net1) - net1.hosts.append(self) - else: - self.eth1 = None - if net2: - self.eth2 = Node(topology, net2) - net2.hosts.append(self) - else: - self.eth2 = None - self.cmd = ["./build/test/picoapp.elf"] - self.gw = gw - if args.startswith("nat"): - self.nat = True - else: - self.nat = False - - - if (net1): - mysock = self.parse_options(self.eth1, delay1, bw1, loss1) - if (args.startswith("dhcpclient")): - self.cmd.append("--barevde") - vdeline = "eth1:"+mysock+':' - else: - self.cmd.append("--vde") - vdeline = "eth1:"+mysock+':'+"172.16."+`self.eth1.net.n`+"."+`self.eth1.n`+":255.255.255.0:" - if (self.gw and re.search("172\.16\."+`self.eth1.net`, self.gw)): - vdeline +=self.gw+":" - self.cmd.append(vdeline) - if (net2): - mysock = self.parse_options(self.eth2, delay2, bw2, loss2) - if (args.startswith("dhcpclient")): - self.cmd.append("--barevde") - vdeline = "eth2:"+mysock+':' - else: - self.cmd.append("--vde") - vdeline = "eth2:"+mysock+':'+"172.16."+`self.eth2.net.n`+"."+`self.eth2.n`+":255.255.255.0:" - if (self.gw and re.search("172\.16\."+`self.eth2.net`+".", self.gw)): - vdeline +=self.gw+":" - self.cmd.append(vdeline) - self.args = args - self.pop = None - topology.hosts.append(self) - self.routes = [] - - - def start(self): - if self.pop: - return - for r in self.routes: - self.cmd.append(r) - self.cmd.append("-a") - self.cmd.append(self.args) - print self.cmd - self.pop = subprocess.Popen(self.cmd) - - - -def cleanup(): - try: - subprocess.call(["killall","vde_switch"]) - subprocess.call(["killall","picoapp.elf"]) - subprocess.call(["killall","wirefilter"]) - os.unlink("/tmp/topology") - except: - pass - - - -def loop(): - while(True): - time.sleep(1) - sys.exit(0) - -def sleep(n): - time.sleep(n) - -def wait(x): - if (x is None): - print("start failed: "+x.cmd) - sys.exit(1) - while (x.pop.poll() == None): - time.sleep(1) - print "Goodbye" - sys.exit(0) - -def start(T): - print "Calculating routes.." - for n in T.nets: - for h in n.hosts: - h.add_routes(T) - print "Done!" - print "Starting up..." - for n in T.nets: - for h in n.hosts: - h.start() - -try: - os.mkdir("/tmp/topology/") -except: - pass -cleanup() diff --git a/kernel/picotcp/test/python/traceroute_from_linux.py b/kernel/picotcp/test/python/traceroute_from_linux.py deleted file mode 100755 index 4f2f944..0000000 --- a/kernel/picotcp/test/python/traceroute_from_linux.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/python -from topology import * - -''' -Add route to 172.16.0.0/16 gw 172.16.1.2 on your host machine. - -Should result in something like: -~$ traceroute 172.16.8.2 -traceroute to 172.16.8.2 (172.16.8.2), 30 hops max, 60 byte packets - 1 172.16.1.2 (172.16.1.2) 0.481 ms 0.473 ms 0.467 ms - 2 172.16.2.2 (172.16.2.2) 4.680 ms 4.702 ms 4.700 ms - 3 172.16.3.2 (172.16.3.2) 8.759 ms 8.768 ms 8.766 ms - 4 172.16.4.2 (172.16.4.2) 10.791 ms 10.789 ms 10.786 ms - 5 172.16.5.2 (172.16.5.2) 12.826 ms 12.825 ms 12.821 ms - 6 172.16.6.2 (172.16.6.2) 14.844 ms 17.858 ms 17.857 ms - 7 172.16.7.2 (172.16.7.2) 17.858 ms 14.000 ms 13.999 ms - 8 172.16.8.2 (172.16.8.2) 18.032 ms 18.029 ms 18.023 ms - -''' - - -T = Topology() -net1 = Network(T, 'nat0') -net2 = Network(T) -net3 = Network(T) -net4 = Network(T) -net5 = Network(T) -net6 = Network(T) -net7 = Network(T) -net8 = Network(T) - -router1 = Host(T, net1, net2) -router2 = Host(T, net2, net3) -router3 = Host(T, net3, net4) -router4 = Host(T, net4, net5) -router5 = Host(T, net5, net6) -router6 = Host(T, net6, net7) -router7 = Host(T, net7, net8) - -h1 = Host(T, net8) - -sleep(1) -start(T) -loop() -cleanup() diff --git a/kernel/picotcp/test/python/traceroute_nat_from_linux.py b/kernel/picotcp/test/python/traceroute_nat_from_linux.py deleted file mode 100755 index d88f42a..0000000 --- a/kernel/picotcp/test/python/traceroute_nat_from_linux.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/python -from topology import * - -''' -Add route to 172.16.0.0/16 gw 172.16.1.2 on your host machine. - -Should result in something like: -~$ traceroute 172.16.8.2 -traceroute to 172.16.8.2 (172.16.8.2), 30 hops max, 60 byte packets - 1 172.16.1.2 (172.16.1.2) 0.481 ms 0.473 ms 0.467 ms - 2 172.16.2.2 (172.16.2.2) 4.680 ms 4.702 ms 4.700 ms - 3 172.16.3.2 (172.16.3.2) 8.759 ms 8.768 ms 8.766 ms - 4 172.16.4.2 (172.16.4.2) 10.791 ms 10.789 ms 10.786 ms - 5 172.16.5.2 (172.16.5.2) 12.826 ms 12.825 ms 12.821 ms - 6 172.16.6.2 (172.16.6.2) 14.844 ms 17.858 ms 17.857 ms - 7 172.16.7.2 (172.16.7.2) 17.858 ms 14.000 ms 13.999 ms - 8 172.16.8.2 (172.16.8.2) 18.032 ms 18.029 ms 18.023 ms - -''' - - -T = Topology() -net1 = Network(T, 'nat0') -net2 = Network(T) -net3 = Network(T) -net4 = Network(T) -net5 = Network(T) -net6 = Network(T) -net7 = Network(T) -net8 = Network(T) - -router1 = Host(T, net1, net2, args="natbox:172.16.2.1") -router2 = Host(T, net2, net3, args="natbox:172.16.3.1") -router3 = Host(T, net3, net4, args="natbox:172.16.4.1") -router4 = Host(T, net4, net5, args="natbox:172.16.5.1") -router5 = Host(T, net5, net6, args="natbox:172.16.6.1") -router6 = Host(T, net6, net7, args="natbox:172.16.7.1") -router7 = Host(T, net7, net8, args="natbox:172.16.8.1") - -h1 = Host(T, net8) - -sleep(1) -start(T) -loop() -cleanup() diff --git a/kernel/picotcp/test/python/udpecho.py b/kernel/picotcp/test/python/udpecho.py deleted file mode 100755 index 6ca15aa..0000000 --- a/kernel/picotcp/test/python/udpecho.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/python -# - -from topology import * - -T = Topology() -net1 = Network(T,"udp0") -echo = Host(T, net1, args="udpecho:172.16.1.2:7770:7770:1400:") - - -sleep(1) -raw_input("Press enter to continue ...") - -start(T) -wait(echo) -cleanup() diff --git a/kernel/picotcp/test/python/zmq_linux.py b/kernel/picotcp/test/python/zmq_linux.py deleted file mode 100755 index 643175a..0000000 --- a/kernel/picotcp/test/python/zmq_linux.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/python -from topology import * -import zmq -import sys - -T = Topology() -net1 = Network(T, "pyt0") - -#h1 = Host(T, net1) -h2 = Host(T, net1, args="zeromq_prod:") - -sleep(1) -raw_input("Press enter to continue ...") -start(T) - -# Zeromq part -ctx = zmq.Context() -z = ctx.socket(zmq.SUB) -z.setsockopt(zmq.SUBSCRIBE, "") -z.connect("tcp://172.16.1.2:1207") -print "In the loop..." -for i in range(20): - if z.poll(20000) == 0: - print "Timeout!!!" - cleanup() - sys.exit(1) - else: - msg = z.recv() - print "Recvd msg len=%d content: %s" % (len(msg), msg) - - - - -cleanup() diff --git a/kernel/picotcp/test/test_tftp_app_client.c b/kernel/picotcp/test/test_tftp_app_client.c deleted file mode 100644 index dd5ece3..0000000 --- a/kernel/picotcp/test/test_tftp_app_client.c +++ /dev/null @@ -1,244 +0,0 @@ -#include -#include -#include -#include -#include -#include "pico_stack.h" -#include "pico_config.h" -#include "pico_ipv4.h" -#include "pico_icmp4.h" -#include "pico_socket.h" -#include "pico_stack.h" -#include "pico_device.h" -#include "pico_dev_vde.h" -#include "pico_tftp.h" - -static struct pico_device *pico_dev; - -int32_t get_filesize(const char *filename) -{ - int ret; - struct stat buf; - - ret = stat(filename, &buf); - if (ret) - return -1; - - return buf.st_size; -} - -void start_rx(struct pico_tftp_session *session, int *synchro, const char *filename, int options) -{ - int ret; - int fd; - int32_t len; - uint8_t buf[PICO_TFTP_PAYLOAD_SIZE]; - int left = 1000; - int countdown = 0; - - printf("Start receiving file %s with options set to %d\n", filename, options); - - if (options) { - ret = pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, 0); - if (ret) { - fprintf(stderr, "Error in pico_tftp_set_option\n"); - exit(1); - } - } - - ret = pico_tftp_app_start_rx(session, filename); - if (ret) { - fprintf(stderr, "Error in pico_tftp_app_start_rx\n"); - exit(1); - } - - fd = open(filename, O_WRONLY | O_EXCL | O_CREAT, 0664); - if (!fd) { - fprintf(stderr, "Error in open\n"); - countdown = 1; - } - - for(; left; left -= countdown) { - usleep(2000); /* PICO_IDLE(); */ - pico_stack_tick(); - if (countdown) - continue; - - if (*synchro) { - len = pico_tftp_get(session, buf, PICO_TFTP_PAYLOAD_SIZE); - if (len < 0) { - fprintf(stderr, "Failure in pico_tftp_get\n"); - close(fd); - countdown = 1; - continue; - } - - ret = write(fd, buf, len); - if (ret < 0) { - fprintf(stderr, "Error in write\n"); - pico_tftp_abort(session, TFTP_ERR_EXCEEDED, "File write error"); - close(fd); - countdown = 1; - continue; - } - - printf("Written %" PRId32 " bytes to file (synchro=%d)\n", len, *synchro); - - if (len != PICO_TFTP_PAYLOAD_SIZE) { - close(fd); - printf("Transfer complete!\n"); - countdown = 1; - } - } - } -} - -void start_tx(struct pico_tftp_session *session, int *synchro, const char *filename, int options) -{ - int ret; - int fd; - int32_t len; - uint8_t buf[PICO_TFTP_PAYLOAD_SIZE]; - int left = 1000; - int countdown = 0; - - printf("Start sending file %s with options set to %d\n", filename, options); - - if (options) { - ret = get_filesize(filename); - if (ret < 0) { - fprintf(stderr, "Error in get_filesize\n"); - exit(1); - } - - ret = pico_tftp_set_option(session, PICO_TFTP_OPTION_FILE, ret); - if (ret) { - fprintf(stderr, "Error in pico_tftp_set_option\n"); - exit(1); - } - } - - ret = pico_tftp_app_start_tx(session, filename); - if (ret) { - fprintf(stderr, "Error in pico_tftp_app_start_rx\n"); - exit(1); - } - - fd = open(filename, O_RDONLY, 0444); - if (!fd) { - fprintf(stderr, "Error in open\n"); - pico_tftp_abort(session, TFTP_ERR_EACC, "Error opening file"); - countdown = 1; - } - - for(; left; left -= countdown) { - usleep(2000); /* PICO_IDLE(); */ - pico_stack_tick(); - if (countdown) - continue; - - if (*synchro) { - ret = read(fd, buf, PICO_TFTP_PAYLOAD_SIZE); - if (ret < 0) { - fprintf(stderr, "Error in read\n"); - pico_tftp_abort(session, TFTP_ERR_EACC, "File read error"); - close(fd); - countdown = 1; - continue; - } - - printf("Read %" PRId32 " bytes from file (synchro=%d)\n", len, *synchro); - - len = pico_tftp_put(session, buf, ret); - if (len < 0) { - fprintf(stderr, "Failure in pico_tftp_put\n"); - close(fd); - countdown = 1; - continue; - } - - if (len != PICO_TFTP_PAYLOAD_SIZE) { - close(fd); - printf("Transfer complete!\n"); - countdown = 1; - } - } - } -} - -void usage(const char *text) -{ - fprintf(stderr, "%s\nArguments must be \n" - " can be:\n" - "\tg => GET request without options\n" - "\tG => GET request WITH options\n" - "\tp => PUT request without options\n" - "\tP => PUT request WITH options\n\n", - text); - exit(1); -} - -int main(int argc, char**argv) -{ - struct pico_ip4 my_ip; - union pico_address server_address; - struct pico_ip4 netmask; - struct pico_tftp_session *session; - int synchro; - int options = 0; - void (*operation)(struct pico_tftp_session *session, int *synchro, const char *filename, int options); - - unsigned char macaddr[6] = { - 0, 0, 0, 0xa, 0xb, 0x0 - }; - - uint16_t *macaddr_low = (uint16_t *) (macaddr + 2); - *macaddr_low = *macaddr_low ^ (uint16_t)((uint16_t)getpid() & (uint16_t)0xFFFFU); - macaddr[4] ^= (uint8_t)(getpid() >> 8); - macaddr[5] ^= (uint8_t) (getpid() & 0xFF); - - pico_string_to_ipv4("10.40.0.10", &my_ip.addr); - pico_string_to_ipv4("255.255.255.0", &netmask.addr); - pico_string_to_ipv4("10.40.0.2", &server_address.ip4.addr); - - if (argc != 3) { - usage("Invalid number or arguments"); - } - - switch (argv[2][0]) { - case 'G': - options = 1; - case 'g': - operation = start_rx; - break; - case 'P': - options = 1; - case 'p': - operation = start_tx; - break; - default: - usage("Invalid mode"); - } - - printf("%s start!\n", argv[0]); - pico_stack_init(); - pico_dev = (struct pico_device *) pico_vde_create("/tmp/vde_switch", "tap0", macaddr); - - if(!pico_dev) { - fprintf(stderr, "Error creating pico device, got enough privileges? Exiting...\n"); - exit(1); - } - - pico_ipv4_link_add(pico_dev, my_ip, netmask); - printf("Starting picoTCP loop\n"); - - session = pico_tftp_app_setup(&server_address, short_be(PICO_TFTP_PORT), PICO_PROTO_IPV4, &synchro); - if (!session) { - fprintf(stderr, "Error in pico_tftp_app_setup\n"); - exit(1); - } - - printf("synchro %d\n", synchro); - - operation(session, &synchro, argv[1], options); -} diff --git a/kernel/picotcp/test/unit/modunit_pico_6lowpan.c b/kernel/picotcp/test/unit/modunit_pico_6lowpan.c deleted file mode 100644 index 166dd76..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_6lowpan.c +++ /dev/null @@ -1,1634 +0,0 @@ -#include "pico_addressing.h" -#include "pico_ipv6_nd.h" -#include "pico_stack.h" -#include "pico_frame.h" -#include "pico_ipv6.h" -#include "pico_dev_radiotest.c" -#include "modules/pico_6lowpan_ll.c" -#include "modules/pico_6lowpan.c" -#include "pico_6lowpan_ll.h" -#include "check.h" - -#include "pico_config.h" -#include "pico_frame.h" -#include "pico_device.h" -#include "pico_protocol.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_dns_client.h" - -#include "pico_ethernet.h" -#include "pico_6lowpan.h" -#include "pico_802154.h" -#include "pico_olsr.h" -#include "pico_aodv.h" -#include "pico_eth.h" -#include "pico_arp.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_icmp4.h" -#include "pico_icmp6.h" -#include "pico_igmp.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_socket.h" -#include "heap.h" - -#include -#include -#include - -/******************************************************************************* -* MACROS -******************************************************************************/ - -#define STARTING() \ - printf("*********************** STARTING %s ***\n", __func__); \ - fflush(stdout) -#define TRYING(s, ...) \ - printf("\n=== TRYING %s: " s, __func__, ##__VA_ARGS__); \ - fflush(stdout) -#define OUTPUT() \ - do { \ - printf("\n> OUTPUT:\n"); \ - } while (0) -#define RESULTS() \ - do { \ - printf("\n> RESULTS:\n"); \ - } while (0) -#define FAIL_UNLESS(cond, i, s, ...) \ - do { \ - char str[80] = { 0 }; \ - snprintf(str, 80, "TEST %2d: "s"...", (i)++, ##__VA_ARGS__); \ - printf("%s",str); \ - if (cond) { \ - printf("%-*s %s\n", (int)(80 - strlen(str) - 12), "", "[SUCCESS]"); \ - } else { \ - printf("%-*s %s\n", (int)(80 - strlen(str) - 12), "", "[FAILED]"); \ - } \ - fflush(stdout); \ - fail_unless((int)(intptr_t)cond, s, ##__VA_ARGS__); \ - }while(0) -#define FAIL_IF(cond, i, s, ...) }\ - do { \ - char str[80] = { 0 }; \ - snprintf(str, 80, "TEST %2d: "s"...", (i)++, ##__VA_ARGS__); \ - printf(str); \ - if (!cond) { \ - printf("%-*s %s\n", (int)(80 - strlen(str) - 12), "", "[SUCCESS]"); \ - } else { \ - printf("%-*s %s\n", (int)(80 - strlen(str) - 12), "", "[FAILED]"); \ - } \ - fflush(stdout); \ - fail_if((int)(intptr_t)(cond), s, ##__VA_ARGS__); \ - }while(0) -#define ENDING(i) \ - printf("*********************** ENDING %s *** NUMBER OF TESTS: %d\n",\ - __func__, ((i)-1)); \ - fflush(stdout) -#define DBG(s, ...) \ - printf(s, ##__VA_ARGS__); \ - fflush(stdout) -static void -dbg_buffer(uint8_t *buf, size_t len) -{ - int i = 0; - printf("Buffer:"); - for (i = 0; i < (int)len; i++) { - if (i % 8 != 0) - printf("0x%02x, ", buf[i]); - else { - printf("\n0x%02x, ", buf[i]); - } - } - printf("\n"); -} - -/******************************************************************************* -* CTX -******************************************************************************/ - -START_TEST(tc_compare_prefix) -{ - int test = 1, ret = 0; - struct pico_ip6 a, b, c; - pico_string_to_ipv6("2aaa:1234:5678:9123:0:0ff:fe00:0105", a.addr); - pico_string_to_ipv6("2aaa:1234:5678:9143:0102:0304:0506:0708", b.addr); - pico_string_to_ipv6("2aaa:1234:5678:9156:0102:0304:0506:0708", c.addr); - - STARTING(); - - TRYING("With 2 equal prexixes\n"); - ret = compare_prefix(a.addr, b.addr, 54); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Prefixes are equal, should've returned 0, ret = %d",ret); - - TRYING("With b > a\n"); - ret = compare_prefix(a.addr, b.addr, 60); - RESULTS(); - FAIL_UNLESS(ret, test, "Prefixes are not equal, shouldn't have returned 0, ret = %d",ret); - - TRYING("With c > b\n"); - ret = compare_prefix(b.addr, c.addr, 64); - RESULTS(); - FAIL_UNLESS(ret, test, "Prefixes are not equal, shouldn't have returned 0, ret = %d",ret); - - ENDING(test); -} -END_TEST - -START_TEST(tc_compare_ctx) -{ - int test = 1, ret = 0; - struct pico_ip6 a, b, c; - struct iphc_ctx ca, cb, cc; - pico_string_to_ipv6("2aaa:1234:5678:9123:0:0ff:fe00:0105", a.addr); - pico_string_to_ipv6("2aaa:1234:5678:9145:0102:0304:0506:0708", b.addr); - pico_string_to_ipv6("2aaa:1234:5678:9156:0102:0304:0506:0708", c.addr); - ca.prefix = a; - ca.size = 54; - cb.prefix = b; - cc.prefix = c; - - STARTING(); - - TRYING("With 2 equal ctx's\n"); - ret = compare_ctx(&ca, &cb); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Prefixes are equal, should've returned 0, ret = %d", ret); - - ca.size = 60; - TRYING("With b > a\n"); - ret = compare_ctx(&ca, &cb); - RESULTS(); - FAIL_UNLESS(ret, test, "Prefixes are not equal, shouln'r return 0, ret = %d", ret); - - cb.size = 64; - TRYING("With b > c\n"); - ret = compare_ctx(&cb, &cc); - RESULTS(); - FAIL_UNLESS(ret, test, "Prefixes are not equal, shouldn't return 0, ret = %d", ret); - - ENDING(test); -} -END_TEST - -START_TEST(tc_ctx_lookup) -{ - int test = 1, ret = 0; - struct pico_ip6 a, b; - struct iphc_ctx *found = NULL; - pico_string_to_ipv6("2aaa:1234:5678:9123:0:0ff:fe00:0105", a.addr); - pico_string_to_ipv6("2aaa:1234:5678:9145:0102:0304:0506:0708", b.addr); - - STARTING(); - pico_stack_init(); - - TRYING("To find a prefix in the context tree\n"); - ret = ctx_insert(a, 13, 54, 0, PICO_IPHC_CTX_COMPRESS, NULL); - found = ctx_lookup(b); - RESULTS(); - FAIL_UNLESS(!ret, test, "Inserting should've succeeded, return 0. ret = %d", ret); - FAIL_UNLESS(found, test, "Should've found the context"); - FAIL_UNLESS(found->id == 13, test, "Should've found the correct ctx, ID = %d", ret); - - ENDING(test); -} -END_TEST - -/******************************************************************************* -* IPHC -******************************************************************************/ - -#ifdef PICO_6LOWPAN_IPHC_ENABLED - -START_TEST(tc_compressor_vtf) -{ - int test = 1, ret = 0; - uint8_t ori_fl[] = {0x64,0x00,0x00,0x00}; - uint8_t ori_dscp[] = {0x62,0x00,0x00,0x00}; - uint8_t ori_notc[] = {0x60,0x0f,0xed,0xcb}; - uint8_t ori_inline[] = {0x6f,0xaf,0xed,0xcb}; - uint8_t comp_fl[] = {0x40}; - uint8_t comp_dscp[] = {0x20}; - uint8_t comp_notc[] = {0x0f,0xed,0xcb}; - uint8_t comp_inline[] = {0xfa,0x0f,0xed,0xcb}; - uint8_t comp[4] = {0, 0, 0, 0}; - uint8_t iphc[3] = {0, 0, 0}; - - STARTING(); - - TRYING("With ECN set. No matter DSCP, should elide flow label and reformat tc\n"); - ret = compressor_vtf(ori_fl, comp, iphc, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(comp, 4); - RESULTS(); - FAIL_UNLESS((iphc[0] & TF_ELIDED) == TF_ELIDED_FL, test, "Should've set the IPHC-bits correctly, %02X", iphc[0]); - FAIL_UNLESS(1 == ret, test, "Should've returned size of 1, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(comp_fl, comp, (size_t)ret), test, "inline formatting not correct"); - memset(comp, 0, 4); - memset(iphc, 0, 3); - - TRYING("With DSCP set. No matter ECN, should elide flow label and reformat tc\n"); - ret = compressor_vtf(ori_dscp, comp, iphc, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(comp, 4); - RESULTS(); - FAIL_UNLESS((iphc[0] & TF_ELIDED) == TF_ELIDED_FL, test, "Should've set the IPHC-bits correctly, %02X", iphc[0]); - FAIL_UNLESS(1 == ret, test, "Should've returned size of 1, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(comp_dscp, comp, (size_t)ret), test, "inline formatting not correct"); - memset(comp, 0, 4); - memset(iphc, 0, 3); - - TRYING("With FL set. If DSCP is not set, can be compressed to 3 bytes\n"); - ret = compressor_vtf(ori_notc, comp, iphc, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(comp, 4); - RESULTS(); - FAIL_UNLESS((iphc[0] & TF_ELIDED) == TF_ELIDED_DSCP, test, "Should've set the IPHC-bits correctly, %02X", iphc[0]); - FAIL_UNLESS(3 == ret, test, "Should've returned size of 3, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(comp_notc, comp, (size_t)ret), test, "inline formatting not correct"); - memset(comp, 0, 4); - memset(iphc, 0, 3); - - TRYING("With evt. set. Should elide nothing and reformat traffic class\n"); - ret = compressor_vtf(ori_inline, comp, iphc, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(comp, 4); - RESULTS(); - FAIL_UNLESS((iphc[0] & TF_ELIDED) == TF_INLINE, test, "Should've set the IPHC-bits correctly, %02X", iphc[0]); - FAIL_UNLESS(4 == ret, test, "Should've returned size of 4, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(comp_inline, comp, (size_t)ret), test, "inline formatting not correct"); - memset(comp, 0, 4); - memset(iphc, 0, 3); - - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_vtf) -{ - int test = 1, ret = 0; - uint8_t ori_fl[] = {0x64,0x00,0x00,0x00}; - uint8_t ori_dscp[] = {0x62,0x00,0x00,0x00}; - uint8_t ori_notc[] = {0x60,0x0f,0xed,0xcb}; - uint8_t ori_inline[] = {0x6f,0xaf,0xed,0xcb}; - uint8_t comp_fl[] = {0x40}; - uint8_t comp_dscp[] = {0x20}; - uint8_t comp_notc[] = {0x0f,0xed,0xcb}; - uint8_t comp_inline[] = {0xfa,0x0f,0xed,0xcb}; - uint8_t ori[4] = {0}; - uint8_t iphc_fl[3] = {TF_ELIDED_FL, 0,0}; - uint8_t iphc_dscp[3] = {TF_ELIDED_FL, 0,0}; - uint8_t iphc_notc[3] = {TF_ELIDED_DSCP, 0,0}; - uint8_t iphc_inline[3] = {TF_INLINE, 0,0}; - - STARTING(); - - TRYING("With flow label compressed\n"); - ret = decompressor_vtf(ori, comp_fl, iphc_fl, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(ori, 4); - RESULTS(); - FAIL_UNLESS(1 == ret, test, "Should've returned length of 1, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(ori_fl, ori, (size_t)4), test, "Should've formatted IPv6 VTF-field correctly"); - memset(ori, 0, 4); - - TRYING("With flow label compression but with IPHC inline\n"); - ret = decompressor_vtf(ori, comp_dscp, iphc_dscp, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(ori, 4); - RESULTS(); - FAIL_UNLESS(1 == ret, test, "Should've returned length of 1, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(ori_dscp, ori, (size_t)4), test, "Should've formatted IPv6 VTF-field correctly"); - memset(ori, 0, 4); - - TRYING("With flow label inline and DSCP compressed\n"); - ret = decompressor_vtf(ori, comp_notc, iphc_notc, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(ori, 4); - RESULTS(); - FAIL_UNLESS(3 == ret, test, "Should've returned length of 3, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(ori_notc, ori, (size_t)4), test, "Should've formatted IPv6 VTF-field correctly"); - memset(ori, 0, 4); - - TRYING("With evt. inline\n"); - ret = decompressor_vtf(ori, comp_inline, iphc_inline, NULL, NULL, NULL); - OUTPUT(); - dbg_buffer(ori, 4); - RESULTS(); - FAIL_UNLESS(4 == ret, test, "Should've returned length of 4, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(ori_inline, ori, (size_t)4), test, "Should've formatted IPv6 VTF-field correctly"); - memset(ori, 0, 4); - - ENDING(test); -} -END_TEST - -START_TEST(tc_compressor_nh) -{ - int test = 1; - uint8_t nxthdr = PICO_PROTO_UDP; - uint8_t iphc = 0; - uint8_t comp = 0; - int ret = 0; - - STARTING(); - - TRYING("With next header = UDP\n"); - ret = compressor_nh(&nxthdr, &comp, &iphc, NULL, NULL, NULL); - OUTPUT(); - printf("IPHC: %02X", iphc); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(iphc == NH_COMPRESSED, test, "Should've set the IPHC bits correctly"); - FAIL_UNLESS(0 == comp, test, "Shouldn't have changed compressed"); - - TRYING("With next header = EXT_HOPBYHOP\n"); - nxthdr = PICO_IPV6_EXTHDR_HOPBYHOP; - ret = compressor_nh(&nxthdr, &comp, &iphc, NULL, NULL, NULL); - OUTPUT(); - printf("IPHC: %02X", iphc); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(iphc == NH_COMPRESSED, test, "Should've set the IPHC bits correctly"); - FAIL_UNLESS(0 == comp, test, "Shouldn't have changed compressed"); - - TRYING("With next header = EXT_ROUTING\n"); - nxthdr = PICO_IPV6_EXTHDR_ROUTING; - ret = compressor_nh(&nxthdr, &comp, &iphc, NULL, NULL, NULL); - OUTPUT(); - printf("IPHC: %02X", iphc); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(iphc == NH_COMPRESSED, test, "Should've set the IPHC bits correctly"); - FAIL_UNLESS(0 == comp, test, "Shouldn't have changed compressed"); - - TRYING("With next header = EXT_FRAG\n"); - nxthdr = PICO_IPV6_EXTHDR_FRAG; - ret = compressor_nh(&nxthdr, &comp, &iphc, NULL, NULL, NULL); - OUTPUT(); - printf("IPHC: %02X", iphc); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(iphc == NH_COMPRESSED, test, "Should've set the IPHC bits correctly"); - FAIL_UNLESS(0 == comp, test, "Shouldn't have changed compressed"); - - TRYING("With next header = EXT_DSTOPT\n"); - nxthdr = PICO_IPV6_EXTHDR_DESTOPT; - ret = compressor_nh(&nxthdr, &comp, &iphc, NULL, NULL, NULL); - OUTPUT(); - printf("IPHC: %02X", iphc); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(iphc == NH_COMPRESSED, test, "Should've set the IPHC bits correctly"); - FAIL_UNLESS(0 == comp, test, "Shouldn't have changed compressed"); - - TRYING("With next header = TCP\n"); - nxthdr = PICO_PROTO_TCP; - ret = compressor_nh(&nxthdr, &comp, &iphc, NULL, NULL, NULL); - OUTPUT(); - printf("IPHC: %02X", iphc); - RESULTS(); - FAIL_UNLESS(1 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(iphc == 0, test, "Should've set the IPHC bits correctly"); - FAIL_UNLESS(PICO_PROTO_TCP == comp, test, "Shouldn't have changed compressed"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_nh) -{ - int test = 1; - uint8_t iphc = NH_COMPRESSED; - uint8_t ori = 0; - int8_t ret = 0; - uint8_t comp = PICO_PROTO_TCP; - - STARTING(); - - TRYING("With NH bit set\n"); - ret = decompressor_nh(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(0 == ori, test, "Should've filled ori with NH_COMPRESSED"); - - TRYING("With NH bit cleared\n"); - iphc = 0; - ret = decompressor_nh(&ori, &comp, &iphc, NULL, NULL, NULL); - FAIL_UNLESS(1 == ret, test, "Should've returned 1, ret = %d", ret); - FAIL_UNLESS(PICO_PROTO_TCP == ori, test, "Should've filled ori with PICO_PROTO_TCP"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_compressor_hl) -{ - int test = 1; - uint8_t iphc = 0; - uint8_t ori = 1; - uint8_t comp; - int ret = 0; - - STARTING(); - - TRYING("With HL set to 1\n"); - ret = compressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(HL_COMPRESSED_1 == iphc, test, "Should've set IPHC bits correctly"); - - TRYING("With HL set to 64\n"); - ori = 64; - ret = compressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(HL_COMPRESSED_64 == iphc, test, "Should've set IPHC bits correctly"); - - TRYING("With HL set to 255\n"); - ori = 255; - ret = compressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(HL_COMPRESSED_255 == iphc, test, "Should've set IPHC bits correctly"); - - TRYING("With random HL\n"); - ori = 153; - ret = compressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(1 == ret, test, "Should've returned 1, ret = %d",ret); - FAIL_UNLESS(0 == iphc, test, "Should've set IPHC bits correctly"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_hl) -{ - int test = 1; - uint8_t iphc = HL_COMPRESSED_1; - uint8_t ori = 0; - uint8_t comp = 0; - int ret = 0; - - STARTING(); - - TRYING("HL 1 compressed\n"); - ret = decompressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d",ret ); - FAIL_UNLESS(1 == ori, test, "Should filled in correct hop limit"); - - TRYING("HL 64 compressed\n"); - iphc = HL_COMPRESSED_64; - ret = decompressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d",ret ); - FAIL_UNLESS(64 == ori, test, "Should filled in correct hop limit"); - - TRYING("HL 255 compressed\n"); - iphc = HL_COMPRESSED_255; - ret = decompressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d",ret ); - FAIL_UNLESS(255 == ori, test, "Should filled in correct hop limit"); - - TRYING("HL not compressed\n"); - iphc = 0; - comp = 125; - ret = decompressor_hl(&ori, &comp, &iphc, NULL, NULL, NULL); - RESULTS(); - FAIL_UNLESS(1 == ret, test, "Should've returned 0, ret = %d",ret ); - FAIL_UNLESS(125 == ori, test, "Should filled in correct hop limit"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_addr_comp_mode) -{ - uint8_t iphc[3] = { 0 }; - int test = 1, ret = 0; - struct pico_ip6 ip; - struct pico_ip6 local; - struct pico_ip6 local2; - struct pico_ip6 local3; - union pico_ll_addr addr = { .pan = { .addr.data = {1,2,3,4,5,6,7,8}, .mode = AM_6LOWPAN_SHORT }}; - struct pico_device dev = { .mode = LL_MODE_IEEE802154 }; - pico_string_to_ipv6("ff00:0:0:0:0:0:e801:100", ip.addr); - pico_string_to_ipv6("fe80:0:0:0:0102:0304:0506:0708", local.addr); - pico_string_to_ipv6("fe80:0:0:0:0:0ff:fe00:0105", local3.addr); - pico_string_to_ipv6("fe80:0:0:0:0:0ff:fe00:0102", local2.addr); - - STARTING(); - - pico_stack_init(); - - TRYING("With MAC derived address\n"); - ret = addr_comp_mode(iphc, &local2, addr, &dev, SRC_SHIFT); - OUTPUT(); - dbg_buffer(iphc, 3); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned 0, ret = %d", ret); - FAIL_UNLESS(iphc[1] & SRC_COMPRESSED, test, "Should've set IPHC bits correctly, %02X", iphc[1]); - memset(iphc, 0, 3); - - TRYING("With wrong device link layer mode\n"); - dev.mode = LL_MODE_ETHERNET; - ret = addr_comp_mode(iphc, &local2, addr, &dev, SRC_SHIFT); - RESULTS(); - FAIL_UNLESS(-1 == ret, test, "Shoudl've returned error (-1), ret = %d", ret); - memset(iphc, 0, 3); - - TRYING("With non MAC derived extended address\n"); - dev.mode = LL_MODE_IEEE802154; - ret = addr_comp_mode(iphc, &local, addr, &dev, SRC_SHIFT); - FAIL_UNLESS(8 == ret, test, "Should've return 8, ret = %d", ret); - FAIL_UNLESS(SRC_COMPRESSED_64 == iphc[1], test, "Should've set the IPHC bits correctly, iphc = %02X", iphc[1]); - memset(iphc, 0, 3); - - TRYING("With non MAC derived short address\n"); - ret = addr_comp_mode(iphc, &local3, addr, &dev, SRC_SHIFT); - FAIL_UNLESS(2 == ret, test, "should've returned 2, ret = %d", ret); - FAIL_UNLESS(SRC_COMPRESSED_16 == iphc[1], test, "Should've set the IPHC bits correctly, iphc = %02X", iphc[1]); - - ENDING(test); -} -END_TEST - -START_TEST(tc_addr_comp_prefix) -{ - int test = 1, ret = 0; - uint8_t iphc[3] = { 0 }; - struct pico_ip6 ip; - struct pico_ip6 local; - struct pico_ip6 local3; - pico_string_to_ipv6("ff00:0:0:0:0:0:e801:100", ip.addr); - pico_string_to_ipv6("fe80:0:0:0:0102:0304:0506:0708", local.addr); - pico_string_to_ipv6("2aaa:0:0:0:0:0ff:fe00:0105", local3.addr); - - STARTING(); - - pico_stack_init(); - - TRYING("With MCAST address\n"); - ret = addr_comp_prefix(iphc, &ip, 1); - RESULTS(); - FAIL_UNLESS(COMP_MULTICAST == ret, test, "Should've returned COMP_MULTICAST, ret = %d", ret); - FAIL_UNLESS(!iphc[1], test, "Shouldn't have set any IPHC bytes, iphc = %02X", iphc[1]); - memset(iphc, 0, 3); - - TRYING("With link local destination address\n"); - ret = addr_comp_prefix(iphc, &local, 0); - RESULTS(); - FAIL_UNLESS(COMP_LINKLOCAL == ret, test, "Should've returned COMP_LINKLOCAL, ret = %d", ret); - FAIL_UNLESS(!iphc[1], test, "Shouldn't have set any IPHC bytes, iphc = %02X", iphc[1]); - memset(iphc, 0, 3); - - TRYING("With a unicast address where there's no context available for\n"); - ret = addr_comp_prefix(iphc, &local3, 0); - RESULTS(); - FAIL_UNLESS(COMP_STATELESS == ret, test, "Should've return COMP_STATELESS, ret = %d", ret); - FAIL_UNLESS(!iphc[1], test, "Shouldn't have set any IPHC bytes, iphc = %02X", iphc[1]); - memset(iphc, 0,3); - - TRYING("With a unicast address where there's context available for\n"); - ctx_insert(local3, 13, 64, 0, PICO_IPHC_CTX_COMPRESS, NULL); - ret = addr_comp_prefix(iphc, &local3, 0); - FAIL_UNLESS(13 == ret, test, "Should've returned CTX ID of 13, ret = %d", ret); - FAIL_UNLESS(iphc[1] & DST_STATEFUL, test, "Should've set DAC correctly, iphc = %02X", iphc[1]); - FAIL_UNLESS(iphc[1] & CTX_EXTENSION, test, "Should've set CTX extension bit correctly, iphc = %02X", iphc[1]); - - ENDING(test); -} -END_TEST - -START_TEST(tc_compressor_src) -{ - int test = 1; - struct pico_ip6 unspec = {{ 0 }}; - struct pico_ip6 ll_mac = {{0xfe,0x80,0,0,0,0,0,0 ,1,2,3,4,5,6,7,8}}; - struct pico_ip6 ll_nmac_16 = {{0xfe,0x80,0,0,0,0,0,0 ,0,0,0,0xff,0xfe,0,0x12,0x34}}; - struct pico_ip6 ll_nmac_64 = {{0xfe,0x80,0,0,0,0,0,0 ,8,7,6,5,4,3,2,1}}; - struct pico_ip6 ip_ctx = {{0x2a,0xaa,0,0,0,0,0,0 ,1,2,3,4,5,6,7,8}}; - struct pico_ip6 ip_stateless = {{0x2a,0xbb,0,0,0,0,0,0 ,1,2,3,4,5,6,7,8}}; - union pico_ll_addr mac = { .pan = {.addr.data = {3,2,3,4,5,6,7,8}, .mode = AM_6LOWPAN_EXT } }; - struct pico_device dev = { 0 }; - int ret = 0; - - uint8_t iphc[3] = { 0, 0, 0 }; - uint8_t buf[PICO_SIZE_IP6] = { 0 }; - - dev.mode = LL_MODE_IEEE802154; - STARTING(); - pico_stack_init(); - - TRYING("With unspecified source address, should: set SAC, clear SAM\n"); - ret = compressor_src(unspec.addr, buf, iphc, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(iphc, 3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(16 == ret, test, "Shouldn't elide unspecified address, ret = %d", ret); - FAIL_UNLESS(iphc[1] & SRC_STATEFUL, test, "Should've set SAC"); - FAIL_UNLESS((iphc[1] & SRC_COMPRESSED) == 0, test, "Should've cleared SAM"); - - TRYING("With invalid device, should indicate error\n"); - dev.mode = LL_MODE_ETHERNET; - ret = compressor_src(ll_mac.addr, buf, iphc, &mac, NULL, &dev); - RESULTS(); - FAIL_UNLESS(-1 == ret, test, "Should've indicated error, invalid device, ret = %d",ret); - - TRYING("With mac derived address, should elide fully\n"); - dev.mode = LL_MODE_IEEE802154; - ret = compressor_src(ll_mac.addr, buf, iphc, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(iphc,3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned compressed size of 0, ret = %d", ret); - FAIL_UNLESS(!(iphc[1] & SRC_STATEFUL), test, "Shoudln't have set SAC"); - FAIL_UNLESS((iphc[1] & SRC_COMPRESSED) == SRC_COMPRESSED, test, "Should set SAM to '11', iphc = %02X", iphc[1]); - - TRYING("With non mac derived 16-bit derivable address\n"); - ret = compressor_src(ll_nmac_16.addr, buf, iphc, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(iphc,3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(2 == ret, test, "Should've returned compressed size of 2, ret = %d", ret); - FAIL_UNLESS(!(iphc[1] & SRC_STATEFUL), test, "Shouldn't have set SAC"); - FAIL_UNLESS((iphc[1] & SRC_COMPRESSED) == SRC_COMPRESSED_16, test, "Should've set SAM to '10', iphc = %02X", iphc[1]); - FAIL_UNLESS(0 == memcmp(buf, ll_nmac_16.addr + 14, 2), test, "Should've copied 16 bit of source address inline"); - - TRYING("With non mac derived 64-bit derivable address\n"); - ret = compressor_src(ll_nmac_64.addr, buf, iphc, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(iphc,3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(8 == ret, test, "Should've returned compressed size of 8, ret = %d", ret); - FAIL_UNLESS(!(iphc[1] & SRC_STATEFUL), test, "Shoudln't have set SAC"); - FAIL_UNLESS((iphc[1] & SRC_COMPRESSED) == SRC_COMPRESSED_64, test, "Should've set SAM to '01', iphc = %02X", iphc[1]); - FAIL_UNLESS(0 == memcmp(buf, ll_nmac_64.addr + 8, 8), test, "Should've copied IID of source address inline"); - - TRYING("With context derived address\n"); - pico_stack_init(); - ctx_insert(ip_ctx, 13, 64, 0, PICO_IPHC_CTX_COMPRESS, NULL); - ret = compressor_src(ip_ctx.addr, buf, iphc, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(iphc, 3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned compressed size of 0, ret = %d", ret); - FAIL_UNLESS((iphc[1] & SRC_STATEFUL), test, "Shoudl've set SAC"); - FAIL_UNLESS((iphc[1] & SRC_COMPRESSED) == SRC_COMPRESSED, test, "Shoudl've set SAM to '11', iphc = %02X", iphc[1]); - FAIL_UNLESS((iphc[2] >> SRC_SHIFT) == 13, test, "Should've filled in the context extension correctly, ctx = %02X", iphc[2]); - - TRYING("With stateless compression\n"); - ret = compressor_src(ip_stateless.addr, buf, iphc, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(iphc, 3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(PICO_SIZE_IP6 == ret, test, "Should've returned compressed size of 16, ret = %d",ret); - FAIL_UNLESS((iphc[1] & SRC_STATEFUL) == 0, test, "Shoudln't have set SAC"); - FAIL_UNLESS((iphc[1] & SRC_COMPRESSED) == 0, test, "Should've set SAM to '00', iphc = %02X", iphc[1]); - FAIL_UNLESS(0 == memcmp(buf, ip_stateless.addr, PICO_SIZE_IP6), test, "Should've copied the source address inline"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_src) -{ - int test = 1; - int ret = 0; - - union pico_ll_addr mac = { .pan = {.addr.data = {3,2,3,4,5,6,7,8}, .mode = AM_6LOWPAN_EXT } }; - struct pico_device dev; - - /* Stateless compression */ - uint8_t iphc1[] = {0x00, 0x00, 0x00}; - uint8_t buf1[] = {0x2a, 0xbb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}; - struct pico_ip6 ip1 = {{0x2a,0xbb,0,0,0,0,0,0 ,1,2,3,4,5,6,7,8}}; - - /* With context */ - uint8_t iphc2[] = {0x00, 0xf0, 0xd0}; - uint8_t buf2[] = {0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - struct pico_ip6 ip2 = {{0x2a,0xaa,0,0,0,0,0,0 ,1,2,3,4,5,6,7,8}}; - - /* Link-local non-mac 64-bit derivable address */ - uint8_t iphc4[] = {0x00, 0x10, 0x00}; - uint8_t buf4[] = {0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - struct pico_ip6 ip4 = {{0xfe,0x80,0,0,0,0,0,0 ,8,7,6,5,4,3,2,1}}; - - /* Link-local non-mac 16-bit derivable address */ - uint8_t iphc3[] = {0x00, 0x20, 0x00}; - uint8_t buf3[] = {0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - struct pico_ip6 ip3 = {{0xfe,0x80,0,0,0,0,0,0 ,0,0,0,0xff,0xfe,0,0x12,0x34}}; - - /* Link-local mac derivable address */ - uint8_t iphc5[] = {0x00, 0x30, 0x00}; - uint8_t buf5[] = {0}; - struct pico_ip6 ip5 = {{0xfe,0x80,0,0,0,0,0,0 ,1,2,3,4,5,6,7,8}}; - - /* Context non-mac 16-bit derivable address */ - uint8_t iphc6[] = {0x00, 0xE0, 0xd0}; - uint8_t buf6[] = {0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - struct pico_ip6 ip6 = {{0x2a,0xaa,0,0,0,0,0,0 ,0,0,0,0xff,0xfe,0,0x12,0x34}}; - - uint8_t buf[PICO_SIZE_IP6] = { 0 }; - dev.mode = LL_MODE_IEEE802154; - - pico_stack_init(); - STARTING(); - - TRYING("With statelessly compressed address\n"); - ret = decompressor_src(buf, buf1, iphc1, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(16 == ret, test, "Should've returned compressed size of 16, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(buf, ip1.addr, PICO_SIZE_IP6), test, "Should've correctly decompressed address"); - memset(buf, 0, PICO_SIZE_IP6); - - TRYING("With context\n"); - pico_stack_init(); - ctx_insert(ip2, 13, 64, 0, PICO_IPHC_CTX_COMPRESS, NULL); - ret = decompressor_src(buf, buf2, iphc2, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned compressed size of 0, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(buf, ip2.addr, PICO_SIZE_IP6), test, "Shoudld've correctly decompressed addresss"); - memset(buf, 0, PICO_SIZE_IP6); - - TRYING("With link-local non-mac 16-bit derivable address\n"); - ret = decompressor_src(buf, buf3, iphc3, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(2 == ret, test, "Shoudl've returned compressed size of 2, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(buf, ip3.addr, PICO_SIZE_IP6), test, "Shoudld've correctly decompressed addresss"); - memset(buf, 0, PICO_SIZE_IP6); - - TRYING("With link-local non-mac 64-bit derivable address\n"); - ret = decompressor_src(buf, buf4, iphc4, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(8 == ret, test, "Should've returned compressed size of 8, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(buf, ip4.addr, PICO_SIZE_IP6), test, "Should've correctly decompressed address"); - memset(buf, 0, PICO_SIZE_IP6); - - TRYING("With link-local mac based address\n"); - ret = decompressor_src(buf, buf5, iphc5, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(0 == ret, test, "Should've returned compressed size of 0, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(buf, ip5.addr, PICO_SIZE_IP6), test, "Should've correctly decompressed address"); - memset(buf, 0, PICO_SIZE_IP6); - - TRYING("Context based non-mac 16-bit derivable address\n"); - ret = decompressor_src(buf, buf6, iphc6, &mac, NULL, &dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(2 == ret, test, "Should've returned compressed size of 2, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(buf, ip6.addr, PICO_SIZE_IP6), test, "Should've correctly decompressed addresss"); - memset(buf, 0, PICO_SIZE_IP6); - - ENDING(test); -} -END_TEST - -START_TEST(tc_compressor_dst) -{ - int test = 1; - int ret = 0; - - union pico_ll_addr mac = { .pan = {.addr.data = {3,2,3,4,5,6,7,8}, .mode = AM_6LOWPAN_EXT } }; - struct pico_device dev; - - /* Multicast 48-bit */ - struct pico_ip6 mcast1 = {{0xff,0x12,0,0,0,0,0,0 ,0,0,0,5,4,3,2,1}}; - uint8_t buf1[] = {0x12,5,4,3,2,1}; - - /* Multicast 32-bit */ - struct pico_ip6 mcast2 = {{0xFF,0x34,0,0,0,0,0,0 ,0,0,0,0,0,1,2,3}}; - uint8_t buf2[] = {0x34,1,2,3}; - - /* Multicast 8-bit */ - struct pico_ip6 mcast3 = {{0xFF,0x02,0,0,0,0,0,0 ,0,0,0,0,0,0,0,5}}; - uint8_t buf3 = 5; - - uint8_t iphc[3] = { 0 }; - uint8_t buf[PICO_SIZE_IP6] = { 0 }; - - dev.mode = LL_MODE_IEEE802154; - STARTING(); - pico_stack_init(); - - TRYING("48-bit derivable mcast address\n"); - ret = compressor_dst(mcast1.addr, buf, iphc, NULL, &mac, &dev); - OUTPUT(); - dbg_buffer(iphc, 3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(6 == ret, test, "Should've returned compressed length of 6, ret = %d", ret); - FAIL_UNLESS(iphc[1] & DST_MULTICAST, test, "Should've set IPHC mcast-flag"); - FAIL_UNLESS(!(iphc[1] & DST_STATEFUL), test, "Shouldn't have set stateful flag, iphc = %02X", iphc[1]); - FAIL_UNLESS((iphc[1] & DST_COMPRESSED) == DST_MCAST_48, test, "Should've set DAM to '01', iphc = %02X", iphc[1]); - FAIL_UNLESS(0 == memcmp(buf1, buf, 6), test, "Shoudl've correctly compressed MCAST 48 address"); - - TRYING("32-bit derivable mcast address\n"); - ret = compressor_dst(mcast2.addr, buf, iphc, NULL, &mac, &dev); - OUTPUT(); - dbg_buffer(iphc, 3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(4 == ret, test, "Should've returned compressed length of 4, ret = %d", ret); - FAIL_UNLESS(iphc[1] & DST_MULTICAST, test, "Should've set IPHC mcast-flag"); - FAIL_UNLESS(!(iphc[1] & DST_STATEFUL), test, "Shouldn't have set stateful flag, iphc = %02X", iphc[1]); - FAIL_UNLESS((iphc[1] & DST_COMPRESSED) == DST_MCAST_32, test, "Should've set DAM to '10', iphc = %02X", iphc[1]); - FAIL_UNLESS(0 == memcmp(buf2, buf, 4), test, "Shoudl've correctly compressed MCAST 32 address"); - - TRYING("8-bit derivable mcast address\n"); - ret = compressor_dst(mcast3.addr, buf, iphc, NULL, &mac, &dev); - OUTPUT(); - dbg_buffer(iphc, 3); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(1 == ret, test, "Should've returned compressed length of 1, ret = %d", ret); - FAIL_UNLESS(iphc[1] & DST_MULTICAST, test, "Should've set IPHC mcast-flag"); - FAIL_UNLESS(!(iphc[1] & DST_STATEFUL), test, "Shouldn't have set stateful flag, iphc = %02X", iphc[1]); - FAIL_UNLESS((iphc[1] & DST_COMPRESSED) == DST_MCAST_8, test, "Should've set DAM to '11', iphc = %02X", iphc[1]); - FAIL_UNLESS(buf[0] == buf3, test, "Shoudl've correctly compressed MCAST 32 address"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_dst) -{ - int test = 1; - int ret = 0; - - union pico_ll_addr mac = { .pan = {.addr.data = {3,2,3,4,5,6,7,8}, .mode = AM_6LOWPAN_EXT } }; - struct pico_device dev; - - /* Multicast 48-bit */ - uint8_t iphc1[3] = {0x00, 0x09, 0x00}; - struct pico_ip6 mcast1 = {{0xff,0x12,0,0,0,0,0,0 ,0,0,0,5,4,3,2,1}}; - uint8_t buf1[] = {0x12,5,4,3,2,1}; - - /* Multicast 32-bit */ - uint8_t iphc2[3] = {0x00, 0x0a, 0x00}; - struct pico_ip6 mcast2 = {{0xFF,0x34,0,0,0,0,0,0 ,0,0,0,0,0,1,2,3}}; - uint8_t buf2[] = {0x34,1,2,3}; - - /* Multicast 8-bit */ - uint8_t iphc3[3] = {0x00, 0x0b, 0x00}; - struct pico_ip6 mcast3 = {{0xFF,0x02,0,0,0,0,0,0 ,0,0,0,0,0,0,0,5}}; - uint8_t buf3[] = {5}; - - uint8_t buf[PICO_SIZE_IP6] = { 0 }; - - dev.mode = LL_MODE_IEEE802154; - STARTING(); - pico_stack_init(); - - TRYING("48-bit compressed address\n"); - ret = decompressor_dst(buf,buf1,iphc1,NULL, &mac,&dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(6 == ret, test, "Should've returned compressed length of 6, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(mcast1.addr, buf, PICO_SIZE_IP6), test, "Should've correctly decompressed the mcast address"); - - TRYING("32-bit compressed address\n"); - ret = decompressor_dst(buf,buf2,iphc2,NULL, &mac,&dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(4 == ret, test, "Should've returned compressed length of 4, ret = %d",ret); - FAIL_UNLESS(0 == memcmp(mcast2.addr, buf, PICO_SIZE_IP6), test, "Should've correctly decompressed 32-bit mcast address"); - - TRYING("8-bit compressed address\n"); - ret = decompressor_dst(buf,buf3, iphc3, NULL, &mac, &dev); - OUTPUT(); - dbg_buffer(buf, PICO_SIZE_IP6); - RESULTS(); - FAIL_UNLESS(1 == ret, test, "Should've returned compressed length of 1, ret = %d", ret); - FAIL_UNLESS(0 == memcmp(mcast3.addr, buf, PICO_SIZE_IP6), test, "Should've correctly decompressed 8-bit mcast address"); - - ENDING(test); -} -END_TEST -static const unsigned char ipv6_frame[61] = { -0x60, 0x00, 0x00, 0x00, 0x00, 0x15, 0x3c, 0xff, /* `.....<. */ -0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ........ */ -0x02, 0x80, 0xe1, 0x03, 0x00, 0x00, 0x9d, 0x00, /* ........ */ -0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ........ */ -0x00, 0x00, 0x00, 0xff, 0xfe, 0x00, 0x65, 0x63, /* ......ec */ -0x11, 0x00, 0x1e, 0x00, 0x01, 0x02, 0x00, 0x00, /* ........ */ -0x4d, 0x4c, 0x4d, 0x4c, 0x00, 0x0d, 0x7b, 0x50, /* MLML..{P */ -0xff, 0x00, 0x01, 0x01, 0x08 /* ..... */ -}; - -static const unsigned char lowpan_frame[18] = { -0x7f, 0x33, 0xe7, 0x02, 0x1e, 0x00, 0xf0, -0x4d, 0x4c, 0x4d, 0x4c, 0x7b, 0x50, 0xff, 0x00, -0x01, 0x01, 0x08 -}; - -static const unsigned char comp_frame[22] = { -0x7f, 0x33, 0xe7, 0x06, 0x1e, 0x00, 0x01, 0x02, -0x00, 0x00, 0xf0, 0x4d, 0x4c, 0x4d, 0x4c, 0x7b, -0x50, 0xff, 0x00, 0x01, 0x01, 0x08 -}; - -START_TEST(tc_compressor_iphc) -{ - int test = 1; - struct pico_frame *f = pico_frame_alloc(61); - union pico_ll_addr src = { .pan = {.addr.data = {0x00,0x80,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_EXT } }; - union pico_ll_addr dst = { .pan = {.addr.data = {0x65,0x63,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_SHORT } }; - int compressed_len = 0; - struct pico_device dev; - uint8_t *buf = NULL; - uint8_t nh; - - dev.mode = LL_MODE_IEEE802154; - memcpy(f->buffer, ipv6_frame, 61); - f->net_hdr = f->buffer; - f->transport_hdr = f->buffer + 48; - f->dev = &dev; - f->src = src; - f->dst = dst; - - STARTING(); - pico_stack_init(); - - TRYING("To compress a IPv6 frame from a sample capture\n"); - buf = compressor_iphc(f, &compressed_len, &nh); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 42); - RESULTS(); - FAIL_UNLESS(2 == compressed_len, test, "Should have returned compressed_len of 2, compressed_len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, lowpan_frame, (size_t)compressed_len), test, "Should've compressed frame correctly"); - pico_frame_discard(f); - - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_iphc) -{ - int test = 1; - struct pico_frame *f = pico_frame_alloc(2); - union pico_ll_addr src = { .pan = {.addr.data = {0x00,0x80,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_EXT } }; - union pico_ll_addr dst = { .pan = {.addr.data = {0x65,0x63,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_SHORT } }; - struct pico_device dev; - int compressed_len = 0; - uint8_t *buf = NULL; - uint8_t hdr[40] = { - 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* `.....<. */ - 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ........ */ - 0x02, 0x80, 0xe1, 0x03, 0x00, 0x00, 0x9d, 0x00, /* ........ */ - 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ........ */ - 0x00, 0x00, 0x00, 0xff, 0xfe, 0x00, 0x65, 0x63 }; - dev.mode = LL_MODE_IEEE802154; - memcpy(f->buffer, lowpan_frame, 2); - f->net_hdr = f->buffer; - f->dev = &dev; - f->src = src; - f->dst = dst; - - STARTING(); - pico_stack_init(); - - TRYING("To decompress a 6LoWPAN frame from a sampel capture\n"); - buf = decompressor_iphc(f, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 40); - RESULTS(); - FAIL_UNLESS(2 == compressed_len, test, "Should've returned compressed_len of 2, compressed_len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, hdr, 40), test, "Should've correctly decompressed the 6LoWPAN frame"); - pico_frame_discard(f); - - ENDING(test); -} -END_TEST - -START_TEST(tc_compressor_nhc_udp) -{ - int test = 1; - struct pico_frame *f = pico_frame_alloc(8); - int compressed_len = 0; - uint8_t *buf = NULL; - - uint8_t udp1[8] = {0x4d, 0x4c, 0x4d, 0x4c, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp1[] = {0xf0, 0x4d, 0x4c, 0x4d, 0x4c, 0x7b, 0x50}; - - uint8_t udp2[8] = {0xF0, 0xb1, 0xF0, 0xb2, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp2[] = {0xf3, 0x12, 0x7b, 0x50}; - - uint8_t udp3[8] = {0xF0, 0xb1, 0x4d, 0x4c, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp3[] = {0xf2, 0xb1, 0x4d, 0x4c, 0x7b, 0x50}; - - uint8_t udp4[8] = {0x4d, 0x4c, 0xF0, 0xb2, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp4[] = {0xf1, 0x4d, 0x4c, 0xb2, 0x7b, 0x50}; - - f->transport_hdr = f->buffer; - - STARTING(); - - TRYING("To compress a UDP header from a sample capture\n"); - memcpy(f->buffer, udp1, 8); - buf = compressor_nhc_udp(f, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 7); - RESULTS(); - FAIL_UNLESS(7 == compressed_len, test, "Should've returned compressed_len of 7, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, comp1, 7), test, "Should've correctly compressed UDP header"); - - TRYING("To compress a UDP header from a sample capture with both compressible addresses\n"); - memcpy(f->buffer, udp2, 8); - buf = compressor_nhc_udp(f, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 4); - RESULTS(); - FAIL_UNLESS(4 == compressed_len, test, "Should've returned compressed_len of 4, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, comp2, 4), test, "should've correctly compressed UDP header"); - - TRYING("To compress a UDP header from a sample capture with compressible source\n"); - memcpy(f->buffer, udp3, 8); - buf = compressor_nhc_udp(f, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 6); - RESULTS(); - FAIL_UNLESS(6 == compressed_len, test, "Should've returned compressed_len of 6, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, comp3, 6), test, "should've correctly compressed UDP header"); - - TRYING("To compress a UDP header from a sample capture with compressible destination\n"); - memcpy(f->buffer, udp4, 8); - buf = compressor_nhc_udp(f, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 6); - RESULTS(); - FAIL_UNLESS(6 == compressed_len, test, "Should've returned compressed_len of 6, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, comp4, 6), test, "should've correctly compressed UDP header"); - - - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_nhc_udp) -{ - int test = 1; - struct pico_frame *f = pico_frame_alloc(9); - int compressed_len = 0; - uint8_t *buf = NULL; - - uint8_t udp1[8] = {0x4d, 0x4c, 0x4d, 0x4c, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp1[] = {0xf0, 0x4d, 0x4c, 0x4d, 0x4c, 0x7b, 0x50}; - - uint8_t udp2[8] = {0xF0, 0xb1, 0xF0, 0xb2, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp2[] = {0xf3, 0x12, 0x7b, 0x50}; - - uint8_t udp3[8] = {0xF0, 0xb1, 0x4d, 0x4c, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp3[] = {0xf2, 0xb1, 0x4d, 0x4c, 0x7b, 0x50}; - - uint8_t udp4[8] = {0x4d, 0x4c, 0xF0, 0xb2, 0x00, 0x0d, 0x7b, 0x50}; - uint8_t comp4[] = {0xf1, 0x4d, 0x4c, 0xb2, 0x7b, 0x50}; - - f->transport_hdr = f->buffer; - f->net_len = PICO_SIZE_IP6HDR; - - STARTING(); - - TRYING("To decompress NH_UDP header with inline addresses\n"); - memcpy(f->buffer, comp1, 7); - f->len = 12; - buf = decompressor_nhc_udp(f, 0, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 8); - RESULTS(); - FAIL_UNLESS(7 == compressed_len, test, "Should've returned compressed_len of 7, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, udp1, 8), test, "Should've correctly compressed UDP header"); - - TRYING("To decompress NHC_UDP header with both addresses compressed\n"); - memcpy(f->buffer, comp2, 4); - f->len = 9; - buf = decompressor_nhc_udp(f, 0, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 8); - RESULTS(); - FAIL_UNLESS(4 == compressed_len, test, "Should've returned compressed_len of 4, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, udp2, 8), test, "Should've correctly decompressed UDP header"); - - TRYING("To decompress NHC_UDP header with both addresses compressed\n"); - memcpy(f->buffer, comp3, 6); - f->len = 11; - buf = decompressor_nhc_udp(f, 0, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 8); - RESULTS(); - FAIL_UNLESS(6 == compressed_len, test, "Should've returned compressed_len of 6, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, udp3, 8), test, "Should've correctly decompressed UDP header"); - - TRYING("To decompress NHC_UDP header with both addresses compressed\n"); - memcpy(f->buffer, comp4, 6); - f->len = 11; - buf = decompressor_nhc_udp(f, 0, &compressed_len); - FAIL_UNLESS(buf, test, "Should've at least returned a buffer"); - OUTPUT(); - dbg_buffer(buf, 8); - RESULTS(); - FAIL_UNLESS(6 == compressed_len, test, "Should've returned compressed_len of 6, len = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, udp4, 8), test, "Should've correctly decompressed UDP header"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_compressor_nhc_ext) -{ - int test = 1; - struct pico_frame *f = pico_frame_alloc(9); - uint8_t nh = PICO_IPV6_EXTHDR_DESTOPT; - int compressed_len = 0; - uint8_t *buf = NULL; - - uint8_t ext1[8] = {0x11, 0x00, 0x1e, 0x00, 0x01, 0x02, 0x00, 0x00}; - uint8_t nhc1[8] = {0xe7, 0x06, 0x1e, 0x00, 0x01, 0x02, 0x00, 0x00}; - - f->net_hdr = f->buffer; - - STARTING(); - - TRYING("With DSTOPT extension header\n"); - memcpy(f->buffer, ext1, 8); - buf = compressor_nhc_ext(f, &compressed_len, &nh); - FAIL_UNLESS(buf, test, "Should've at least returend a buffer"); - OUTPUT(); - dbg_buffer(buf, (size_t)compressed_len); - RESULTS(); - FAIL_UNLESS(8 == compressed_len, test, "Should've returned length of 8, ret = %d", compressed_len); - FAIL_UNLESS(PICO_PROTO_UDP == nh, test, "Should've updated next header to %02X, ret = %02X", PICO_PROTO_UDP, nh); - FAIL_UNLESS(0 == memcmp(buf, nhc1, (size_t)compressed_len), test, "Should've correctly compressed next header"); - - pico_frame_discard(f); - ENDING(test); -} -END_TEST - -START_TEST(tc_decompressor_nhc_ext) -{ - int test = 1; - struct pico_frame *f = pico_frame_alloc(9); - int compressed_len = 0, decomp; - uint8_t *buf = NULL; - - uint8_t ext1[8] = {0x11, 0x00, 0x1e, 0x00, 0x01, 0x02, 0x00, 0x00}; - uint8_t nhc1[8] = {0xe7, 0x02, 0x1e, 0x00, 0xf0 /* udp dispatch */}; - - f->net_hdr = f->buffer; - - STARTING(); - - TRYING("nhc_ext compressed header with dstopt extension header\n"); - memcpy(f->buffer, nhc1, 5); - buf = decompressor_nhc_ext(f, &compressed_len, &decomp); - FAIL_UNLESS(buf, test, "should've at least returend a buffer"); - OUTPUT(); - dbg_buffer(buf, 8); - RESULTS(); - FAIL_UNLESS(4 == compressed_len, test, "should've returned length of 4, ret = %d", compressed_len); - FAIL_UNLESS(0 == memcmp(buf, ext1, 8), test, "should've correctly decompressed next header"); - - pico_frame_discard(f); - ENDING(test); -} -END_TEST - -START_TEST(tc_pico_iphc_compress) -{ - int test = 1; - struct pico_frame *f = pico_frame_alloc(61); - union pico_ll_addr src = { .pan = {.addr.data = {0x00,0x80,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_EXT } }; - union pico_ll_addr dst = { .pan = {.addr.data = {0x65,0x63,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_SHORT } }; - struct pico_device dev; - struct pico_frame *new = NULL; - - dev.mode = LL_MODE_IEEE802154; - memcpy(f->buffer, ipv6_frame, 61); - f->net_hdr = f->buffer; - f->net_len = 48; - f->transport_hdr = f->buffer + 48; - f->transport_len = 8; - f->len = 61; - f->dev = &dev; - f->src = src; - f->dst = dst; - - STARTING(); - pico_stack_init(); - - TRYING("Trying to compress an IPv6 frame from an example capture\n"); - new = pico_iphc_compress(f); - FAIL_UNLESS(new, test, "Should've at least returned a frame"); - OUTPUT(); - dbg_buffer(new->net_hdr, new->len); - RESULTS(); - FAIL_UNLESS(22 == new->len, test, "Should have returned length of 22, len = %d", new->len); - FAIL_UNLESS(0 == memcmp(new->net_hdr, comp_frame, 22), test, "Should've compressed the frame correctly"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_pico_iphc_decompress) -{ - int test = 0; - struct pico_frame *f = pico_frame_alloc(61); - union pico_ll_addr src = { .pan = {.addr.data = {0x00,0x80,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_EXT } }; - union pico_ll_addr dst = { .pan = {.addr.data = {0x65,0x63,0xe1,0x03,0x00,0x00,0x9d,0x00}, .mode = AM_6LOWPAN_SHORT } }; - struct pico_device dev; - struct pico_frame *new = NULL; - - dev.mode = LL_MODE_IEEE802154; - memcpy(f->buffer, comp_frame, 22); - f->net_hdr = f->buffer; - f->net_len = 22; - f->len = 22; - f->dev = &dev; - f->src = src; - f->dst = dst; - - STARTING(); - pico_stack_init(); - - TRYING("Trying to decompress a 6LoWPAN frame from an example capture\n"); - new = pico_iphc_decompress(f); - FAIL_UNLESS(new, test, "Should've at least returned a frame"); - OUTPUT(); - dbg_buffer(new->net_hdr, new->len); - RESULTS(); - FAIL_UNLESS(61 == new->len, test, "Should've returned a length of 61, len = %d", new->len); - dbg_buffer(new->net_hdr, new->len); - FAIL_UNLESS(0 == memcmp(new->net_hdr, ipv6_frame, new->len), test, "Should've decompressed the frame correctly"); - - - ENDING(test); -} -END_TEST -#endif - -static struct pico_frame *rx = NULL; -static uint8_t tx[1500]; -static int rx_called = 0; -static int tx_called = 0; -static uint8_t tx_len = 0; - -int pico_datalink_send(struct pico_frame *f) { - dbg("Datalink_send called!\n"); - if (++tx_called == 2) { - memcpy(tx, f->start, f->len); - OUTPUT(); - dbg("tx: "); - dbg_buffer(tx, tx_len); - } - - if (f->dev->eth) { - /* If device has stack with datalink-layer pass frame through it */ - if (LL_MODE_IEEE802154 == f->dev->mode) { - return pico_enqueue(pico_proto_6lowpan.q_out, f); - } else { - return pico_enqueue(pico_proto_ethernet.q_out, f); - } - } else { - /* non-ethernet: no post-processing needed */ - return pico_sendto_dev(f); - } -} - -int32_t pico_network_receive(struct pico_frame *f) -{ - dbg("Network_receive called!\n"); - if (++rx_called == 2) - rx = pico_frame_copy(f); - - printf("RCVD frame at network layer \n"); - dbg_buffer(f->buffer, f->buffer_len); - return (int32_t)f->buffer_len; -} - -#define NUM_PING 1 - -#ifdef PICO_SUPPORT_IPV6 -static void cb_ping6(struct pico_icmp6_stats *s) -{ - char host[50]; - pico_ipv6_to_string(host, s->dst.addr); - if (s->err == 0) { - dbg("%lu bytes from %s: icmp_req=%lu ttl=%lu time=%lu ms\n", s->size, host, s->seq, - s->ttl, (long unsigned int)s->time); - if (s->seq >= NUM_PING) - exit(0); - } else { - dbg("PING %lu to %s: Error %d\n", s->seq, host, s->err); - exit(1); - } -} -#endif - -static void ping_abort_timer(pico_time now, void *_id) -{ - int *id = (int *) _id; - IGNORE_PARAMETER(now); - printf("Ping: aborting...\n"); - pico_icmp6_ping_abort(*id); -} - -/* Copy a string until the separator, - terminate it and return the next index, - or NULL if it encounters a EOS */ -static char *cpy_arg(char **dst, char *str) -{ - char *p, *nxt = NULL; - char *start = str; - char *end = start + strlen(start); - char sep = ','; - - p = str; - while (p) { - if ((*p == sep) || (*p == '\0')) { - *p = (char)0; - nxt = p + 1; - if ((*nxt == 0) || (nxt >= end)) - nxt = 0; - - printf("dup'ing %s\n", start); - *dst = strdup(start); - break; - } - - p++; - } - return nxt; -} - -static void app_ping(char *arg) -{ - char *dest = NULL; - char *next = NULL; - char *abort = NULL; - char *delay = NULL; - char *asize = NULL; - static int id; - int timeout = 0; - int size = 64; - - next = cpy_arg(&dest, arg); - if (!dest) { - fprintf(stderr, "ping needs the following format: ping:dst_addr:[size:[abort after N sec:[wait N sec before start]]]\n"); - exit(255); - } - if (next) { - next = cpy_arg(&asize, next); - size = atoi(asize); - if (size <= 0) { - size = 64; /* Default */ - } - } - - if (next) { - next = cpy_arg(&abort, next); - if (strlen(abort) > 0) { - printf("Got arg: '%s'\n", abort); - timeout = atoi(abort); - if (timeout < 0) { - fprintf(stderr, "ping needs the following format: ping:dst_addr:[size:[abort after N sec:[wait N sec before start]]]\n"); - exit(255); - } - printf("Aborting ping after %d seconds\n", timeout); - } - } - - if (next) { - next = cpy_arg(&delay, next); - if (strlen(delay) > 0) { - uint32_t initial_delay = (uint32_t) atoi(delay); - if (initial_delay > 0) { - printf("Initial delay: %u seconds\n", initial_delay); - initial_delay = PICO_TIME_MS() + (initial_delay * 1000); - while (PICO_TIME_MS() < initial_delay) { - pico_stack_tick(); - usleep(10000); - } - } - } - } - printf("Starting ping.\n"); - - id = pico_icmp6_ping(dest, NUM_PING, 1000, 10000, size, cb_ping6, NULL); - if (timeout > 0) { - printf("Adding abort timer after %d seconds for id %d\n", timeout, id); - if (!pico_timer_add((pico_time)(timeout * 1000), ping_abort_timer, &id)) { - printf("Failed to set ping abort timeout, aborting ping\n"); - ping_abort_timer((pico_time)0, &id); - exit(1); - } - } - - /* free copied args */ - if (dest) - free(dest); - - if (abort) - free(abort); -} - -START_TEST(tc_tx_rx) -{ - int test = 0; - struct pico_device *dev = NULL; - uint8_t n_id, n_area0, n_area1; - struct pico_ip6 myaddr, pan, netmask; - const char pan_addr[] = "2aaa:abcd::0"; - const char pan_netmask[] = "ffff:ffff:ffff:ffff::0"; - - const char *id = "3"; - const char *area0 = "1"; - const char *area1 = "0"; - char *dump = (char *)strdup("build/test/unit_6lowpan.pcap"); - char *arg = (char *)strdup("2aaa:abcd:0000:0000:0200:00aa:ab00:0001,1450,0,1,"); - - STARTING(); - - n_id = (uint8_t) atoi(id); - n_area0 = (uint8_t) atoi(area0); - n_area1 = (uint8_t) atoi(area1); - - /* Initialize picoTCP */ - pico_stack_init(); - - pico_string_to_ipv6(pan_addr, myaddr.addr); - pico_string_to_ipv6(pan_addr, pan.addr); - pico_string_to_ipv6(pan_netmask, netmask.addr); - myaddr.addr[8] = 0x02; - myaddr.addr[11] = 0xaa; - myaddr.addr[12] = 0xab; - myaddr.addr[15] = n_id; - - printf("%d:%d:%d\n", n_id, n_area0, n_area1); - dev = pico_radiotest_create(n_id, n_area0, n_area1, 1, (char *)dump); - if (!dev) { - exit(1); - } - - printf("Radiotest created.\n"); - - /* Add a routable link */ - pico_ipv6_link_add(dev, myaddr, netmask); - - /* Start ping-application */ - app_ping((char *)arg); - - printf("%s: launching PicoTCP loop\n", __FUNCTION__); - while(!rx) { - pico_stack_tick(); - usleep(2000); - } - OUTPUT(); - dbg("RX: "); - dbg_buffer(rx->start, rx->len); - RESULTS(); - tx[0] |= 0x60; - FAIL_UNLESS(0 == memcmp(rx->start, tx, rx->len), test, "Should've received exactly the same frame as was transmitted"); - - ENDING(test); -} -END_TEST - -static Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_compare_prefix = tcase_create("Unit test for compare_prefix"); - TCase *TCase_compare_ctx = tcase_create("Unit test for compare_ctx"); - TCase *TCase_ctx_lookup = tcase_create("Unit test for ctx_lookup"); - -/******************************************************************************* - * IPHC - ******************************************************************************/ -#ifdef PICO_6LOWPAN_IPHC_ENABLED - TCase *TCase_compressor_vtf = tcase_create("Unit test for compressor_vtf"); - TCase *TCase_decompressor_vtf = tcase_create("Unit test for decompressor_vtf"); - TCase *TCase_compressor_nh = tcase_create("Unit test for compressor_nh"); - TCase *TCase_decompressor_nh = tcase_create("Unit test for decompressor_nh"); - TCase *TCase_compressor_hl = tcase_create("Unit test for compressor_hl"); - TCase *TCase_decompressor_hl = tcase_create("Unit test for decompressor_hl"); - TCase *TCase_addr_comp_prefix = tcase_create("Unit test for addr_comp_prefix"); - TCase *TCase_addr_comp_mode = tcase_create("Unit test for addr_comp_mode"); - TCase *TCase_compressor_src = tcase_create("Unit test for compressor_src"); - TCase *TCase_decompressor_src = tcase_create("Unit test for decompressor_src"); - TCase *TCase_compressor_dst = tcase_create("Unit test for compressor_dst"); - TCase *TCase_decompressor_dst = tcase_create("Unit test for decompressor_dst"); - TCase *TCase_compressor_iphc = tcase_create("Unit test for compressor_iphc"); - TCase *TCase_decompressor_iphc = tcase_create("Unit test for decompressor_iphc"); - TCase *TCase_compressor_nhc_udp = tcase_create("Unit test for compressor_nhc_udp"); - TCase *TCase_decompressor_nhc_udp = tcase_create("Unit test for decompressor_nhc_udp"); - TCase *TCase_compressor_nhc_ext = tcase_create("Unit test for compressor_nhc_ext"); - TCase *TCase_decompressor_nhc_ext = tcase_create("Unit test for decompressor_nhc_ext"); - TCase *TCase_pico_iphc_compress = tcase_create("Unit test for pico_iphc_compress"); - TCase *TCase_pico_iphc_decompress = tcase_create("Unit test for pico_iphc_decompress"); -#endif - - TCase *TCase_tx_rx = tcase_create("Unit test for tx_rx"); - - tcase_add_test(TCase_compare_prefix, tc_compare_prefix); - suite_add_tcase(s, TCase_compare_prefix); - tcase_add_test(TCase_compare_ctx ,tc_compare_ctx); - suite_add_tcase(s, TCase_compare_ctx); - tcase_add_test(TCase_ctx_lookup ,tc_ctx_lookup); - suite_add_tcase(s, TCase_ctx_lookup); - -/******************************************************************************* - * IPHC - ******************************************************************************/ -#ifdef PICO_6LOWPAN_IPHC_ENABLED - tcase_add_test(TCase_compressor_vtf, tc_compressor_vtf); - suite_add_tcase(s, TCase_compressor_vtf); - tcase_add_test(TCase_decompressor_vtf, tc_decompressor_vtf); - suite_add_tcase(s, TCase_decompressor_vtf); - tcase_add_test(TCase_compressor_nh, tc_compressor_nh); - suite_add_tcase(s, TCase_compressor_nh); - tcase_add_test(TCase_decompressor_nh, tc_decompressor_nh); - suite_add_tcase(s, TCase_decompressor_nh); - tcase_add_test(TCase_compressor_hl, tc_compressor_hl); - suite_add_tcase(s, TCase_compressor_hl); - tcase_add_test(TCase_decompressor_hl, tc_decompressor_hl); - suite_add_tcase(s, TCase_decompressor_hl); - tcase_add_test(TCase_addr_comp_prefix, tc_addr_comp_prefix); - suite_add_tcase(s, TCase_addr_comp_prefix); - tcase_add_test(TCase_addr_comp_mode, tc_addr_comp_mode); - suite_add_tcase(s, TCase_addr_comp_mode); - tcase_add_test(TCase_compressor_src, tc_compressor_src); - suite_add_tcase(s, TCase_compressor_src); - tcase_add_test(TCase_decompressor_src, tc_decompressor_src); - suite_add_tcase(s, TCase_decompressor_src); - tcase_add_test(TCase_compressor_dst, tc_compressor_dst); - suite_add_tcase(s, TCase_compressor_dst); - tcase_add_test(TCase_decompressor_dst, tc_decompressor_dst); - suite_add_tcase(s, TCase_decompressor_dst); - tcase_add_test(TCase_compressor_iphc, tc_compressor_iphc); - suite_add_tcase(s, TCase_compressor_iphc); - tcase_add_test(TCase_decompressor_iphc, tc_decompressor_iphc); - suite_add_tcase(s, TCase_decompressor_iphc); - tcase_add_test(TCase_compressor_nhc_udp, tc_compressor_nhc_udp); - suite_add_tcase(s, TCase_compressor_nhc_udp); - tcase_add_test(TCase_decompressor_nhc_udp, tc_decompressor_nhc_udp); - suite_add_tcase(s, TCase_decompressor_nhc_udp); - tcase_add_test(TCase_compressor_nhc_ext, tc_compressor_nhc_ext); - suite_add_tcase(s, TCase_compressor_nhc_ext); - tcase_add_test(TCase_decompressor_nhc_ext, tc_decompressor_nhc_ext); - suite_add_tcase(s, TCase_decompressor_nhc_ext); - tcase_add_test(TCase_pico_iphc_compress, tc_pico_iphc_compress); - suite_add_tcase(s, TCase_pico_iphc_compress); - tcase_add_test(TCase_pico_iphc_decompress, tc_pico_iphc_decompress); - suite_add_tcase(s, TCase_pico_iphc_decompress); -#endif - - tcase_add_test(TCase_tx_rx ,tc_tx_rx); - suite_add_tcase(s, TCase_tx_rx); - - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_802154.c b/kernel/picotcp/test/unit/modunit_pico_802154.c deleted file mode 100644 index e7d6c84..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_802154.c +++ /dev/null @@ -1,556 +0,0 @@ -#include "pico_addressing.h" -#include "pico_ipv6_nd.h" -#include "pico_stack.h" -#include "pico_frame.h" -#include "pico_ipv6.h" -#include "pico_6lowpan.h" -#include "modules/pico_802154.c" -#include "check.h" - -#include -#include -#include - -/******************************************************************************* - * MACROS - ******************************************************************************/ - -#define STARTING() \ - printf("*********************** STARTING %s ***\n", __func__); \ - fflush(stdout) -#define TRYING(s, ...) \ - printf("Trying %s: " s, __func__, ##__VA_ARGS__); \ - fflush(stdout) -#define CHECKING(i) \ - printf("Checking the results of test %2d in %s...", (i)++, \ - __func__); \ - fflush(stdout) -#define FAIL_UNLESS(cond, s, ...) \ - if ((cond)) { \ - printf(" SUCCESS\n"); \ - } else { \ - printf(" FAILED\n"); \ - } \ - fail_unless((cond), s, ##__VA_ARGS__) -#define FAIL_IF(cond, s, ...) \ - if (!(cond)) { \ - printf(" SUCCESS\n"); \ - } else { \ - printf(" FAILED\n"); \ - } \ - fail_if((cond), s, ##__VA_ARGS__) -#define ENDING(i) \ - printf("*********************** ENDING %s *** NUMBER OF TESTS: %d\n",\ - __func__, ((i)-1)); \ - fflush(stdout) -#define DBG(s, ...) \ - printf(s, ##__VA_ARGS__); \ - fflush(stdout) - -/******************************************************************************* - * HELPER FUNCTIONS - ******************************************************************************/ - -static void dbg_addr_ext(const char *msg, uint8_t a[SIZE_6LOWPAN_EXT]) -{ - DBG("%s: (64-bit extended address): ", msg); - DBG("%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n", - a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]); -} - -/******************************************************************************* - * ADDRESSES - ******************************************************************************/ - -START_TEST(tc_swap) -{ - int test = 1; - uint8_t a = 5; - uint8_t b = 1; - - STARTING(); - - // TEST 1 - TRYING("With a = %d and b = %d\n", a, b); - pico_swap(&a, &b); - CHECKING(test); - FAIL_IF(1 != a && b != 5, "Failed swapping numbers\n"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_802154_to_ietf) -{ - int test = 1; - struct pico_802154 a = { - .addr.data = { 1,2,3,4,5,6,7,8 }, - .mode = AM_6LOWPAN_EXT - }; - uint8_t buf[] = {8,7,6,5,4,3,2,1}; - - STARTING(); - - // TEST 1 - TRYING("Extended address mode\n"); - addr_802154_to_ietf(&a); - dbg_addr_ext("After", a.addr.data); - CHECKING(test); - FAIL_UNLESS(0 == memcmp(a.addr.data, buf, SIZE_6LOWPAN_EXT), - "Failed converting to IETF endianness\n"); - - // TEST 2 - TRYING("Short address mode\n"); - a.mode = AM_6LOWPAN_SHORT; - addr_802154_to_ietf(&a); - dbg_addr_ext("After", a.addr.data); - CHECKING(test); - FAIL_UNLESS(a.addr._short.addr == short_be(0x0708), - "Failed converting short to IETF endianness\n"); - - // TEST 3 - TRYING("Wrong address mode\n"); - a.mode = AM_6LOWPAN_NONE; - addr_802154_to_ietf(&a); - dbg_addr_ext("After", a.addr.data); - buf[0] = 7; - buf[1] = 8; - CHECKING(test); - FAIL_UNLESS(0 == memcmp(a.addr.data, buf, SIZE_6LOWPAN_EXT), - "Should've done nothing\n"); - - ENDING(test); - -} -END_TEST - -START_TEST(tc_802154_ll_src) -{ - int test = 1; - struct pico_ip6 ip = { - .addr = {0,0,0,0,0,0,0,0, 3,2,3,4,5,6,7,8} - }; - struct pico_ip6 ip2 = { - .addr = {0,0,0,0,0,0,0,0, 0,0,0,0xff,0xfe,0,0x12,0x34} - }; - struct pico_6lowpan_info info = { - .addr_short.addr = short_be(0x1234), - .addr_ext.addr = {3,2,3,4,5,6,7,8} - }; - struct pico_device dev; - struct pico_802154 addr; - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->buffer; - - STARTING(); - - dev.eth = (struct pico_ethdev *)&info; - f->net_hdr = f->buffer; - f->dev = &dev; - dev.hostvars.lowpan_flags = PICO_6LP_FLAG_LOWPAN; - - // TEST 3 - TRYING("With an IPv6 address that is derived from MAC short address\n"); - info.addr_short.addr = short_be(0x1234); - hdr->src = ip2; - addr = addr_802154_ll_src(f); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_SHORT == addr.mode, - "Should've returned device's short address \n"); - CHECKING(test); - FAIL_UNLESS(short_be(0x1234) == addr.addr._short.addr, - "Should've copied the short address from the device\n"); - - // TEST 4 - TRYING("With an IPv6 address that is derived from MAC extended address\n"); - ip.addr[8] = 1; - hdr->src = ip; - addr = addr_802154_ll_src(f); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_EXT == addr.mode, - "Should've returned device's extended address\n"); - CHECKING(test); - FAIL_UNLESS(0 == memcmp(info.addr_ext.addr, addr.addr._ext.addr, SIZE_6LOWPAN_EXT), - "Should've copied device's extended address\n"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_802154_ll_dst) -{ - int test = 1; - struct pico_ip6 ip; - struct pico_ip6 local; - struct pico_ip6 local2; - struct pico_802154 addr; - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - struct pico_ipv6_hdr *hdr = (struct pico_ipv6_hdr *)f->buffer; - struct pico_device dev; - uint8_t buf[] = {3,2,3,4,5,6,7,8}; - pico_string_to_ipv6("ff00:0:0:0:0:0:e801:100", ip.addr); - pico_string_to_ipv6("fe80:0:0:0:0102:0304:0506:0708", local.addr); - pico_string_to_ipv6("fe80:0:0:0:0:0ff:fe00:1234", local2.addr); - - STARTING(); - - f->net_hdr = f->buffer; - f->dev = &dev; - dev.hostvars.lowpan_flags = PICO_6LP_FLAG_LOWPAN; - - // TEST 1 - TRYING("With a MCAST IPv6 address, should return 0xFFFF\n"); - hdr->dst = ip; - addr = addr_802154_ll_dst(f); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_SHORT == addr.mode, - "Should've set address mode to SHORT\n"); - CHECKING(test); - FAIL_UNLESS(short_be(ADDR_802154_BCAST) == addr.addr._short.addr, - "Should've set address to BCAST\n"); - - // TEST 2 - TRYING("With a link local IPv6 address derived from an extended L2 address\n"); - hdr->dst = local; - addr = addr_802154_ll_dst(f); - dbg_addr_ext("After:", addr.addr._ext.addr); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_EXT == addr.mode, - "Should've set address mode to EXTENDED\n"); - CHECKING(test); - FAIL_UNLESS(0 == memcmp(buf, addr.addr._ext.addr, SIZE_6LOWPAN_EXT), - "Should've copied the extended address from the IP address\n"); - - // TEST 3 - TRYING("With a link local IPv6 address derived from a short L2 address\n"); - hdr->dst = local2; - addr = addr_802154_ll_dst(f); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_SHORT == addr.mode, - "Should've set address mode to SHORT\n"); - CHECKING(test); - FAIL_UNLESS(short_be(0x1234) == addr.addr._short.addr, - "Should've copied the short address from the IP address\n"); - - /* TODO: Test getting address from neighbour table */ - - ENDING(test); -} -END_TEST - -/******************************************************************************* - * FRAME - ******************************************************************************/ - -/* Frame (123 bytes) */ -static uint8_t pkt[] = { -0x41, 0xcc, 0xa6, 0xff, 0xff, 0x8a, /* A..... */ -0x18, 0x00, 0xff, 0xff, 0xda, 0x1c, 0x00, 0x88, /* ........ */ -0x18, 0x00, 0xff, 0xff, 0xda, 0x1c, 0x00, 0xc1, /* ........ */ -0x09, 0x00, 0x02, 0x42, 0xfa, 0x40, 0x04, 0x01, /* ...B.@.. */ -0xf0, 0xb1, 0x01, 0x06, 0x6f, 0xaf, 0x48, 0x65, /* ....o.He */ -0x6c, 0x6c, 0x6f, 0x20, 0x30, 0x30, 0x36, 0x20, /* llo 006 */ -0x30, 0x78, 0x46, 0x46, 0x33, 0x43, 0x0a, 0x00, /* 0xFF3C.. */ -0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, /* ........ */ -0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, /* ...... ! */ -0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, /* "#$%&'() */ -0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, /* *+,-./01 */ -0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, /* 23456789 */ -0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, /* :;<=>?@A */ -0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, /* BCDEFGHI */ -0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, /* JKLMNOPQ */ -0x52, 0x53, 0x54, 0x68, 0x79 /* RSThy */ -}; - -START_TEST(tc_dst_am) -{ - int test = 1; - int ret = 0; - - STARTING(); - - // TEST 1 - TRYING("Trying to determine AM of destination addr from buffer \n"); - ret = dst_am((struct pico_802154_hdr *)pkt); - DBG("ret = %d\n", ret); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_EXT == ret, - "Should've returned the AM of an extended address\n"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_src_am) -{ - int test = 1; - int ret = 0; - - STARTING(); - - // TEST 1 - TRYING("Trying to determine AM of source addr from buffer \n"); - ret = src_am((struct pico_802154_hdr *)pkt); - DBG("ret = %d\n", ret); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_EXT == ret, - "Should've returned the AM of an extended address\n"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_802154_hdr_len) -{ - int test = 1; - int ret = 0; - - STARTING(); - - // TEST 1 - TRYING("Trying to determine length of the header from buffer\n"); - ret = frame_802154_hdr_len((struct pico_802154_hdr *)pkt); - DBG("ret = %d\n", ret); - CHECKING(test); - FAIL_UNLESS(21 == ret, - "Should've returned the correct length of the header\n"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_802154_src) -{ - int test = 1; - struct pico_802154_hdr *hdr; - struct pico_802154 addr; - uint8_t src[] = {0x00, 0x1C, 0xDA, 0xFF, 0xFF, 0x00, 0x18, 0x88}; - STARTING(); - - hdr = (struct pico_802154_hdr *)pkt; - - // TEST 1 - TRYING("To receive the source address from a mapped buffer\n"); - addr = frame_802154_src(hdr); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_EXT == addr.mode, - "Should've returned an extended address\n"); - CHECKING(test); - FAIL_UNLESS(0 == memcmp(src, addr.addr._ext.addr, SIZE_6LOWPAN_EXT), - "Should've copied the extended source address\n"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_802154_dst) -{ - int test = 1; - struct pico_802154_hdr *hdr; - struct pico_802154 addr; - uint8_t dst[] = {0x00, 0x1C, 0xDA, 0xFF, 0xFF, 0x00, 0x18, 0x8a}; - - STARTING(); - hdr = (struct pico_802154_hdr *)pkt; - - // TEST 1 - TRYING("To receive the source address from a mapped buffer\n"); - addr = frame_802154_dst(hdr); - CHECKING(test); - FAIL_UNLESS(AM_6LOWPAN_EXT == addr.mode, - "Should've returned an extended address\n"); - CHECKING(test); - FAIL_UNLESS(0 == memcmp(dst, addr.addr._ext.addr, SIZE_6LOWPAN_EXT), - "Should've copied the extended source address\n"); - - ENDING(test); -} -END_TEST - -START_TEST(tc_802154_format) -{ - int test = 1; - struct pico_802154 src = { - .addr.data = {0x00, 0x1C, 0xDA, 0xFF, 0xFF, 0x00, 0x18, 0x88}, - .mode = AM_6LOWPAN_EXT - }; - struct pico_802154 dst = { - .addr.data = {0x00, 0x1C, 0xDA, 0xFF, 0xFF, 0x00, 0x18, 0x8a}, - .mode = AM_6LOWPAN_EXT - }; - struct pico_6lowpan_short pan = { .addr = short_be(0xffff) }; - uint8_t buf[127] = {0}; - int i = 0; - - STARTING(); - - // TEST 1 - TRYING("To format a frame like sample capture\n"); - frame_802154_format(buf, 166, FCF_INTRA_PAN, FCF_NO_ACK_REQ, - FCF_NO_SEC, pan, src, dst); - printf("Buffer:"); - for (i = 0; i < 21; i++) { - if (i % 8 != 0) - printf("%02x ", buf[i]); - else { - printf("\n%02x ", buf[i]); - } - } - printf("\n"); - CHECKING(test); - FAIL_UNLESS(21 == frame_802154_hdr_len((struct pico_802154_hdr *)buf), - "Failed to correctly set the frame header, the length isn't right\n"); - CHECKING(test); - FAIL_UNLESS(0 == memcmp(pkt, buf, 21), - "Failed to correctly format IEEE802.15.4 frame\n"); - - ENDING(test); -} -END_TEST -START_TEST(tc_802154_process_out) -{ - int i = 0; - int ret = 0; - int test = 1; - struct pico_802154 src = { - .addr.data = {3,2,3,4,5,6,7,8}, - .mode = AM_6LOWPAN_EXT - }; - struct pico_802154 dst = { - .addr.data = {0x00, 0x1C, 0xDA, 0xFF, 0xFF, 0x00, 0x18, 0x8a}, - .mode = AM_6LOWPAN_EXT - }; - struct pico_frame *f = pico_frame_alloc(0); - struct pico_6lowpan_info info = { - .addr_short.addr = short_be(0x1234), - .addr_ext.addr = {3,2,3,4,5,6,7,8}, - .pan_id.addr = short_be(0x1234) - }; - struct pico_device dev; - uint8_t buf[] = {0x41,0xcc,0x00,0x34,0x12,0x8a,0x18,0x00, - 0xff,0xff,0xda,0x1c,0x00,0x08,0x07,0x06, - 0x05,0x04,0x03,0x02,0x03}; - dev.eth = (struct pico_ethdev *)&info; - dev.q_out = PICO_ZALLOC(sizeof(struct pico_queue)); - f->dev = &dev; - dev.hostvars.lowpan_flags = PICO_6LP_FLAG_LOWPAN; - - STARTING(); - pico_stack_init(); - - // TEST 1 - TRYING("Trying with bare frame\n"); - f->src.pan = src; - f->dst.pan = dst; - ret = pico_802154_process_out(f); - printf("Buffer:"); - for (i = 0; i < 21; i++) { - if (i % 8 != 0) - printf("%02x ", f->datalink_hdr[i]); - else { - printf("\n%02x ", f->datalink_hdr[i]); - } - } - printf("\n"); - CHECKING(test); - FAIL_UNLESS(0 < ret, "Shouldn't have returned an error\n"); - CHECKING(test); - FAIL_UNLESS(0 == memcmp(buf, f->datalink_hdr, 21), - "Frame isn't correctly formatted\n"); - - pico_frame_discard(f); - - ENDING(test); -} -END_TEST -START_TEST(tc_802154_process_in) -{ - int ret = 0; - int test = 1; - struct pico_802154 src = { - .addr.data = {3,2,3,4,5,6,7,8}, - .mode = AM_6LOWPAN_EXT - }; - struct pico_802154 dst = { - .addr.data = {0x00, 0x1C, 0xDA, 0xFF, 0xFF, 0x00, 0x18, 0x8a}, - .mode = AM_6LOWPAN_EXT - }; - struct pico_frame *f = pico_frame_alloc(22); - uint8_t buf[] = {0x41,0xcc,0x00,0x34,0x12,0x8a,0x18,0x00, - 0xff,0xff,0xda,0x1c,0x00,0x08,0x07,0x06, - 0x05,0x04,0x03,0x02,0x03,0x60}; - memcpy(f->buffer, buf, 22); - f->src.pan = src; - f->dst.pan = dst; - - STARTING(); - pico_stack_init(); - - TRYING("Apply processing function on predefined buffer\n"); - ret = pico_802154_process_in(f); - CHECKING(test); - FAIL_UNLESS(0 < ret, "Should not return failure\n"); -} -END_TEST -static Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_swap = tcase_create("Unit test for pico_swap"); - TCase *TCase_802154_to_ietf = tcase_create("Unit test for 802154_to_ietf"); - TCase *TCase_802154_ll_src = tcase_create("Unit test for 802154_ll_src"); - TCase *TCase_802154_ll_dst = tcase_create("Unit test for 802154_ll_dst"); - TCase *TCase_802154_hdr_len = tcase_create("Unit test for 802154_hdr_len"); - TCase *TCase_src_am = tcase_create("Unit test for src_am"); - TCase *TCase_dst_am = tcase_create("Unit test for dst_am"); - TCase *TCase_802154_src = tcase_create("Unit test for 802154_src"); - TCase *TCase_802154_dst = tcase_create("Unit test for 802154_dst"); - TCase *TCase_802154_format = tcase_create("Unit test for 802154_format"); - TCase *TCase_802154_process_out = tcase_create("Unit test for 802154_process_out"); - TCase *TCase_802154_process_in = tcase_create("Unit test for 802154_process_in"); - -/******************************************************************************* - * ADDRESSES - ******************************************************************************/ - tcase_add_test(TCase_swap, tc_swap); - suite_add_tcase(s, TCase_swap); - tcase_add_test(TCase_802154_to_ietf, tc_802154_to_ietf); - suite_add_tcase(s, TCase_802154_to_ietf); - tcase_add_test(TCase_802154_ll_src, tc_802154_ll_src); - suite_add_tcase(s, TCase_802154_ll_src); - tcase_add_test(TCase_802154_ll_dst, tc_802154_ll_dst); - suite_add_tcase(s, TCase_802154_ll_dst); - -/******************************************************************************* - * FRAME - ******************************************************************************/ - tcase_add_test(TCase_802154_hdr_len, tc_802154_hdr_len); - suite_add_tcase(s, TCase_802154_hdr_len); - tcase_add_test(TCase_src_am, tc_src_am); - suite_add_tcase(s, TCase_src_am); - tcase_add_test(TCase_dst_am, tc_dst_am); - suite_add_tcase(s, TCase_dst_am); - tcase_add_test(TCase_802154_src, tc_802154_src); - suite_add_tcase(s, TCase_802154_src); - tcase_add_test(TCase_802154_dst, tc_802154_dst); - suite_add_tcase(s, TCase_802154_dst); - tcase_add_test(TCase_802154_format, tc_802154_format); - suite_add_tcase(s, TCase_802154_format); - tcase_add_test(TCase_802154_process_out, tc_802154_process_out); - suite_add_tcase(s, TCase_802154_process_out); - tcase_add_test(TCase_802154_process_in, tc_802154_process_in); - suite_add_tcase(s, TCase_802154_process_in); - - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_aodv.c b/kernel/picotcp/test/unit/modunit_pico_aodv.c deleted file mode 100644 index 03bda6f..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_aodv.c +++ /dev/null @@ -1,545 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "modules/pico_aodv.c" -#include "check.h" - - -Suite *pico_suite(void); - -START_TEST(tc_aodv_node_compare) -{ - struct pico_aodv_node a, b; - a.dest.ip4.addr = long_be(1); - b.dest.ip4.addr = long_be(2); - - fail_if(aodv_node_compare(&a, &b) >= 0); - a.dest.ip4.addr = long_be(3); - fail_if(aodv_node_compare(&a, &b) <= 0); - b.dest.ip4.addr = long_be(3); - fail_if(aodv_node_compare(&a, &b) != 0); -} -END_TEST - -START_TEST(tc_aodv_dev_cmp) -{ - struct pico_device a, b; - a.hash = 1; - b.hash = 2; - fail_if(aodv_dev_cmp(&a, &b) >= 0); - a.hash = 3; - fail_if(aodv_dev_cmp(&a, &b) <= 0); - b.hash = 3; - fail_if(aodv_dev_cmp(&a, &b) != 0); - -} -END_TEST - -START_TEST(tc_get_node_by_addr) -{ - struct pico_aodv_node a; - union pico_address test; - a.dest.ip4.addr = long_be(10); - test.ip4.addr = long_be(10); - - pico_tree_insert(&aodv_nodes, &a); - - fail_if(get_node_by_addr(&test) != &a); - pico_tree_delete(&aodv_nodes, &a); - fail_if(get_node_by_addr(&test) != NULL); - -} -END_TEST - -static int set_bcast_link_called = 0; -void pico_ipv4_route_set_bcast_link(struct pico_ipv4_link *link) -{ - IGNORE_PARAMETER(link); - set_bcast_link_called++; -} - -START_TEST(tc_pico_aodv_set_dev) -{ - struct pico_device *dev = NULL; - pico_aodv_set_dev(dev); - fail_if(set_bcast_link_called != 1); -} -END_TEST - -START_TEST(tc_aodv_peer_refresh) -{ - /* TODO: test this: static int aodv_peer_refresh(struct pico_aodv_node *node, uint32_t seq) */ - struct pico_aodv_node node; - memset(&node, 0, sizeof(node)); - node.dseq = 0xFFFF; - fail_if(aodv_peer_refresh(&node, 10) != 0); /* should succeed, because SYNC flag is not yet set... */ - fail_if((node.flags & PICO_AODV_NODE_SYNC) == 0); /* Flag should be set after last call... */ - fail_if(aodv_peer_refresh(&node, 5) == 0); /* should FAIL, because seq number is lower... */ - fail_if(aodv_peer_refresh(&node, 10) == 0); /* should FAIL, because seq number is still the same... */ - fail_if(aodv_peer_refresh(&node, 15) != 0); /* should succeed, because seq number is now bigger... */ - fail_if(node.dseq != 15); -} -END_TEST - -static int called_route_add = 0; -static uint32_t route_add_gw = 0u; -static int route_add_metric = 0; -int pico_ipv4_route_add(struct pico_ip4 address, struct pico_ip4 netmask, struct pico_ip4 gateway, int metric, struct pico_ipv4_link *link) -{ - IGNORE_PARAMETER(link); - IGNORE_PARAMETER(netmask); - IGNORE_PARAMETER(address); - called_route_add++; - route_add_gw = gateway.addr; - route_add_metric = metric; - return 0; -} - -START_TEST(tc_aodv_elect_route) -{ - struct pico_aodv_node node; - union pico_address gateway; - memset(&node, 0, sizeof(node)); - gateway.ip4.addr = 0x55555555; - - called_route_add = 0; - aodv_elect_route(&node, NULL, 150, NULL); - fail_if(called_route_add != 1); /* Not active, should succeed */ - fail_if(route_add_gw != 0u); - fail_if(route_add_metric != 1); - - called_route_add = 0; - route_add_metric = 0; - route_add_gw = 0u; - node.flags = PICO_AODV_NODE_ROUTE_DOWN | PICO_AODV_NODE_ROUTE_UP; - aodv_elect_route(&node, &gateway, 150, NULL); - fail_if(called_route_add != 0); /* Already active, existing metric is lower */ - - called_route_add = 0; - route_add_metric = 0; - route_add_gw = 0u; - node.metric = 22; - aodv_elect_route(&node, &gateway, 15, NULL); - fail_if(called_route_add != 1); /* Already active, existing metric is higher */ - fail_if(route_add_metric != 16); - fail_if(route_add_gw != 0x55555555); - -} -END_TEST - -START_TEST(tc_aodv_peer_new) -{ - union pico_address addr; - struct pico_aodv_node *new; - addr.ip4.addr = 0x44444444; - new = aodv_peer_new(&addr); - fail_if(!new); - fail_if(!get_node_by_addr(&addr)); - pico_set_mm_failure(1); - new = aodv_peer_new(&addr); - fail_if(new); -} -END_TEST -START_TEST(tc_aodv_peer_eval) -{ - union pico_address addr; - struct pico_aodv_node *node = NULL; - /* Case 0: Creation */ - addr.ip4.addr = 0x11224433; - node = aodv_peer_eval(&addr, 0, 0); - fail_if(!node); - fail_if((node->flags & PICO_AODV_NODE_SYNC) != 0); /* Not synced! */ - - /* Case 1: retrieve, unsynced */ - node->metric = 42; - node = aodv_peer_eval(&addr, 0, 0); /* Should get existing node! */ - fail_if(!node); - fail_if(node->metric != 42); - fail_if((node->flags & PICO_AODV_NODE_SYNC) != 0); /* Not synced! */ - - - /* Case 2: new node, invalid allocation */ - addr.ip4.addr = 0x11224455; - pico_set_mm_failure(1); - node = aodv_peer_eval(&addr, long_be(10), 1); - fail_if(node); - - /* Case 3: existing node, setting the new sequence */ - addr.ip4.addr = 0x11224433; - node = aodv_peer_eval(&addr, long_be(10), 1); /* Should get existing node! */ - fail_if(node->metric != 42); - fail_if((node->flags & PICO_AODV_NODE_SYNC) == 0); - fail_if(node->dseq != 10); -} -END_TEST - -START_TEST(tc_aodv_lifetime) -{ - struct pico_aodv_node node; - pico_time now = PICO_TIME_MS(); - memset(&node, 0, sizeof(node)); - fail_if(aodv_lifetime(&node) == 0); - fail_if(node.last_seen < now); - node.last_seen = now - AODV_ACTIVE_ROUTE_TIMEOUT; - fail_if(aodv_lifetime(&node) != 0); -} -END_TEST - -static uint8_t sent_pkt_type = 0xFF; -static uint32_t dest_addr = 0; -static int pico_socket_sendto_called = 0; -static int pico_socket_sendto_extended_called = 0; -uint32_t expected_dseq = 0; -int pico_socket_sendto(struct pico_socket *s, const void *buf, const int len, void *dst, uint16_t remote_port) -{ - uint8_t *pkt = (uint8_t *)(uintptr_t)buf; - printf("Sendto called!\n"); - pico_socket_sendto_called++; - fail_if(remote_port != short_be(PICO_AODV_PORT)); - fail_if (s != aodv_socket); - fail_if(pkt[0] > 4); - fail_if(pkt[0] < 1); - sent_pkt_type = pkt[0]; - dest_addr = ((union pico_address *)dst)->ip4.addr; - if (sent_pkt_type == AODV_TYPE_RREQ) { - /* struct pico_aodv_rreq *req = (struct pico_aodv_rreq *)(uintptr_t)buf; */ - fail_if(len != sizeof(struct pico_aodv_rreq)); - } - else if (sent_pkt_type == AODV_TYPE_RREP) { - struct pico_aodv_rrep *rep = (struct pico_aodv_rrep *)(uintptr_t)buf; - fail_if(len != sizeof(struct pico_aodv_rrep)); - fail_if(rep->dest != 0x11111111); - fail_if(rep->orig != 0x22222222); - printf("rep->dseq= %08x, exp: %08x\n", rep->dseq, expected_dseq); - fail_if(rep->dseq != expected_dseq); - } - - return len; -} - -int pico_socket_sendto_extended(struct pico_socket *s, const void *buf, const int len, - void *dst, uint16_t remote_port, struct pico_msginfo *msginfo) -{ - IGNORE_PARAMETER(msginfo); - pico_socket_sendto_extended_called++; - return pico_socket_sendto(s, buf, len, dst, remote_port); -} - -START_TEST(tc_aodv_send_reply) -{ - struct pico_aodv_node node; - struct pico_aodv_rreq req; - struct pico_msginfo info; - union pico_address addr; - addr.ip4.addr = 0x22222222; - memset(&node, 0, sizeof(node)); - memset(&req, 0, sizeof(req)); - memset(&info, 0, sizeof(info)); - - req.dest = 0x11111111; - req.orig = addr.ip4.addr; - req.dseq = 99; - - aodv_send_reply(&node, &req, 1, &info); - fail_if(pico_socket_sendto_called != 0); /* Call should have no effect, due to non-existing origin node */ - - /* Creating origin... */ - fail_if(aodv_peer_new(&addr) == NULL); - aodv_send_reply(&node, &req, 0, &info); - fail_if(pico_socket_sendto_called != 0); /* Call should have no effect, node non-local, non sync'd */ - - expected_dseq = long_be(pico_aodv_local_id + 1); - aodv_send_reply(&node, &req, 1, &info); - fail_if(pico_socket_sendto_called != 1); /* Call should succeed */ - pico_socket_sendto_called = 0; - - node.flags = PICO_AODV_NODE_SYNC; - node.dseq = 42; - expected_dseq = long_be(42); - aodv_send_reply(&node, &req, 0, &info); - fail_if(pico_socket_sendto_called != 1); /* Call should succeed */ - pico_socket_sendto_called = 0; -} -END_TEST - -static struct pico_ipv4_link global_link; -struct pico_ipv4_link *pico_ipv4_link_by_dev(struct pico_device *dev) -{ - IGNORE_PARAMETER(dev); - if (!global_link.address.addr) - return NULL; - - printf("Setting link!\n"); - return &global_link; -} - -static struct pico_device global_dev; -static int link_find_success = 0; -struct pico_device *pico_ipv4_link_find(struct pico_ip4 *ip4) -{ - IGNORE_PARAMETER(ip4); - if (link_find_success) - return &global_dev; - - return NULL; -} - -static int timer_set = 0; -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - IGNORE_PARAMETER(arg); - IGNORE_PARAMETER(timer); - IGNORE_PARAMETER(expire); - printf("Timer set!\n"); - timer_set++; - return (uint32_t ) 0x99999999; - -} - -START_TEST(tc_aodv_send_req) -{ - struct pico_aodv_node node; - struct pico_device d; - aodv_socket = NULL; - - memset(&node, 0, sizeof(node)); - node.flags = PICO_AODV_NODE_ROUTE_DOWN | PICO_AODV_NODE_ROUTE_UP; - fail_if(aodv_send_req(&node) != 0); /* Should fail: node already active */ - fail_if(pico_socket_sendto_called != 0); - fail_if(pico_socket_sendto_extended_called != 0); - - node.flags = 0; - fail_if(aodv_send_req(&node) != 0); /* Should fail: no devices in tree */ - fail_if(pico_socket_sendto_called != 0); - fail_if(pico_socket_sendto_extended_called != 0); - - pico_tree_insert(&aodv_devices, &d); - fail_if(aodv_send_req(&node) != -1); /* Should fail: aodv_socket == NULL */ - fail_if(pico_err != PICO_ERR_EINVAL); - fail_if(pico_socket_sendto_called != 0); - fail_if(pico_socket_sendto_extended_called != 0); - - - /* No valid link, timer is set, call does not send packets */ - aodv_socket = (struct pico_socket*) 1; - global_link.address.addr = 0; - fail_if(aodv_send_req(&node) != 0); - fail_if(pico_socket_sendto_called != 0); - fail_if(pico_socket_sendto_extended_called != 0); - fail_if(timer_set != 1); - timer_set = 0; - - - /* One valid link, timer is set, one packet is sent */ - global_link.address.addr = 0xFEFEFEFE; - fail_if(aodv_send_req(&node) != 1); - fail_if(pico_socket_sendto_called != 1); - fail_if(pico_socket_sendto_extended_called != 1); - fail_if(timer_set != 1); - pico_socket_sendto_called = 0; - pico_socket_sendto_extended_called = 0; - timer_set = 0; - -} -END_TEST - -START_TEST(tc_aodv_reverse_path_discover) -{ - struct pico_aodv_node node; - memset(&node, 0, sizeof(node)); - aodv_reverse_path_discover(0, &node); -} -END_TEST - -START_TEST(tc_aodv_recv_valid_rreq) -{ - struct pico_aodv_node node; - struct pico_aodv_rreq req; - struct pico_msginfo info; - union pico_address addr; - memset(&node, 0, sizeof(node)); - memset(&req, 0, sizeof(req)); - memset(&info, 0, sizeof(info)); - - addr.ip4.addr = 0x22222222; - - link_find_success = 0; - aodv_recv_valid_rreq(&node, &req, &info); - fail_if(pico_socket_sendto_called > 0); - - /* link not local, but active node, set to send reply, no timer */ - link_find_success = 0; - fail_if(aodv_peer_new(&addr) == NULL); - global_link.address.addr = 0x44444444; - req.orig = addr.ip4.addr; - req.dest = 0x11111111; - node.flags = PICO_AODV_NODE_SYNC | PICO_AODV_NODE_ROUTE_UP | PICO_AODV_NODE_ROUTE_DOWN; - node.dseq = 42; - expected_dseq = long_be(42); - aodv_recv_valid_rreq(&node, &req, &info); - fail_if(pico_socket_sendto_called < 1); - fail_if(timer_set > 0); - pico_socket_sendto_called = 0; - - /* link local, active node. Full send + set timer. */ - link_find_success = 1; - expected_dseq = long_be(pico_aodv_local_id + 1); - aodv_peer_new(&addr); - aodv_recv_valid_rreq(&node, &req, &info); - fail_if(pico_socket_sendto_called < 1); - fail_if(timer_set < 1); - -} -END_TEST - -START_TEST(tc_aodv_parse_rreq) -{ - /* TODO: test this: static void aodv_parse_rreq(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) */ -} -END_TEST - -START_TEST(tc_aodv_parse_rrep) -{ - /* TODO: test this: static void aodv_parse_rrep(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) */ -} -END_TEST - -START_TEST(tc_aodv_parse_rerr) -{ - /* TODO: test this: static void aodv_parse_rerr(union pico_address *from, uint8_t *buf, int len, struct pico_msginfo *msginfo) */ -} -END_TEST - -START_TEST(tc_aodv_parse_rack) -{ - aodv_parse_rack(NULL, NULL, 0, NULL); -} -END_TEST - -START_TEST(tc_pico_aodv_parse) -{ -} -END_TEST - -START_TEST(tc_pico_aodv_socket_callback) -{ - /* TODO: test this: static void pico_aodv_socket_callback(uint16_t ev, struct pico_socket *s) */ -} -END_TEST - -START_TEST(tc_aodv_make_rreq) -{ - /* TODO: test this: static void aodv_make_rreq(struct pico_aodv_node *node, struct pico_aodv_rreq *req) */ -} -END_TEST - -START_TEST(tc_aodv_retrans_rreq) -{ - /* TODO: test this: static void aodv_retrans_rreq(pico_time now, void *arg) */ -} -END_TEST - -START_TEST(tc_pico_aodv_expired) -{ - /* TODO: test this: static void pico_aodv_expired(struct pico_aodv_node *node) */ -} -END_TEST - -START_TEST(tc_pico_aodv_collector) -{ - /* TODO: test this: static void pico_aodv_collector(pico_time now, void *arg) */ -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_aodv_node_compare = tcase_create("Unit test for aodv_node_compare"); - TCase *TCase_aodv_dev_cmp = tcase_create("Unit test for aodv_dev_cmp"); - TCase *TCase_get_node_by_addr = tcase_create("Unit test for get_node_by_addr"); - TCase *TCase_pico_aodv_set_dev = tcase_create("Unit test for pico_aodv_set_dev"); - TCase *TCase_aodv_peer_refresh = tcase_create("Unit test for aodv_peer_refresh"); - TCase *TCase_aodv_elect_route = tcase_create("Unit test for aodv_elect_route"); - TCase *TCase_aodv_peer_new = tcase_create("Unit test for aodv_peer_new"); - TCase *TCase_aodv_peer_eval = tcase_create("Unit test for aodv_peer_eval"); - TCase *TCase_aodv_lifetime = tcase_create("Unit test for aodv_lifetime"); - TCase *TCase_aodv_send_reply = tcase_create("Unit test for aodv_send_reply"); - TCase *TCase_aodv_send_req = tcase_create("Unit test for aodv_send_req"); - TCase *TCase_aodv_reverse_path_discover = tcase_create("Unit test for aodv_reverse_path_discover"); - TCase *TCase_aodv_recv_valid_rreq = tcase_create("Unit test for aodv_recv_valid_rreq"); - TCase *TCase_aodv_parse_rreq = tcase_create("Unit test for aodv_parse_rreq"); - TCase *TCase_aodv_parse_rrep = tcase_create("Unit test for aodv_parse_rrep"); - TCase *TCase_aodv_parse_rerr = tcase_create("Unit test for aodv_parse_rerr"); - TCase *TCase_aodv_parse_rack = tcase_create("Unit test for aodv_parse_rack"); - TCase *TCase_pico_aodv_parse = tcase_create("Unit test for pico_aodv_parse"); - TCase *TCase_pico_aodv_socket_callback = tcase_create("Unit test for pico_aodv_socket_callback"); - TCase *TCase_aodv_make_rreq = tcase_create("Unit test for aodv_make_rreq"); - TCase *TCase_aodv_retrans_rreq = tcase_create("Unit test for aodv_retrans_rreq"); - TCase *TCase_pico_aodv_expired = tcase_create("Unit test for pico_aodv_expired"); - TCase *TCase_pico_aodv_collector = tcase_create("Unit test for pico_aodv_collector"); - - - tcase_add_test(TCase_aodv_node_compare, tc_aodv_node_compare); - suite_add_tcase(s, TCase_aodv_node_compare); - tcase_add_test(TCase_aodv_dev_cmp, tc_aodv_dev_cmp); - suite_add_tcase(s, TCase_aodv_dev_cmp); - tcase_add_test(TCase_get_node_by_addr, tc_get_node_by_addr); - suite_add_tcase(s, TCase_get_node_by_addr); - tcase_add_test(TCase_pico_aodv_set_dev, tc_pico_aodv_set_dev); - suite_add_tcase(s, TCase_pico_aodv_set_dev); - tcase_add_test(TCase_aodv_peer_refresh, tc_aodv_peer_refresh); - suite_add_tcase(s, TCase_aodv_peer_refresh); - tcase_add_test(TCase_aodv_elect_route, tc_aodv_elect_route); - suite_add_tcase(s, TCase_aodv_elect_route); - tcase_add_test(TCase_aodv_peer_new, tc_aodv_peer_new); - suite_add_tcase(s, TCase_aodv_peer_new); - tcase_add_test(TCase_aodv_peer_eval, tc_aodv_peer_eval); - suite_add_tcase(s, TCase_aodv_peer_eval); - tcase_add_test(TCase_aodv_lifetime, tc_aodv_lifetime); - suite_add_tcase(s, TCase_aodv_lifetime); - tcase_add_test(TCase_aodv_send_reply, tc_aodv_send_reply); - suite_add_tcase(s, TCase_aodv_send_reply); - tcase_add_test(TCase_aodv_send_req, tc_aodv_send_req); - suite_add_tcase(s, TCase_aodv_send_req); - tcase_add_test(TCase_aodv_reverse_path_discover, tc_aodv_reverse_path_discover); - suite_add_tcase(s, TCase_aodv_reverse_path_discover); - tcase_add_test(TCase_aodv_recv_valid_rreq, tc_aodv_recv_valid_rreq); - suite_add_tcase(s, TCase_aodv_recv_valid_rreq); - tcase_add_test(TCase_aodv_parse_rreq, tc_aodv_parse_rreq); - suite_add_tcase(s, TCase_aodv_parse_rreq); - tcase_add_test(TCase_aodv_parse_rrep, tc_aodv_parse_rrep); - suite_add_tcase(s, TCase_aodv_parse_rrep); - tcase_add_test(TCase_aodv_parse_rerr, tc_aodv_parse_rerr); - suite_add_tcase(s, TCase_aodv_parse_rerr); - tcase_add_test(TCase_aodv_parse_rack, tc_aodv_parse_rack); - suite_add_tcase(s, TCase_aodv_parse_rack); - tcase_add_test(TCase_pico_aodv_parse, tc_pico_aodv_parse); - suite_add_tcase(s, TCase_pico_aodv_parse); - tcase_add_test(TCase_pico_aodv_socket_callback, tc_pico_aodv_socket_callback); - suite_add_tcase(s, TCase_pico_aodv_socket_callback); - tcase_add_test(TCase_aodv_make_rreq, tc_aodv_make_rreq); - suite_add_tcase(s, TCase_aodv_make_rreq); - tcase_add_test(TCase_aodv_retrans_rreq, tc_aodv_retrans_rreq); - suite_add_tcase(s, TCase_aodv_retrans_rreq); - tcase_add_test(TCase_pico_aodv_expired, tc_pico_aodv_expired); - suite_add_tcase(s, TCase_pico_aodv_expired); - tcase_add_test(TCase_pico_aodv_collector, tc_pico_aodv_collector); - suite_add_tcase(s, TCase_pico_aodv_collector); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_dev_loop.c b/kernel/picotcp/test/unit/modunit_pico_dev_loop.c deleted file mode 100644 index 5bec073..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_dev_loop.c +++ /dev/null @@ -1,97 +0,0 @@ -#include "modules/pico_dev_loop.c" -#include "check.h" -static int called = 0; -static int fail = 0; - -Suite *pico_suite(void); - -int pico_device_init(struct pico_device __attribute__((unused)) *dev, const char __attribute__((unused)) *name, const uint8_t __attribute__((unused)) *mac) -{ - if (fail) - return -1; - - return 0; -} - -void pico_device_destroy(struct pico_device *dev) -{ - dev = dev; -} - -int32_t pico_stack_recv(struct pico_device __attribute__((unused)) *dev, uint8_t __attribute__((unused)) *buffer, uint32_t __attribute__((unused)) len) -{ - called = 1; - return 1; -} - -START_TEST(tc_pico_loop_send) -{ - uint8_t buf[LOOP_MTU + 1] = {}; - fail_if(pico_loop_send(NULL, buf, LOOP_MTU + 1) != 0); - - /* First send: OK */ - fail_if(pico_loop_send(NULL, buf, LOOP_MTU) != LOOP_MTU); - - /* Second: buffer busy */ - fail_if(pico_loop_send(NULL, buf, LOOP_MTU) != 0); - -} -END_TEST - -START_TEST(tc_pico_loop_poll) -{ - uint8_t buf[LOOP_MTU + 1] = {}; - fail_if(pico_loop_poll(NULL, 0) != 0); - called = 0; - /* First send: OK */ - fail_if(pico_loop_send(NULL, buf, LOOP_MTU) != LOOP_MTU); - fail_if(pico_loop_poll(NULL, 1) != 0); - fail_if(called == 0); -} -END_TEST - -START_TEST(tc_pico_loop_create) -{ - -#ifdef PICO_FAULTY - printf("Testing with faulty memory in pico_loop_create (1)\n"); - pico_set_mm_failure(1); - fail_if(pico_loop_create() != NULL); -#endif - fail = 1; - fail_if(pico_loop_create() != NULL); - fail = 0; - fail_if(pico_loop_create() == NULL); - -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_loop_send = tcase_create("Unit test for pico_loop_send"); - TCase *TCase_pico_loop_poll = tcase_create("Unit test for pico_loop_poll"); - TCase *TCase_pico_loop_create = tcase_create("Unit test for pico_loop_create"); - - - tcase_add_test(TCase_pico_loop_send, tc_pico_loop_send); - suite_add_tcase(s, TCase_pico_loop_send); - tcase_add_test(TCase_pico_loop_poll, tc_pico_loop_poll); - suite_add_tcase(s, TCase_pico_loop_poll); - tcase_add_test(TCase_pico_loop_create, tc_pico_loop_create); - suite_add_tcase(s, TCase_pico_loop_create); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_dev_ppp.c b/kernel/picotcp/test/unit/modunit_pico_dev_ppp.c deleted file mode 100644 index 499ed8c..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_dev_ppp.c +++ /dev/null @@ -1,1387 +0,0 @@ -#include -#include -#include -#include "pico_device.h" -#include "pico_dev_ppp.h" -#include "pico_stack.h" -#include "pico_ipv4.h" -#include "pico_md5.h" -#include "pico_dns_client.h" -#include "modules/pico_dev_ppp.c" -#include "check.h" - -struct pico_device_ppp _ppp = {}; -static enum ppp_modem_event ppp_modem_ev; -static enum ppp_lcp_event ppp_lcp_ev; -static enum ppp_auth_event ppp_auth_ev; -static enum ppp_ipcp_event ppp_ipcp_ev; - -static uint32_t called_picotimer = 0; - -Suite *pico_suite(void); - -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - IGNORE_PARAMETER(arg); - IGNORE_PARAMETER(timer); - IGNORE_PARAMETER(expire); - - called_picotimer++; - - return called_picotimer; -} - - -static void modem_state(struct pico_device_ppp *ppp, enum ppp_modem_event event) -{ - IGNORE_PARAMETER(ppp); - printf("Called MODEM FSM mock\n"); - ppp_modem_ev = event; -} -static void lcp_state(struct pico_device_ppp *ppp, enum ppp_lcp_event event) -{ - IGNORE_PARAMETER(ppp); - printf("Called LCP FSM mock\n"); - ppp_lcp_ev = event; -} -static void auth_state(struct pico_device_ppp *ppp, enum ppp_auth_event event) -{ - IGNORE_PARAMETER(ppp); - printf("Called AUTH FSM mock\n"); - ppp_auth_ev = event; -} -static void ipcp_state(struct pico_device_ppp *ppp, enum ppp_ipcp_event event) -{ - IGNORE_PARAMETER(ppp); - printf("Called IPCP FSM mock\n"); - ppp_ipcp_ev = event; -} - -static int called_serial_send = 0; -static uint8_t serial_out_first_char = 0; -static uint32_t serial_out_len = 0; -static uint8_t serial_buffer[64]; - -static int unit_serial_send(struct pico_device *dev, const void *buf, int len) -{ - IGNORE_PARAMETER(dev); - printf("Called send function!\n"); - serial_out_len = (unsigned) len; - if (len < 64) { - memcpy(serial_buffer, buf, (unsigned) len); - } else { - memcpy(serial_buffer, buf, 64); - } - - serial_out_first_char = *(uint8_t *)((uintptr_t)(buf)); - called_serial_send++; - printf(" First char : %02x, len: %d\n", serial_out_first_char, serial_out_len); - printf(" ---- %02x %02x %02x %02x %02x %02x %02x %02x\n", - serial_buffer[0], serial_buffer[1], serial_buffer[2], - serial_buffer[3], serial_buffer[4], serial_buffer[5], - serial_buffer[6], serial_buffer[7]); - return len; -} - -uint8_t test_string[5][10] = { - { 0x7e, 'a', 'b', 'c', 'd', 0x7e }, - { 0x7e, 'a', 0x7e, 'c', 'd', 0x7e }, - { 0x7e, 'a', 'b', 0x7d, 'd', 0x7e }, - { 0x7e, 0x7d, 'b', 'c', 0x7e, 0x7e }, - { 0x7e, 0xed, 0x7d, 0xff, 0x7d, 0x3F, 'c', 0x20, 0x7e } -}; - -uint8_t escape_string[5][12] = { - { 0x7e, 'a', 'b', 'c', 'd', 0x7e }, - { 0x7e, 'a', 0x7d, 0x5e, 'c', 'd', 0x7e }, - { 0x7e, 'a', 'b', 0x7d, 0x5d, 'd', 0x7e }, - { 0x7e, 0x7d, 0x5d, 'b', 'c', 0x7d, 0x5e, 0x7e }, - { 0x7e, 0xed, 0x7d, 0x5d, 0xff, 0x7d, 0x5d, 0x3F, 'c', 0x20, 0x7e } -}; - -int test_string_len[] = { - 6, 6, 6, 6, 9 -}; -int escape_string_len[] = { - 6, 7, 7, 8, 11 -}; - -START_TEST(tc_ppp_serial_send_escape) -{ - int i; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - - for (i = 0; i < 5; i++) { - called_serial_send = 0; - fail_if(ppp_serial_send_escape(&_ppp, test_string[i], test_string_len[i]) != test_string_len[i]); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != (unsigned) escape_string_len[i]); - printf(" test string ---- %02x %02x %02x %02x %02x %02x %02x %02x\n", - test_string[i][0], test_string[i][1], test_string[i][2], - test_string[i][3], test_string[i][4], test_string[i][5], - test_string[i][6], test_string[i][7]); - printf(" expected string ---- %02x %02x %02x %02x %02x %02x %02x %02x\n", - escape_string[i][0], escape_string[i][1], escape_string[i][2], - escape_string[i][3], escape_string[i][4], escape_string[i][5], - escape_string[i][6], escape_string[i][7]); - printf(" received string ---- %02x %02x %02x %02x %02x %02x %02x %02x\n", - serial_buffer[0], serial_buffer[1], serial_buffer[2], - serial_buffer[3], serial_buffer[4], serial_buffer[5], - serial_buffer[6], serial_buffer[7]); - - fail_if(memcmp(escape_string[i], serial_buffer, serial_out_len) != 0); - } -} -END_TEST - - -START_TEST(tc_lcp_timer_start) -{ - - /* Reset counter, LCP REQ */ - memset(&_ppp, 0, sizeof(_ppp)); - lcp_timer_start(&_ppp, 0); - fail_if(_ppp.timer_on != PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_count != 0); - fail_if(_ppp.timer_val != PICO_PPP_DEFAULT_TIMER); - - /* LCP CONFIG REQ, Normal case */ - memset(&_ppp, 0, sizeof(_ppp)); - lcp_timer_start(&_ppp, PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_on != PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_count != PICO_PPP_DEFAULT_MAX_CONFIGURE); - fail_if(_ppp.timer_val != PICO_PPP_DEFAULT_TIMER); - - /* LCP TERMINATE REQ, Normal case */ - memset(&_ppp, 0, sizeof(_ppp)); - lcp_timer_start(&_ppp, PPP_TIMER_ON_LCPTERM); - fail_if(_ppp.timer_on != PPP_TIMER_ON_LCPTERM); - fail_if(_ppp.timer_count != PICO_PPP_DEFAULT_MAX_TERMINATE); - fail_if(_ppp.timer_val != PICO_PPP_DEFAULT_TIMER); -} -END_TEST -START_TEST(tc_lcp_zero_restart_count) -{ - /* Reset counter, LCP REQ */ - memset(&_ppp, 0, sizeof(_ppp)); - lcp_zero_restart_count(&_ppp); - fail_if(_ppp.timer_on != PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_count != 0); - fail_if(_ppp.timer_val != PICO_PPP_DEFAULT_TIMER); -} -END_TEST -START_TEST(tc_lcp_timer_stop) -{ - /* LCP CONFIG REQ, Normal case */ - memset(&_ppp, 0, sizeof(_ppp)); - lcp_timer_start(&_ppp, PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_on != PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_count != PICO_PPP_DEFAULT_MAX_CONFIGURE); - fail_if(_ppp.timer_val != PICO_PPP_DEFAULT_TIMER); - /* Releasing timer */ - lcp_timer_stop(&_ppp, PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_on != 0); -} -END_TEST -START_TEST(tc_ppp_ctl_packet_size) -{ - uint32_t size = 10; - uint32_t prefix = ppp_ctl_packet_size(&_ppp, 0, &size); - fail_if(prefix != (PPP_HDR_SIZE + PPP_PROTO_SLOT_SIZE)); - fail_if(size != (10 + prefix + PPP_FCS_SIZE + 1)); -} -END_TEST -START_TEST(tc_ppp_fcs_char) -{ - char a = '*'; - uint16_t fcs; - fcs = ppp_fcs_char(0u, (uint8_t)a); - fail_if(fcs != 36440); -} -END_TEST -START_TEST(tc_ppp_fcs_continue) -{ - char a = '*'; - uint16_t fcs; - fcs = ppp_fcs_continue(0, (uint8_t *) &a, 1); - fail_if(fcs != 36440); -} -END_TEST -START_TEST(tc_ppp_fcs_finish) -{ - uint16_t fcs = 36440; - fcs = ppp_fcs_finish(fcs); - fail_if (fcs != 29095); -} -END_TEST -START_TEST(tc_ppp_fcs_start) -{ - uint16_t fcs; - char s = '*'; - fcs = ppp_fcs_start((uint8_t*)&s, 1); - fail_if(fcs != 33247); -} -END_TEST -START_TEST(tc_ppp_fcs_verify) -{ - char hello[8] = "hello"; - uint16_t fcs = ppp_fcs_start((uint8_t *)hello, 5); - fcs = ppp_fcs_finish(fcs); - memcpy(hello + 5, &fcs, 2); - fail_if(0 != ppp_fcs_verify((uint8_t *)hello, 7)); - hello[0] = 'B'; - hello[1] = 'y'; - hello[2] = 'e'; - hello[3] = 'z'; - hello[4] = 'z'; - fail_if(-1 != ppp_fcs_verify((uint8_t *)hello, 7)); - -} -END_TEST -START_TEST(tc_pico_ppp_ctl_send) -{ - uint8_t pkt[32] = { }; - memset(&_ppp, 0, sizeof(_ppp)); - - /* No serial_send associated */ - fail_if(pico_ppp_ctl_send(&_ppp.dev, 1, pkt, 30) != 30); - fail_if(called_serial_send != 0); - /* normal case */ - _ppp.serial_send = unit_serial_send; - fail_if(pico_ppp_ctl_send(&_ppp.dev, 1, pkt, 30) != 30); - fail_if(called_serial_send != 1); - called_serial_send = 0; - fail_if(serial_out_first_char != 0x7e); - fail_if(serial_out_len != 30); -} -END_TEST -START_TEST(tc_pico_ppp_send) -{ - uint8_t pkt[32] = { }; - memset(&_ppp, 0, sizeof(_ppp)); - - - /* wrong ipcp_state */ - _ppp.serial_send = unit_serial_send; - fail_if(pico_ppp_send(&_ppp.dev, pkt, 30) != 30); - fail_if(called_serial_send != 0); - - /* No serial_send associated */ - _ppp.serial_send = NULL; - _ppp.ipcp_state = PPP_IPCP_STATE_OPENED; - fail_if(pico_ppp_send(&_ppp.dev, pkt, 30) != 30); - fail_if(called_serial_send != 0); - - /* normal case */ - _ppp.serial_send = unit_serial_send; - fail_if(pico_ppp_send(&_ppp.dev, pkt, 30) != 30); - fail_if(called_serial_send != 1); - called_serial_send = 0; - fail_if(serial_out_first_char != 0x7e); - fail_if(serial_out_len != 38); - - /* with LCPOPT_PROTO_COMP set */ - called_serial_send = 0; - LCPOPT_SET_PEER((&_ppp), LCPOPT_PROTO_COMP); - fail_if(pico_ppp_send(&_ppp.dev, pkt, 30) != 30); - fail_if(called_serial_send != 1); - called_serial_send = 0; - fail_if(serial_out_first_char != 0x7e); - fail_if(serial_out_len != 37); - LCPOPT_UNSET_PEER((&_ppp), LCPOPT_PROTO_COMP); - - /* with LCPOPT_ADDRCTL_COMP set */ - called_serial_send = 0; - LCPOPT_SET_PEER((&_ppp), LCPOPT_ADDRCTL_COMP); - fail_if(pico_ppp_send(&_ppp.dev, pkt, 30) != 30); - fail_if(called_serial_send != 1); - called_serial_send = 0; - fail_if(serial_out_first_char != 0x7e); - fail_if(serial_out_len != 36); - LCPOPT_UNSET_PEER((&_ppp), LCPOPT_ADDRCTL_COMP); - -} -END_TEST -START_TEST(tc_ppp_modem_start_timer) -{ - memset(&_ppp, 0, sizeof(_ppp)); - ppp_modem_start_timer(&_ppp); - fail_if(_ppp.timer_on != PPP_TIMER_ON_MODEM); - fail_if(_ppp.timer_val != PICO_PPP_DEFAULT_TIMER); -} -END_TEST -START_TEST(tc_ppp_modem_send_reset) -{ - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - /* No serial send */ - ppp_modem_send_reset(&_ppp); - fail_if(called_serial_send > 0); - /* Normal way */ - _ppp.serial_send = unit_serial_send; - ppp_modem_send_reset(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 5); - -} -END_TEST -START_TEST(tc_ppp_modem_send_echo) -{ - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - /* No serial send */ - ppp_modem_send_echo(&_ppp); - fail_if(called_serial_send > 0); - /* Normal way */ - _ppp.serial_send = unit_serial_send; - ppp_modem_send_echo(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 6); -} -END_TEST -START_TEST(tc_ppp_modem_send_creg) -{ - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - /* No serial send */ - ppp_modem_send_creg(&_ppp); - fail_if(called_serial_send > 0); - /* Normal way */ - _ppp.serial_send = unit_serial_send; - ppp_modem_send_creg(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 11); -} -END_TEST -START_TEST(tc_ppp_modem_send_cgreg) -{ - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - /* No serial send */ - ppp_modem_send_cgreg(&_ppp); - fail_if(called_serial_send > 0); - /* Normal way */ - _ppp.serial_send = unit_serial_send; - ppp_modem_send_cgreg(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 12); -} -END_TEST -START_TEST(tc_ppp_modem_send_cgdcont) -{ - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - /* No serial send */ - ppp_modem_send_cgdcont(&_ppp); - fail_if(called_serial_send > 0); - /* Normal way */ - _ppp.serial_send = unit_serial_send; - ppp_modem_send_cgdcont(&_ppp); - fail_if(called_serial_send != 1); -} -END_TEST -START_TEST(tc_ppp_modem_send_cgatt) -{ - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - /* No serial send */ - ppp_modem_send_cgatt(&_ppp); - fail_if(called_serial_send > 0); - /* Normal way */ - _ppp.serial_send = unit_serial_send; - ppp_modem_send_cgatt(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 12); -} -END_TEST -START_TEST(tc_ppp_modem_send_dial) -{ - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - /* No serial send */ - ppp_modem_send_dial(&_ppp); - fail_if(called_serial_send > 0); - /* Normal way */ - _ppp.serial_send = unit_serial_send; - ppp_modem_send_dial(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 13); -} -END_TEST - -START_TEST(tc_ppp_modem_connected) -{ - memset(&_ppp, 0, sizeof(_ppp)); - ppp_lcp_ev = 0; - ppp_modem_connected(&_ppp); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_UP); -} -END_TEST -START_TEST(tc_ppp_modem_disconnected) -{ - memset(&_ppp, 0, sizeof(_ppp)); - ppp_lcp_ev = 0; - ppp_modem_disconnected(&_ppp); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_DOWN); -} -END_TEST -START_TEST(tc_ppp_modem_recv) -{ - char ok[] = "OK"; - char connect[] = "CONNECT HELLO HI THERE"; - char error[] = "ERROR"; - char blabla[] = "Blabla"; - ppp_modem_ev = 0; - ppp_modem_recv(&_ppp, ok, (uint32_t)strlen(ok)); - fail_if(ppp_modem_ev != PPP_MODEM_EVENT_OK); - - ppp_modem_ev = 0; - ppp_modem_recv(&_ppp, connect, (uint32_t)strlen(connect)); - fail_if(ppp_modem_ev != PPP_MODEM_EVENT_CONNECT); - - ppp_modem_ev = 0; - ppp_modem_recv(&_ppp, error, (uint32_t)strlen(error)); - fail_if(ppp_modem_ev != PPP_MODEM_EVENT_STOP); - - ppp_modem_ev = PPP_MODEM_EVENT_MAX; /* Which is basically illegal, just to check */ - ppp_modem_recv(&_ppp, blabla, (uint32_t)8); - fail_if(ppp_modem_ev != PPP_MODEM_EVENT_MAX); - -} -END_TEST -START_TEST(tc_lcp_send_configure_request) -{ - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - - /* With no options... */ - called_serial_send = 0; - lcp_send_configure_request(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 12); - - /* With all the options... */ - called_serial_send = 0; - LCPOPT_SET_LOCAL((&_ppp), LCPOPT_PROTO_COMP); - LCPOPT_SET_LOCAL((&_ppp), LCPOPT_MRU); - LCPOPT_SET_LOCAL((&_ppp), LCPOPT_ADDRCTL_COMP); - lcp_send_configure_request(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 20); - - /* with a failing malloc... */ - pico_set_mm_failure(1); - called_serial_send = 0; - lcp_send_configure_request(&_ppp); - fail_if(called_serial_send != 0); - -} -END_TEST -START_TEST(tc_lcp_optflags) -{ - uint8_t pkt[4 + sizeof(struct pico_lcp_hdr)]; - uint8_t *p = pkt + sizeof(struct pico_lcp_hdr); - p[0] = 0x03; - p[1] = 0x42; - p[2] = 0x56; - p[3] = 0x99; - memset(&_ppp, 0, sizeof(_ppp)); - fail_if(lcp_optflags(&_ppp, pkt, 4 + sizeof(struct pico_lcp_hdr), 1u) != 0x08); - fail_if(_ppp.auth != 0x5699); -} -END_TEST - -START_TEST(tc_lcp_send_configure_ack) -{ - uint8_t pkt[20] = ""; - struct pico_lcp_hdr *lcpreq; - called_serial_send = 0; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - _ppp.pkt = pkt; - _ppp.len = 4; - lcpreq = (struct pico_lcp_hdr *)_ppp.pkt; - lcpreq->len = short_be(4); - lcp_send_configure_ack(&_ppp); - fail_if(called_serial_send != 1); -} -END_TEST -START_TEST(tc_lcp_send_terminate_request) -{ - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - - called_serial_send = 0; - lcp_send_terminate_request(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 12); - -} -END_TEST -START_TEST(tc_lcp_send_terminate_ack) -{ - uint8_t pkt[20] = ""; - struct pico_lcp_hdr *lcpreq; - called_serial_send = 0; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - _ppp.pkt = pkt; - _ppp.len = 4; - lcpreq = (struct pico_lcp_hdr *)_ppp.pkt; - lcpreq->len = short_be(4); - lcp_send_terminate_ack(&_ppp); - fail_if(called_serial_send != 1); -} -END_TEST -START_TEST(tc_lcp_send_configure_nack) -{ - uint8_t pkt[20] = ""; - struct pico_lcp_hdr *lcpreq; - called_serial_send = 0; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - _ppp.pkt = pkt; - _ppp.len = 4; - lcpreq = (struct pico_lcp_hdr *)_ppp.pkt; - lcpreq->len = short_be(4); - lcp_send_configure_nack(&_ppp); - fail_if(called_serial_send != 1); -} -END_TEST -START_TEST(tc_lcp_process_in) -{ - uint8_t pkt[64]; - called_serial_send = 0; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - - /* Receive ACK (RCA) */ - ppp_lcp_ev = 0; - pkt[0] = PICO_CONF_ACK; - lcp_process_in(&_ppp, pkt, 64); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_RCA); - - /* Receive NACK (RCN) */ - ppp_lcp_ev = 0; - pkt[0] = PICO_CONF_NAK; - lcp_process_in(&_ppp, pkt, 64); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_RCN); - - /* Receive REJ (RCN) */ - ppp_lcp_ev = 0; - pkt[0] = PICO_CONF_REJ; - lcp_process_in(&_ppp, pkt, 64); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_RCN); - - /* Receive REQ, with unwanted option field (RCR-) */ - ppp_lcp_ev = 0; - pkt[0] = PICO_CONF_REQ; - pkt[sizeof(struct pico_lcp_hdr)] = 0x04; - pkt[sizeof(struct pico_lcp_hdr) + 1] = 0x02; - lcp_process_in(&_ppp, pkt, sizeof(struct pico_lcp_hdr) + 2); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_RCR_NEG); - - /* Receive REQ, with valid option field (RCR+) */ - ppp_lcp_ev = 0; - pkt[0] = PICO_CONF_REQ; - pkt[sizeof(struct pico_lcp_hdr)] = 0x04; - pkt[sizeof(struct pico_lcp_hdr) + 1] = 0x02; - _ppp.lcpopt_local = (1 << 4); - lcp_process_in(&_ppp, pkt, sizeof(struct pico_lcp_hdr) + 2); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_RCR_POS); -} -END_TEST -START_TEST(tc_pap_process_in) -{ - struct pico_pap_hdr hdr; - memset(&_ppp, 0, sizeof(_ppp)); - - /* Receive SUCCESS (RAA) */ - _ppp.auth = 0xc023; - ppp_auth_ev = 0; - hdr.code = PAP_AUTH_ACK; - pap_process_in(&_ppp, (uint8_t *)&hdr, sizeof(hdr)); - fail_if (ppp_auth_ev != PPP_AUTH_EVENT_RAA); - - /* Receive FAILURE (RAN) */ - ppp_auth_ev = 0; - hdr.code = PAP_AUTH_NAK; - pap_process_in(&_ppp, (uint8_t *)&hdr, sizeof(hdr)); - fail_if (ppp_auth_ev != PPP_AUTH_EVENT_RAN); -} -END_TEST -START_TEST(tc_chap_process_in) -{ - struct pico_chap_hdr hdr; - memset(&_ppp, 0, sizeof(_ppp)); - - /* Receive challenge (RAC) */ - ppp_auth_ev = 0; - _ppp.auth = 0xc223; - hdr.code = CHAP_CHALLENGE; - chap_process_in(&_ppp, (uint8_t *)&hdr, sizeof(hdr)); - fail_if (ppp_auth_ev != PPP_AUTH_EVENT_RAC); - - /* Receive SUCCESS (RAA) */ - ppp_auth_ev = 0; - hdr.code = CHAP_SUCCESS; - chap_process_in(&_ppp, (uint8_t *)&hdr, sizeof(hdr)); - fail_if (ppp_auth_ev != PPP_AUTH_EVENT_RAA); - - /* Receive FAILURE (RAN) */ - ppp_auth_ev = 0; - hdr.code = CHAP_FAILURE; - chap_process_in(&_ppp, (uint8_t *)&hdr, sizeof(hdr)); - fail_if (ppp_auth_ev != PPP_AUTH_EVENT_RAN); - -} -END_TEST - -START_TEST(tc_ipcp_ack) -{ - /* TODO: test this: static void ipcp_ack(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) */ - uint8_t pkt[20] = ""; - struct pico_ipcp_hdr *ipcp; - called_serial_send = 0; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - _ppp.pkt = pkt; - _ppp.len = 4; - ipcp = (struct pico_ipcp_hdr *)_ppp.pkt; - ipcp->len = short_be(4); - ipcp_send_ack(&_ppp); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 12); - -} -END_TEST -START_TEST(tc_uint32_t) -{ - memset(&_ppp, 0, sizeof(_ppp)); - fail_if(ipcp_request_options_size(&_ppp) != 3 * IPCP_ADDR_LEN); - - _ppp.ipcp_nbns1 = 1; - fail_if(ipcp_request_options_size(&_ppp) != 4 * IPCP_ADDR_LEN); - - _ppp.ipcp_nbns2 = 1; - fail_if(ipcp_request_options_size(&_ppp) != 5 * IPCP_ADDR_LEN); - -} -END_TEST -START_TEST(tc_ipcp_request_add_address) -{ - uint8_t dst[6]; - ipcp_request_add_address(dst, 0x42, 0xDDCCBBAA); - fail_if(dst[0] != 0x42); - fail_if(dst[1] != IPCP_ADDR_LEN); - fail_if(dst[2] != 0xAA); - fail_if(dst[3] != 0xBB); - fail_if(dst[4] != 0xCC); - fail_if(dst[5] != 0xDD); -} -END_TEST - -START_TEST(tc_ipcp_request_fill) -{ - - uint8_t opts[5 * IPCP_ADDR_LEN]; - memset(&_ppp, 0, sizeof(_ppp)); - - _ppp.ipcp_allowed_fields = 0xffff; - ipcp_request_fill(&_ppp, opts); - fail_if(opts[0] != IPCP_OPT_IP); - fail_if(opts[6] != IPCP_OPT_DNS1); - fail_if(opts[12] != IPCP_OPT_DNS2); - - _ppp.ipcp_nbns1 = 1; - ipcp_request_fill(&_ppp, opts); - fail_if(opts[0] != IPCP_OPT_IP); - fail_if(opts[6] != IPCP_OPT_DNS1); - fail_if(opts[12] != IPCP_OPT_DNS2); - fail_if(opts[18] != IPCP_OPT_NBNS1); - - _ppp.ipcp_nbns2 = 1; - ipcp_request_fill(&_ppp, opts); - fail_if(opts[0] != IPCP_OPT_IP); - fail_if(opts[6] != IPCP_OPT_DNS1); - fail_if(opts[12] != IPCP_OPT_DNS2); - fail_if(opts[18] != IPCP_OPT_NBNS1); - fail_if(opts[24] != IPCP_OPT_NBNS2); -} -END_TEST -START_TEST(tc_ipcp_send_req) -{ - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - - /* With no options... */ - called_serial_send = 0; - ipcp_send_req(&_ppp); - fail_if(called_serial_send != 1); -} -END_TEST - -START_TEST(tc_ipcp_reject_vj) -{ - /* TODO: test this: static void ipcp_reject_vj(struct pico_device_ppp *ppp, uint8_t *comp_req) */ - uint8_t buf[IPCP_OPT_VJ + sizeof(struct pico_ipcp_hdr)] = { }; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - called_serial_send = 0; - ipcp_reject_vj(&_ppp, buf); - fail_if(called_serial_send != 1); - fail_if(serial_out_len != 18); -} -END_TEST -START_TEST(tc_ppp_ipv4_conf) -{ - /* TODO: test this: static void ppp_ipv4_conf(struct pico_device_ppp *ppp) */ - /* This test needs an actual device */ -} -END_TEST - -START_TEST(tc_ipcp_process_in) -{ - /* TODO: test this: static void ipcp_process_in(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) */ - uint8_t req[sizeof(struct pico_ipcp_hdr) + 5 * IPCP_ADDR_LEN]; - uint8_t *p = req + sizeof(struct pico_ipcp_hdr); - ; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - - /* * * ACK * * */ - req[0] = PICO_CONF_ACK; - - /* Fill addresses */ - *(p++) = IPCP_OPT_IP; - *(p++) = IPCP_ADDR_LEN; - *(((uint32_t*)p)) = 0x11223344; - p += sizeof(uint32_t); - - *(p++) = IPCP_OPT_DNS1; - *(p++) = IPCP_ADDR_LEN; - *(((uint32_t*)p)) = 0x55667788; - p += sizeof(uint32_t); - - *(p++) = IPCP_OPT_NBNS1; - *(p++) = IPCP_ADDR_LEN; - *(((uint32_t*)p)) = 0x99AABBCC; - p += sizeof(uint32_t); - - *(p++) = IPCP_OPT_DNS2; - *(p++) = IPCP_ADDR_LEN; - *(((uint32_t*)p)) = 0xDDEEFF00; - p += sizeof(uint32_t); - - *(p++) = IPCP_OPT_NBNS2; - *(p++) = IPCP_ADDR_LEN; - *(((uint32_t*)p)) = 0x11223344; - p += sizeof(uint32_t); - - ppp_ipcp_ev = 0; - ipcp_process_in(&_ppp, req, sizeof(req)); - fail_if(ppp_ipcp_ev != PPP_IPCP_EVENT_RCA); - fail_if(_ppp.ipcp_ip != 0x11223344); - fail_if(_ppp.ipcp_dns1 != 0x55667788); - fail_if(_ppp.ipcp_nbns1 != 0x99aabbcc); - fail_if(_ppp.ipcp_dns2 != 0xddeeff00); - fail_if(_ppp.ipcp_nbns2 != 0x11223344); - - /* Get a VJ reject ! */ - ppp_ipcp_ev = 0; - called_serial_send = 0; - p = req + sizeof(struct pico_ipcp_hdr); - *(p++) = IPCP_OPT_VJ; - *(p++) = IPCP_VJ_LEN; - *(((uint32_t*)p)) = 0x1; - ipcp_process_in(&_ppp, req, sizeof(struct pico_ipcp_hdr) + IPCP_VJ_LEN); - fail_if(called_serial_send != 1); - fail_if(ppp_ipcp_ev != 0); - - /* * * REQ * * */ - ppp_ipcp_ev = 0; - req[0] = PICO_CONF_REQ; - ipcp_process_in(&_ppp, req, sizeof(struct pico_ipcp_hdr)); - fail_if(ppp_ipcp_ev != PPP_IPCP_EVENT_RCR_POS); - - /* * * NAK * * */ - ppp_ipcp_ev = 0; - req[0] = PICO_CONF_NAK; - ipcp_process_in(&_ppp, req, sizeof(struct pico_ipcp_hdr)); - fail_if(ppp_ipcp_ev != PPP_IPCP_EVENT_RCN); - - /* * * REJ * * */ - ppp_ipcp_ev = 0; - req[0] = PICO_CONF_REJ; - ipcp_process_in(&_ppp, req, sizeof(struct pico_ipcp_hdr)); - fail_if(ppp_ipcp_ev != PPP_IPCP_EVENT_RCN); - -} -END_TEST - -START_TEST(tc_ipcp6_process_in) -{ - /* TODO: test this: static void ipcp6_process_in(struct pico_device_ppp *ppp, uint8_t *pkt, uint32_t len) */ - /* When implemented, do... */ - uint8_t req[sizeof(struct pico_ipcp_hdr)]; - ppp_ipcp_ev = 0; - req[0] = PICO_CONF_REJ; - ipcp6_process_in(&_ppp, req, sizeof(struct pico_ipcp_hdr)); - fail_if(ppp_ipcp_ev != 0); -} -END_TEST - -START_TEST(tc_ppp_process_packet_payload) -{ - /* Empty, tested with ppp_process_packet, below. */ -} -END_TEST -START_TEST(tc_ppp_process_packet) -{ - /* Empty, tested with ppp_recv_data, below. */ -} -END_TEST -START_TEST(tc_ppp_recv_data) -{ - uint8_t pkt[20] = ""; - struct pico_lcp_hdr *lcpreq; - - /* This creates an LCP ack */ - printf("Unit test: Packet forgery. Creating LCP ACK... \n"); - called_serial_send = 0; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - _ppp.pkt = pkt; - _ppp.len = 4; - lcpreq = (struct pico_lcp_hdr *)_ppp.pkt; - lcpreq->len = short_be(4); - lcp_send_configure_ack(&_ppp); - fail_if(called_serial_send != 1); - /* LCP ack is now in the buffer, and can be processed */ - printf("Unit test: Packet forgery. Injecting LCP ACK... \n"); - ppp_lcp_ev = 0; - ppp_recv_data(&_ppp, serial_buffer + 1, serial_out_len - 2); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_RCA); - printf("OK!\n"); - /* TODO: Increase coverage. */ -} -END_TEST - -START_TEST(tc_lcp_this_layer_up) -{ - /* TODO: test this: static void lcp_this_layer_up(struct pico_device_ppp *ppp) */ - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - - ppp_auth_ev = 0; - lcp_this_layer_up(&_ppp); - fail_if(ppp_auth_ev != PPP_AUTH_EVENT_UP_NONE); - - ppp_auth_ev = 0; - _ppp.auth = 0xc023; - lcp_this_layer_up(&_ppp); - fail_if(ppp_auth_ev != PPP_AUTH_EVENT_UP_PAP); - - ppp_auth_ev = 0; - _ppp.auth = 0xc223; - lcp_this_layer_up(&_ppp); - fail_if(ppp_auth_ev != PPP_AUTH_EVENT_UP_CHAP); - - ppp_auth_ev = 0; - _ppp.auth = 0xfefe; - lcp_this_layer_up(&_ppp); - fail_if(ppp_auth_ev != 0); -} -END_TEST -START_TEST(tc_lcp_this_layer_down) -{ - ppp_auth_ev = 0; - lcp_this_layer_down(&_ppp); - fail_if(ppp_auth_ev != PPP_AUTH_EVENT_DOWN); -} -END_TEST -START_TEST(tc_lcp_this_layer_started) -{ - ppp_modem_ev = 0; - lcp_this_layer_started(&_ppp); - fail_if(ppp_modem_ev != PPP_MODEM_EVENT_START); -} -END_TEST -START_TEST(tc_lcp_this_layer_finished) -{ - ppp_modem_ev = 0; - lcp_this_layer_finished(&_ppp); - fail_if(ppp_modem_ev != PPP_MODEM_EVENT_STOP); -} -END_TEST -START_TEST(tc_lcp_initialize_restart_count) -{ - /* TODO: test this: static void lcp_initialize_restart_count(struct pico_device_ppp *ppp) */ - memset(&_ppp, 0, sizeof(_ppp)); - lcp_initialize_restart_count(&_ppp); - fail_if(_ppp.timer_on != PPP_TIMER_ON_LCPREQ); - fail_if(_ppp.timer_count != PICO_PPP_DEFAULT_MAX_CONFIGURE); - fail_if(_ppp.timer_val != PICO_PPP_DEFAULT_TIMER); -} -END_TEST -START_TEST(tc_lcp_send_code_reject) -{ - /* TODO: test this: static void lcp_send_code_reject(struct pico_device_ppp *ppp) */ - lcp_send_code_reject(&_ppp); -} -END_TEST -START_TEST(tc_lcp_send_echo_reply) -{ - uint8_t pkt[20] = ""; - struct pico_lcp_hdr *lcpreq; - called_serial_send = 0; - memset(&_ppp, 0, sizeof(_ppp)); - _ppp.serial_send = unit_serial_send; - _ppp.pkt = pkt; - _ppp.len = 4; - lcpreq = (struct pico_lcp_hdr *)_ppp.pkt; - lcpreq->len = short_be(4); - lcp_send_echo_reply(&_ppp); - fail_if(called_serial_send != 1); -} -END_TEST -START_TEST(tc_auth) -{ - ppp_ipcp_ev = 0; - auth(&_ppp); - fail_if(ppp_ipcp_ev != PPP_IPCP_EVENT_UP); -} -END_TEST -START_TEST(tc_deauth) -{ - ppp_ipcp_ev = 0; - deauth(&_ppp); - fail_if(ppp_ipcp_ev != PPP_IPCP_EVENT_DOWN); -} -END_TEST -START_TEST(tc_auth_req) -{ - auth_req(&_ppp); -} -END_TEST -START_TEST(tc_auth_rsp) -{ - uint8_t req[sizeof(struct pico_chap_hdr) + 1 + CHAP_MD5_SIZE] = { - 0 - }; /* 21 bytes */ - struct pico_chap_hdr *hdr = (struct pico_chap_hdr *)req; - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - _ppp.serial_send = unit_serial_send; - - _ppp.auth = 0xc223; /* hardcode CHAP */ - hdr->code = CHAP_CHALLENGE; - hdr->len = short_be((uint16_t)(sizeof (struct pico_chap_hdr) + CHAP_MD5_SIZE)); - req[sizeof(struct pico_chap_hdr)] = CHAP_MD5_SIZE; /* CHAP value size field */ - _ppp.pkt = req; - _ppp.len = sizeof(struct pico_chap_hdr) + CHAP_MD5_SIZE; - auth_rsp(&_ppp); - fail_if(called_serial_send != 1); - printf("OK!\n"); - -} -END_TEST -START_TEST(tc_auth_start_timer) -{ - memset(&_ppp, 0, sizeof(_ppp)); - auth_start_timer(&_ppp); - fail_if(_ppp.timer_on != PPP_TIMER_ON_AUTH); - -} -END_TEST -START_TEST(tc_ipcp_send_ack) -{ - uint8_t req[sizeof(struct pico_chap_hdr) + 4 ]; - struct pico_ipcp_hdr *hdr = (struct pico_ipcp_hdr *)req; - memset(&_ppp, 0, sizeof(_ppp)); - called_serial_send = 0; - _ppp.serial_send = unit_serial_send; - hdr->code = PICO_CONF_REQ; - hdr->len = short_be((uint16_t)(sizeof (struct pico_ipcp_hdr) + 4)); - _ppp.pkt = req; - _ppp.len = sizeof(struct pico_chap_hdr) + 4; - ipcp_send_ack(&_ppp); - fail_if(called_serial_send != 1); - printf("OK!\n"); -} -END_TEST -START_TEST(tc_ipcp_send_nack) -{ - ipcp_send_nack(&_ppp); -} -END_TEST -START_TEST(tc_ipcp_bring_up) -{ - memset(&_ppp, 0, sizeof(_ppp)); - /* without address */ - ipcp_bring_up(&_ppp); - - /* with address */ - _ppp.ipcp_ip = 0xAABBCCDD; - ipcp_bring_up(&_ppp); -} -END_TEST -START_TEST(tc_ipcp_bring_down) -{ - /* TODO: test this: static void ipcp_bring_down(struct pico_device_ppp *ppp) */ - ipcp_bring_down(&_ppp); -} -END_TEST -START_TEST(tc_ipcp_start_timer) -{ - memset(&_ppp, 0, sizeof(_ppp)); - ipcp_start_timer(&_ppp); - fail_if (_ppp.timer_on != PPP_TIMER_ON_IPCP); - fail_if (_ppp.timer_val != PICO_PPP_DEFAULT_TIMER * PICO_PPP_DEFAULT_MAX_FAILURE); -} -END_TEST -START_TEST(tc_pico_ppp_poll) -{ - /* TODO: test this: static int pico_ppp_poll(struct pico_device *dev, int loop_score) */ -} -END_TEST -START_TEST(tc_pico_ppp_link_state) -{ - memset(&_ppp, 0, sizeof(_ppp)); - fail_if(pico_ppp_link_state(&_ppp.dev) != 0); - _ppp.ipcp_state = PPP_IPCP_STATE_OPENED; - fail_if(pico_ppp_link_state(&_ppp.dev) == 0); -} -END_TEST -START_TEST(tc_check_to_modem) -{ - ppp_modem_ev = 0; - memset(&_ppp, 0, sizeof(_ppp)); - /* No timer on ... */ - check_to_modem(&_ppp); - fail_if(ppp_modem_ev != 0); - - /* Timer set to 1 */ - _ppp.timer_on = PPP_TIMER_ON_MODEM; - _ppp.timer_val = 1; - check_to_modem(&_ppp); - fail_if(ppp_modem_ev != 0); - _ppp.timer_val--; - /* Timer expired */ - check_to_modem(&_ppp); - printf("Modem event: %02x\n", ppp_modem_ev); - fail_if(ppp_modem_ev != PPP_MODEM_EVENT_TIMEOUT); - -} -END_TEST -START_TEST(tc_check_to_lcp) -{ - ppp_lcp_ev = 0; - memset(&_ppp, 0, sizeof(_ppp)); - /* No timer on ... */ - check_to_lcp(&_ppp); - fail_if(ppp_lcp_ev != 0); - - /* Count set to 1 */ - _ppp.timer_count = 1; - - /* Timer set to 1 */ - _ppp.timer_on = PPP_TIMER_ON_LCPTERM; - _ppp.timer_val = 1; - check_to_lcp(&_ppp); - fail_if(ppp_lcp_ev != 0); - _ppp.timer_val--; - /* Timer expired */ - check_to_lcp(&_ppp); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_TO_POS); - - /* Timer set to 1 */ - ppp_lcp_ev = 0; - _ppp.timer_on = PPP_TIMER_ON_LCPREQ; - _ppp.timer_val = 1; - check_to_lcp(&_ppp); - fail_if(ppp_lcp_ev != 0); - _ppp.timer_val--; - /* Timer expired */ - check_to_lcp(&_ppp); - fail_if(ppp_lcp_ev != PPP_LCP_EVENT_TO_NEG); -} -END_TEST -START_TEST(tc_check_to_auth) -{ - /* TODO: test this: static void check_to_auth(struct pico_device_ppp *ppp) */ - ppp_auth_ev = 0; - memset(&_ppp, 0, sizeof(_ppp)); - /* No timer on ... */ - check_to_auth(&_ppp); - fail_if(ppp_auth_ev != 0); - - /* Timer set to 1 */ - _ppp.timer_on = PPP_TIMER_ON_AUTH; - _ppp.timer_val = 1; - check_to_auth(&_ppp); - fail_if(ppp_auth_ev != 0); - /* Timer expired */ - _ppp.timer_val--; - check_to_auth(&_ppp); - fail_if(ppp_auth_ev != PPP_AUTH_EVENT_TO); -} -END_TEST -START_TEST(tc_check_to_ipcp) -{ - ppp_ipcp_ev = 0; - memset(&_ppp, 0, sizeof(_ppp)); - /* No timer on ... */ - check_to_ipcp(&_ppp); - fail_if(ppp_ipcp_ev != 0); - - /* Timer set to 1 */ - _ppp.timer_on = PPP_TIMER_ON_IPCP; - _ppp.timer_val = 1; - check_to_ipcp(&_ppp); - fail_if(ppp_ipcp_ev != 0); - /* Timer expired */ - _ppp.timer_val--; - check_to_ipcp(&_ppp); - fail_if(ppp_ipcp_ev != PPP_IPCP_EVENT_TO); -} -END_TEST - -START_TEST(tc_pico_ppp_tick) -{ - called_picotimer = 0; - memset(&_ppp, 0, sizeof(_ppp)); - pico_ppp_tick(0, &_ppp); - fail_if(called_picotimer != 1); -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_ppp_serial_send_escape = tcase_create("Unit test for ppp_serial_send_escape"); - TCase *TCase_lcp_timer_start = tcase_create("Unit test for lcp_timer_start"); - TCase *TCase_lcp_zero_restart_count = tcase_create("Unit test for lcp_zero_restart_count"); - TCase *TCase_lcp_timer_stop = tcase_create("Unit test for lcp_timer_stop"); - TCase *TCase_ppp_ctl_packet_size = tcase_create("Unit test for ppp_ctl_packet_size"); - TCase *TCase_ppp_fcs_char = tcase_create("Unit test for ppp_fcs_char"); - TCase *TCase_ppp_fcs_continue = tcase_create("Unit test for ppp_fcs_continue"); - TCase *TCase_ppp_fcs_finish = tcase_create("Unit test for ppp_fcs_finish"); - TCase *TCase_ppp_fcs_start = tcase_create("Unit test for ppp_fcs_start"); - TCase *TCase_ppp_fcs_verify = tcase_create("Unit test for ppp_fcs_verify"); - TCase *TCase_pico_ppp_ctl_send = tcase_create("Unit test for pico_ppp_ctl_send"); - TCase *TCase_pico_ppp_send = tcase_create("Unit test for pico_ppp_send"); - TCase *TCase_ppp_modem_start_timer = tcase_create("Unit test for ppp_modem_start_timer"); - TCase *TCase_ppp_modem_send_reset = tcase_create("Unit test for ppp_modem_send_reset"); - TCase *TCase_ppp_modem_send_echo = tcase_create("Unit test for ppp_modem_send_echo"); - TCase *TCase_ppp_modem_send_creg = tcase_create("Unit test for ppp_modem_send_creg"); - TCase *TCase_ppp_modem_send_cgreg = tcase_create("Unit test for ppp_modem_send_cgreg"); - TCase *TCase_ppp_modem_send_cgdcont = tcase_create("Unit test for ppp_modem_send_cgdcont"); - TCase *TCase_ppp_modem_send_cgatt = tcase_create("Unit test for ppp_modem_send_cgatt"); - TCase *TCase_ppp_modem_send_dial = tcase_create("Unit test for ppp_modem_send_dial"); - TCase *TCase_ppp_modem_connected = tcase_create("Unit test for ppp_modem_connected"); - TCase *TCase_ppp_modem_disconnected = tcase_create("Unit test for ppp_modem_disconnected"); - TCase *TCase_ppp_modem_recv = tcase_create("Unit test for ppp_modem_recv"); - TCase *TCase_lcp_send_configure_request = tcase_create("Unit test for lcp_send_configure_request"); - TCase *TCase_lcp_optflags = tcase_create("Unit test for lcp_optflags"); - TCase *TCase_lcp_send_configure_ack = tcase_create("Unit test for lcp_send_configure_ack"); - TCase *TCase_lcp_send_terminate_request = tcase_create("Unit test for lcp_send_terminate_request"); - TCase *TCase_lcp_send_terminate_ack = tcase_create("Unit test for lcp_send_terminate_ack"); - TCase *TCase_lcp_send_configure_nack = tcase_create("Unit test for lcp_send_configure_nack"); - TCase *TCase_lcp_process_in = tcase_create("Unit test for lcp_process_in"); - TCase *TCase_pap_process_in = tcase_create("Unit test for pap_process_in"); - TCase *TCase_chap_process_in = tcase_create("Unit test for chap_process_in"); - TCase *TCase_ipcp_ack = tcase_create("Unit test for ipcp_ack"); - TCase *TCase_uint32_t = tcase_create("Unit test for uint32_t"); - TCase *TCase_ipcp_request_add_address = tcase_create("Unit test for ipcp_request_add_address"); - TCase *TCase_ipcp_request_fill = tcase_create("Unit test for ipcp_request_fill"); - TCase *TCase_ipcp_send_req = tcase_create("Unit test for ipcp_send_req"); - TCase *TCase_ipcp_reject_vj = tcase_create("Unit test for ipcp_reject_vj"); - TCase *TCase_ppp_ipv4_conf = tcase_create("Unit test for ppp_ipv4_conf"); - TCase *TCase_ipcp_process_in = tcase_create("Unit test for ipcp_process_in"); - TCase *TCase_ipcp6_process_in = tcase_create("Unit test for ipcp6_process_in"); - TCase *TCase_ppp_process_packet_payload = tcase_create("Unit test for ppp_process_packet_payload"); - TCase *TCase_ppp_process_packet = tcase_create("Unit test for ppp_process_packet"); - TCase *TCase_ppp_recv_data = tcase_create("Unit test for ppp_recv_data"); - TCase *TCase_lcp_this_layer_up = tcase_create("Unit test for lcp_this_layer_up"); - TCase *TCase_lcp_this_layer_down = tcase_create("Unit test for lcp_this_layer_down"); - TCase *TCase_lcp_this_layer_started = tcase_create("Unit test for lcp_this_layer_started"); - TCase *TCase_lcp_this_layer_finished = tcase_create("Unit test for lcp_this_layer_finished"); - TCase *TCase_lcp_initialize_restart_count = tcase_create("Unit test for lcp_initialize_restart_count"); - TCase *TCase_lcp_send_code_reject = tcase_create("Unit test for lcp_send_code_reject"); - TCase *TCase_lcp_send_echo_reply = tcase_create("Unit test for lcp_send_echo_reply"); - TCase *TCase_auth = tcase_create("Unit test for auth"); - TCase *TCase_deauth = tcase_create("Unit test for deauth"); - TCase *TCase_auth_req = tcase_create("Unit test for auth_req"); - TCase *TCase_auth_rsp = tcase_create("Unit test for auth_rsp"); - TCase *TCase_auth_start_timer = tcase_create("Unit test for auth_start_timer"); - TCase *TCase_ipcp_send_ack = tcase_create("Unit test for ipcp_send_ack"); - TCase *TCase_ipcp_send_nack = tcase_create("Unit test for ipcp_send_nack"); - TCase *TCase_ipcp_bring_up = tcase_create("Unit test for ipcp_bring_up"); - TCase *TCase_ipcp_bring_down = tcase_create("Unit test for ipcp_bring_down"); - TCase *TCase_ipcp_start_timer = tcase_create("Unit test for ipcp_start_timer"); - TCase *TCase_pico_ppp_poll = tcase_create("Unit test for pico_ppp_poll"); - TCase *TCase_pico_ppp_link_state = tcase_create("Unit test for pico_ppp_link_state"); - TCase *TCase_check_to_modem = tcase_create("Unit test for check_to_modem"); - TCase *TCase_check_to_lcp = tcase_create("Unit test for check_to_lcp"); - TCase *TCase_check_to_auth = tcase_create("Unit test for check_to_auth"); - TCase *TCase_check_to_ipcp = tcase_create("Unit test for check_to_ipcp"); - TCase *TCase_pico_ppp_tick = tcase_create("Unit test for pico_ppp_tick"); - - - tcase_add_test(TCase_ppp_serial_send_escape, tc_ppp_serial_send_escape); - suite_add_tcase(s, TCase_ppp_serial_send_escape); - tcase_add_test(TCase_lcp_timer_start, tc_lcp_timer_start); - suite_add_tcase(s, TCase_lcp_timer_start); - tcase_add_test(TCase_lcp_zero_restart_count, tc_lcp_zero_restart_count); - suite_add_tcase(s, TCase_lcp_zero_restart_count); - tcase_add_test(TCase_lcp_timer_stop, tc_lcp_timer_stop); - suite_add_tcase(s, TCase_lcp_timer_stop); - tcase_add_test(TCase_ppp_ctl_packet_size, tc_ppp_ctl_packet_size); - suite_add_tcase(s, TCase_ppp_ctl_packet_size); - tcase_add_test(TCase_ppp_fcs_char, tc_ppp_fcs_char); - suite_add_tcase(s, TCase_ppp_fcs_char); - tcase_add_test(TCase_ppp_fcs_continue, tc_ppp_fcs_continue); - suite_add_tcase(s, TCase_ppp_fcs_continue); - tcase_add_test(TCase_ppp_fcs_finish, tc_ppp_fcs_finish); - suite_add_tcase(s, TCase_ppp_fcs_finish); - tcase_add_test(TCase_ppp_fcs_start, tc_ppp_fcs_start); - suite_add_tcase(s, TCase_ppp_fcs_start); - tcase_add_test(TCase_ppp_fcs_verify, tc_ppp_fcs_verify); - suite_add_tcase(s, TCase_ppp_fcs_verify); - tcase_add_test(TCase_pico_ppp_ctl_send, tc_pico_ppp_ctl_send); - suite_add_tcase(s, TCase_pico_ppp_ctl_send); - tcase_add_test(TCase_pico_ppp_send, tc_pico_ppp_send); - suite_add_tcase(s, TCase_pico_ppp_send); - tcase_add_test(TCase_ppp_modem_start_timer, tc_ppp_modem_start_timer); - suite_add_tcase(s, TCase_ppp_modem_start_timer); - tcase_add_test(TCase_ppp_modem_send_reset, tc_ppp_modem_send_reset); - suite_add_tcase(s, TCase_ppp_modem_send_reset); - tcase_add_test(TCase_ppp_modem_send_echo, tc_ppp_modem_send_echo); - suite_add_tcase(s, TCase_ppp_modem_send_echo); - tcase_add_test(TCase_ppp_modem_send_creg, tc_ppp_modem_send_creg); - suite_add_tcase(s, TCase_ppp_modem_send_creg); - tcase_add_test(TCase_ppp_modem_send_cgreg, tc_ppp_modem_send_cgreg); - suite_add_tcase(s, TCase_ppp_modem_send_cgreg); - tcase_add_test(TCase_ppp_modem_send_cgdcont, tc_ppp_modem_send_cgdcont); - suite_add_tcase(s, TCase_ppp_modem_send_cgdcont); - tcase_add_test(TCase_ppp_modem_send_cgatt, tc_ppp_modem_send_cgatt); - suite_add_tcase(s, TCase_ppp_modem_send_cgatt); - tcase_add_test(TCase_ppp_modem_send_dial, tc_ppp_modem_send_dial); - suite_add_tcase(s, TCase_ppp_modem_send_dial); - tcase_add_test(TCase_ppp_modem_connected, tc_ppp_modem_connected); - suite_add_tcase(s, TCase_ppp_modem_connected); - tcase_add_test(TCase_ppp_modem_disconnected, tc_ppp_modem_disconnected); - suite_add_tcase(s, TCase_ppp_modem_disconnected); - tcase_add_test(TCase_ppp_modem_recv, tc_ppp_modem_recv); - suite_add_tcase(s, TCase_ppp_modem_recv); - tcase_add_test(TCase_lcp_send_configure_request, tc_lcp_send_configure_request); - suite_add_tcase(s, TCase_lcp_send_configure_request); - tcase_add_test(TCase_lcp_optflags, tc_lcp_optflags); - suite_add_tcase(s, TCase_lcp_optflags); - tcase_add_test(TCase_lcp_send_configure_ack, tc_lcp_send_configure_ack); - suite_add_tcase(s, TCase_lcp_send_configure_ack); - tcase_add_test(TCase_lcp_send_terminate_request, tc_lcp_send_terminate_request); - suite_add_tcase(s, TCase_lcp_send_terminate_request); - tcase_add_test(TCase_lcp_send_terminate_ack, tc_lcp_send_terminate_ack); - suite_add_tcase(s, TCase_lcp_send_terminate_ack); - tcase_add_test(TCase_lcp_send_configure_nack, tc_lcp_send_configure_nack); - suite_add_tcase(s, TCase_lcp_send_configure_nack); - tcase_add_test(TCase_lcp_process_in, tc_lcp_process_in); - suite_add_tcase(s, TCase_lcp_process_in); - tcase_add_test(TCase_pap_process_in, tc_pap_process_in); - suite_add_tcase(s, TCase_pap_process_in); - tcase_add_test(TCase_chap_process_in, tc_chap_process_in); - suite_add_tcase(s, TCase_chap_process_in); - tcase_add_test(TCase_ipcp_ack, tc_ipcp_ack); - suite_add_tcase(s, TCase_ipcp_ack); - tcase_add_test(TCase_uint32_t, tc_uint32_t); - suite_add_tcase(s, TCase_uint32_t); - tcase_add_test(TCase_ipcp_request_add_address, tc_ipcp_request_add_address); - suite_add_tcase(s, TCase_ipcp_request_add_address); - tcase_add_test(TCase_ipcp_request_fill, tc_ipcp_request_fill); - suite_add_tcase(s, TCase_ipcp_request_fill); - tcase_add_test(TCase_ipcp_send_req, tc_ipcp_send_req); - suite_add_tcase(s, TCase_ipcp_send_req); - tcase_add_test(TCase_ipcp_reject_vj, tc_ipcp_reject_vj); - suite_add_tcase(s, TCase_ipcp_reject_vj); - tcase_add_test(TCase_ppp_ipv4_conf, tc_ppp_ipv4_conf); - suite_add_tcase(s, TCase_ppp_ipv4_conf); - tcase_add_test(TCase_ipcp_process_in, tc_ipcp_process_in); - suite_add_tcase(s, TCase_ipcp_process_in); - tcase_add_test(TCase_ipcp6_process_in, tc_ipcp6_process_in); - suite_add_tcase(s, TCase_ipcp6_process_in); - tcase_add_test(TCase_ppp_process_packet_payload, tc_ppp_process_packet_payload); - suite_add_tcase(s, TCase_ppp_process_packet_payload); - tcase_add_test(TCase_ppp_process_packet, tc_ppp_process_packet); - suite_add_tcase(s, TCase_ppp_process_packet); - tcase_add_test(TCase_ppp_recv_data, tc_ppp_recv_data); - suite_add_tcase(s, TCase_ppp_recv_data); - tcase_add_test(TCase_lcp_this_layer_up, tc_lcp_this_layer_up); - suite_add_tcase(s, TCase_lcp_this_layer_up); - tcase_add_test(TCase_lcp_this_layer_down, tc_lcp_this_layer_down); - suite_add_tcase(s, TCase_lcp_this_layer_down); - tcase_add_test(TCase_lcp_this_layer_started, tc_lcp_this_layer_started); - suite_add_tcase(s, TCase_lcp_this_layer_started); - tcase_add_test(TCase_lcp_this_layer_finished, tc_lcp_this_layer_finished); - suite_add_tcase(s, TCase_lcp_this_layer_finished); - tcase_add_test(TCase_lcp_initialize_restart_count, tc_lcp_initialize_restart_count); - suite_add_tcase(s, TCase_lcp_initialize_restart_count); - tcase_add_test(TCase_lcp_send_code_reject, tc_lcp_send_code_reject); - suite_add_tcase(s, TCase_lcp_send_code_reject); - tcase_add_test(TCase_lcp_send_echo_reply, tc_lcp_send_echo_reply); - suite_add_tcase(s, TCase_lcp_send_echo_reply); - tcase_add_test(TCase_auth, tc_auth); - suite_add_tcase(s, TCase_auth); - tcase_add_test(TCase_deauth, tc_deauth); - suite_add_tcase(s, TCase_deauth); - tcase_add_test(TCase_auth_req, tc_auth_req); - suite_add_tcase(s, TCase_auth_req); - tcase_add_test(TCase_auth_rsp, tc_auth_rsp); - suite_add_tcase(s, TCase_auth_rsp); - tcase_add_test(TCase_auth_start_timer, tc_auth_start_timer); - suite_add_tcase(s, TCase_auth_start_timer); - tcase_add_test(TCase_ipcp_send_ack, tc_ipcp_send_ack); - suite_add_tcase(s, TCase_ipcp_send_ack); - tcase_add_test(TCase_ipcp_send_nack, tc_ipcp_send_nack); - suite_add_tcase(s, TCase_ipcp_send_nack); - tcase_add_test(TCase_ipcp_bring_up, tc_ipcp_bring_up); - suite_add_tcase(s, TCase_ipcp_bring_up); - tcase_add_test(TCase_ipcp_bring_down, tc_ipcp_bring_down); - suite_add_tcase(s, TCase_ipcp_bring_down); - tcase_add_test(TCase_ipcp_start_timer, tc_ipcp_start_timer); - suite_add_tcase(s, TCase_ipcp_start_timer); - tcase_add_test(TCase_pico_ppp_poll, tc_pico_ppp_poll); - suite_add_tcase(s, TCase_pico_ppp_poll); - tcase_add_test(TCase_pico_ppp_link_state, tc_pico_ppp_link_state); - suite_add_tcase(s, TCase_pico_ppp_link_state); - tcase_add_test(TCase_check_to_modem, tc_check_to_modem); - suite_add_tcase(s, TCase_check_to_modem); - tcase_add_test(TCase_check_to_lcp, tc_check_to_lcp); - suite_add_tcase(s, TCase_check_to_lcp); - tcase_add_test(TCase_check_to_auth, tc_check_to_auth); - suite_add_tcase(s, TCase_check_to_auth); - tcase_add_test(TCase_check_to_ipcp, tc_check_to_ipcp); - suite_add_tcase(s, TCase_check_to_ipcp); - tcase_add_test(TCase_pico_ppp_tick, tc_pico_ppp_tick); - suite_add_tcase(s, TCase_pico_ppp_tick); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - mock_modem_state = modem_state; - mock_lcp_state = lcp_state; - mock_auth_state = auth_state; - mock_ipcp_state = ipcp_state; - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_dns_client.c b/kernel/picotcp/test/unit/modunit_pico_dns_client.c deleted file mode 100644 index 7e3831e..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_dns_client.c +++ /dev/null @@ -1,229 +0,0 @@ -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_dns_client.h" -#include "pico_tree.h" -#include "pico_udp.h" -#include "modules/pico_dns_client.c" -#include "check.h" - -Suite *pico_suite(void); - -START_TEST(tc_pico_dns_client_callback) -{ - struct pico_socket *s = pico_udp_open(); - s->proto = &pico_proto_udp; - - fail_if(!s); - - /* Test with ERR */ - pico_dns_client_callback(PICO_SOCK_EV_ERR, s); - - /* Test with failing RD */ - pico_dns_client_callback(PICO_SOCK_EV_RD, s); - -} -END_TEST -START_TEST(tc_pico_dns_client_retransmission) -{ - /* TODO: test this: static void pico_dns_client_retransmission(pico_time now, void *arg); */ -} -END_TEST -START_TEST(tc_dns_ns_cmp) -{ - /* TODO: test this: static int dns_ns_cmp(void *ka, void *kb) */ -} -END_TEST -START_TEST(tc_dns_query_cmp) -{ - /* TODO: test this: static int dns_query_cmp(void *ka, void *kb) */ -} -END_TEST -START_TEST(tc_pico_dns_client_del_ns) -{ - /* TODO: test this: static int pico_dns_client_del_ns(struct pico_ip4 *ns_addr) */ -} -END_TEST -START_TEST(tc_pico_dns_ns) -{ - /* TODO: test this: static struct pico_dns_ns *pico_dns_client_add_ns(struct pico_ip4 *ns_addr) */ -} -END_TEST -START_TEST(tc_pico_dns_client_del_query) -{ - /* TODO: test this: static int pico_dns_client_del_query(uint16_t id) */ -} -END_TEST -START_TEST(tc_pico_dns_query) -{ - /* TODO: test this: static struct pico_dns_query *pico_dns_client_find_query(uint16_t id) */ -} -END_TEST -START_TEST(tc_pico_dns_client_strlen) -{ - /* TODO: test this: static uint16_t pico_dns_client_strlen(const char *url) */ -} -END_TEST -START_TEST(tc_pico_dns_client_seek) -{ - /* TODO: test this: static char *pico_dns_client_seek(char *ptr) */ -} -END_TEST -START_TEST(tc_pico_dns_client_mirror) -{ - /* TODO: test this: static int8_t pico_dns_client_mirror(char *ptr) */ -} -END_TEST -START_TEST(tc_pico_dns_client_query_prefix) -{ - /* TODO: test this: static int pico_dns_client_query_prefix(struct pico_dns_prefix *pre) */ -} -END_TEST -START_TEST(tc_pico_dns_client_query_suffix) -{ - /* TODO: test this: static int pico_dns_client_query_suffix(struct pico_dns_query_suffix *suf, uint16_t type, uint16_t class) */ -} -END_TEST -START_TEST(tc_pico_dns_client_query_domain) -{ - /* TODO: test this: static int pico_dns_client_query_domain(char *ptr) */ -} -END_TEST -START_TEST(tc_pico_dns_client_answer_domain) -{ - /* TODO: test this: static int pico_dns_client_answer_domain(char *ptr) */ -} -END_TEST -START_TEST(tc_pico_dns_client_check_prefix) -{ - /* TODO: test this: static int pico_dns_client_check_prefix(struct pico_dns_prefix *pre) */ -} -END_TEST -START_TEST(tc_pico_dns_client_check_qsuffix) -{ - /* TODO: test this: static int pico_dns_client_check_qsuffix(struct pico_dns_query_suffix *suf, struct pico_dns_query *q) */ -} -END_TEST -START_TEST(tc_pico_dns_client_check_asuffix) -{ - /* TODO: test this: static int pico_dns_client_check_asuffix(struct pico_dns_answer_suffix *suf, struct pico_dns_query *q) */ -} -END_TEST -START_TEST(tc_pico_dns_client_seek_suffix) -{ - /* TODO: test this: static char *pico_dns_client_seek_suffix(char *suf, struct pico_dns_prefix *pre, struct pico_dns_query *q) */ -} -END_TEST -START_TEST(tc_pico_dns_client_send) -{ - /* TODO: test this: static int pico_dns_client_send(struct pico_dns_query *q) */ -} -END_TEST -START_TEST(tc_pico_dns_client_user_callback) -{ - /* TODO: test this: static int pico_dns_client_user_callback(struct pico_dns_answer_suffix *asuffix, struct pico_dns_query *q) */ -} -END_TEST -START_TEST(tc_pico_dns_client_getaddr_init) -{ - /* TODO: test this: static int pico_dns_client_getaddr_init(const char *url, uint16_t proto, void (*callback)(char *, void *), void *arg) */ -} -END_TEST -START_TEST(tc_pico_dns_ipv6_set_ptr) -{ - /* TODO: test this: static void pico_dns_ipv6_set_ptr(const char *ip, char *dst) */ -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_dns_client_callback = tcase_create("Unit test for pico_dns_client_callback"); - TCase *TCase_pico_dns_client_retransmission = tcase_create("Unit test for pico_dns_client_retransmission"); - TCase *TCase_dns_ns_cmp = tcase_create("Unit test for dns_ns_cmp"); - TCase *TCase_dns_query_cmp = tcase_create("Unit test for dns_query_cmp"); - TCase *TCase_pico_dns_client_del_ns = tcase_create("Unit test for pico_dns_client_del_ns"); - TCase *TCase_pico_dns_ns = tcase_create("Unit test for pico_dns_ns"); - TCase *TCase_pico_dns_client_del_query = tcase_create("Unit test for pico_dns_client_del_query"); - TCase *TCase_pico_dns_query = tcase_create("Unit test for pico_dns_query"); - TCase *TCase_pico_dns_client_strlen = tcase_create("Unit test for pico_dns_client_strlen"); - TCase *TCase_pico_dns_client_seek = tcase_create("Unit test for pico_dns_client_seek"); - TCase *TCase_pico_dns_client_mirror = tcase_create("Unit test for pico_dns_client_mirror"); - TCase *TCase_pico_dns_client_query_prefix = tcase_create("Unit test for pico_dns_client_query_prefix"); - TCase *TCase_pico_dns_client_query_suffix = tcase_create("Unit test for pico_dns_client_query_suffix"); - TCase *TCase_pico_dns_client_query_domain = tcase_create("Unit test for pico_dns_client_query_domain"); - TCase *TCase_pico_dns_client_answer_domain = tcase_create("Unit test for pico_dns_client_answer_domain"); - TCase *TCase_pico_dns_client_check_prefix = tcase_create("Unit test for pico_dns_client_check_prefix"); - TCase *TCase_pico_dns_client_check_qsuffix = tcase_create("Unit test for pico_dns_client_check_qsuffix"); - TCase *TCase_pico_dns_client_check_asuffix = tcase_create("Unit test for pico_dns_client_check_asuffix"); - TCase *TCase_pico_dns_client_seek_suffix = tcase_create("Unit test for pico_dns_client_seek_suffix"); - TCase *TCase_pico_dns_client_send = tcase_create("Unit test for pico_dns_client_send"); - TCase *TCase_pico_dns_client_user_callback = tcase_create("Unit test for pico_dns_client_user_callback"); - TCase *TCase_pico_dns_client_getaddr_init = tcase_create("Unit test for pico_dns_client_getaddr_init"); - TCase *TCase_pico_dns_ipv6_set_ptr = tcase_create("Unit test for pico_dns_ipv6_set_ptr"); - - - tcase_add_test(TCase_pico_dns_client_callback, tc_pico_dns_client_callback); - suite_add_tcase(s, TCase_pico_dns_client_callback); - tcase_add_test(TCase_pico_dns_client_retransmission, tc_pico_dns_client_retransmission); - suite_add_tcase(s, TCase_pico_dns_client_retransmission); - tcase_add_test(TCase_dns_ns_cmp, tc_dns_ns_cmp); - suite_add_tcase(s, TCase_dns_ns_cmp); - tcase_add_test(TCase_dns_query_cmp, tc_dns_query_cmp); - suite_add_tcase(s, TCase_dns_query_cmp); - tcase_add_test(TCase_pico_dns_client_del_ns, tc_pico_dns_client_del_ns); - suite_add_tcase(s, TCase_pico_dns_client_del_ns); - tcase_add_test(TCase_pico_dns_ns, tc_pico_dns_ns); - suite_add_tcase(s, TCase_pico_dns_ns); - tcase_add_test(TCase_pico_dns_client_del_query, tc_pico_dns_client_del_query); - suite_add_tcase(s, TCase_pico_dns_client_del_query); - tcase_add_test(TCase_pico_dns_query, tc_pico_dns_query); - suite_add_tcase(s, TCase_pico_dns_query); - tcase_add_test(TCase_pico_dns_client_strlen, tc_pico_dns_client_strlen); - suite_add_tcase(s, TCase_pico_dns_client_strlen); - tcase_add_test(TCase_pico_dns_client_seek, tc_pico_dns_client_seek); - suite_add_tcase(s, TCase_pico_dns_client_seek); - tcase_add_test(TCase_pico_dns_client_mirror, tc_pico_dns_client_mirror); - suite_add_tcase(s, TCase_pico_dns_client_mirror); - tcase_add_test(TCase_pico_dns_client_query_prefix, tc_pico_dns_client_query_prefix); - suite_add_tcase(s, TCase_pico_dns_client_query_prefix); - tcase_add_test(TCase_pico_dns_client_query_suffix, tc_pico_dns_client_query_suffix); - suite_add_tcase(s, TCase_pico_dns_client_query_suffix); - tcase_add_test(TCase_pico_dns_client_query_domain, tc_pico_dns_client_query_domain); - suite_add_tcase(s, TCase_pico_dns_client_query_domain); - tcase_add_test(TCase_pico_dns_client_answer_domain, tc_pico_dns_client_answer_domain); - suite_add_tcase(s, TCase_pico_dns_client_answer_domain); - tcase_add_test(TCase_pico_dns_client_check_prefix, tc_pico_dns_client_check_prefix); - suite_add_tcase(s, TCase_pico_dns_client_check_prefix); - tcase_add_test(TCase_pico_dns_client_check_qsuffix, tc_pico_dns_client_check_qsuffix); - suite_add_tcase(s, TCase_pico_dns_client_check_qsuffix); - tcase_add_test(TCase_pico_dns_client_check_asuffix, tc_pico_dns_client_check_asuffix); - suite_add_tcase(s, TCase_pico_dns_client_check_asuffix); - tcase_add_test(TCase_pico_dns_client_seek_suffix, tc_pico_dns_client_seek_suffix); - suite_add_tcase(s, TCase_pico_dns_client_seek_suffix); - tcase_add_test(TCase_pico_dns_client_send, tc_pico_dns_client_send); - suite_add_tcase(s, TCase_pico_dns_client_send); - tcase_add_test(TCase_pico_dns_client_user_callback, tc_pico_dns_client_user_callback); - suite_add_tcase(s, TCase_pico_dns_client_user_callback); - tcase_add_test(TCase_pico_dns_client_getaddr_init, tc_pico_dns_client_getaddr_init); - suite_add_tcase(s, TCase_pico_dns_client_getaddr_init); - tcase_add_test(TCase_pico_dns_ipv6_set_ptr, tc_pico_dns_ipv6_set_ptr); - suite_add_tcase(s, TCase_pico_dns_ipv6_set_ptr); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_dns_common.c b/kernel/picotcp/test/unit/modunit_pico_dns_common.c deleted file mode 100644 index b172796..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_dns_common.c +++ /dev/null @@ -1,1446 +0,0 @@ -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_dns_common.h" -#include "pico_tree.h" -#include "modules/pico_dns_common.c" -#include "check.h" - -Suite *pico_suite(void); - -START_TEST(tc_dns_rdata_cmp) /* MARK: dns_rdata_cmp */ -{ - uint8_t rdata1[10] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 - }; - uint8_t rdata2[10] = { - 1, 2, 3, 3, 5, 6, 7, 8, 9, 10 - }; - uint8_t rdata3[1] = { - 2 - }; - uint8_t rdata4[1] = { - 1 - }; - uint8_t rdata5[11] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9 - }; - uint8_t rdata6[12] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - }; - - uint8_t rdata7[5] = { - 72, 69, 76, 76, 79 - }; - - uint8_t rdata8[5] = { - 104, 101, 108, 108, 111 - }; - - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Check equal data and size */ - ret = pico_dns_rdata_cmp(rdata1, rdata1, 10, 10, 0); - fail_unless(!ret, "dns_rdata_cmp failed with equal data and size, case-sensitive!\n"); - - /* Check smaller data and equal size */ - ret = pico_dns_rdata_cmp(rdata1, rdata2, 10, 10, 0); - fail_unless(ret > 0, "dns_rdata_cmp failed with smaller data and equal size, case-sensitive!\n"); - - /* Check larger data and smaller size */ - ret = pico_dns_rdata_cmp(rdata1, rdata3, 10, 1, 0); - fail_unless(ret < 0, "dns_rdata_cmp failed with larger data and smaller size, case-sensitive!\n"); - - /* Check equal data and smaller size */ - ret = pico_dns_rdata_cmp(rdata1, rdata4, 10, 1, 0); - fail_unless(ret > 0, "dns_rdata_cmp failed with equal data and smaller size, case-sensitive!\n"); - - /* Check smaller data and larger size */ - ret = pico_dns_rdata_cmp(rdata1, rdata5, 10, 11, 0); - fail_unless(ret < 0, "dns_rdata_cmp failed with equal data and larger size, case-sensitive!\n"); - - /* Check larger data and larger size */ - ret = pico_dns_rdata_cmp(rdata1, rdata6, 10, 12, 0); - fail_unless(ret < 0, "dns_rdata_cmp failed with larger data and larger size, case-sensitive!\n"); - - /* Check for tolower effect */ - ret = pico_dns_rdata_cmp(rdata7, rdata8, 5, 5, 0); - fail_unless(ret < 0, "dns_rdata_cmp failed with check for tolower effect, case-sensitive!\n"); - - /* now check with case-insensitive */ - - /* Check equal data and size */ - ret = pico_dns_rdata_cmp(rdata1, rdata1, 10, 10, 1); - fail_unless(!ret, "dns_rdata_cmp failed with equal data and size, case-insensitive!\n"); - - /* Check smaller data and equal size */ - ret = pico_dns_rdata_cmp(rdata1, rdata2, 10, 10, 1); - fail_unless(ret > 0, "dns_rdata_cmp failed with smaller data and equal size, case-insensitive!\n"); - - /* Check larger data and smaller size */ - ret = pico_dns_rdata_cmp(rdata1, rdata3, 10, 1, 1); - fail_unless(ret < 0, "dns_rdata_cmp failed with larger data and smaller size, case-insensitive!\n"); - - /* Check equal data and smaller size */ - ret = pico_dns_rdata_cmp(rdata1, rdata4, 10, 1, 1); - fail_unless(ret > 0, "dns_rdata_cmp failed with equal data and smaller size, case-insensitive!\n"); - - /* Check smaller data and larger size */ - ret = pico_dns_rdata_cmp(rdata1, rdata5, 10, 11, 1); - fail_unless(ret < 0, "dns_rdata_cmp failed with equal data and larger size, case-insensitive!\n"); - - /* Check larger data and larger size */ - ret = pico_dns_rdata_cmp(rdata1, rdata6, 10, 12, 1); - fail_unless(ret < 0, "dns_rdata_cmp failed with larger data and larger size, case-insensitive!\n"); - - /* Check for tolower effect */ - ret = pico_dns_rdata_cmp(rdata7, rdata8, 5, 5, 1); - fail_unless(ret == 0, "dns_rdata_cmp failed with check for tolower effect, case-insensitive!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_dns_question_cmp) /* MARK: dns_question_cmp */ -{ - struct pico_dns_question *a = NULL, *b = NULL; - const char *url1 = "host (2).local"; - const char *url3 = "host.local"; - const char *url2 = "192.168.2.1"; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - a = pico_dns_question_create(url1, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "Question A could not be created!\n"); - b = pico_dns_question_create(url3, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "Question B could not be created!\n"); - - ret = pico_dns_question_cmp((void *)a, (void *)b); - fail_unless(ret > 0, "Question is lexicographically smaller"); - pico_dns_question_delete((void **)&a); - pico_dns_question_delete((void **)&b); - - a = pico_dns_question_create(url2, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_PTR, - PICO_DNS_CLASS_IN, 1); - fail_if(!a, "Question A could not be created!\n"); - b = pico_dns_question_create(url2, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_PTR, - PICO_DNS_CLASS_IN, 1); - fail_if(!b, "Question B could not be created!\n"); - - ret = pico_dns_question_cmp((void *)a, (void *)b); - fail_unless(!ret, "Question A and B should be equal!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_dns_qtree_insert) /* MARK: dns_qtree_insert*/ -{ - const char *url = "host.local"; - const char *url2 = "host (2).local"; - const char *url3 = "host (3).local"; - struct pico_dns_question *a = NULL, *b = NULL, *c = NULL; - uint16_t qlen = 0; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_DNS_QTREE_DECLARE(qtree2); - - printf("*********************** starting %s * \n", __func__); - - a = pico_dns_question_create(url, &qlen, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a || !(a->qname) || !(a->qsuffix), "Could not create question A!\n"); - b = pico_dns_question_create(url2, &qlen, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b || !(b->qname) || !(b->qsuffix), "Coud not create question B!\n"); - - pico_tree_insert(&qtree, a); - fail_unless(pico_tree_count(&qtree) == 1, - "pico_tree_insert failed with tree 1 question A!\n"); - - pico_tree_insert(&qtree, b); - fail_unless(2 == pico_tree_count(&qtree), - "pico_tree_insert failed with tree 1 question B!\n"); - - PICO_DNS_QTREE_DESTROY(&qtree); - fail_unless(0 == pico_tree_count(&qtree), - "Question tree not properly destroyed!\n"); - c = pico_dns_question_create(url3, &qlen, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!c || !(c->qname) || !(c->qsuffix), "Coud not create question B!\n"); - pico_tree_insert(&qtree2, c); - fail_unless(1 == pico_tree_count(&qtree2), - "pico_tree_insert failed with tree 2 question B!\n"); - PICO_DNS_QTREE_DESTROY(&qtree2); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_dns_record_cmp) /* MARK: dns_record_cmp */ -{ - struct pico_dns_record *a = NULL; - struct pico_dns_record *b = NULL; - const char *url1 = "foo.local"; - const char *url3 = "a.local"; - struct pico_ip4 rdata = { - 0 - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create test records */ - a = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "Record A could not be created!\n"); - b = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "Record B could not be created!\n"); - - /* Try to compare equal records */ - ret = pico_dns_record_cmp((void *) a, (void *) b); - fail_unless(!ret, "dns_record_cmp failed with equal records - %d!\n", ret); - pico_dns_record_delete((void **)&a); - pico_dns_record_delete((void **)&b); - - /* Create different test records */ - a = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "Record A could not be created!\n"); - b = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "Record B could not be created!\n"); - - /* Try to compare records with equal rname but different type */ - ret = pico_dns_record_cmp((void *) a, (void *) b); - fail_unless(ret > 0, "dns_record_cmp failed with same name, different types!\n"); - pico_dns_record_delete((void **)&a); - pico_dns_record_delete((void **)&b); - - /* Create different test records */ - a = pico_dns_record_create(url3, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "Record A could not be created!\n"); - b = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "Record B could not be created!\n"); - - /* Try to compare records with different rname but equal type */ - ret = pico_dns_record_cmp((void *) a, (void *) b); - fail_unless(ret < 0, "mdns_cmp failed with different name, same types!\n"); - pico_dns_record_delete((void **)&a); - pico_dns_record_delete((void **)&b); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_dns_rtree_insert) /* MARK: dns_rtree_insert*/ -{ - PICO_DNS_RTREE_DECLARE(rtree); - PICO_DNS_RTREE_DECLARE(rtree2); - struct pico_dns_record *a = NULL; - struct pico_dns_record *b = NULL, *c = NULL; - const char *url1 = "foo.local"; - struct pico_ip4 rdata = { - 0 - }; - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create test records */ - a = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "Record A could not be created!\n"); - b = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "Record B could not be created!\n"); - - pico_tree_insert(&rtree, a); - pico_tree_insert(&rtree, b); - - PICO_DNS_RTREE_DESTROY(&rtree); - fail_unless(pico_tree_count(&rtree) == 0, - "Record tree not properly destroyed!\n"); - - c = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!c, "Record C could not be created!\n"); - pico_tree_insert(&rtree2, c); - - PICO_DNS_RTREE_DESTROY(&rtree2); - fail_unless(pico_tree_count(&rtree2) == 0, - "Record tree not properly destroyed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_dns_record_cmp_name_type) /* MARK: dns_record_cmp_name_type */ -{ - struct pico_dns_record *a = NULL; - struct pico_dns_record *b = NULL; - const char *url1 = "foo.local"; - const char *url3 = "a.local"; - struct pico_ip4 rdata = { - 0 - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create different test records */ - a = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "Record A could not be created!\n"); - b = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "Record B could not be created!\n"); - - /* Try to compare records with equal rname but different type */ - ret = pico_dns_record_cmp_name_type((void *) a, (void *) b); - fail_unless(ret > 0, "dns_record_cmp failed with same name, different types!\n"); - pico_dns_record_delete((void **)&a); - pico_dns_record_delete((void **)&b); - - /* Create exactly the same test records */ - a = pico_dns_record_create(url3, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "Record A could not be created!\n"); - b = pico_dns_record_create(url3, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "Record B could not be created!\n"); - - /* Try to compare records with different rname but equal type */ - ret = pico_dns_record_cmp_name_type((void *) a, (void *) b); - fail_unless(!ret, "dns_record_cmp_name_type failed with same names, same types!\n"); - pico_dns_record_delete((void **)&a); - pico_dns_record_delete((void **)&b); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_fill_packet_header) /* MARK: dns_fill_packet_header */ -{ - struct pico_dns_header *header = NULL; - uint8_t answer_buf[12] = { - 0x00, 0x00, - 0x85, 0x00, - 0x00, 0x00, - 0x00, 0x01, - 0x00, 0x01, - 0x00, 0x01 - }; - uint8_t query_buf[12] = { - 0x00, 0x00, - 0x01, 0x00, - 0x00, 0x01, - 0x00, 0x01, - 0x00, 0x01, - 0x00, 0x01 - }; - int i = 0; - - printf("*********************** starting %s * \n", __func__); - - header = (struct pico_dns_header *) - PICO_ZALLOC(sizeof(struct pico_dns_header)); - - fail_if(NULL == header, "Not enough space!\n"); - - /* Create a query header */ - pico_dns_fill_packet_header(header, 1, 1, 1, 1); - - for (i = 0; i < 12; i++) - printf("### %02x :: %02x\n", ((uint8_t*)header)[i], query_buf[i]); - fail_unless(0 == memcmp((void *)header, (void *)query_buf, 12), - "Comparing query header failed!\n"); - - /* Create a answer header */ - pico_dns_fill_packet_header(header, 0, 1, 1, 1); - - fail_unless(0 == memcmp((void *)header, (void *)answer_buf, 12), - "Comparing answer header failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_fill_packet_rr_section) /* MARK: dns_fill_packet_rr_section */ -{ - printf("*********************** starting %s * \n", __func__); - - /* TODO: Insert test here */ - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_fill_packet_rr_sections) /* MARK: dns_fill_packet_rr_sections */ -{ - pico_dns_packet *packet = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_DNS_RTREE_DECLARE(antree); - PICO_DNS_RTREE_DECLARE(nstree); - PICO_DNS_RTREE_DECLARE(artree); - struct pico_dns_record *record = NULL; - const char *rname = "picotcp.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint8_t cmp_buf[39] = { - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x00u, 0x00u, 0x00u, 0x78u, - 0x00u, 0x04u, - 10u, 10u, 0u, 1u - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create a new A record */ - record = pico_dns_record_create(rname, rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 120); - fail_if(!record, "dns_record_create failed!\n"); - - /* Add the record to a tree */ - pico_tree_insert(&antree, record); - - /* Try to fill the rr sections with packet as a NULL-pointer */ - ret = pico_dns_fill_packet_rr_sections(packet, &qtree, &antree, - &nstree, &artree); - fail_unless(ret, "Checking of params failed!\n"); - - len = (uint16_t)sizeof(struct pico_dns_header); - pico_tree_size(&qtree, &len, &pico_dns_question_size); - pico_tree_size(&antree, &len, &pico_dns_record_size); - pico_tree_size(&nstree, &len, &pico_dns_record_size); - pico_tree_size(&artree, &len, &pico_dns_record_size); - printf("Packet len: %d\n", len); - - /* Allocate the packet with the right size */ - packet = (pico_dns_packet *)PICO_ZALLOC((size_t)len); - fail_if(NULL == packet, "Allocating packet failed!\n"); - fail_if(pico_dns_fill_packet_rr_sections(packet, &qtree, &antree, &nstree, - &artree), - "Filling of rr sections failed!\n"); - - fail_unless(memcmp((void *)packet, (void *)cmp_buf, 39) == 0, - "Filling of rr sections went wrong!\n"); - PICO_FREE(packet); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_fill_packet_question_section) /* MARK: dns_fill_packet_question_section */ -{ - pico_dns_packet *packet = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - struct pico_dns_question *a = NULL, *b = NULL; - const char *qurl = "picotcp.com"; - uint8_t cmp_buf[45] = { - 0x00u, 0x00u, /* 2 */ - 0x00u, 0x00u, /* 2 */ - 0x00u, 0x00u, /* 2 */ - 0x00u, 0x00u, /* 2 */ - 0x00u, 0x00u, /* 2 */ - 0x00u, 0x00u, /* 2 //12 */ - 0x06u, 'g', 'o', 'o', 'g', 'l', 'e', /* 7 */ - 0x03u, 'c', 'o', 'm', /* 4 */ - 0x00u, /* 1 //12 */ - 0x00u, 0x01u, - 0x00u, 0x01u, /* 4 */ - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', /* 8 */ - 0x03u, 'c', 'o', 'm', /* 4 */ - 0x00u, /* 1 //13 */ - 0x00u, 0x01u, - 0x00u, 0x01u - }; /* 4 */ - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create DNS questions and a vector of them */ - a = pico_dns_question_create(qurl, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(NULL == a, "dns_question_create failed!\n"); - b = pico_dns_question_create("google.com", &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(NULL == b, "dns_question_create failed!\n"); - - pico_tree_insert(&qtree, a); - pico_tree_insert(&qtree, b); - - /* Determine the length of the packet and provide space */ - len = (uint16_t)sizeof(struct pico_dns_header); - pico_tree_size(&qtree, &len, &pico_dns_question_size); - printf("Packet len: %d - 45\n", len); - packet = (pico_dns_packet *)PICO_ZALLOC((size_t)len); - - fail_if(NULL == packet, "Allocating packet failed!\n"); - fail_if(pico_dns_fill_packet_question_section(packet, &qtree), - "Filling of rr sections failed!\n"); - - fail_unless(memcmp((void *)packet, (void *)cmp_buf, 45) == 0, - "Filling of question sesction went wrong!\n"); - PICO_FREE(packet); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_packet_compress_find_ptr) /* MARK: dns_packet_compress_find_ptr */ -{ - uint8_t data[] = "abcdef\5local\0abcdef\4test\5local"; - uint8_t *name = (uint8_t *)(data + 24); - uint16_t len = 31; - uint8_t *ptr = NULL; - - printf("*********************** starting %s * \n", __func__); - - ptr = pico_dns_packet_compress_find_ptr(name, data, len); - fail_unless(ptr == (data + 6), "Finding compression ptr failed %p - %p!\n", ptr, - data + 6); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_packet_compress_name) /* MARK: dns_packet_compress_name */ -{ - uint8_t buf[46] = { - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u - }; - - uint8_t *name = buf + 29u; - uint16_t len = 46; - int ret = 0; - printf("*********************** starting %s * \n", __func__); - - ret = pico_dns_packet_compress_name(name, buf, &len); - fail_unless(ret == 0, "dns_packet_compress_name returned error!\n"); - fail_unless(len == (46 - 11), "packet_compress_name return wrong length!\n"); - fail_unless(memcmp(name, "\xc0\x0c", 2) == 0, "packet_compress_name failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_packet_compress) /* MARK: dns_packet_compress */ -{ - uint8_t buf[83] = { - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x01u, - 0x00u, 0x00u, - 0x00u, 0x02u, - 0x00u, 0x00u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x00u, 0x00u, 0x00, 0x0A, - 0x00u, 0x04u, - 0x0Au, 0x0Au, 0x0A, 0x0A, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x00u, 0x00u, 0x00, 0x0A, - 0x00u, 0x04u, - 0x0Au, 0x0Au, 0x0A, 0x0A - }; - uint8_t cmp_buf[61] = { - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x01u, - 0x00u, 0x00u, - 0x00u, 0x02u, - 0x00u, 0x00u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0xC0u, 0x0Cu, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x00u, 0x00u, 0x00, 0x0A, - 0x00u, 0x04u, - 0x0Au, 0x0Au, 0x0A, 0x0A, - 0xC0u, 0x0Cu, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x00u, 0x00u, 0x00, 0x0A, - 0x00u, 0x04u, - 0x0Au, 0x0Au, 0x0A, 0x0A - }; - pico_dns_packet *packet = (pico_dns_packet *)buf; - uint16_t len = 83; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - ret = pico_dns_packet_compress(packet, &len); - - fail_unless(ret == 0, "dns_packet_compress returned error!\n"); - fail_unless(len == (83 - 22), "packet_compress returned length %u!\n", len); - fail_unless(memcmp(packet, cmp_buf, 61) == 0, "packet_compress_name failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_question_fill_qsuffix) /* MARK: dns_question_fill_suffix */ -{ - struct pico_dns_question_suffix suffix; - printf("*********************** starting %s * \n", __func__); - - pico_dns_question_fill_suffix(&suffix, PICO_DNS_TYPE_A, PICO_DNS_CLASS_IN); - - fail_unless((suffix.qtype == short_be(PICO_DNS_TYPE_A)) && - (suffix.qclass == short_be(PICO_DNS_CLASS_IN)), - "Filling qsuffix failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_question_delete) /* MARK: dns_question_delete */ -{ - const char *qurl = "picotcp.com"; - uint16_t len = 0; - int ret = 0; - struct pico_dns_question *a = pico_dns_question_create(qurl, &len, - PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, - 0); - printf("*********************** starting %s * \n", __func__); - - ret = pico_dns_question_delete((void **)&a); - - fail_unless(ret == 0, "dns_question_delete returned error!\n"); - fail_unless(a == NULL, "dns_question_delete failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_question_create) /* MARK: dns_quesiton_create */ -{ - const char *qurl = "picotcp.com"; - const char *qurl2 = "1.2.3.4"; - const char *qurl3 = "2001:0db8:0000:0000:0000:0000:0000:0000"; - char buf[13] = { - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03u, 'c', 'o', 'm', - 0x00u - }; - char buf2[22] = { - 0x01u, '4', - 0x01u, '3', - 0x01u, '2', - 0x01u, '1', - 0x07u, 'i', 'n', '-', 'a', 'd', 'd', 'r', - 0x04u, 'a', 'r', 'p', 'a', - 0x00u - }; - char buf3[74] = { - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '8', 0x01u, 'b', 0x01u, 'd', 0x01u, '0', - 0x01u, '1', 0x01u, '0', 0x01u, '0', 0x01u, '2', - 0x03u, 'I', 'P', '6', - 0x04u, 'A', 'R', 'P', 'A', - 0x00u - }; - uint16_t len = 0; - struct pico_dns_question *a = NULL; - - printf("*********************** starting %s * \n", __func__); - - /* First, plain A record */ - a = pico_dns_question_create(qurl, &len, - PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, - 0); - fail_if(a == NULL, "dns_question_created returned NULL!\n"); - fail_unless(strcmp(a->qname, buf) == 0, "url not converted correctly!\n"); - fail_unless(short_be(a->qsuffix->qtype) == PICO_DNS_TYPE_A, - "qtype not properly set!\n"); - fail_unless(short_be(a->qsuffix->qclass) == PICO_DNS_CLASS_IN, - "qclass not properly set!\n"); - pico_dns_question_delete((void **)&a); - - /* Reverse PTR record for IPv4 address */ - a = pico_dns_question_create(qurl2, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_PTR, PICO_DNS_CLASS_IN, 1); - fail_unless(strcmp(a->qname, buf2) == 0, "url2 not converted correctly! %s\n", a->qname); - fail_unless(short_be(a->qsuffix->qtype) == PICO_DNS_TYPE_PTR, - "qtype2 not properly set!\n"); - fail_unless(short_be(a->qsuffix->qclass) == PICO_DNS_CLASS_IN, - "qclass2 not properly set!\n"); - pico_dns_question_delete((void **)&a); - - /* Reverse PTR record for IPv6 address */ - a = pico_dns_question_create(qurl3, &len, PICO_PROTO_IPV6, - PICO_DNS_TYPE_PTR, PICO_DNS_CLASS_IN, 1); - fail_unless(strcmp(a->qname, buf3) == 0, "url3 not converted correctly!\n"); - fail_unless(short_be(a->qsuffix->qtype) == PICO_DNS_TYPE_PTR, - "qtype3 not properly set!\n"); - fail_unless(short_be(a->qsuffix->qclass) == PICO_DNS_CLASS_IN, - "qclass3 not properly set!\n"); - pico_dns_question_delete((void **)&a); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_query_create) /* MARK: dns_query_create */ -{ - pico_dns_packet *packet = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - const char *qurl = "picotcp.com"; - const char *qurl2 = "google.com"; - uint8_t buf[42] = { - 0x00u, 0x00u, - 0x01u, 0x00u, - 0x00u, 0x02u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x06u, 'g', 'o', 'o', 'g', 'l', 'e', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0xc0u, 0x13u, - 0x00u, 0x01u, - 0x00u, 0x01u - }; - uint16_t len = 0; - int ret = 0; - struct pico_dns_question *a = NULL, *b = NULL; - - printf("*********************** starting %s * \n", __func__); - - - a = pico_dns_question_create(qurl, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a, "dns_question_create failed!\n"); - fail_unless(ret == 0, "dns_question_vector_add returned error!\n"); - b = pico_dns_question_create(qurl2, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b, "dns_question_create failed!\n"); - fail_unless(ret == 0, "dns_question_vector_add returned error!\n"); - - pico_tree_insert(&qtree, a); - pico_tree_insert(&qtree, b); - - packet = pico_dns_query_create(&qtree, NULL, NULL, NULL, &len); - fail_if(packet == NULL, "dns_query_create returned NULL!\n"); - fail_unless(0 == memcmp(buf, (void *)packet, 42), - "dns_query_created failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_record_fill_suffix) /* MARK: dns_record_fill_suffix */ -{ - struct pico_dns_record_suffix *suffix = NULL; - - printf("*********************** starting %s * \n", __func__); - - pico_dns_record_fill_suffix(&suffix, PICO_DNS_TYPE_A, PICO_DNS_CLASS_IN, - 120, 4); - - fail_unless((suffix->rtype == short_be(PICO_DNS_TYPE_A) && - suffix->rclass == short_be(PICO_DNS_CLASS_IN) && - suffix->rttl == long_be(120) && - suffix->rdlength == short_be(4)), - "Filling rsuffix failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_record_copy_flat) /* MARK: dns_record_copy_flat */ -{ - struct pico_dns_record *record = NULL; - const char *url = "picotcp.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint8_t buf[128] = { - 0 - }; - uint8_t *ptr = NULL; - uint8_t cmp_buf[27] = { - 0x07, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03, 'c', 'o', 'm', - 0x00, - 0x00, 0x01, - 0x00, 0x01, - 0x00, 0x00, 0x00, 0x78, - 0x00, 0x04, - 0x0A, 0x0A, 0x00, 0x01 - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - record = pico_dns_record_create(url, (void *)rdata, 4, - &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 120); - fail_if(!record, "dns_record_create failed!\n"); - - ptr = buf + 20; - - /* Try to copy the record to a flat buffer */ - ret = pico_dns_record_copy_flat(record, &ptr); - - fail_unless(ret == 0, "dns_record_copy_flat returned error!\n"); - fail_unless(memcmp(buf + 20, cmp_buf, 27) == 0, - "dns_record_copy_flat failed!\n"); - - /* FREE memory */ - pico_dns_record_delete((void **)&record); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_record_copy) /* MARK: dns_record_copy */ -{ - struct pico_dns_record *a = NULL, *b = NULL; - const char *url = "picotcp.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint16_t len = 0; - - a = pico_dns_record_create(url, (void *)rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 120); - fail_if(!a, "dns_record_create failed!\n"); - - printf("*********************** starting %s * \n", __func__); - - /* Try to copy the first DNS record */ - b = pico_dns_record_copy(a); - fail_unless(b != NULL, "dns_record_copy returned NULL!\n"); - fail_unless(a != b, "pointers point to same struct!\n"); - fail_unless(strcmp(a->rname, b->rname) == 0, - "dns_record_copy failed copying names!\n"); - fail_unless(a->rsuffix->rtype == b->rsuffix->rtype, - "dns_record_copy failed copying rtype!\n"); - fail_unless(a->rsuffix->rclass == b->rsuffix->rclass, - "dns_record_copy failed copying rclass!\n"); - fail_unless(a->rsuffix->rttl == b->rsuffix->rttl, - "dns_record_copy failed copying rttl!\n"); - fail_unless(a->rsuffix->rdlength == b->rsuffix->rdlength, - "dns_record_copy failed copying rdlenth!\n"); - fail_unless(memcmp(a->rdata, b->rdata, short_be(b->rsuffix->rdlength)) == 0, - "dns_record_copy failed copying rdata!\n"); - - /* FREE memory */ - pico_dns_record_delete((void **)&a); - pico_dns_record_delete((void **)&b); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_record_delete) /* MARK: dns_record_delete */ -{ - struct pico_dns_record *a = NULL; - const char *url = "picotcp.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - - a = pico_dns_record_create(url, (void *)rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 120); - fail_if(!a, "dns_record_create failed!\n"); - - /* Try to delete the created record */ - ret = pico_dns_record_delete((void **)&a); - fail_unless(ret == 0, "pico_dns_record_delete returned NULL!\n"); - fail_unless(a == NULL, "pico_dns_record_delete failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_record_create) /* MARK: dns_record_create */ -{ - struct pico_dns_record *a = NULL; - const char *url = "picotcp.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - a = pico_dns_record_create(url, (void *)rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 120); - fail_if(!a, "dns_record_create returned NULL!\n"); - fail_unless(strcmp(a->rname, "\x7picotcp\x3com"), - "dns_record_create didn't convert url %s properly!\n", - a->rname); - fail_unless(a->rsuffix->rtype == short_be(PICO_DNS_TYPE_A), - "dns_record_create failed setting rtype!\n"); - fail_unless(a->rsuffix->rclass == short_be(PICO_DNS_CLASS_IN), - "dns_record_create failed setting rclass!\n"); - fail_unless(a->rsuffix->rttl == long_be(120), - "dns_record_create failed setting rttl!\n"); - fail_unless(a->rsuffix->rdlength == short_be(4), - "dns_record_create failed setting rdlenth!\n"); - fail_unless(memcmp(a->rdata, rdata, 4) == 0, - "dns_record_create failed setting rdata!\n"); - - /* TODO: Test PTR records */ - - pico_dns_record_delete((void **)&a); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_answer_create) /* MARK: dns_answer_create */ -{ - pico_dns_packet *packet = NULL; - PICO_DNS_RTREE_DECLARE(rtree); - struct pico_dns_record *a = NULL, *b = NULL; - const char *url = "picotcp.com"; - const char *url2 = "google.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint16_t len = 0; - uint8_t buf[62] = { - 0x00u, 0x00u, - 0x85u, 0x00u, - 0x00u, 0x00u, - 0x00u, 0x02u, - 0x00u, 0x00u, - 0x00u, 0x00u, - 0x06u, 'g', 'o', 'o', 'g', 'l', 'e', - 0x03u, 'c', 'o', 'm', - 0x00u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x00u, 0x00u, 0x00u, 0x78u, - 0x00u, 0x04u, - 0x0Au, 0x0Au, 0x00u, 0x01u, - 0x07u, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0xc0u, 0x13u, - 0x00u, 0x01u, - 0x00u, 0x01u, - 0x00u, 0x00u, 0x00u, 0x78u, - 0x00u, 0x04u, - 0x0Au, 0x0Au, 0x00u, 0x01u - }; - - printf("*********************** starting %s * \n", __func__); - - a = pico_dns_record_create(url, (void *)rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 120); - fail_if(!a, "dns_record_create returned NULL!\n"); - b = pico_dns_record_create(url2, (void *)rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 120); - fail_if(!a, "dns_record_create returned NULL!\n"); - - pico_tree_insert(&rtree, a); - pico_tree_insert(&rtree, b); - - /* Try to create an answer packet */ - packet = pico_dns_answer_create(&rtree, NULL, NULL, &len); - fail_if (packet == NULL, "dns_answer_create returned NULL!\n"); - fail_unless(0 == memcmp((void *)packet, (void *)buf, len), - "dns_answer_create failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_namelen_comp) /* MARK: dns_namelen_comp */ -{ - char name[] = "\3www\4tass\2be\0"; - char name_comp[] = "\3www\4tass\2be\xc0\x02"; /* two bytes ofset from start of buf */ - unsigned int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* name without compression */ - ret = pico_dns_namelen_comp(name); - fail_unless(ret == 12, "Namelength is wrong!\n"); - - /* name with compression */ - ret = pico_dns_namelen_comp(name_comp); - fail_unless(ret == 13, "Namelength is wrong!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_decompress_name) /* MARK: dns_decompress_name */ -{ - char name[] = "\4mail\xc0\x02"; - char name2[] = "\xc0\x02"; - char buf[] = "00\6google\3com"; - char *ret; - - printf("*********************** starting %s * \n", __func__); - - /* Test normal DNS name compression */ - ret = pico_dns_decompress_name(name, (pico_dns_packet *)buf); - - /* Fail conditions */ - fail_unless(ret != NULL, "Name ptr returned is NULL"); - fail_unless(strcmp(ret, "\4mail\6google\3com") == 0, "Not correctly decompressed: '%s'!\n", ret); - - /* Free memory */ - PICO_FREE(ret); - ret = NULL; - - /* Test when there is only a pointer */ - ret = pico_dns_decompress_name(name2, (pico_dns_packet *)buf); - - /* Fail conditions */ - fail_unless(ret != NULL, "Name ptr returned is NULL"); - fail_unless(strcmp(ret, "\6google\3com") == 0, "Not correctly decompressed: '%s'!\n", ret); - - /* Free memory */ - PICO_FREE(ret); - ret = NULL; - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_url_get_reverse_len) /* MARK: dns_url_get_reverse_len */ -{ - const char *url_ipv4 = "10.10.0.1"; - const char *url_ipv6 = "2001:0db8:0000:0000:0000:0000:0000:0000"; - uint16_t arpalen = 0; - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Try to determine the reverse length of the IPv4 URL */ - len = pico_dns_url_get_reverse_len(url_ipv4, &arpalen, PICO_PROTO_IPV4); - fail_unless(len == (9 + 2) && arpalen == 13, - "dns_url_get_reverse_len failed with IPv4 URL!\n"); - - /* Try to determine the reverse length of the IPv6 URL */ - len = pico_dns_url_get_reverse_len(url_ipv6, &arpalen, PICO_PROTO_IPV6); - fail_unless(len == (63 + 2) && arpalen == 9, - "dns_url_get_reverse_len failed with IPv4 URL!\n"); - - len = pico_dns_url_get_reverse_len(NULL, NULL, PICO_PROTO_IPV4); - fail_unless(len == 0, "dns_url_get_reverse_len with NULL-ptrs failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_url_to_reverse_qname) /* MARK: dns_url_to_reverse_qname */ -{ - const char *url_ipv4 = "10.10.0.1"; - const char *url_ipv6 = "2001:0db8:0000:0000:0000:0000:0000:0000"; - char *qname = NULL; - char cmp_buf1[24] = { - 0x01, '1', - 0x01, '0', - 0x02, '1', '0', - 0x02, '1', '0', - 0x07, 'i', 'n', '-', 'a', 'd', 'd', 'r', - 0x04, 'a', 'r', 'p', 'a', - 0x00 - }; - char cmp_buf[74] = { - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '0', 0x01u, '0', 0x01u, '0', 0x01u, '0', - 0x01u, '8', 0x01u, 'b', 0x01u, 'd', 0x01u, '0', - 0x01u, '1', 0x01u, '0', 0x01u, '0', 0x01u, '2', - 0x03u, 'I', 'P', '6', - 0x04u, 'A', 'R', 'P', 'A', - 0x00u - }; - - printf("*********************** starting %s * \n", __func__); - - /* Try to reverse IPv4 URL */ - qname = pico_dns_url_to_reverse_qname(url_ipv4, PICO_PROTO_IPV4); - fail_unless(qname != NULL, "dns_url_to_reverse_qname returned NULL!\n"); - fail_unless(strcmp(qname, cmp_buf1) == 0, - "dns_url_to_reverse_qname failed with IPv4 %s!\n", qname); - PICO_FREE(qname); - - /* Try to reverse IPv6 URL */ - qname = pico_dns_url_to_reverse_qname(url_ipv6, PICO_PROTO_IPV6); - fail_unless(qname != NULL, "dns_url_to_reverse_qname returned NULL!\n"); - fail_unless(strcmp(qname, cmp_buf) == 0, - "dns_url_to_reverse_qname failed with IPv6!\n"); - PICO_FREE(qname); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_qname_to_url) /* MARK: dns_qname_to_url */ -{ - char qname[24] = { - 0x01, '1', - 0x01, '0', - 0x02, '1', '0', - 0x02, '1', '0', - 0x07, 'i', 'n', '-', 'a', 'd', 'd', 'r', - 0x04, 'a', 'r', 'p', 'a', - 0x00 - }; - char qname2[13] = { - 0x07, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03, 'c', 'o', 'm', - 0x00 - }; - char qname3[14] = { - 0x08, 'p', 'i', 'c', 'o', '.', 't', 'c', 'p', - 0x03, 'c', 'o', 'm', - 0x00 - }; - char *url = NULL; - - printf("*********************** starting %s * \n", __func__); - - /* Try to convert qname to url */ - url = pico_dns_qname_to_url(qname); - fail_unless(url != NULL, "dns_qname_to_url returned NULL!\n"); - fail_unless(strcmp(url, "1.0.10.10.in-addr.arpa") == 0, - "dns_qname_to_url failed %s!\n", url); - PICO_FREE(url); - - /* Try to convert qname2 to url */ - url = pico_dns_qname_to_url(qname2); - fail_unless(url != NULL, "dns_qname_to_url returned NULL!\n"); - fail_unless(strcmp(url, "picotcp.com") == 0, - "dns_qname_to_url failed %s!\n", url); - PICO_FREE(url); - - /* Try to convert qname2 to url */ - url = pico_dns_qname_to_url(qname3); - fail_unless(url != NULL, "dns_qname_to_url returned NULL!\n"); - fail_unless(strcmp(url, "pico.tcp.com") == 0, - "dns_qname_to_url failed %s!\n", url); - PICO_FREE(url); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_url_to_qname) /* MARK: dns_url_to_qname */ -{ - char qname1[24] = { - 0x01, '1', - 0x01, '0', - 0x02, '1', '0', - 0x02, '1', '0', - 0x07, 'i', 'n', '-', 'a', 'd', 'd', 'r', - 0x04, 'a', 'r', 'p', 'a', - 0x00 - }; - char qname2[13] = { - 0x07, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03, 'c', 'o', 'm', - 0x00 - }; - char *qname = NULL; - - printf("*********************** starting %s * \n", __func__); - - /* Try to convert url to qname1 */ - qname = pico_dns_url_to_qname("1.0.10.10.in-addr.arpa"); - fail_unless(qname != NULL, "dns_url_to_qname returned NULL!\n"); - fail_unless(strcmp(qname, qname1) == 0, - "dns_url_to_qname failed %s!\n", qname); - PICO_FREE(qname); - - /* Try to convert url to qname2 */ - qname = pico_dns_url_to_qname("picotcp.com"); - fail_unless(qname != NULL, "dns_url_to_qname returned NULL!\n"); - fail_unless(strcmp(qname, qname2) == 0, - "dns_url_to_qname failed %s!\n", qname); - PICO_FREE(qname); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_name_to_dns_notation) /* MARK: dns_name_to_dns_notation */ -{ - char qname1[13] = { - 0x07, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03, 'c', 'o', 'm', - 0x00 - }; - char url1[13] = { - 0, 'p', 'i', 'c', 'o', 't', 'c', 'p', '.', 'c', 'o', 'm', 0x00 - }; - char url2[13] = { - 'a', 'p', 'i', 'c', 'o', 't', 'c', 'p', '.', 'c', 'o', 'm', 0x00 - }; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - ret = pico_dns_name_to_dns_notation(url1, (uint16_t)strlen(url1)); - fail_unless(ret == -1, "dns_name_to_dns_notation didn't check correct!\n"); - - ret = pico_dns_name_to_dns_notation(url2, (uint16_t)strlen(url2)); - fail_unless(ret == 0, "dns_name_to_dns_notation returned error!\n"); - fail_unless(strcmp(url2, qname1) == 0, - "dns_name_to_dns_notation failed! %s\n", url2); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_notation_to_name) /* MARK: dns_notation_to_name */ -{ - char qname1[13] = { - 0x07, 'p', 'i', 'c', 'o', 't', 'c', 'p', - 0x03, 'c', 'o', 'm', - 0x00 - }; - char url1[13] = { - '.', 'p', 'i', 'c', 'o', 't', 'c', 'p', '.', 'c', 'o', 'm', 0x00 - }; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - ret = pico_dns_notation_to_name(qname1, (uint16_t)strlen(qname1)); - fail_unless(ret == 0, "dns_notation_to_name returned error!\n"); - fail_unless(strcmp(url1, qname1) == 0, - "dns_notation_to_name failed! %s\n", qname1); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_mirror_addr) /* MARK: dns_mirror_addr */ -{ - char url[12] = "192.168.0.1"; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - ret = pico_dns_mirror_addr(url); - fail_unless(ret == 0, "dns_mirror_addr returned error!\n"); - fail_unless(strcmp(url, "1.0.168.192") == 0, - "dns_mirror_addr failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_dns_ptr_ip6_nibble_lo) /* MARK: dns_ptr_ip6_nibble_lo */ -{ - uint8_t byte = 0x34; - char nibble_lo = 0; - - printf("*********************** starting %s * \n", __func__); - - nibble_lo = dns_ptr_ip6_nibble_lo(byte); - fail_unless(nibble_lo == '4', "dns_ptr_ip6_nibble_lo failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_dns_ptr_ip6_nibble_hi) /* MARK: dns_ptr_ip6_nibble_hi */ -{ - uint8_t byte = 0x34; - char nibble_hi = 0; - - printf("*********************** starting %s * \n", __func__); - - nibble_hi = dns_ptr_ip6_nibble_hi(byte); - fail_unless(nibble_hi == '3', "dns_ptr_ip6_nibble_hi failed! '%c'\n", - nibble_hi); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_pico_dns_ipv6_set_ptr) /* MARK: dns_ipv6_set_ptr */ -{ - const char *url_ipv6 = "2001:0db8:0000:0000:0000:0000:0000:0000"; - - char cmpbuf[65] = { - '0', '.', '0', '.', '0', '.', '0', '.', - '0', '.', '0', '.', '0', '.', '0', '.', - '0', '.', '0', '.', '0', '.', '0', '.', - '0', '.', '0', '.', '0', '.', '0', '.', - '0', '.', '0', '.', '0', '.', '0', '.', - '0', '.', '0', '.', '0', '.', '0', '.', - '8', '.', 'b', '.', 'd', '.', '0', '.', - '1', '.', '0', '.', '0', '.', '2', '.', 0x00 - }; - char buf[65] = {}; - - printf("*********************** starting %s * \n", __func__); - - pico_dns_ipv6_set_ptr(url_ipv6, buf); - fail_unless(strcmp(buf, cmpbuf) == 0, - "dns_ipv6_set_ptr failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_dns_rdata_cmp = tcase_create("Unit test for dns_rdata_cmp"); - TCase *TCase_dns_question_cmp = tcase_create("Unit test for dns_question_cmp"); - TCase *TCase_dns_qtree_insert = tcase_create("Unit test for dns_qtree_insert"); - TCase *TCase_dns_record_cmp = tcase_create("Unit test for dns_record_cmp"); - TCase *TCase_dns_rtree_insert = tcase_create("Unit test for dns_rtree_insert"); - TCase *TCase_dns_record_cmp_name_type = tcase_create("Unit test for dns_record_cmp_name_type"); - - /* DNS packet section filling */ - TCase *TCase_pico_dns_fill_packet_header = tcase_create("Unit test for 'pico_dns_fill_packet_header'"); - TCase *TCase_pico_dns_fill_packet_rr_sections = tcase_create("Unit test for 'pico_dns_fill_packet_rr_sections'"); - TCase *TCase_pico_dns_fill_packet_rr_section = tcase_create("Unit test for 'pico_dns_fill_packet_rr_section'"); - TCase *TCase_pico_dns_fill_packet_question_section = tcase_create("Unit test for 'pico_dns_fill_packet_question_sections'"); - - /* DNS packet compression */ - TCase *TCase_pico_dns_packet_compress_find_ptr = tcase_create("Unit test for 'pico_dns_packet_compress_find_ptr'"); - TCase *TCase_pico_dns_packet_compress_name = tcase_create("Unit test for 'pico_dns_packet_compress_name'"); - TCase *TCase_pico_dns_packet_compress = tcase_create("Unit test for 'pico_dns_packet_compress'"); - - /* DNS question functions */ - TCase *TCase_pico_dns_question_fill_qsuffix = tcase_create("Unit test for 'pico_dns_question_fill_qsuffix'"); - TCase *TCase_pico_dns_question_delete = tcase_create("Unit test for 'pico_dns_question_delete'"); - TCase *TCase_pico_dns_question_create = tcase_create("Unit test for 'pico_dns_question_create'"); - - /* DNS query packet creation */ - TCase *TCase_pico_dns_query_create = tcase_create("Unit test for 'pico_dns_query_create'"); - - /* DNS resource record functions */ - TCase *TCase_pico_dns_record_fill_suffix = tcase_create("Unit test for 'pico_dns_record_fill_suffix'"); - TCase *TCase_pico_dns_record_copy_flat = tcase_create("Unit test for 'pico_dns_record_copy_flat'"); - TCase *TCase_pico_dns_record_copy = tcase_create("Unit test for 'pico_dns_record_copy'"); - TCase *TCase_pico_dns_record_delete = tcase_create("Unit test for 'pico_dns_record_delete'"); - TCase *TCAse_pico_dns_record_create = tcase_create("Unit test for 'pico_dns_record_create'"); - - /* DNS answer packet creation */ - TCase *TCase_pico_dns_answer_create = tcase_create("Unit test for 'pico_dns_answer_create'"); - - /* Name conversion and compression function */ - TCase *TCase_pico_dns_namelen_comp = tcase_create("Unit test for 'pico_dns_namelen_comp'"); - TCase *TCase_pico_dns_decompress_name = tcase_create("Unit test for 'pico_dns_decompress_name'"); - TCase *TCase_pico_dns_url_get_reverse_len = tcase_create("Unit test for 'pico_dns_url_get_reverse_len'"); - TCase *TCase_pico_dns_url_to_reverse_qname = tcase_create("Unit test for 'pico_dns_url_to_reverse_qname'"); - TCase *TCase_pico_dns_qname_to_url = tcase_create("Unit test for 'pico_dns_qname_to_url'"); - TCase *TCase_pico_dns_url_to_qname = tcase_create("Unit test for 'pico_dns_url_to_qname'"); - TCase *TCase_pico_dns_name_to_dns_notation = tcase_create("Unit test for 'pico_dns_name_to_dns_notation'"); - TCase *TCase_pico_dns_notation_to_name = tcase_create("Unit test for 'pico_dns_notation_to_name'"); - TCase *TCase_pico_dns_mirror_addr = tcase_create("Unit test for 'pico_dns_mirror_addr'"); - TCase *TCase_dns_ptr_ip6_nibble_lo = tcase_create("Unit test for 'dns_ptr_ip6_nibble_lo'"); - TCase *TCase_dns_ptr_ip6_nibble_hi = tcase_create("Unit test for 'dns_ptr_ip6_nibble_hi'"); - TCase *TCase_pico_dns_ipv6_set_ptr = tcase_create("Unit test for 'pico_dns_ipv6_set_ptr'"); - - tcase_add_test(TCase_dns_rdata_cmp, tc_dns_rdata_cmp); - tcase_add_test(TCase_dns_question_cmp, tc_dns_question_cmp); - tcase_add_test(TCase_dns_qtree_insert, tc_dns_qtree_insert); - tcase_add_test(TCase_dns_record_cmp, tc_dns_record_cmp); - tcase_add_test(TCase_dns_rtree_insert, tc_dns_rtree_insert); - tcase_add_test(TCase_dns_record_cmp_name_type, tc_dns_record_cmp_name_type); - tcase_add_test(TCase_pico_dns_fill_packet_header, tc_pico_dns_fill_packet_header); - tcase_add_test(TCase_pico_dns_fill_packet_rr_section, tc_pico_dns_fill_packet_rr_section); - tcase_add_test(TCase_pico_dns_fill_packet_rr_sections, tc_pico_dns_fill_packet_rr_sections); - tcase_add_test(TCase_pico_dns_fill_packet_question_section, tc_pico_dns_fill_packet_question_section); - tcase_add_test(TCase_pico_dns_packet_compress_find_ptr, tc_pico_dns_packet_compress_find_ptr); - tcase_add_test(TCase_pico_dns_packet_compress_name, tc_pico_dns_packet_compress_name); - tcase_add_test(TCase_pico_dns_packet_compress, tc_pico_dns_packet_compress); - tcase_add_test(TCase_pico_dns_question_fill_qsuffix, tc_pico_dns_question_fill_qsuffix); - tcase_add_test(TCase_pico_dns_question_delete, tc_pico_dns_question_delete); - tcase_add_test(TCase_pico_dns_question_create, tc_pico_dns_question_create); - tcase_add_test(TCase_pico_dns_query_create, tc_pico_dns_query_create); - tcase_add_test(TCase_pico_dns_record_fill_suffix, tc_pico_dns_record_fill_suffix); - tcase_add_test(TCase_pico_dns_record_copy_flat, tc_pico_dns_record_copy_flat); - tcase_add_test(TCase_pico_dns_record_copy, tc_pico_dns_record_copy); - tcase_add_test(TCase_pico_dns_record_delete, tc_pico_dns_record_delete); - tcase_add_test(TCAse_pico_dns_record_create, tc_pico_dns_record_create); - tcase_add_test(TCase_pico_dns_answer_create, tc_pico_dns_answer_create); - tcase_add_test(TCase_pico_dns_namelen_comp, tc_pico_dns_namelen_comp); - tcase_add_test(TCase_pico_dns_decompress_name, tc_pico_dns_decompress_name); - tcase_add_test(TCase_pico_dns_url_get_reverse_len, tc_pico_dns_url_get_reverse_len); - tcase_add_test(TCase_pico_dns_url_to_reverse_qname, tc_pico_dns_url_to_reverse_qname); - tcase_add_test(TCase_pico_dns_qname_to_url, tc_pico_dns_qname_to_url); - tcase_add_test(TCase_pico_dns_url_to_qname, tc_pico_dns_url_to_qname); - tcase_add_test(TCase_pico_dns_name_to_dns_notation, tc_pico_dns_name_to_dns_notation); - tcase_add_test(TCase_pico_dns_notation_to_name, tc_pico_dns_notation_to_name); - tcase_add_test(TCase_pico_dns_mirror_addr, tc_pico_dns_mirror_addr); - tcase_add_test(TCase_dns_ptr_ip6_nibble_lo, tc_dns_ptr_ip6_nibble_lo); - tcase_add_test(TCase_dns_ptr_ip6_nibble_hi, tc_dns_ptr_ip6_nibble_hi); - tcase_add_test(TCase_pico_dns_ipv6_set_ptr, tc_pico_dns_ipv6_set_ptr); - - suite_add_tcase(s, TCase_dns_rdata_cmp); - suite_add_tcase(s, TCase_dns_question_cmp); - suite_add_tcase(s, TCase_dns_qtree_insert); - suite_add_tcase(s, TCase_dns_record_cmp); - suite_add_tcase(s, TCase_dns_rtree_insert); - suite_add_tcase(s, TCase_dns_record_cmp_name_type); - suite_add_tcase(s, TCase_pico_dns_fill_packet_header); - suite_add_tcase(s, TCase_pico_dns_fill_packet_rr_section); - suite_add_tcase(s, TCase_pico_dns_fill_packet_rr_sections); - suite_add_tcase(s, TCase_pico_dns_fill_packet_question_section); - suite_add_tcase(s, TCase_pico_dns_packet_compress_find_ptr); - suite_add_tcase(s, TCase_pico_dns_packet_compress_name); - suite_add_tcase(s, TCase_pico_dns_packet_compress); - suite_add_tcase(s, TCase_pico_dns_question_fill_qsuffix); - suite_add_tcase(s, TCase_pico_dns_question_delete); - suite_add_tcase(s, TCase_pico_dns_question_create); - suite_add_tcase(s, TCase_pico_dns_query_create); - suite_add_tcase(s, TCase_pico_dns_record_fill_suffix); - suite_add_tcase(s, TCase_pico_dns_record_copy); - suite_add_tcase(s, TCase_pico_dns_record_delete); - suite_add_tcase(s, TCAse_pico_dns_record_create); - suite_add_tcase(s, TCase_pico_dns_answer_create); - suite_add_tcase(s, TCase_pico_dns_namelen_comp); - suite_add_tcase(s, TCase_pico_dns_decompress_name); - suite_add_tcase(s, TCase_pico_dns_url_get_reverse_len); - suite_add_tcase(s, TCase_pico_dns_url_to_reverse_qname); - suite_add_tcase(s, TCase_pico_dns_qname_to_url); - suite_add_tcase(s, TCase_pico_dns_url_to_qname); - suite_add_tcase(s, TCase_pico_dns_name_to_dns_notation); - suite_add_tcase(s, TCase_pico_dns_notation_to_name); - suite_add_tcase(s, TCase_pico_dns_mirror_addr); - suite_add_tcase(s, TCase_dns_ptr_ip6_nibble_lo); - suite_add_tcase(s, TCase_dns_ptr_ip6_nibble_hi); - suite_add_tcase(s, TCase_pico_dns_ipv6_set_ptr); - - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} - diff --git a/kernel/picotcp/test/unit/modunit_pico_dns_sd.c b/kernel/picotcp/test/unit/modunit_pico_dns_sd.c deleted file mode 100644 index 75a6f13..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_dns_sd.c +++ /dev/null @@ -1,403 +0,0 @@ -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_dns_common.h" -#include "pico_tree.h" -#include "pico_dev_mock.c" -#include "modules/pico_dns_sd.c" -#include "check.h" - -Suite *pico_suite(void); -void callback( pico_mdns_rtree *tree, char *str, void *arg); -int dns_sd_init(void); -char text[] = "textvers"; -char text2[] = "pass"; -char text3[] = "color"; -char value[] = "1"; -char value3[] = ""; -void callback( pico_mdns_rtree *tree, - char *str, - void *arg ) -{ - kv_vector vector = { - 0 - }; - - /* This doesn't even gets called, tests exit before possible callback */ - IGNORE_PARAMETER(str); - IGNORE_PARAMETER(arg); - IGNORE_PARAMETER(tree); - fail_unless(pico_dns_sd_register_service("Hello World!", - "_kerberos._udp", - 88, &vector, 120, - callback, NULL) == 0, - "dns_sd_register_service failed!\n"); -} - -int dns_sd_init() -{ - struct mock_device *mock = NULL; - - struct pico_ip4 local = { - .addr = long_be(0x0a280064) - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - - mock = pico_mock_create(NULL); - if (!mock) - return -1; - - pico_ipv4_link_add(mock->dev, local, netmask); - - /* Try to initialise the mDNS module right */ - return pico_dns_sd_init("host.local", local, callback, NULL); -} - -START_TEST(tc_dns_sd_kv_vector_strlen) -{ - kv_vector pairs = { - 0 - }; - - pico_dns_sd_kv_vector_add(&pairs, text, value); - pico_dns_sd_kv_vector_add(&pairs, text2, NULL); - pico_dns_sd_kv_vector_add(&pairs, text3, value3); - - fail_unless(pico_dns_sd_kv_vector_strlen(&pairs) == 23, - "dns_sd_kv_vector_strlen returned wrong length!\n"); - - pico_dns_sd_kv_vector_erase(&pairs); -} -END_TEST -START_TEST(tc_dns_sd_srv_record_create) -{ - struct pico_mdns_record *record = NULL; - - uint8_t buf[19] = { - 0, 0, 0, 0, 0, 80, - 5, 'h', 'i', 't', 'e', 'x', - 5, 'l', 'o', 'c', 'a', 'l', - 0 - }; - - record = pico_dns_sd_srv_record_create("test.local", 0, 0, 80, - "hitex.local", 10, - PICO_MDNS_RECORD_UNIQUE); - - fail_unless(strcmp(record->record->rname, "\4test\5local") == 0, - "Name of SRV record not correct!\n"); - fail_unless(short_be(record->record->rsuffix->rtype) == 33, - "Type of SRV record not correctly set!\n"); - fail_unless(short_be(record->record->rsuffix->rclass) == 0x8001, - "Class of SRV record not correctly set!\n"); - fail_unless(short_be(record->record->rsuffix->rdlength) == 19, - "rdlength of SRV record not correctly set!\n"); - fail_unless(long_be(record->record->rsuffix->rttl) == 10, - "TTL of SRV record not correctly set!\n"); - fail_unless(memcmp(record->record->rdata, buf, 19) == 0, - "Rdata of TXT record not correctly set!\n"); - pico_mdns_record_delete((void **)&record); -} -END_TEST -START_TEST(tc_dns_sd_txt_record_create) -{ - struct pico_mdns_record *record = NULL; - kv_vector pairs = { - 0 - }; - - uint8_t buf[23] = { - 10, 't', 'e', 'x', 't', 'v', 'e', 'r', 's', '=', '1', - 4, 'p', 'a', 's', 's', - 6, 'c', 'o', 'l', 'o', 'r', '=' - }; - - pico_dns_sd_kv_vector_add(&pairs, text, value); - pico_dns_sd_kv_vector_add(&pairs, text2, NULL); - pico_dns_sd_kv_vector_add(&pairs, text3, value3); - - record = pico_dns_sd_txt_record_create("test.local", pairs, 10, - PICO_MDNS_RECORD_UNIQUE); - - fail_unless(strcmp(record->record->rname, "\4test\5local") == 0, - "Name of TXT record not correct!\n"); - fail_unless(short_be(record->record->rsuffix->rtype) == 16, - "Type of TXT record not correctly set!\n"); - fail_unless(short_be(record->record->rsuffix->rclass) == 0x8001, - "Class of TXT record not correctly set!\n"); - fail_unless(short_be(record->record->rsuffix->rdlength) == 23, - "rdlength of TXT record not correctly set!\n"); - fail_unless(long_be(record->record->rsuffix->rttl) == 10, - "TTL of TXT record not correctly set!\n"); - fail_unless(memcmp(record->record->rdata, buf, 23) == 0, - "Rdata of TXT record not correctly set!\n"); - pico_mdns_record_delete((void **)&record); -} -END_TEST -START_TEST(tc_dns_sd_kv_create) -{ - key_value_pair_t *pair = NULL; - - pair = pico_dns_sd_kv_create("textvers", "1"); - fail_unless(strcmp(pair->key, "textvers") == 0, - "dns_sd_kv_create failed!\n"); - fail_unless(strcmp(pair->value, "1") == 0, - "dns_sd_kv_create failed!\n"); - PICO_FREE(pair->key); - PICO_FREE(pair->value); - PICO_FREE(pair); - - pair = pico_dns_sd_kv_create("textvers", NULL); - fail_unless(strcmp(pair->key, "textvers") == 0, - "dns_sd_kv_create failed!\n"); - fail_unless(pair->value == NULL, - "dns_sd_kv_create failed!\n"); - PICO_FREE(pair->key); - PICO_FREE(pair); - - pair = pico_dns_sd_kv_create("textvers", ""); - fail_unless(strcmp(pair->key, "textvers") == 0, - "dns_sd_kv_create failed!\n"); - fail_unless(strcmp(pair->value, "") == 0, - "dns_sd_kv_create failed!\n"); - PICO_FREE(pair->key); - PICO_FREE(pair->value); - PICO_FREE(pair); -} -END_TEST -START_TEST(tc_dns_sd_kv_delete) -{ - key_value_pair_t *pair = NULL; - - pair = pico_dns_sd_kv_create("textvers", "1"); - fail_unless(strcmp(pair->key, "textvers") == 0, - "dns_sd_kv_create failed!\n"); - fail_unless(strcmp(pair->value, "1") == 0, - "dns_sd_kv_create failed!\n"); - pico_dns_sd_kv_delete(&pair); - fail_unless(pair == NULL, - "dns_sd_kv_delete failed!\n"); - - pair = pico_dns_sd_kv_create("textvers", NULL); - fail_unless(strcmp(pair->key, "textvers") == 0, - "dns_sd_kv_create failed!\n"); - fail_unless(pair->value == NULL, - "dns_sd_kv_create failed!\n"); - pico_dns_sd_kv_delete(&pair); - fail_unless(pair == NULL, - "dns_sd_kv_delete failed!\n"); - - pair = pico_dns_sd_kv_create("textvers", ""); - fail_unless(strcmp(pair->key, "textvers") == 0, - "dns_sd_kv_create failed!\n"); - fail_unless(strcmp(pair->value, "") == 0, - "dns_sd_kv_create failed!\n"); - pico_dns_sd_kv_delete(&pair); - fail_unless(pair == NULL, - "dns_sd_kv_delete failed!\n"); -} -END_TEST -START_TEST(tc_dns_sd_check_type_format) -{ - fail_unless(pico_dns_sd_check_type_format("_http._tcp") == 0, - "dns_sd_check_type_format failed with correct format!\n"); - fail_unless(pico_dns_sd_check_type_format("_printer._sub._http._tcp") - == 0, - "dns_sd_check_type_format failed with subtype!\n"); - - /* Test too long subtype */ - fail_unless(pico_dns_sd_check_type_format( - "1234567891123456789212345678931234567894123456789512345678961234._sub._http._tcp"), "dns_sd_check_type_format failed with too big subtype!\n"); - - /* Test too long service type with subtype */ - fail_unless(pico_dns_sd_check_type_format( - "printer._sub.0123456789112345678._tcp"), - "dns_sd_check_type_format failed with too big sn w/ sub!\n"); - - /* Test too long service type with subtype */ - fail_unless(pico_dns_sd_check_type_format("0123456789112345678._tcp"), - "dns_sd_check_type_format failed with too big sn!\n"); - -} -END_TEST -START_TEST(tc_dns_sd_check_instance_name_format) -{ - /* Test too long name */ - fail_unless(pico_dns_sd_check_instance_name_format( - "1234567891123456789212345678931234567894123456789512345678961234"), - "dns_sd_check_instance_name_format failed with too big name!\n"); - - fail_unless(pico_dns_sd_check_instance_name_format("Hello World!") == 0, - "dns_sd_check_instance_name_format failed!\n"); -} -END_TEST -START_TEST(tc_dns_sd_create_service_url) -{ - char *service_url = NULL; - - service_url = pico_dns_sd_create_service_url("Hello World!", "_http._tcp"); - - fail_unless(strcmp(service_url, "Hello World!._http._tcp.local") == 0, - "dns_sd_create_service_url failed!\n"); -} -END_TEST -START_TEST(tc_dns_sd_init) -{ - pico_stack_init(); - fail_unless(dns_sd_init() == 0, - "dns_sd_init failed!\n"); -} -END_TEST -START_TEST(tc_dns_sd_register_service) -{ - pico_stack_init(); - dns_sd_init(); -} -END_TEST -START_TEST(tc_dns_sd_browse_service) -{ - /* Not implemented in code */ -} -END_TEST - -START_TEST(tc_dns_sd_kv_vector_add) -{ - kv_vector pairs = { - 0 - }; - char *key = NULL; - - pico_dns_sd_kv_vector_add(&pairs, text, value); - pico_dns_sd_kv_vector_add(&pairs, text2, NULL); - pico_dns_sd_kv_vector_add(&pairs, text3, value3); - - key = pico_dns_sd_kv_vector_get(&pairs, 2)->key; - fail_unless(strcmp("color", key) == 0, - "dns_sd_kv_vector_add failed!\n"); -} -END_TEST -START_TEST(tc_dns_sd_kv_vector_get) -{ - kv_vector pairs = { - 0 - }; - char *key = NULL; - - pico_dns_sd_kv_vector_add(&pairs, text, value); - pico_dns_sd_kv_vector_add(&pairs, text2, NULL); - pico_dns_sd_kv_vector_add(&pairs, text3, value3); - - key = pico_dns_sd_kv_vector_get(&pairs, 2)->key; - fail_unless(strcmp("color", key) == 0, - "dns_sd_kv_vector_get failed!\n"); - - fail_unless(pico_dns_sd_kv_vector_get(&pairs, 3) == NULL, - "dns_sd_kv_vector_get failed @ OOB!\n"); -} -END_TEST -START_TEST(tc_dns_sd_kv_vector_erase) -{ - kv_vector pairs = { - 0 - }; - - pico_dns_sd_kv_vector_add(&pairs, text, value); - pico_dns_sd_kv_vector_add(&pairs, text2, NULL); - pico_dns_sd_kv_vector_add(&pairs, text3, value3); - - pico_dns_sd_kv_vector_erase(&pairs); - - fail_unless(pairs.pairs == NULL, - "dns_sd_kv_vector_erase failed!\n"); - fail_unless(pairs.count == 0, - "dns_sd_kv_vector_erase failed!\n"); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - /* Key-Value pair vector plain creation function */ - TCase *TCase_dns_sd_kv_vector_strlen = tcase_create("Unit test for dns_sd_kv_vector_strlen"); - - /* DNS utility functions */ - TCase *TCase_dns_sd_srv_record_create = tcase_create("Unit test for dns_sd_srv_record_create"); - TCase *TCase_dns_sd_txt_record_create = tcase_create("Unit test for dns_sd_txt_record_create"); - - /* Key-Value pair creation */ - TCase *TCase_dns_sd_kv_create = tcase_create("Unit test for dns_sd_kv_create"); - TCase *TCase_dns_sd_kv_delete = tcase_create("Unit test for dns_sd_kv_delete"); - - /* Utility functions */ - TCase *TCase_dns_sd_check_type_format = tcase_create("Unit test for dns_sd_check_type_format"); - TCase *TCase_dns_sd_check_instance_name_format = tcase_create("Unit test for dns_sd_check_instance_name_format"); - TCase *TCase_dns_sd_create_service_url = tcase_create("Unit test for dns_sd_create_service_url"); - /* DNS SD API functions */ - TCase *TCase_dns_sd_init = tcase_create("Unit test for dns_sd_init"); - TCase *TCase_dns_sd_register_service = tcase_create("Unit test for dns_sd_register_service"); - TCase *TCase_dns_sd_browse_service = tcase_create("Unit test for dns_sd_browse_service"); - - /* Key-Value vector functions */ - TCase *TCase_dns_sd_kv_vector_add = tcase_create("Unit test for dns_sd_kv_vector_add"); - TCase *TCase_dns_sd_kv_vector_get = tcase_create("Unit test for dns_sd_kv_vector_get"); - TCase *TCase_dns_sd_kv_vector_erase = tcase_create("Unit test for dns_sd_kv_vector_erase"); - - /* Key-Value pair vector plain creation function */ - tcase_add_test(TCase_dns_sd_kv_vector_strlen, tc_dns_sd_kv_vector_strlen); - suite_add_tcase(s, TCase_dns_sd_kv_vector_strlen); - - /* DNS utility functions */ - tcase_add_test(TCase_dns_sd_srv_record_create, tc_dns_sd_srv_record_create); - suite_add_tcase(s, TCase_dns_sd_srv_record_create); - tcase_add_test(TCase_dns_sd_txt_record_create, tc_dns_sd_txt_record_create); - suite_add_tcase(s, TCase_dns_sd_txt_record_create); - - /* Key-Value pair creation */ - tcase_add_test(TCase_dns_sd_kv_create, tc_dns_sd_kv_create); - suite_add_tcase(s, TCase_dns_sd_kv_create); - tcase_add_test(TCase_dns_sd_kv_delete, tc_dns_sd_kv_delete); - suite_add_tcase(s, TCase_dns_sd_kv_delete); - - /* Utility functions */ - tcase_add_test(TCase_dns_sd_check_type_format, tc_dns_sd_check_type_format); - suite_add_tcase(s, TCase_dns_sd_check_type_format); - tcase_add_test(TCase_dns_sd_check_instance_name_format, tc_dns_sd_check_instance_name_format); - suite_add_tcase(s, TCase_dns_sd_check_instance_name_format); - tcase_add_test(TCase_dns_sd_create_service_url, tc_dns_sd_create_service_url); - suite_add_tcase(s, TCase_dns_sd_create_service_url); - - /* DNS SD API functions */ - tcase_add_test(TCase_dns_sd_init, tc_dns_sd_init); - suite_add_tcase(s, TCase_dns_sd_init); - tcase_add_test(TCase_dns_sd_register_service, tc_dns_sd_register_service); - suite_add_tcase(s, TCase_dns_sd_register_service); - tcase_add_test(TCase_dns_sd_browse_service, tc_dns_sd_browse_service); - suite_add_tcase(s, TCase_dns_sd_browse_service); - - /* Key-Value vector functions */ - tcase_add_test(TCase_dns_sd_kv_vector_add, tc_dns_sd_kv_vector_add); - suite_add_tcase(s, TCase_dns_sd_kv_vector_add); - tcase_add_test(TCase_dns_sd_kv_vector_get, tc_dns_sd_kv_vector_get); - suite_add_tcase(s, TCase_dns_sd_kv_vector_get); - tcase_add_test(TCase_dns_sd_kv_vector_erase, tc_dns_sd_kv_vector_erase); - suite_add_tcase(s, TCase_dns_sd_kv_vector_erase); - - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_ethernet.c b/kernel/picotcp/test/unit/modunit_pico_ethernet.c deleted file mode 100644 index 6f67fa9..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_ethernet.c +++ /dev/null @@ -1,315 +0,0 @@ -//#include "pico_config.h" -#include "pico_stack.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_icmp4.h" -#include "pico_icmp6.h" -#include "pico_arp.h" -#include "pico_ethernet.h" -#include "modules/pico_ethernet.c" -#include "check.h" - -#define STARTING() \ - printf("*********************** STARTING %s ***\n", __func__); \ - fflush(stdout) -#define TRYING(s, ...) \ - printf("Trying %s: " s, __func__, ##__VA_ARGS__); \ - fflush(stdout) -#define CHECKING(i) \ - printf("Checking the results of test %2d in %s...", (i)++, \ - __func__); \ - fflush(stdout) -#define SUCCESS() \ - printf(" SUCCES\n"); \ - fflush(stdout) -#define BREAKING(s, ...) \ - printf("Breaking %s: " s, __func__, ##__VA_ARGS__); \ - fflush(stdout) -#define ENDING(i) \ - printf("*********************** ENDING %s *** N TESTS: %d\n", \ - __func__, ((i)-1)); \ - fflush(stdout) -#define DBG(s, ...) \ - printf(s, ##__VA_ARGS__); \ - fflush(stdout) - -Suite *pico_suite(void); - -START_TEST(tc_destination_is_bcast) -{ - /* test this: static int destination_is_bcast(struct pico_frame *f) */ - struct pico_ip6 addr = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }}; - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - struct pico_ipv6_hdr *h = (struct pico_ipv6_hdr *)f->buffer; - struct pico_ipv4_hdr *h4 = NULL; - - /* Test parameters */ - int ret = 0, count = 0; - - f->net_hdr = (uint8_t*) h; - f->buffer[0] = 0x60; /* Ipv6 */ - - STARTING(); - - TRYING("With wrong protocol -> IPv6\n"); - memcpy(h->dst.addr, addr.addr, PICO_SIZE_IP6); - ret = destination_is_bcast(f); - CHECKING(count); - fail_unless(0 == ret, "Should've returned 0 since IPv6 frame\n"); - SUCCESS(); - pico_frame_discard(f); - - f = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - h4 = (struct pico_ipv4_hdr *)f->buffer; - f->net_hdr = (uint8_t *)h4; - f->buffer[0] = 0x40; /* IPv4 */ - TRYING("With right protocol -> IPv4\n"); - ret = destination_is_bcast(f); - CHECKING(count); - fail_unless(0 == ret, "Should've returned 0 since not a mcast address\n"); - SUCCESS(); - - BREAKING(); - ret = destination_is_bcast(NULL); - CHECKING(count); - fail_unless(0 == ret, "Should've returned 0 since NULL-pointer\n"); - SUCCESS(); - - ENDING(count); -} -END_TEST -START_TEST(tc_destination_is_mcast) -{ - struct pico_ip6 addr = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }}; - struct pico_ip6 mcast = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }}; - struct pico_ip4 addr4 = {0}; - struct pico_ip4 mcast4 = {0}; - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - struct pico_ipv6_hdr *h = (struct pico_ipv6_hdr *)f->buffer; - struct pico_ipv4_hdr *h4 = (struct pico_ipv4_hdr *)f->buffer; - /* Test parameters */ - int ret = 0, count = 0; - - f->net_hdr = (uint8_t*) h; - f->buffer[0] = 0x60; /* Ipv6 */ - - STARTING(); - - pico_string_to_ipv4("232.1.1.0", &(mcast4.addr)); /* 0 */ - pico_string_to_ipv4("10.20.0.1", &(addr4.addr)); - - pico_string_to_ipv6("ff00:0:0:0:0:0:e801:100", (mcast.addr)); /* 0 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:100", (addr.addr)); /* 0 */ - - TRYING("With IPv6 unicast addr\n"); - memcpy(h->dst.addr, addr.addr, PICO_SIZE_IP6); - ret = destination_is_mcast(f); - CHECKING(count); - fail_unless(0 == ret, "Should've returned 0 since not an IPv6 multicast\n"); - SUCCESS(); - - TRYING("With IPv6 multicast addr\n"); - memcpy(h->dst.addr, mcast.addr, PICO_SIZE_IP6); - ret = destination_is_mcast(f); - CHECKING(count); - fail_unless(1 == ret, "Should've returned 1 since an IPv6 multicast\n"); - SUCCESS(); - - pico_frame_discard(f); - f = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - h4 = (struct pico_ipv4_hdr *)f->buffer; - f->net_hdr = (uint8_t *)h4; - f->buffer[0] = 0x40; /* IPv4 */ - - TRYING("With IPv4 unicast addr\n"); - h4->dst = addr4; - ret = destination_is_bcast(f); - CHECKING(count); - fail_unless(0 == ret, "Should've returned 0 since not an IPv4 mcast address\n"); - SUCCESS(); - - TRYING("With IPv4 multicast addr\n"); - h4->dst = mcast4; - ret = destination_is_mcast(f); - CHECKING(count); - fail_unless(1 == ret, "Should've returned 1 since an IPv4 multicast\n"); - SUCCESS(); - - BREAKING(); - ret = destination_is_bcast(NULL); - CHECKING(count); - fail_unless(0 == ret, "Should've returned 0 since NULL-pointer\n"); - SUCCESS(); - - ENDING(count); -} -END_TEST -START_TEST(tc_pico_ipv4_ethernet_receive) -{ - /* test this: static int32_t pico_ipv4_ethernet_receive(struct pico_frame *f) */ - struct pico_frame *f = NULL; - struct pico_ipv4_hdr *h4 = NULL; - int ret = 0, count = 0; - - STARTING(); - - f = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - h4 = (struct pico_ipv4_hdr *)f->buffer; - f->net_hdr = (uint8_t *)h4; - f->buffer[0] = 0x40; /* IPv4 */ - - TRYING("With IPv4 frame\n"); - ret = pico_ipv4_ethernet_receive(f); - CHECKING(count); - fail_unless(ret > 0, "Was correct frame should've returned size of frame\n"); - SUCCESS(); - CHECKING(count); - fail_unless(pico_proto_ipv4.q_in->size == f->buffer_len, "Frame not enqueued\n"); - SUCCESS(); - - ENDING(count); -} -END_TEST -START_TEST(tc_pico_ipv6_ethernet_receive) -{ - /* test this: static int32_t pico_ipv6_ethernet_receive(struct pico_frame *f) */ - struct pico_frame *f = NULL; - struct pico_ipv6_hdr *h = NULL; - - int ret = 0, count = 0; - - STARTING(); - f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - h = (struct pico_ipv6_hdr *)f->buffer; - f->net_hdr = (uint8_t*) h; - f->buffer[0] = 0x40; /* Ipv6 */ - - TRYING("With wrong network type\n"); - ret = pico_ipv6_ethernet_receive(f); - CHECKING(count); - fail_unless(ret == -1, "Wrong type should've returned an error\n"); - SUCCESS(); - - f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - h = (struct pico_ipv6_hdr *)f->buffer; - f->net_hdr = (uint8_t*) h; - f->buffer[0] = 0x60; - TRYING("With correct network type\n"); - ret = pico_ipv6_ethernet_receive(f); - CHECKING(count); - fail_unless(ret == (int32_t)f->buffer_len, "Was correct frame, should've returned success\n"); - SUCCESS(); - CHECKING(count); - fail_unless(pico_proto_ipv6.q_in->size == f->buffer_len, "Frame not enqueued\n"); - SUCCESS(); - - ENDING(count); -} -END_TEST -START_TEST(tc_pico_eth_receive) -{ - struct pico_frame *f = NULL; - struct pico_eth_hdr *eth = NULL; - int ret = 0, count = 0; - - STARTING(); - - f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr) + sizeof(struct pico_eth_hdr)); - f->datalink_hdr = f->buffer; - f->net_hdr = f->datalink_hdr + sizeof(struct pico_eth_hdr); - eth = (struct pico_eth_hdr *)f->datalink_hdr; - ((uint8_t *)(f->net_hdr))[0] = 0x40; /* Ipv4 */ - - /* ETHERNET PROTOCOL : IPV6 */ - eth->proto = PICO_IDETH_IPV6; - - TRYING("With wrong network type\n"); - ret = pico_eth_receive(f); - CHECKING(count); - fail_unless(ret == -1, "Wrong type should've returned an error\n"); - SUCCESS(); - - f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr) + sizeof(struct pico_eth_hdr)); - f->datalink_hdr = f->buffer; - f->net_hdr = f->datalink_hdr + sizeof(struct pico_eth_hdr); - eth = (struct pico_eth_hdr *)f->datalink_hdr; - ((uint8_t *)(f->net_hdr))[0] = 0x60; /* Ipv6 */ - - /* ETHERNET PROTOCOL : IPV6 */ - eth->proto = PICO_IDETH_IPV6; - TRYING("With correct network type\n"); - ret = pico_eth_receive(f); - CHECKING(count); - fail_unless(ret == (int32_t)f->buffer_len, "Was correct frame, should've returned success\n"); - SUCCESS(); - CHECKING(count); - fail_unless(pico_proto_ipv6.q_in->size == f->buffer_len, "Frame not enqueued\n"); - SUCCESS(); - - pico_frame_discard(f); - - f = pico_frame_alloc(sizeof(struct pico_ipv4_hdr) + sizeof(struct pico_eth_hdr)); - f->datalink_hdr = f->buffer; - f->net_hdr = f->datalink_hdr + sizeof(struct pico_eth_hdr); - eth = (struct pico_eth_hdr *)f->datalink_hdr; - ((uint8_t *)(f->net_hdr))[0] = 0x40; /* Ipv4 */ - - TRYING("With wrong frame type\n"); - ret = pico_eth_receive(f); - CHECKING(count); - fail_unless(ret == -1, "should've returned -1 wrong ethernet protocol\n"); - SUCCESS(); - - f = pico_frame_alloc(sizeof(struct pico_ipv4_hdr) + sizeof(struct pico_eth_hdr)); - f->datalink_hdr = f->buffer; - f->net_hdr = f->datalink_hdr + sizeof(struct pico_eth_hdr); - eth = (struct pico_eth_hdr *)f->datalink_hdr; - ((uint8_t *)(f->net_hdr))[0] = 0x40; /* Ipv4 */ - eth->proto = PICO_IDETH_IPV4; - - TRYING("With IPv4 frame\n"); - ret = pico_eth_receive(f); - CHECKING(count); - fail_unless(ret > 0, "Was correct frame should've returned size of frame\n"); - SUCCESS(); - CHECKING(count); - fail_unless(pico_proto_ipv4.q_in->size == f->buffer_len, "Frame not enqueued\n"); - SUCCESS(); - - ENDING(count); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_destination_is_bcast = tcase_create("Unit test for destination_is_bcast"); - TCase *TCase_destination_is_mcast = tcase_create("Unit test for destination_is_mcast"); - TCase *TCase_pico_ipv4_ethernet_receive = tcase_create("Unit test for pico_ipv4_ethernet_receive"); - TCase *TCase_pico_ipv6_ethernet_receive = tcase_create("Unit test for pico_ipv6_ethernet_receive"); - TCase *TCase_pico_eth_receive = tcase_create("Unit test for pico_eth_receive"); - - tcase_add_test(TCase_destination_is_bcast, tc_destination_is_bcast); - suite_add_tcase(s, TCase_destination_is_bcast); - tcase_add_test(TCase_destination_is_mcast, tc_destination_is_mcast); - suite_add_tcase(s, TCase_destination_is_mcast); - tcase_add_test(TCase_pico_ipv4_ethernet_receive, tc_pico_ipv4_ethernet_receive); - suite_add_tcase(s, TCase_pico_ipv4_ethernet_receive); - tcase_add_test(TCase_pico_ipv6_ethernet_receive, tc_pico_ipv6_ethernet_receive); - suite_add_tcase(s, TCase_pico_ipv6_ethernet_receive); - tcase_add_test(TCase_pico_eth_receive, tc_pico_eth_receive); - suite_add_tcase(s, TCase_pico_eth_receive); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_fragments.c b/kernel/picotcp/test/unit/modunit_pico_fragments.c deleted file mode 100644 index 2df932b..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_fragments.c +++ /dev/null @@ -1,1234 +0,0 @@ -#include "pico_config.h" -#include "pico_ipv6.h" -#include "pico_icmp6.h" -#include "pico_ipv4.h" -#include "pico_icmp4.h" -#include "pico_stack.h" -#include "pico_eth.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_socket.h" -#include "pico_device.h" -#include "pico_tree.h" -#include "pico_constants.h" -#include "pico_fragments.h" -#include "./modules/pico_fragments.c" -#include "check.h" - -Suite *pico_suite(void); -/* Mock! */ -static int transport_recv_called = 0; -static uint32_t buffer_len_transport_receive = 0; -#define TESTPROTO 0x99 -#define TESTID 0x11 -int32_t pico_transport_receive(struct pico_frame *f, uint8_t proto) -{ - fail_if(proto != TESTPROTO); - transport_recv_called++; - buffer_len_transport_receive = f->buffer_len; - pico_frame_discard(f); - return 0; -} - -static int timer_add_called = 0; -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - IGNORE_PARAMETER(expire); - IGNORE_PARAMETER(arg); - fail_if(timer != pico_frag_expire); - timer_add_called++; - return 0; -} - -static int timer_cancel_called = 0; -void pico_timer_cancel(uint32_t id) -{ - IGNORE_PARAMETER(id); - timer_cancel_called++; -} - -static int icmp4_frag_expired_called = 0; -int pico_icmp4_frag_expired(struct pico_frame *f) -{ - fail_unless(IS_IPV4(f)); - icmp4_frag_expired_called++; - return 0; -} - -static int icmp6_frag_expired_called = 0; -int pico_icmp6_frag_expired(struct pico_frame *f) -{ - fail_unless(IS_IPV6(f)); - icmp6_frag_expired_called++; - return 0; -} - - -START_TEST(tc_pico_ipv6_frag_compare) -{ - struct pico_frame *a, *b; - a = pico_frame_alloc(10); - fail_if(!a); - b = pico_frame_alloc(10); - fail_if(!b); - a->frag = 0xaa00; - b->frag = 0xbb00; - fail_unless(pico_ipv6_frag_compare(a, b) < 0); - fail_unless(pico_ipv6_frag_compare(b, a) > 0); - b->frag = 0xaa00; - fail_unless(pico_ipv6_frag_compare(a, b) == 0); - pico_frame_discard(a); - pico_frame_discard(b); -} -END_TEST - -START_TEST(tc_pico_ipv4_frag_compare) -{ - struct pico_frame *a, *b; - a = pico_frame_alloc(10); - fail_if(!a); - b = pico_frame_alloc(10); - fail_if(!b); - a->frag = 0xaa00; - b->frag = 0xbb00; - fail_unless(pico_ipv4_frag_compare(a, b) < 0); - fail_unless(pico_ipv4_frag_compare(b, a) > 0); - b->frag = 0xaa00; - fail_unless(pico_ipv4_frag_compare(a, b) == 0); - pico_frame_discard(a); - pico_frame_discard(b); -} -END_TEST - -START_TEST(tc_pico_ipv6_fragments_complete) -{ - struct pico_frame *a, *b; - transport_recv_called = 0; - timer_cancel_called = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = 1; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20; /* off = 32 */ - - pico_tree_insert(&ipv6_fragments, a); - pico_tree_insert(&ipv6_fragments, b); - - pico_set_mm_failure(1); - pico_fragments_complete(64, TESTPROTO, PICO_PROTO_IPV6); - fail_if(transport_recv_called != 0); - fail_if(timer_cancel_called != 0); - - pico_fragments_complete(64, TESTPROTO, PICO_PROTO_IPV6); - fail_if(transport_recv_called != 1); - fail_if(timer_cancel_called != 1); -} -END_TEST - -START_TEST(tc_pico_ipv4_fragments_complete) -{ - struct pico_frame *a, *b; - transport_recv_called = 0; - timer_cancel_called = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 >> 3u; /* off = 32 */ - - pico_tree_insert(&ipv4_fragments, a); - pico_tree_insert(&ipv4_fragments, b); - - pico_set_mm_failure(1); - pico_fragments_complete(64, TESTPROTO, PICO_PROTO_IPV4); - fail_if(transport_recv_called != 0); - fail_if(timer_cancel_called != 0); - - pico_fragments_complete(64, TESTPROTO, PICO_PROTO_IPV4); - fail_if(transport_recv_called != 1); - fail_if(timer_cancel_called != 1); -} -END_TEST - -START_TEST(tc_pico_fragments_complete) -{ - /* Done in the two tests above */ -} -END_TEST - -START_TEST(tc_pico_fragments_empty_tree) -{ - PICO_TREE_DECLARE(tree, pico_ipv4_frag_compare); - struct pico_frame *a = NULL, *b = NULL; - - pico_fragments_empty_tree(NULL); - - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - /* Make sure we have different frames a and b (because of compare functions in PICO_TREE_DECLARE) */ - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 >> 3u; /* off = 32 */ - - /* Insert them in the tree */ - pico_tree_insert(&tree, a); - pico_tree_insert(&tree, b); - - pico_fragments_empty_tree(&tree); - - /* Is tree empty? */ - fail_if(!pico_tree_empty(&tree)); -} -END_TEST - -START_TEST(tc_pico_fragments_check_complete) -{ - struct pico_frame *a, *b; - fail_if(pico_fragments_check_complete(&ipv4_fragments, TESTPROTO, PICO_PROTO_IPV4) != 1); - fail_if(pico_fragments_check_complete(&ipv6_fragments, TESTPROTO, PICO_PROTO_IPV6) != 1); - - /* Case 1: IPV4 all packets received */ - transport_recv_called = 0; - timer_cancel_called = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 >> 3u; /* off = 32 */ - - pico_tree_insert(&ipv4_fragments, a); - pico_tree_insert(&ipv4_fragments, b); - - fail_if(pico_fragments_check_complete(&ipv4_fragments, TESTPROTO, PICO_PROTO_IPV4) != 0); - fail_if(transport_recv_called != 1); - fail_if(timer_cancel_called != 1); - - /* Case 2: IPV6 all packets received */ - transport_recv_called = 0; - timer_cancel_called = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = 1; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20; /* off = 32 */ - - pico_tree_insert(&ipv6_fragments, a); - pico_tree_insert(&ipv6_fragments, b); - - fail_if(pico_fragments_check_complete(&ipv6_fragments, TESTPROTO, PICO_PROTO_IPV6) != 0); - fail_if(transport_recv_called != 1); - fail_if(timer_cancel_called != 1); - - - /* Case 3: IPV4 NOT all packets received */ - transport_recv_called = 0; - timer_cancel_called = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 >> 3u | PICO_IPV4_MOREFRAG; /* off = 32 + more frags */ - /* b->frag = PICO_IPV4_MOREFRAG; /\* more frags *\/ */ - - pico_tree_insert(&ipv4_fragments, a); - pico_tree_insert(&ipv4_fragments, b); - - fail_if(pico_fragments_check_complete(&ipv4_fragments, TESTPROTO, PICO_PROTO_IPV4) == 0); - fail_if(transport_recv_called != 0); - fail_if(timer_cancel_called != 0); - - /* Case 4: IPV6 NOT all packets received */ - transport_recv_called = 0; - timer_cancel_called = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = 1; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 1; /* more frags */ - - pico_tree_insert(&ipv6_fragments, a); - pico_tree_insert(&ipv6_fragments, b); - - fail_if(pico_fragments_check_complete(&ipv6_fragments, TESTPROTO, PICO_PROTO_IPV6) == 0); - fail_if(transport_recv_called != 0); - fail_if(timer_cancel_called != 0); -} -END_TEST - -START_TEST(tc_pico_fragments_send_notify) -{ - struct pico_frame *a = NULL; - char ipv4_multicast_address[] = { - "224.0.0.1" - }; - - icmp4_frag_expired_called = 0; - - /* Case 1: NULL fragment */ - - pico_fragments_send_notify(NULL); - - /* Notify should not be send when supplied a NULL argument */ - fail_if(icmp4_frag_expired_called); - - /* Case 2: fragment with offset > 0 */ - a = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!a); - printf("Allocated frame, %p\n", a); - - a->net_hdr = a->buffer; - a->net_len = sizeof(struct pico_ipv4_hdr); - a->buffer[0] = 0x40; /* IPV4 */ - a->frag = 0x20 >> 3u; /* off = 32 */ - - pico_fragments_send_notify(a); - - /* fragment has offset > 0, no notify should be sent */ - fail_if(icmp4_frag_expired_called); - - /* Case 3: fragment with offset > 0 & multicast address */ - pico_string_to_ipv4(ipv4_multicast_address, &((struct pico_ipv4_hdr*)(a->net_hdr))->dst.addr); - pico_fragments_send_notify(a); - - /* fragment has offset > 0 AND multicast address, no notify should be sent */ - fail_if(icmp4_frag_expired_called); - - /* Case 4: fragment with offset == 0 */ - a->net_hdr = a->buffer; - a->net_len = sizeof(struct pico_ipv4_hdr); - a->buffer[0] = 0x40; /* IPV4 */ - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - pico_string_to_ipv4("127.0.0.1", &((struct pico_ipv4_hdr*)(a->net_hdr))->dst.addr); /* Set a non-nulticast address */ - - pico_fragments_send_notify(a); - - /* fragment has offset == 0, notify should be sent */ - fail_if(!icmp4_frag_expired_called); - - /* Case 5: fragment with offset == 0 & multicast address */ - icmp4_frag_expired_called = 0; /* reset flag */ - pico_string_to_ipv4(ipv4_multicast_address, &((struct pico_ipv4_hdr*)(a->net_hdr))->dst.addr); - - pico_fragments_send_notify(a); - - /* fragment has offset == 0 but multicast address, notify should NOT sent */ - fail_if(icmp4_frag_expired_called); - - /* Cleanup */ - pico_frame_discard(a); -} -END_TEST - - -START_TEST(tc_pico_frag_expire) -{ - struct pico_frame *a, *b; - /* Addr setup, choose a unicast addr */ - struct pico_ip6 addr_1 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }}; - char ipv4_multicast_address[] = { - "224.0.0.1" - }; - struct pico_ip6 ipv6_multicast_addr = {{ 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }}; - - /* Clear env vars */ - icmp4_frag_expired_called = 0; - icmp6_frag_expired_called = 0; - - /* Common tests */ - /* Case 1: tree is NULL */ - pico_frag_expire(0, NULL); - fail_if(icmp4_frag_expired_called); - fail_if(icmp6_frag_expired_called); - - /* IPV4 TESTS */ - /* Initial setup */ - a = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - /* Case 1: first fragment was not received, do not send notify + empty tree */ - a->net_hdr = a->buffer; - a->net_len = sizeof(struct pico_ipv4_hdr); - a->buffer[0] = 0x40; /* IPV4 */ - a->frag = 0x20 >> 3u; /* off = 32 */ - - pico_tree_insert(&ipv4_fragments, a); - - pico_frag_expire(0, (void*)(&ipv4_fragments)); - fail_if(icmp4_frag_expired_called); - fail_if(!pico_tree_empty(&ipv4_fragments)); - - /* Case 2: first fragment was received, send notify + empty tree */ - - b->net_hdr = b->buffer; - b->net_len = sizeof(struct pico_ipv4_hdr); - b->buffer[0] = 0x40; /* IPV4 */ - b->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - pico_tree_insert(&ipv4_fragments, b); - - pico_frag_expire(0, (void*)(&ipv4_fragments)); - fail_if(!icmp4_frag_expired_called); - fail_if(!pico_tree_empty(&ipv4_fragments)); - - /* Case 3: first fragment was received but it is multicast, do not send notify + empty tree */ - /* Reallocate frame, it was discarded in the last pico_frag_expire() */ - b = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - /* Reset env vars */ - icmp4_frag_expired_called = 0; - - b->net_hdr = b->buffer; - b->net_len = sizeof(struct pico_ipv4_hdr); - b->buffer[0] = 0x40; /* IPV4 */ - b->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - pico_string_to_ipv4(ipv4_multicast_address, &((struct pico_ipv4_hdr*)(b->net_hdr))->dst.addr); - - pico_tree_insert(&ipv4_fragments, b); - - pico_frag_expire(0, (void*)(&ipv4_fragments)); - fail_if(icmp4_frag_expired_called); - fail_if(!pico_tree_empty(&ipv4_fragments)); - - - /* IPV6 TESTS */ - /* re-allocate frames, they were discarded in pico_frag_expire */ - a = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - /* Case 4: first fragment was not received, do not send notify + empty tree */ - a->net_hdr = a->buffer; - a->net_len = sizeof(struct pico_ipv6_hdr); - a->buffer[0] = 0x60; /* IPV6 */ - a->frag = 0x20; /* off = 32 */ - memcpy(((struct pico_ipv6_hdr*)(a->net_hdr))->dst.addr, addr_1.addr, PICO_SIZE_IP6); - - pico_tree_insert(&ipv6_fragments, a); - - pico_frag_expire(0, (void*)(&ipv6_fragments)); - fail_if(icmp6_frag_expired_called); - fail_if(!pico_tree_empty(&ipv6_fragments)); - - /* Case 5: first fragment was received, send notify + empty tree */ - - b->net_hdr = b->buffer; - b->net_len = sizeof(struct pico_ipv6_hdr); - b->buffer[0] = 0x60; /* IPV6 */ - b->frag = 1; - memcpy(((struct pico_ipv6_hdr*)(b->net_hdr))->dst.addr, addr_1.addr, PICO_SIZE_IP6); - - pico_tree_insert(&ipv6_fragments, b); - - pico_frag_expire(0, (void*)(&ipv6_fragments)); - fail_if(!icmp6_frag_expired_called); - fail_if(!pico_tree_empty(&ipv6_fragments)); - - /* Case 6: first fragment was received but it is multicast, do not send notify + empty tree */ - /* Reallocate frame, it was discarded in the last pico_frag_expire() */ - b = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - /* Reset env vars */ - icmp6_frag_expired_called = 0; - - b->net_hdr = b->buffer; - b->net_len = sizeof(struct pico_ipv4_hdr); - b->buffer[0] = 0x60; /* IPV4 */ - b->frag = 1; - - memcpy(((struct pico_ipv6_hdr*)(b->net_hdr))->dst.addr, ipv6_multicast_addr.addr, PICO_SIZE_IP6); - - pico_tree_insert(&ipv6_fragments, b); - - pico_frag_expire(0, (void*)(&ipv6_fragments)); - fail_if(icmp6_frag_expired_called); - fail_if(!pico_tree_empty(&ipv6_fragments)); - -} -END_TEST -START_TEST(tc_pico_ipv6_frag_timer_on) -{ - /* Reset env variable */ - timer_add_called = 0; - - pico_ipv6_frag_timer_on(); - - /* Was timer added? */ - fail_if(!timer_add_called); -} -END_TEST -START_TEST(tc_pico_ipv4_frag_timer_on) -{ - /* Reset env variable */ - timer_add_called = 0; - - pico_ipv4_frag_timer_on(); - - /* Was timer added? */ - fail_if(!timer_add_called); -} -END_TEST -START_TEST(tc_pico_ipv6_frag_match) -{ - struct pico_frame *a, *b; - struct pico_ipv6_hdr *ha, *hb; - - /* Addr setup */ - struct pico_ip6 addr_1 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }}; - struct pico_ip6 addr_2 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }}; - - /* Inital setup */ - a = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - /* Case 1: net hdr(s) are NULL */ - a->net_hdr = NULL; - b->net_hdr = NULL; - - fail_if(pico_ipv6_frag_match(a, b) != -2); - - /* Init a frame */ - a->net_hdr = a->buffer; - a->net_len = sizeof(struct pico_ipv6_hdr); - - fail_if(pico_ipv6_frag_match(a, b) != -2); - - /* Init b frame */ - b->net_hdr = b->buffer; - b->net_len = sizeof(struct pico_ipv6_hdr); - - /* Init hdrs for rest of tests*/ - ha = (struct pico_ipv6_hdr *)a->net_hdr; - hb = (struct pico_ipv6_hdr *)b->net_hdr; - - /* Case 2: src addr are different*/ - /* Init a and b net hdr adresses */ - memcpy(ha->src.addr, addr_1.addr, PICO_SIZE_IP6); - memcpy(ha->dst.addr, addr_2.addr, PICO_SIZE_IP6); - memcpy(hb->src.addr, addr_2.addr, PICO_SIZE_IP6); - memcpy(hb->dst.addr, addr_2.addr, PICO_SIZE_IP6); - - fail_if(pico_ipv6_frag_match(a, b) != 1); - - /* Case 3: dst addr are different*/ - /* Init a and b net hdr adresses */ - memcpy(ha->src.addr, addr_1.addr, PICO_SIZE_IP6); - memcpy(ha->dst.addr, addr_2.addr, PICO_SIZE_IP6); - memcpy(hb->src.addr, addr_1.addr, PICO_SIZE_IP6); - memcpy(hb->dst.addr, addr_1.addr, PICO_SIZE_IP6); - - fail_if(pico_ipv6_frag_match(a, b) != 2); - - /* Case 4: fragments are the same (src and dst are the same)*/ - /* Init a and b net hdr adresses */ - memcpy(ha->src.addr, addr_1.addr, PICO_SIZE_IP6); - memcpy(ha->dst.addr, addr_2.addr, PICO_SIZE_IP6); - memcpy(hb->src.addr, addr_1.addr, PICO_SIZE_IP6); - memcpy(hb->dst.addr, addr_2.addr, PICO_SIZE_IP6); - - fail_if(pico_ipv6_frag_match(a, b) != 0); - - /* Cleanup */ - pico_frame_discard(a); - pico_frame_discard(b); -} -END_TEST -START_TEST(tc_pico_ipv4_frag_match) -{ - struct pico_frame *a, *b; - struct pico_ipv4_hdr *ha, *hb; - - /* Addr setup */ - struct pico_ip4 addr_1 = { - .addr = long_be(0x0a280064) - }; - - struct pico_ip4 addr_2 = { - .addr = long_be(0x0a280312) - }; - - /* Case 1: frames are NULL */ - a = NULL; - b = NULL; - - fail_if(pico_ipv4_frag_match(a, b) != -1); - - /* setup */ - a = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - - /* Case 2: net hdr(s) are NULL */ - a->net_hdr = NULL; - b->net_hdr = NULL; - - fail_if(pico_ipv4_frag_match(a, b) != -2); - - /* Init a frame */ - a->net_hdr = a->buffer; - a->net_len = sizeof(struct pico_ipv4_hdr); - - fail_if(pico_ipv4_frag_match(a, b) != -2); - - /* Init b frame */ - b->net_hdr = b->buffer; - b->net_len = sizeof(struct pico_ipv4_hdr); - - /* Init hdrs for rest of tests*/ - ha = (struct pico_ipv4_hdr *)a->net_hdr; - hb = (struct pico_ipv4_hdr *)b->net_hdr; - - /* Case 3: src addr are different*/ - /* Init a and b net hdr adresses */ - ha->src = addr_1; - ha->dst = addr_2; - hb->src = addr_2; - hb->dst = addr_2; - - fail_if(pico_ipv4_frag_match(a, b) != 1); - - /* Case 4: dst addr are different*/ - /* Init a and b net hdr adresses */ - ha->src = addr_1; - ha->dst = addr_2; - hb->src = addr_1; - hb->dst = addr_1; - - fail_if(pico_ipv4_frag_match(a, b) != 2); - - /* Case 5: fragments are the same (src and dst are the same)*/ - /* Init a and b net hdr adresses */ - ha->src = addr_1; - ha->dst = addr_2; - hb->src = addr_1; - hb->dst = addr_2; - - fail_if(pico_ipv4_frag_match(a, b) != 0); - - /* Cleanup */ - pico_frame_discard(a); - pico_frame_discard(b); -} -END_TEST - -START_TEST(tc_pico_fragments_get_header_length) -{ - fail_unless(pico_fragments_get_header_length(PICO_PROTO_IPV4) == PICO_SIZE_IP4HDR); - - fail_unless(pico_fragments_get_header_length(PICO_PROTO_IPV6) == PICO_SIZE_IP6HDR); - - fail_unless(pico_fragments_get_header_length(1) == 0); -} -END_TEST - -START_TEST(tc_pico_fragments_get_more_flag) -{ - struct pico_frame *a = NULL, *b = NULL; - - a = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!a); - printf("Allocated frame, %p\n", a); - - a->net_hdr = a->buffer; - a->net_len = sizeof(struct pico_ipv4_hdr); - a->buffer[0] = 0x40; /* IPV4 */ - a->frag = PICO_IPV4_MOREFRAG; /* Set more flag */ - - b = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - b->net_hdr = a->buffer; - b->net_len = sizeof(struct pico_ipv6_hdr); - b->buffer[0] = 0x60; /* IPV6 */ - b->frag = 0x1; /* set more flag */ - - fail_unless(pico_fragments_get_more_flag(NULL, PICO_PROTO_IPV4) == 0); - fail_unless(pico_fragments_get_more_flag(NULL, PICO_PROTO_IPV6) == 0); - - /* More flag set in IPV4 */ - fail_unless(pico_fragments_get_more_flag(a, PICO_PROTO_IPV4) == 1); - - /* More flag set in IPV6 */ - fail_unless(pico_fragments_get_more_flag(b, PICO_PROTO_IPV6) == 1); - - /* More flag NOT set in IPV4 */ - a->frag = 0; - fail_unless(pico_fragments_get_more_flag(a, PICO_PROTO_IPV4) == 0); - - /* More flag NOT set in IPV6 */ - b->frag = 0; - fail_unless(pico_fragments_get_more_flag(b, PICO_PROTO_IPV6) == 0); - - /* Invalid net argument */ - fail_unless(pico_fragments_get_more_flag(a, 1) == 0); - fail_unless(pico_fragments_get_more_flag(b, 1) == 0); - - /* Cleanup */ - pico_frame_discard(a); - pico_frame_discard(b); -} -END_TEST - -START_TEST(tc_pico_fragments_get_offset) -{ - struct pico_frame *a=NULL, *b = NULL; - - b = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - /* IPV4 with fragment offset > 0 */ - b->frag = 0x20 >> 3u; /* off = 32 */ - fail_unless(pico_fragments_get_offset(b, PICO_PROTO_IPV4) == 32); - - /* IPV4 with fragment offset == 0 */ - b->frag = 0; /* off = 0 */ - fail_unless(pico_fragments_get_offset(b, PICO_PROTO_IPV4) == 0); - - /* Invalid net argument */ - fail_unless(pico_fragments_get_offset(b, 1) == 0); - - a = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - fail_if(!a); - printf("Allocated frame, %p\n", a); - - /* IPV6 with fragment offset > 0 */ - a->frag = 0x20; /* off = 32 */ - fail_unless(pico_fragments_get_offset(a, PICO_PROTO_IPV6) == 32); - - /* IPV6 with fragment offset == 0 */ - a->frag = 1; /* off = 0 */ - fail_unless(pico_fragments_get_offset(a, PICO_PROTO_IPV6) == 0); - - /* Invalid net argument */ - fail_unless(pico_fragments_get_offset(a, 1) == 0); - - /* Invalid frame argument */ - fail_unless(pico_fragments_get_offset(NULL, PICO_PROTO_IPV4) == 0); - fail_unless(pico_fragments_get_offset(NULL, PICO_PROTO_IPV6) == 0); - fail_unless(pico_fragments_get_offset(NULL, 1) == 0); - - pico_frame_discard(a); - pico_frame_discard(b); -} -END_TEST - -START_TEST(tc_pico_fragments_reassemble) -{ - struct pico_frame *a, *b; - - /* NULL tree */ - transport_recv_called = 0; - buffer_len_transport_receive = 0; - fail_if(pico_fragments_reassemble(NULL, 0, TESTPROTO, PICO_PROTO_IPV4) != -1); - fail_if(transport_recv_called); - - /* Empty tree */ - transport_recv_called = 0; - buffer_len_transport_receive = 0; - fail_if(pico_fragments_reassemble(&ipv4_fragments, 0, TESTPROTO, PICO_PROTO_IPV4) != -2); - fail_if(transport_recv_called); - - /* Empty tree */ - transport_recv_called = 0; - buffer_len_transport_receive = 0; - fail_if(pico_fragments_reassemble(&ipv6_fragments, 0, TESTPROTO, PICO_PROTO_IPV6) != -2); - fail_if(transport_recv_called); - - /* Case 1: IPV4 , everything good */ - transport_recv_called = 0; - buffer_len_transport_receive = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 >> 3u; /* off = 32 */ - - pico_tree_insert(&ipv4_fragments, a); - pico_tree_insert(&ipv4_fragments, b); - - fail_if(pico_fragments_reassemble(&ipv4_fragments, 64, TESTPROTO, PICO_PROTO_IPV4) != 0); - fail_if(transport_recv_called != 1); - fail_if(buffer_len_transport_receive != 64 + PICO_SIZE_IP4HDR); - fail_if(!pico_tree_empty(&ipv4_fragments)); - - /* Case 2: IPV6 , everything good */ - transport_recv_called = 0; - buffer_len_transport_receive = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = 1; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20; /* off = 32 */ - - pico_tree_insert(&ipv6_fragments, a); - pico_tree_insert(&ipv6_fragments, b); - - fail_if(pico_fragments_reassemble(&ipv6_fragments, 64, TESTPROTO, PICO_PROTO_IPV6) != 0); - fail_if(transport_recv_called != 1); - fail_if(buffer_len_transport_receive != 64 + PICO_SIZE_IP6HDR); - fail_if(!pico_tree_empty(&ipv4_fragments)); - - /* Case 3: IPV4 with mm failure*/ - transport_recv_called = 0; - buffer_len_transport_receive = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 >> 3u; /* off = 32 */ - - pico_tree_insert(&ipv4_fragments, a); - pico_tree_insert(&ipv4_fragments, b); - - pico_set_mm_failure(1); - fail_if(pico_fragments_reassemble(&ipv4_fragments, 64, TESTPROTO, PICO_PROTO_IPV4) != 1); - fail_if(transport_recv_called == 1); - fail_if(buffer_len_transport_receive != 0); - fail_if(pico_tree_empty(&ipv4_fragments)); - - /* Case 4: IPV6 with mm failure */ - transport_recv_called = 0; - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = 1; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20; /* off = 32 */ - - pico_tree_insert(&ipv6_fragments, a); - pico_tree_insert(&ipv6_fragments, b); - - pico_set_mm_failure(1); - fail_if(pico_fragments_reassemble(&ipv6_fragments, 64, TESTPROTO, PICO_PROTO_IPV6) != 1); - fail_if(transport_recv_called == 1); - fail_if(buffer_len_transport_receive != 0); - fail_if(pico_tree_empty(&ipv6_fragments)); -} -END_TEST - -START_TEST(tc_pico_ipv6_process_frag) -{ - struct pico_ipv6_exthdr *hdr = NULL; - struct pico_frame *a = NULL, *b = NULL, *c = NULL; - - /* NULL args provided */ - ipv6_cur_frag_id = 0; - timer_add_called = 0; - pico_ipv6_process_frag(hdr, a, TESTPROTO); - fail_if(ipv6_cur_frag_id != 0); - fail_if(timer_add_called != 0); - - /* init hdr */ - hdr = PICO_ZALLOC(sizeof(struct pico_ipv6_exthdr)); - hdr->ext.frag.id[0]= 0xF; - - /* NULL frame provided */ - ipv6_cur_frag_id = 0; - timer_add_called = 0; - pico_ipv6_process_frag(hdr, a, TESTPROTO); - fail_if(ipv6_cur_frag_id != 0); - fail_if(timer_add_called != 0); - - /* init frame */ - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - c = pico_frame_alloc(64 + 20); - fail_if(!c); - printf("Allocated frame, %p\n", c); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = 1; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 | 0x1; /* off = 32 */ - - c->net_hdr = c->buffer; - c->net_len = 20; - c->transport_len = 32; - c->transport_hdr = c->buffer + 20; - c->frag = 0x40; /* off = 64 */ - - /* Case 1: Empty fragments tree */ - ipv6_cur_frag_id = 0; - timer_add_called = 0; - /* make sure tree is empty */ - fail_if(!pico_tree_empty(&ipv6_fragments)); - - pico_ipv6_process_frag(hdr, a, TESTPROTO); - fail_if(ipv6_cur_frag_id != IP6_FRAG_ID(hdr)); - fail_if(timer_add_called != 1); - fail_if(pico_tree_empty(&ipv6_fragments)); - /* make sure we added the fragment to the tree */ - fail_if(((struct pico_frame *)pico_tree_first(&ipv6_fragments))->buffer != a->buffer); - - /* Case 2: Adding second fragment */ - timer_add_called = 0; - pico_ipv6_process_frag(hdr, b, TESTPROTO); - fail_if(ipv6_cur_frag_id != IP6_FRAG_ID(hdr)); - fail_if(timer_add_called != 0); - fail_if(pico_tree_empty(&ipv6_fragments)); - /* make sure we added the fragment to the tree */ - fail_if(((struct pico_frame *)pico_tree_last(&ipv6_fragments))->buffer != b->buffer); - - /* Case 3: Adding final fragment */ - timer_cancel_called = 0; - transport_recv_called = 0; - buffer_len_transport_receive = 0; - pico_ipv6_process_frag(hdr, c, TESTPROTO); - fail_if(ipv6_cur_frag_id != IP6_FRAG_ID(hdr)); - fail_if(timer_cancel_called != 1); - fail_if(transport_recv_called != 1); - fail_if(buffer_len_transport_receive != 96 + PICO_SIZE_IP6HDR); - /* Everything was received, tree should be empty */ - fail_if(!pico_tree_empty(&ipv6_fragments)); - - /* Cleanup */ - pico_fragments_empty_tree(&ipv6_fragments); - pico_frame_discard(a); - pico_frame_discard(b); - pico_frame_discard(c); -} -END_TEST - -START_TEST(tc_pico_ipv4_process_frag) -{ - struct pico_ipv4_hdr *hdr = NULL; - struct pico_frame *a = NULL, *b = NULL, *c = NULL; - - /* NULL args provided */ - ipv4_cur_frag_id = 0; - timer_add_called = 0; - pico_ipv4_process_frag(hdr, a, TESTPROTO); - fail_if(ipv4_cur_frag_id != 0); - fail_if(timer_add_called != 0); - - /* init hdr */ - hdr = PICO_ZALLOC(sizeof(struct pico_ipv4_hdr)); - hdr->id = TESTID; - - /* NULL frame provided */ - ipv4_cur_frag_id = 0; - timer_add_called = 0; - pico_ipv4_process_frag(hdr, a, TESTPROTO); - fail_if(ipv4_cur_frag_id != 0); - fail_if(timer_add_called != 0); - - /* init frame */ - a = pico_frame_alloc(32 + 20); - fail_if(!a); - printf("Allocated frame, %p\n", a); - b = pico_frame_alloc(32 + 20); - fail_if(!b); - printf("Allocated frame, %p\n", b); - c = pico_frame_alloc(64 + 20); - fail_if(!c); - printf("Allocated frame, %p\n", c); - - a->net_hdr = a->buffer; - a->net_len = 20; - a->transport_len = 32; - a->transport_hdr = a->buffer + 20; - a->frag = PICO_IPV4_MOREFRAG; /* more frags */ - - b->net_hdr = b->buffer; - b->net_len = 20; - b->transport_len = 32; - b->transport_hdr = b->buffer + 20; - b->frag = 0x20 >> 3u | PICO_IPV4_MOREFRAG; /* off = 32 + more frags*/ - - c->net_hdr = c->buffer; - c->net_len = 20; - c->transport_len = 32; - c->transport_hdr = c->buffer + 20; - c->frag = 0x40 >> 3u; /* off = 64 */ - - /* Case 1: Empty fragments tree */ - ipv4_cur_frag_id = 0; - timer_add_called = 0; - /* make sure tree is empty */ - fail_if(!pico_tree_empty(&ipv4_fragments)); - - pico_ipv4_process_frag(hdr, a, TESTPROTO); - fail_if(ipv4_cur_frag_id != TESTID); - fail_if(timer_add_called != 1); - fail_if(pico_tree_empty(&ipv4_fragments)); - /* make sure we added the fragment to the tree */ - fail_if(((struct pico_frame *)pico_tree_first(&ipv4_fragments))->buffer != a->buffer); - - /* Case 2: Adding second fragment */ - timer_add_called = 0; - pico_ipv4_process_frag(hdr, b, TESTPROTO); - fail_if(ipv4_cur_frag_id != TESTID); - fail_if(timer_add_called != 0); - fail_if(pico_tree_empty(&ipv4_fragments)); - /* make sure we added the fragment to the tree */ - fail_if(((struct pico_frame *)pico_tree_last(&ipv4_fragments))->buffer != b->buffer); - - /* Case 3: Adding final fragment */ - timer_cancel_called = 0; - transport_recv_called = 0; - buffer_len_transport_receive = 0; - pico_ipv4_process_frag(hdr, c, TESTPROTO); - fail_if(ipv4_cur_frag_id != TESTID); - fail_if(timer_cancel_called != 1); - fail_if(transport_recv_called != 1); - fail_if(buffer_len_transport_receive != 96 + PICO_SIZE_IP4HDR); - /* Everything was received, tree should be empty */ - fail_if(!pico_tree_empty(&ipv4_fragments)); - - /* Cleanup */ - pico_fragments_empty_tree(&ipv4_fragments); - pico_frame_discard(a); - pico_frame_discard(b); - pico_frame_discard(c); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_ipv6_process_frag = tcase_create("Unit test for pico_ipv6_process_frag"); - TCase *TCase_pico_ipv4_process_frag = tcase_create("Unit test for pico_ipv4_process_frag"); - - TCase *TCase_pico_fragments_reassemble = tcase_create("Unit test for pico_fragments_reassemble"); - TCase *TCase_pico_fragments_get_offset = tcase_create("Unit test for pico_fragments_get_offset"); - TCase *TCase_pico_fragments_get_more_flag = tcase_create("Unit test for pico_fragments_get_more_flag"); - TCase *TCase_pico_fragments_get_header_length = tcase_create("Unit test for pico_fragments_get_header_length"); - - TCase *TCase_pico_fragments_empty_tree = tcase_create("Unit test for pico_fragments_empty_tree"); - TCase *TCase_pico_fragments_send_notify = tcase_create("Unit test for pico_fragments_send_notify"); - TCase *TCase_pico_ipv6_frag_compare = tcase_create("Unit test for pico_ipv6_frag_compare"); - TCase *TCase_pico_ipv4_frag_compare = tcase_create("Unit test for pico_ipv4_frag_compare"); - TCase *TCase_pico_ipv6_fragments_complete = tcase_create("Unit test for pico_ipv6_fragments_complete"); - TCase *TCase_pico_ipv4_fragments_complete = tcase_create("Unit test for pico_ipv4_fragments_complete"); - TCase *TCase_pico_fragments_complete = tcase_create("Unit test for pico_fragments_complete"); - TCase *TCase_pico_fragments_check_complete = tcase_create("Unit test for pico_fragments_check_complete"); - TCase *TCase_pico_frag_expire = tcase_create("Unit test for pico_frag_expire"); - TCase *TCase_pico_ipv6_frag_timer_on = tcase_create("Unit test for pico_ipv6_frag_timer_on"); - TCase *TCase_pico_ipv4_frag_timer_on = tcase_create("Unit test for pico_ipv4_frag_timer_on"); - TCase *TCase_pico_ipv6_frag_match = tcase_create("Unit test for pico_ipv6_frag_match"); - TCase *TCase_pico_ipv4_frag_match = tcase_create("Unit test for pico_ipv4_frag_match"); - - tcase_add_test(TCase_pico_ipv4_process_frag, tc_pico_ipv4_process_frag); - suite_add_tcase(s, TCase_pico_ipv4_process_frag); - tcase_add_test(TCase_pico_ipv6_process_frag, tc_pico_ipv6_process_frag); - suite_add_tcase(s, TCase_pico_ipv6_process_frag); - tcase_add_test(TCase_pico_fragments_reassemble, tc_pico_fragments_reassemble); - suite_add_tcase(s, TCase_pico_fragments_reassemble); - tcase_add_test(TCase_pico_fragments_get_offset, tc_pico_fragments_get_offset); - suite_add_tcase(s, TCase_pico_fragments_get_offset); - tcase_add_test(TCase_pico_fragments_get_more_flag, tc_pico_fragments_get_more_flag); - suite_add_tcase(s, TCase_pico_fragments_get_more_flag); - tcase_add_test(TCase_pico_fragments_get_header_length, tc_pico_fragments_get_header_length); - suite_add_tcase(s, TCase_pico_fragments_get_header_length); - tcase_add_test(TCase_pico_fragments_send_notify, tc_pico_fragments_send_notify); - suite_add_tcase(s, TCase_pico_fragments_send_notify); - tcase_add_test(TCase_pico_fragments_empty_tree, tc_pico_fragments_empty_tree); - suite_add_tcase(s, TCase_pico_fragments_empty_tree); - tcase_add_test(TCase_pico_ipv6_frag_compare, tc_pico_ipv6_frag_compare); - suite_add_tcase(s, TCase_pico_ipv6_frag_compare); - tcase_add_test(TCase_pico_ipv4_frag_compare, tc_pico_ipv4_frag_compare); - suite_add_tcase(s, TCase_pico_ipv4_frag_compare); - tcase_add_test(TCase_pico_ipv6_fragments_complete, tc_pico_ipv6_fragments_complete); - suite_add_tcase(s, TCase_pico_ipv6_fragments_complete); - tcase_add_test(TCase_pico_ipv4_fragments_complete, tc_pico_ipv4_fragments_complete); - suite_add_tcase(s, TCase_pico_ipv4_fragments_complete); - tcase_add_test(TCase_pico_fragments_complete, tc_pico_fragments_complete); - suite_add_tcase(s, TCase_pico_fragments_complete); - tcase_add_test(TCase_pico_fragments_check_complete, tc_pico_fragments_check_complete); - suite_add_tcase(s, TCase_pico_fragments_check_complete); - tcase_add_test(TCase_pico_frag_expire, tc_pico_frag_expire); - suite_add_tcase(s, TCase_pico_frag_expire); - tcase_add_test(TCase_pico_ipv6_frag_timer_on, tc_pico_ipv6_frag_timer_on); - suite_add_tcase(s, TCase_pico_ipv6_frag_timer_on); - tcase_add_test(TCase_pico_ipv4_frag_timer_on, tc_pico_ipv4_frag_timer_on); - suite_add_tcase(s, TCase_pico_ipv4_frag_timer_on); - tcase_add_test(TCase_pico_ipv6_frag_match, tc_pico_ipv6_frag_match); - suite_add_tcase(s, TCase_pico_ipv6_frag_match); - tcase_add_test(TCase_pico_ipv4_frag_match, tc_pico_ipv4_frag_match); - suite_add_tcase(s, TCase_pico_ipv4_frag_match); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_frame.c b/kernel/picotcp/test/unit/modunit_pico_frame.c deleted file mode 100644 index 557d6c7..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_frame.c +++ /dev/null @@ -1,274 +0,0 @@ -#include "pico_config.h" -#include "pico_protocol.h" -#include "pico_frame.h" -#include "stack/pico_frame.c" -#include "check.h" - -volatile pico_err_t pico_err; - -#define FRAME_SIZE 1000 - -Suite *pico_suite(void); - -START_TEST(tc_pico_frame_alloc_discard) -{ - struct pico_frame *f = pico_frame_alloc(FRAME_SIZE); - - /* Test consistency */ - fail_if(!f); - fail_if(!f->buffer); - fail_if(!f->usage_count); - fail_if(*f->usage_count != 1); - fail_if(f->start != f->buffer); - fail_if(f->len != f->buffer_len); - fail_if(f->len != FRAME_SIZE); - pico_frame_discard(f); - - /* Test empty discard */ - pico_frame_discard(NULL); - -#ifdef PICO_FAULTY - printf("Testing with faulty memory in frame_alloc (1)\n"); - pico_set_mm_failure(1); - f = pico_frame_alloc(FRAME_SIZE); - fail_if(f); - - printf("Testing with faulty memory in frame_alloc (2)\n"); - pico_set_mm_failure(2); - f = pico_frame_alloc(FRAME_SIZE); - fail_if(f); - - printf("Testing with faulty memory in frame_do_alloc, with external buffer, failing to allocate usage_count \n"); - pico_set_mm_failure(2); - f = pico_frame_do_alloc(FRAME_SIZE, 1, 1); - fail_if(f); -#endif - printf("Testing frame_do_alloc, with invalid flags combination\n"); - f = pico_frame_do_alloc(FRAME_SIZE, 0, 1); - fail_if(f); - -} -END_TEST - -START_TEST(tc_pico_frame_grow_head) -{ - struct pico_frame *f = pico_frame_alloc(3); - struct pico_frame *f2 = pico_frame_alloc(0); - int ret = 0; - uint8_t buf[6] = { 0, 0, 0, 'a', 'b', 'c'}; - - /* I don't care about usage_count, it's tested 'pico_frame_grow' */ - fail_if(pico_frame_grow_head(f, 2) == 0); - - /* Check for dereferencing OOB */ - fail_if(pico_frame_grow_head(f2, 2) == -1); - f2->net_hdr[0] = 1; - - f->net_hdr = f->buffer; - f->net_len = 3; - f->net_hdr[0] = 'a'; - f->net_hdr[1] = 'b'; - f->net_hdr[2] = 'c'; - - /* Try to grow head */ - ret = pico_frame_grow_head(f, 6); - fail_if(ret != 0); - fail_unless(0 == memcmp(f->buffer, buf, f->buffer_len)); - fail_unless(3 == f->net_hdr - f->buffer); - - f->datalink_hdr = f->net_hdr - 3; - f->datalink_hdr[0] = 1; -} -END_TEST - -START_TEST(tc_pico_frame_grow) -{ - struct pico_frame *f = pico_frame_alloc(3); - struct pico_frame *f2 = pico_frame_alloc(0); - fail_if(f->buffer_len != 3); - /* Ensure that the usage_count starts at byte 4, for good alignment */ - fail_if(((void*)f->usage_count - (void *)f->buffer) != 4); - - ((uint8_t *)f->buffer)[0] = 'a'; - ((uint8_t *)f->buffer)[1] = 'b'; - ((uint8_t *)f->buffer)[2] = 'c'; - *f->usage_count = 12; - - - /* First, the failing cases. */ - fail_if(pico_frame_grow(NULL, 30) == 0); - fail_if(pico_frame_grow(f, 2) == 0); - f->flags = 0; - - /* Check for dereferencing OOB */ - fail_if(pico_frame_grow(f2, 3) != 0); - f2->net_hdr[0] = 1; - f2->net_hdr[1] = 2; - - pico_set_mm_failure(1); - fail_if(pico_frame_grow(f, 21) == 0); - - /* Now, the good one. */ - fail_if(pico_frame_grow(f, 21) != 0); - fail_if(f->buffer_len != 21); - fail_if(((void *)f->usage_count - (void *)f->buffer) != 24); - - - fail_if(((uint8_t *)f->buffer)[0] != 'a'); - fail_if(((uint8_t *)f->buffer)[1] != 'b'); - fail_if(((uint8_t *)f->buffer)[2] != 'c'); - fail_if(*f->usage_count != 12); - - *f->usage_count = 1; - pico_frame_discard(f); - - f = pico_frame_alloc_skeleton(10, 1); - fail_if(!f); - fail_if(f->buffer); - fail_if(!f->flags); - f->buffer = PICO_ZALLOC(10); - - fail_if(pico_frame_grow(f, 22) != 0); - fail_if (f->flags); - pico_frame_discard(f); - -} -END_TEST - -START_TEST(tc_pico_frame_copy) -{ - struct pico_frame *f = pico_frame_alloc(FRAME_SIZE); - struct pico_frame *c1, *c2, *c3; - (void)c3; - fail_if(!f); - fail_if(!f->buffer); - fail_if(*f->usage_count != 1); - - /* First copy */ - c1 = pico_frame_copy(f); - fail_if(!c1); - fail_if(!c1->buffer); - fail_if(!c1->usage_count); - - fail_if (c1->buffer != f->buffer); - fail_if(c1->usage_count != f->usage_count); - fail_if(*c1->usage_count != 2); - fail_if(*f->usage_count != 2); - fail_if(c1->start != c1->buffer); - fail_if(c1->len != c1->buffer_len); - fail_if(c1->len != FRAME_SIZE); - - /* Second copy */ - c2 = pico_frame_copy(f); - fail_if (c2->buffer != f->buffer); - fail_if(c2->usage_count != f->usage_count); - fail_if(*c2->usage_count != 3); - fail_if(*f->usage_count != 3); - fail_if(c2->start != c2->buffer); - fail_if(c2->len != c2->buffer_len); - fail_if(c2->len != FRAME_SIZE); - - -#ifdef PICO_FAULTY - printf("Testing with faulty memory in frame_copy (1)\n"); - pico_set_mm_failure(1); - c3 = pico_frame_copy(f); - fail_if(c3); - fail_if(!f); -#endif - - /* Discard 1 */ - pico_frame_discard(c1); - fail_if(*f->usage_count != 2); - - /* Discard 2 */ - pico_frame_discard(c2); - fail_if(*f->usage_count != 1); - - pico_frame_discard(f); - -} -END_TEST - -START_TEST(tc_pico_frame_deepcopy) -{ - struct pico_frame *f = pico_frame_alloc(FRAME_SIZE); - struct pico_frame *dc = pico_frame_deepcopy(f); - fail_if(*f->usage_count != 1); - fail_if(*dc->usage_count != 1); - fail_if(dc->buffer == f->buffer); -#ifdef PICO_FAULTY - printf("Testing with faulty memory in frame_deepcopy (1)\n"); - pico_set_mm_failure(1); - dc = pico_frame_deepcopy(f); - fail_if(dc); - fail_if(!f); -#endif -} -END_TEST - -START_TEST(tc_pico_is_digit) -{ - fail_if(pico_is_digit('a')); - fail_if(pico_is_digit('Z')); - fail_if(pico_is_digit('\0')); - fail_if(pico_is_digit('\n')); - fail_if(pico_is_digit('0' - 1)); - fail_if(pico_is_digit('9' + 1)); - fail_unless(pico_is_digit('0')); - fail_unless(pico_is_digit('9')); -} -END_TEST - - -START_TEST(tc_pico_is_hex) -{ - fail_if(pico_is_hex('g')); - fail_if(pico_is_hex('Z')); - fail_if(pico_is_hex('\0')); - fail_if(pico_is_hex('\n')); - fail_if(pico_is_hex('0' - 1)); - fail_if(pico_is_hex('f' + 1)); - fail_if(pico_is_hex('F' + 1)); - fail_unless(pico_is_hex('0')); - fail_unless(pico_is_hex('f')); - fail_unless(pico_is_hex('A')); - fail_unless(pico_is_hex('F')); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("pico_frame.c"); - TCase *TCase_pico_frame_alloc_discard = tcase_create("Unit test for pico_frame_alloc_discard"); - TCase *TCase_pico_frame_copy = tcase_create("Unit test for pico_frame_copy"); - TCase *TCase_pico_frame_grow = tcase_create("Unit test for pico_frame_grow"); - TCase *TCase_pico_frame_grow_head = tcase_create("Unit test for pico_frame_grow_head"); - TCase *TCase_pico_frame_deepcopy = tcase_create("Unit test for pico_frame_deepcopy"); - TCase *TCase_pico_is_digit = tcase_create("Unit test for pico_is_digit"); - TCase *TCase_pico_is_hex = tcase_create("Unit test for pico_is_hex"); - tcase_add_test(TCase_pico_frame_alloc_discard, tc_pico_frame_alloc_discard); - tcase_add_test(TCase_pico_frame_copy, tc_pico_frame_copy); - tcase_add_test(TCase_pico_frame_grow, tc_pico_frame_grow); - tcase_add_test(TCase_pico_frame_grow_head, tc_pico_frame_grow_head); - tcase_add_test(TCase_pico_frame_deepcopy, tc_pico_frame_deepcopy); - tcase_add_test(TCase_pico_is_digit, tc_pico_is_digit); - tcase_add_test(TCase_pico_is_hex, tc_pico_is_hex); - suite_add_tcase(s, TCase_pico_frame_alloc_discard); - suite_add_tcase(s, TCase_pico_frame_copy); - suite_add_tcase(s, TCase_pico_frame_grow); - suite_add_tcase(s, TCase_pico_frame_grow_head); - suite_add_tcase(s, TCase_pico_frame_deepcopy); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_hotplug_detection.c b/kernel/picotcp/test/unit/modunit_pico_hotplug_detection.c deleted file mode 100644 index cbcc32b..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_hotplug_detection.c +++ /dev/null @@ -1,192 +0,0 @@ -#include "pico_hotplug_detection.h" -#include "pico_tree.h" -#include "pico_device.h" -#include "modules/pico_hotplug_detection.c" -#include "check.h" -#include "pico_dev_null.c" - -Suite *pico_suite(void); -void cb_one(struct pico_device *dev, int event); -void cb_two(struct pico_device *dev, int event); -int link_state_a(struct pico_device *self); -int link_state_b(struct pico_device *self); - -/* stubs for timer */ -static int8_t timer_active = 0; -void (*timer_cb_function)(pico_time, void *); -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - IGNORE_PARAMETER(expire); - IGNORE_PARAMETER(arg); - - timer_active++; - timer_cb_function = timer; - - return 123; -} - -void pico_timer_cancel(uint32_t id) -{ - timer_active--; - fail_if(id != 123); -} - -/* callbacks for testing */ -uint32_t cb_one_cntr = 0; -int cb_one_last_event = 0; -void cb_one(struct pico_device *dev, int event) -{ - IGNORE_PARAMETER(dev); - - cb_one_cntr++; - cb_one_last_event = event; -} -uint32_t cb_two_cntr = 0; -int cb_two_last_event = 0; -void cb_two(struct pico_device *dev, int event) -{ - IGNORE_PARAMETER(dev); - - cb_two_cntr++; - cb_two_last_event = event; -} - -/* link state functions for the testing devices */ -int state_a = 0; -int link_state_a(struct pico_device *self) -{ - IGNORE_PARAMETER(self); - return state_a; -} - -int state_b = 0; -int link_state_b(struct pico_device *self) -{ - IGNORE_PARAMETER(self); - return state_b; -} - - -START_TEST(tc_pico_hotplug_reg_dereg) -{ - /* create some devices */ - struct pico_device *dev_a, *dev_b; - dev_a = pico_null_create("dummy1"); - dev_b = pico_null_create("dummy2"); - - dev_a->link_state = &link_state_a; - dev_b->link_state = &link_state_b; - - /* add some function pointers to be called */ - pico_hotplug_register(dev_a, &cb_one); - fail_unless(timer_active == 1); - pico_hotplug_register(dev_a, &cb_two); - pico_hotplug_register(dev_b, &cb_two); - fail_unless(timer_active == 1); - - /* remove function pointers */ - pico_hotplug_deregister(dev_a, &cb_one); - pico_hotplug_deregister(dev_a, &cb_two); - pico_hotplug_deregister(dev_b, &cb_two); - - /* check that our tree is empty at the end */ - fail_unless(pico_tree_empty(&Hotplug_device_tree)); - - /* register functions multiple times */ - pico_hotplug_register(dev_a, &cb_one); - pico_hotplug_register(dev_a, &cb_one); - pico_hotplug_register(dev_a, &cb_two); - pico_hotplug_register(dev_a, &cb_two); - pico_hotplug_register(dev_b, &cb_two); - pico_hotplug_register(dev_b, &cb_two); - - /* remove function pointers once */ - pico_hotplug_deregister(dev_a, &cb_one); - pico_hotplug_deregister(dev_a, &cb_two); - fail_unless(timer_active == 1); - pico_hotplug_deregister(dev_b, &cb_two); - fail_unless(timer_active == 0); - - /* check that our tree is empty at the end */ - fail_unless(pico_tree_empty(&Hotplug_device_tree)); -} -END_TEST - -START_TEST(tc_pico_hotplug_callbacks) -{ - /* create some devices */ - struct pico_device *dev_a, *dev_b; - - dev_a = pico_null_create("dummy1"); - dev_b = pico_null_create("dummy2"); - - dev_a->link_state = &link_state_a; - dev_b->link_state = &link_state_b; - - /* add some function pointers to be called */ - pico_hotplug_register(dev_a, &cb_one); - pico_hotplug_register(dev_a, &cb_two); - pico_hotplug_register(dev_b, &cb_two); - - fail_unless(timer_active == 1); - - timer_active = 0; - timer_cb_function(0, NULL); - fail_unless(timer_active == 1); - fail_unless(cb_one_cntr == 1); - fail_unless(cb_two_cntr == 2); - - state_a = 1; - timer_active = 0; - timer_cb_function(0, NULL); - fail_unless(timer_active == 1); - fail_unless(cb_one_cntr == 2); - fail_unless(cb_one_last_event == PICO_HOTPLUG_EVENT_UP ); - fail_unless(cb_two_cntr == 3); - fail_unless(cb_two_last_event == PICO_HOTPLUG_EVENT_UP ); - - state_b = 1; - timer_active = 0; - timer_cb_function(0, NULL); - fail_unless(timer_active == 1); - fail_unless(cb_one_cntr == 2); - fail_unless(cb_one_last_event == PICO_HOTPLUG_EVENT_UP ); - fail_unless(cb_two_cntr == 4); - fail_unless(cb_two_last_event == PICO_HOTPLUG_EVENT_UP ); - - state_a = 0; - state_b = 0; - timer_active = 0; - timer_cb_function(0, NULL); - fail_unless(timer_active == 1); - fail_unless(cb_one_cntr == 3); - fail_unless(cb_one_last_event == PICO_HOTPLUG_EVENT_DOWN ); - fail_unless(cb_two_cntr == 6); - fail_unless(cb_two_last_event == PICO_HOTPLUG_EVENT_DOWN ); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_hotplug_reg_dereg = tcase_create("Unit test for pico_hotplug_reg_dereg"); - TCase *TCase_pico_hotplug_callbacks = tcase_create("Unit test for pico_hotplug_callbacks"); - - tcase_add_test(TCase_pico_hotplug_reg_dereg, tc_pico_hotplug_reg_dereg); - suite_add_tcase(s, TCase_pico_hotplug_reg_dereg); - tcase_add_test(TCase_pico_hotplug_callbacks, tc_pico_hotplug_callbacks); - suite_add_tcase(s, TCase_pico_hotplug_callbacks); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_igmp.c b/kernel/picotcp/test/unit/modunit_pico_igmp.c deleted file mode 100644 index bfc7c22..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_igmp.c +++ /dev/null @@ -1,359 +0,0 @@ -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_socket.h" -#include "pico_stack.h" -#include "pico_socket.h" -#include "pico_queue.h" -#include "pico_tree.h" -#include "modules/pico_igmp.c" -#include "check.h" -#include "pico_dev_null.c" - -Suite *pico_suite(void); -void mock_callback(struct igmp_timer *t); - -static uint32_t timers_added = 0; -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - IGNORE_PARAMETER(expire); - IGNORE_PARAMETER(timer); - IGNORE_PARAMETER(arg); - return ++timers_added; -} - -void mock_callback(struct igmp_timer *t) -{ - IGNORE_PARAMETER(t); -} -static int mcast_filter_cmp(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - if (a->ip4.addr < b->ip4.addr) - return -1; - - if (a->ip4.addr > b->ip4.addr) - return 1; - - return 0; -} -static int mcast_sources_cmp(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - if (a->ip4.addr < b->ip4.addr) - return -1; - - if (a->ip4.addr > b->ip4.addr) - return 1; - - return 0; -} -static PICO_TREE_DECLARE(_MCASTFilter, mcast_filter_cmp); -START_TEST(tc_pico_igmp_report_expired) -{ - struct igmp_timer *t = PICO_ZALLOC(sizeof(struct igmp_timer)); - struct pico_ip4 zero = {0}; - t->mcast_link = zero; - t->mcast_group = zero; - /* void function, just check for side effects */ - pico_igmp_report_expired(t); -} -END_TEST -START_TEST(tc_igmpt_type_compare) -{ - struct igmp_timer a; - struct igmp_timer b; - a.type = 1; - b.type = 2; - fail_if(igmpt_type_compare(&a, &b) != -1); - fail_if(igmpt_type_compare(&b, &a) != 1); - fail_if(igmp_timer_cmp(&b, &a) != 1); -} -END_TEST -START_TEST(tc_pico_igmp_state_change) -{ - struct pico_ip4 mcast_link, mcast_group; - pico_string_to_ipv4("192.168.1.1", &mcast_link.addr); - pico_string_to_ipv4("224.7.7.7", &mcast_group.addr); - fail_if(pico_igmp_state_change(&mcast_link, &mcast_group, 0, NULL, 99) != -1); - fail_if(pico_igmp_state_change(&mcast_link, &mcast_group, 0, NULL, PICO_IGMP_STATE_CREATE) != 0); -} -END_TEST -START_TEST(tc_pico_igmp_timer_expired) -{ - struct igmp_timer *t, *s; - t = PICO_ZALLOC(sizeof(struct igmp_timer)); - t->stopped = IGMP_TIMER_STOPPED; - t->type = 0; - pico_string_to_ipv4("192.168.1.1", &t->mcast_link.addr); - pico_string_to_ipv4("244.7.7.7", &t->mcast_group.addr); - /* void function, just check for side effects */ - pico_igmp_timer_expired(0, (void *)t); - pico_tree_insert(&IGMPTimers, t); - s = PICO_ZALLOC(sizeof(struct igmp_timer)); - memcpy(s,t,sizeof(struct igmp_timer)); // t will be freed next test - pico_igmp_timer_expired(0, (void *)t); /* t is freed here */ - s->stopped++; - s->start = PICO_TIME_MS()*2; - s->type++; - pico_tree_insert(&IGMPTimers, s); - t = PICO_ZALLOC(sizeof(struct igmp_timer)); - memcpy(t,s,sizeof(struct igmp_timer)); // s will be freed next test - pico_igmp_timer_expired(0, (void *)s); /* s is freed here */ - t->callback = mock_callback; - pico_igmp_timer_expired(0, (void *)t); -} -END_TEST -START_TEST(tc_pico_igmp_v2querier_expired) -{ - struct igmp_timer *t = PICO_ZALLOC(sizeof(struct igmp_timer)); - struct pico_ip4 addr = {0}; - struct pico_device *dev = pico_null_create("dummy2"); - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_frame)); - t->f = f; - pico_string_to_ipv4("192.168.1.1", &(addr.addr)); - /* void function, just check for side effects */ - /* No link */ - pico_igmp_v2querier_expired(t); - f->dev = dev; - pico_ipv4_link_add(dev, addr, addr); - pico_igmp_v2querier_expired(t); -} -END_TEST -START_TEST(tc_pico_igmp_delete_parameter) -{ - struct mcast_parameters p; - fail_if(pico_igmp_delete_parameter(&p) != -1); -} -END_TEST -START_TEST(tc_pico_igmp_process_in) -{ - struct mcast_parameters *p; - struct pico_device *dev = pico_null_create("dummy3"); - struct pico_ipv4_link *link; - uint8_t i, j, _i, _j; - int result; - struct pico_mcast_group g; - /* Building example frame */ - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - pico_string_to_ipv4("192.168.1.1", &p->mcast_link.ip4.addr); - pico_string_to_ipv4("244.7.7.7", &p->mcast_group.ip4.addr); - /* no link */ - fail_if(pico_igmp_generate_report(p) != -1); - - pico_ipv4_link_add(dev, p->mcast_link.ip4, p->mcast_link.ip4); - link = pico_ipv4_link_get(&p->mcast_link.ip4); - link->mcast_compatibility = PICO_IGMPV2; - g.mcast_addr.ip4 = p->mcast_group.ip4; - g.MCASTSources.root = &LEAF; - g.MCASTSources.compare = mcast_sources_cmp; - /* No mcastsources tree */ - link->mcast_compatibility = PICO_IGMPV3; - fail_if(pico_igmp_generate_report(p) != -1); - pico_tree_insert(link->MCASTGroups, &g); - pico_tree_insert(&IGMPParameters, p); - - link->mcast_compatibility = 99; - fail_if(pico_igmp_generate_report(p) != -1); - link->mcast_compatibility = PICO_IGMPV2; - fail_if(pico_igmp_generate_report(p) != 0); - link->mcast_compatibility = PICO_IGMPV3; - for(_j = 0; _j < 3; _j++) { /* FILTER */ - (_j == 2) ? (result = -1) : (result = 0); - for(_i = 0; _i < 3; _i++) { /* FILTER */ - if(_i == 2) result = -1; - - for(i = 0; i < 3; i++) { /* STATES */ - for(j = 0; j < 6; j++) { /* EVENTS */ - p->MCASTFilter = &_MCASTFilter; - p->filter_mode = _i; - g.filter_mode = _j; - if(p->event == IGMP_EVENT_DELETE_GROUP || p->event == IGMP_EVENT_QUERY_RECV) - p->event++; - - fail_if(pico_igmp_generate_report(p) != result); - p->state = i; - p->event = j; - if(result != -1 && p->f) /* in some combinations, no frame is created */ - fail_if(pico_igmp_process_in(NULL, p->f) != 0); - } - } - } - } -} -END_TEST -START_TEST(tc_pico_igmp_find_parameter) -{ - struct pico_ip4 mcast_link, mcast_group; - struct mcast_parameters test = { - 0 - }; - fail_if(pico_igmp_find_parameter(NULL, NULL) != NULL); - pico_string_to_ipv4("192.168.1.1", &mcast_link.addr); - fail_if(pico_igmp_find_parameter(&mcast_link, NULL) != NULL); - pico_string_to_ipv4("192.168.1.2", &mcast_group.addr); - fail_if(pico_igmp_find_parameter(&mcast_link, &mcast_group) != NULL); - test.mcast_link.ip4 = mcast_link; - test.mcast_group.ip4 = mcast_group; - pico_tree_insert(&IGMPParameters, &test); - - fail_if(pico_igmp_find_parameter(&mcast_link, &mcast_group) == NULL); -} -END_TEST -START_TEST(tc_pico_igmp_compatibility_mode) -{ - struct pico_frame *f; - struct pico_device *dev = pico_null_create("dummy1"); - struct pico_ip4 addr; - struct pico_ipv4_hdr *hdr; - struct igmp_message *query; - uint8_t ihl = 24; - f = pico_proto_ipv4.alloc(&pico_proto_ipv4, dev, sizeof(struct igmpv3_report) + sizeof(struct igmpv3_group_record) + (0 * sizeof(struct pico_ip4))); - pico_string_to_ipv4("192.168.1.2", &addr.addr); - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - ihl = (uint8_t)((hdr->vhl & 0x0F) * 4); /* IHL is in 32bit words */ - query = (struct igmp_message *) f->transport_hdr; - /* No link */ - fail_if(pico_igmp_compatibility_mode(f) != -1); - pico_ipv4_link_add(dev, addr, addr); - f->dev = dev; - /* Igmpv3 query */ - hdr->len = short_be((uint16_t)(12 + ihl)); - fail_if(pico_igmp_compatibility_mode(f) != 0); - /* Igmpv2 query */ - hdr->len = short_be((uint16_t)(8 + ihl)); - query->max_resp_time = 0; - fail_if(pico_igmp_compatibility_mode(f) == 0); - query->max_resp_time = 1; - fail_if(pico_igmp_compatibility_mode(f) != 0); - /* Invalid Query */ - hdr->len = short_be((uint16_t)(9 + ihl)); - fail_if(pico_igmp_compatibility_mode(f) == 0); -} -END_TEST -START_TEST(tc_pico_igmp_analyse_packet) -{ - struct pico_frame *f; - struct pico_device *dev = pico_null_create("dummy0"); - struct pico_ip4 addr; - struct igmp_message *igmp; - f = pico_proto_ipv4.alloc(&pico_proto_ipv4, dev, sizeof(struct igmp_message)); - pico_string_to_ipv4("192.168.1.1", &addr.addr); - /* No link */ - fail_if(pico_igmp_analyse_packet(f) != NULL); - pico_ipv4_link_add(dev, addr, addr); - f->dev = dev; - - igmp = (struct igmp_message *) (f->transport_hdr); - igmp->type = 0; - /* wrong type */ - fail_if(pico_igmp_analyse_packet(f) != NULL); - - /* all correct */ - igmp->type = IGMP_TYPE_MEM_QUERY; - fail_if(pico_igmp_analyse_packet(f) == NULL); - igmp->type = IGMP_TYPE_MEM_REPORT_V1; - fail_if(pico_igmp_analyse_packet(f) == NULL); - igmp->type = IGMP_TYPE_MEM_REPORT_V2; - fail_if(pico_igmp_analyse_packet(f) == NULL); - igmp->type = IGMP_TYPE_MEM_REPORT_V3; - fail_if(pico_igmp_analyse_packet(f) == NULL); -} -END_TEST -START_TEST(tc_pico_igmp_discard) -{ - /* TODO */ -} -END_TEST -START_TEST(tc_srst) -{ - struct mcast_parameters p; - struct pico_device *dev = pico_null_create("dummy0"); - struct pico_ipv4_link *link; - - pico_string_to_ipv4("192.168.1.1", &p.mcast_link.ip4.addr); - /* no link */ - fail_if(srst(&p) != -1); - pico_ipv4_link_add(dev, p.mcast_link.ip4, p.mcast_link.ip4); - link = pico_ipv4_link_get(&p.mcast_link.ip4); - /* Not supported protocol for this call */ - link->mcast_compatibility = PICO_IGMPV2; - fail_if(srst(&p) != -1); - link->mcast_compatibility = PICO_IGMPV3; - fail_if(srst(&p) != -1); -} -END_TEST -START_TEST(tc_stcl) -{ - struct igmp_timer *t = PICO_ZALLOC(sizeof(struct igmp_timer)); - struct mcast_parameters p; - - pico_string_to_ipv4("192.168.1.10", &t->mcast_link.addr); - pico_string_to_ipv4("244.7.7.7", &t->mcast_group.addr); - p.mcast_link.ip4 = t->mcast_link; - p.mcast_group.ip4 = t->mcast_group; - t->type = IGMP_TIMER_GROUP_REPORT; - /* not in tree */ - fail_if(stcl(&p) != -1); - pico_igmp_timer_start(t); - fail_if(stcl(&p) != 0); -} -END_TEST - -Suite *pico_suite(void) -{ - - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_igmp_report_expired = tcase_create("Unit test for pico_igmp_report_expired"); - TCase *TCase_igmpt_type_compare = tcase_create("Unit test for igmpt_type_compare"); - TCase *TCase_pico_igmp_analyse_packet = tcase_create("Unit test for pico_igmp_analyse_packet"); - TCase *TCase_pico_igmp_discard = tcase_create("Unit test for pico_igmp_discard"); - TCase *TCase_pico_igmp_compatibility_mode = tcase_create("Unit test for pico_igmp_compatibility"); - TCase *TCase_pico_igmp_state_change = tcase_create("Unit test for pico_igmp_state_change"); - TCase *TCase_pico_igmp_process_in = tcase_create("Unit test for pico_igmp_process_in"); - TCase *TCase_pico_igmp_timer_expired = tcase_create("Unit test for pico_igmp_timer_expired"); - TCase *TCase_pico_igmp_delete_parameter = tcase_create("Unit test for pico_igmp_delete_parameter"); - TCase *TCase_pico_igmp_find_parameter = tcase_create("Unit test for pico_igmp_find_parameter"); - TCase *TCase_stcl = tcase_create("Unit test for stcl"); - TCase *TCase_srst = tcase_create("Unit test for srst"); - TCase *TCase_pico_igmp_v2querier_expired = tcase_create("Unit test for pico_igmp_v2_querier_expired"); - - tcase_add_test(TCase_pico_igmp_report_expired, tc_pico_igmp_report_expired); - suite_add_tcase(s, TCase_pico_igmp_report_expired); - tcase_add_test(TCase_igmpt_type_compare, tc_igmpt_type_compare); - suite_add_tcase(s, TCase_igmpt_type_compare); - tcase_add_test(TCase_pico_igmp_analyse_packet, tc_pico_igmp_analyse_packet); - suite_add_tcase(s, TCase_pico_igmp_analyse_packet); - tcase_add_test(TCase_pico_igmp_discard, tc_pico_igmp_discard); - suite_add_tcase(s, TCase_pico_igmp_discard); - tcase_add_test(TCase_pico_igmp_compatibility_mode, tc_pico_igmp_compatibility_mode); - suite_add_tcase(s, TCase_pico_igmp_compatibility_mode); - suite_add_tcase(s, TCase_pico_igmp_state_change); - tcase_add_test(TCase_pico_igmp_state_change, tc_pico_igmp_state_change); - suite_add_tcase(s, TCase_pico_igmp_process_in); - tcase_add_test(TCase_pico_igmp_process_in, tc_pico_igmp_process_in); - suite_add_tcase(s, TCase_pico_igmp_timer_expired); - tcase_add_test(TCase_pico_igmp_timer_expired, tc_pico_igmp_timer_expired); - suite_add_tcase(s, TCase_pico_igmp_delete_parameter); - tcase_add_test(TCase_pico_igmp_delete_parameter, tc_pico_igmp_delete_parameter); - suite_add_tcase(s, TCase_pico_igmp_find_parameter); - tcase_add_test(TCase_pico_igmp_find_parameter, tc_pico_igmp_find_parameter); - suite_add_tcase(s, TCase_stcl); - tcase_add_test(TCase_stcl, tc_stcl); - suite_add_tcase(s, TCase_srst); - tcase_add_test(TCase_srst, tc_srst); - suite_add_tcase(s, TCase_pico_igmp_v2querier_expired); - tcase_add_test(TCase_pico_igmp_v2querier_expired, tc_pico_igmp_v2querier_expired); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_ipfilter.c b/kernel/picotcp/test/unit/modunit_pico_ipfilter.c deleted file mode 100644 index bad6c68..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_ipfilter.c +++ /dev/null @@ -1,321 +0,0 @@ -#include "pico_ipv4.h" -#include "pico_config.h" -#include "pico_icmp4.h" -#include "pico_stack.h" -#include "pico_eth.h" -#include "pico_socket.h" -#include "pico_device.h" -#include "pico_ipfilter.h" -#include "pico_tcp.h" -#include "pico_udp.h" -#include "pico_tree.h" -#include "modules/pico_ipfilter.c" -#include "check.h" - -Suite *pico_suite(void); -int pico_icmp4_packet_filtered(struct pico_frame *f) -{ - (void)f; - return 0; -} - -void pico_frame_discard(struct pico_frame *f) -{ - (void)f; -} - -volatile pico_err_t pico_err; - - - -START_TEST(tc_ipfilter) -{ - uint32_t r; - struct filter_node a = { - 0 - }, b = { - 0 - }; - fail_if(filter_compare(&a, &b) != 0); - - - /* a is rule, matching packet b */ - a.filter_id = 1; - - /* check that rule is matched properly */ - fail_if(filter_match_packet_find_rule(&a, &b) != &a); - - /* a has a out port that does not match packet */ - b.out_port = 8; - a.out_port = 7; - fail_if(filter_compare(&a, &b) == 0); - - /* a matches all ports */ - a.out_port = 0; - fail_if(filter_compare(&a, &b) != 0); - - /*** NEXT TEST ***/ - - - /* a has a in port that does not match packet */ - b.in_port = 8; - a.in_port = 7; - fail_if(filter_compare(&a, &b) == 0); - - /* a matches all ports */ - a.in_port = 0; - fail_if(filter_compare(&a, &b) != 0); - - /* a matches port exactly */ - a.in_port = 0; - fail_if(filter_compare(&a, &b) != 0); - - /*** NEXT TEST ***/ - - /* a matches all out addresses */ - b.out_addr = 0x010000a0; - fail_if(filter_compare(&a, &b) != 0); - - /* a does not match b via 8-bit netmask */ - a.out_addr = 0x000000c0; - a.out_addr_netmask = 0x000000ff; - fail_if(filter_compare(&a, &b) == 0); - - /* a does not match b at all*/ - a.out_addr = 0x020000b0; - a.out_addr_netmask = 0xffffffff; - fail_if(filter_compare(&a, &b) == 0); - - /* a matches b via 8-bit netmask */ - a.out_addr = 0x000000a0; - a.out_addr_netmask = 0x000000ff; - fail_if(filter_compare(&a, &b) != 0); - - /* a matches b exactly */ - a.out_addr = 0x010000a0; - a.out_addr_netmask = 0xffffffff; - fail_if(filter_compare(&a, &b) != 0); - - /*** NEXT TEST ***/ - - /* a matches all in addresses */ - b.in_addr = 0x010000a0; - fail_if(filter_compare(&a, &b) != 0); - - /* a does not match b via 8-bit netmask */ - a.in_addr = 0x000000c0; - a.in_addr_netmask = 0x000000ff; - fail_if(filter_compare(&a, &b) == 0); - - /* a does not match b at all*/ - a.in_addr = 0x020000b0; - a.in_addr_netmask = 0xffffffff; - fail_if(filter_compare(&a, &b) == 0); - - /* a matches b via 8-bit netmask */ - a.in_addr = 0x000000a0; - a.in_addr_netmask = 0x000000ff; - fail_if(filter_compare(&a, &b) != 0); - - /* a matches b exactly */ - a.in_addr = 0x010000a0; - a.in_addr_netmask = 0xffffffff; - fail_if(filter_compare(&a, &b) != 0); - - /*** NEXT TEST ***/ - - /* a matches all protocols */ - b.proto = 4u; - fail_if(filter_compare(&a, &b) != 0); - - /* a does not match protocol */ - a.proto = 5u; - fail_if(filter_compare(&a, &b) == 0); - - /* a matches b's protocol */ - a.proto = b.proto; - fail_if(filter_compare(&a, &b) != 0); - - /*** NEXT TEST ***/ - - /* a matches all devices */ - b.fdev = (struct pico_device *) &b; - fail_if(filter_compare(&a, &b) != 0); - - /* a does not match device */ - a.fdev = (struct pico_device *)&a; - fail_if(filter_compare(&a, &b) == 0); - - /* a matches b's device */ - a.fdev = b.fdev; - fail_if(filter_compare(&a, &b) != 0); - - - /*** SAME TEST DUPLICATED WITH INVERTED ORDER OF PARAMETERS ***/ - - memset(&a, 0, sizeof(struct filter_node)); - memset(&b, 0, sizeof(struct filter_node)); - - a.filter_id = 2; - - - /* check that rule is matched properly */ - fail_if(filter_match_packet_find_rule(&b, &a) != &a); - - /* a has a out port that does not match packet */ - b.out_port = 8; - a.out_port = 7; - fail_if(filter_compare(&b, &a) == 0); - - /* a matches all ports */ - a.out_port = 0; - fail_if(filter_compare(&b, &a) != 0); - - /*** NEXT TEST ***/ - - - /* a has a in port that does not match packet */ - b.in_port = 8; - a.in_port = 7; - fail_if(filter_compare(&b, &a) == 0); - - /* a matches all ports */ - a.in_port = 0; - fail_if(filter_compare(&b, &a) != 0); - - /* a matches port exactly */ - a.in_port = 0; - fail_if(filter_compare(&b, &a) != 0); - - /*** NEXT TEST ***/ - - /* a matches all out addresses */ - b.out_addr = 0x010000a0; - fail_if(filter_compare(&b, &a) != 0); - - /* a does not match b via 8-bit netmask */ - a.out_addr = 0x000000c0; - a.out_addr_netmask = 0x000000ff; - fail_if(filter_compare(&b, &a) == 0); - - /* a does not match b at all*/ - a.out_addr = 0x020000b0; - a.out_addr_netmask = 0xffffffff; - fail_if(filter_compare(&b, &a) == 0); - - /* a matches b via 8-bit netmask */ - a.out_addr = 0x000000a0; - a.out_addr_netmask = 0x000000ff; - fail_if(filter_compare(&b, &a) != 0); - - /* a matches b exactly */ - a.out_addr = 0x010000a0; - a.out_addr_netmask = 0xffffffff; - fail_if(filter_compare(&b, &a) != 0); - - /*** NEXT TEST ***/ - - /* a matches all in addresses */ - b.in_addr = 0x010000a0; - fail_if(filter_compare(&b, &a) != 0); - - /* a does not match b via 8-bit netmask */ - a.in_addr = 0x000000c0; - a.in_addr_netmask = 0x000000ff; - fail_if(filter_compare(&b, &a) == 0); - - /* a does not match b at all*/ - a.in_addr = 0x020000b0; - a.in_addr_netmask = 0xffffffff; - fail_if(filter_compare(&b, &a) == 0); - - /* a matches b via 8-bit netmask */ - a.in_addr = 0x000000a0; - a.in_addr_netmask = 0x000000ff; - fail_if(filter_compare(&b, &a) != 0); - - /* a matches b exactly */ - a.in_addr = 0x010000a0; - a.in_addr_netmask = 0xffffffff; - fail_if(filter_compare(&b, &a) != 0); - - /*** NEXT TEST ***/ - - /* a matches all protocols */ - b.proto = 4u; - fail_if(filter_compare(&b, &a) != 0); - - /* a does not match protocol */ - a.proto = 5u; - fail_if(filter_compare(&b, &a) == 0); - - /* a matches b's protocol */ - a.proto = b.proto; - fail_if(filter_compare(&b, &a) != 0); - - /*** NEXT TEST ***/ - - /* a matches all devices */ - b.fdev = (struct pico_device *)&b; - fail_if(filter_compare(&b, &a) != 0); - - /* a does not match device */ - a.fdev = (struct pico_device *)&a; - fail_if(filter_compare(&b, &a) == 0); - - /* a matches b's device */ - a.fdev = b.fdev; - fail_if(filter_compare(&b, &a) != 0); - - - - /*********** TEST ADD FILTER **************/ - - /* - uint32_t pico_ipv4_filter_add(struct pico_device *dev, uint8_t proto, - struct pico_ip4 *out_addr, struct pico_ip4 *out_addr_netmask, - struct pico_ip4 *in_addr, struct pico_ip4 *in_addr_netmask, - uint16_t out_port, uint16_t in_port, int8_t priority, - uint8_t tos, enum filter_action action) - */ - - - r = pico_ipv4_filter_add(NULL, 0, NULL, NULL, NULL, NULL, 0, 0, MAX_PRIORITY + 1, 0, FILTER_DROP); - fail_if(r > 0); - - r = pico_ipv4_filter_add(NULL, 0, NULL, NULL, NULL, NULL, 0, 0, MIN_PRIORITY - 1, 0, FILTER_PRIORITY); - fail_if(r > 0); - - r = pico_ipv4_filter_add(NULL, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, FILTER_COUNT); - fail_if(r > 0); - -#ifdef FAULTY - pico_set_mm_failure(1); - r = pico_ipv4_filter_add(NULL, 0, NULL, NULL, NULL, NULL, 0, 0, 0, 0, FILTER_DROP); - fail_if(r > 0); - fail_if(pico_err != PICO_ERR_ENOMEM); -#endif -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("IPfilter module"); - - TCase *TCase_ipfilter = tcase_create("Unit test for ipfilter"); - tcase_add_test(TCase_ipfilter, tc_ipfilter); - suite_add_tcase(s, TCase_ipfilter); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_ipv6_nd.c b/kernel/picotcp/test/unit/modunit_pico_ipv6_nd.c deleted file mode 100644 index 435b5de..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_ipv6_nd.c +++ /dev/null @@ -1,289 +0,0 @@ -#include "pico_config.h" -#include "pico_tree.h" -#include "pico_ipv6_nd.h" -#include "pico_icmp6.h" -#include "pico_ipv6.h" -#include "pico_stack.h" -#include "pico_device.h" -#include "pico_eth.h" -#include "pico_addressing.h" -#include "modules/pico_ipv6_nd.c" -#include "check.h" -#ifdef PICO_SUPPORT_IPV6 - -#undef PICO_TIME -#undef PICO_TIME_MS - -#define PICO_TIME_MS (0) -#define PICO_TIME (0) - -Suite *pico_suite(void); -START_TEST(tc_pico_nd_new_expire_time) -{ - struct pico_ipv6_neighbor n = { - 0 - }; - struct pico_device d = { {0} }; - - /* TODO: how to test these time values */ - - n.dev = &d; - - d.hostvars.retranstime = 666; - - n.state = PICO_ND_STATE_INCOMPLETE; - pico_nd_new_expire_time(&n); - - n.state = PICO_ND_STATE_REACHABLE; - pico_nd_new_expire_time(&n); - - - n.state = PICO_ND_STATE_STALE; - pico_nd_new_expire_time(&n); - - - n.state = PICO_ND_STATE_PROBE; - pico_nd_new_expire_time(&n); - -} -END_TEST -START_TEST(tc_pico_nd_queue) -{ - struct pico_ip6 addr = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9 }}; - int i; - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_ipv6_hdr)); - struct pico_ipv6_hdr *h = (struct pico_ipv6_hdr *) f->buffer; - f->net_hdr = (uint8_t*) h; - f->buffer[0] = 0x60; /* Ipv6 */ - memcpy(h->dst.addr, addr.addr, PICO_SIZE_IP6); - - fail_if(!f); - - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) { - fail_if(frames_queued_v6[i] != NULL); - } - pico_ipv6_nd_unreachable(&addr); - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) { - fail_if(frames_queued_v6[i] != NULL); - } - pico_ipv6_nd_postpone(f); - fail_if(frames_queued_v6[0]->buffer != f->buffer); - - pico_ipv6_nd_unreachable(&addr); - - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) { - fail_if(frames_queued_v6[i] != NULL); - } -} -END_TEST - -START_TEST(tc_pico_nd_discover) -{ - /* TODO: test this: static void pico_nd_discover(struct pico_ipv6_neighbor *n) */ -} -END_TEST -START_TEST(tc_neigh_options) -{ - /* TODO: test this: static int neigh_options(struct pico_frame *f, struct pico_icmp6_opt_lladdr *opt, uint8_t expected_opt) */ -} -END_TEST -START_TEST(tc_neigh_adv_complete) -{ - /* TODO: test this: static int neigh_adv_complete(struct pico_ipv6_neighbor *n, struct pico_icmp6_opt_lladdr *opt) */ -} -END_TEST -START_TEST(tc_neigh_adv_reconfirm_router_option) -{ - /* TODO: test this: static void neigh_adv_reconfirm_router_option(struct pico_ipv6_neighbor *n, unsigned int isRouter) */ -} -END_TEST -START_TEST(tc_neigh_adv_reconfirm) -{ - /* TODO: test this: static int neigh_adv_reconfirm(struct pico_ipv6_neighbor *n, struct pico_icmp6_opt_lladdr *opt, struct pico_icmp6_hdr *hdr) */ -} -END_TEST -START_TEST(tc_neigh_adv_check_solicited) -{ - /* TODO: test this: static void neigh_adv_check_solicited(struct pico_icmp6_hdr *ic6, struct pico_ipv6_neighbor *n) */ -} -END_TEST -START_TEST(tc_neigh_adv_process) -{ - /* TODO: test this: static int neigh_adv_process(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_ipv6_neighbor) -{ - /* TODO: test this: static struct pico_ipv6_neighbor *neighbor_from_sol_new(struct pico_ip6 *ip, struct pico_icmp6_opt_lladdr *opt, struct pico_device *dev) */ -} -END_TEST -START_TEST(tc_neighbor_from_sol) -{ - /* TODO: test this: static void neighbor_from_sol(struct pico_ip6 *ip, struct pico_icmp6_opt_lladdr *opt, struct pico_device *dev) */ -} -END_TEST -START_TEST(tc_neigh_sol_process) -{ - /* TODO: test this: static int neigh_sol_process(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_icmp6_initial_checks) -{ - /* TODO: test this: static int icmp6_initial_checks(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_neigh_adv_mcast_validity_checks) -{ - /* TODO: test this: static int neigh_adv_mcast_validity_check(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_neigh_sol_mcast_validity_checks) -{ - /* TODO: test this: static int neigh_sol_mcast_validity_check(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_neigh_adv_validity_checks) -{ - /* TODO: test this: static int neigh_adv_validity_checks(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_neigh_sol_validity_checks) -{ - /* TODO: test this: static int neigh_sol_validity_checks(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_neigh_adv_checks) -{ - /* TODO: test this: static int neigh_adv_checks(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_nd_router_sol_recv) -{ - /* TODO: test this: static int pico_nd_router_sol_recv(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_nd_router_adv_recv) -{ - /* TODO: test this: static int pico_nd_router_adv_recv(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_nd_neigh_sol_recv) -{ - /* TODO: test this: static int pico_nd_neigh_sol_recv(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_nd_neigh_adv_recv) -{ - /* TODO: test this: static int pico_nd_neigh_adv_recv(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_nd_redirect_recv) -{ - /* TODO: test this: static int pico_nd_redirect_recv(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_ipv6_nd_timer_callback) -{ - /* TODO: test this: static void pico_ipv6_nd_timer_callback(pico_time now, void *arg) */ -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_nd_new_expire_time = tcase_create("Unit test for pico_nd_new_expire_time"); - TCase *TCase_pico_nd_discover = tcase_create("Unit test for pico_nd_discover"); - TCase *TCase_neigh_options = tcase_create("Unit test for neigh_options"); - TCase *TCase_neigh_adv_complete = tcase_create("Unit test for neigh_adv_complete"); - TCase *TCase_neigh_adv_reconfirm_router_option = tcase_create("Unit test for neigh_adv_reconfirm_router_option"); - TCase *TCase_neigh_adv_reconfirm = tcase_create("Unit test for neigh_adv_reconfirm"); - TCase *TCase_neigh_adv_check_solicited = tcase_create("Unit test for neigh_adv_check_solicited"); - TCase *TCase_neigh_adv_process = tcase_create("Unit test for neigh_adv_process"); - TCase *TCase_pico_ipv6_neighbor = tcase_create("Unit test for pico_ipv6_neighbor"); - TCase *TCase_neighbor_from_sol = tcase_create("Unit test for neighbor_from_sol"); - TCase *TCase_neigh_sol_process = tcase_create("Unit test for neigh_sol_process"); - TCase *TCase_icmp6_initial_checks = tcase_create("Unit test for icmp6_initial_checks"); - TCase *TCase_neigh_sol_mcast_validity_checks = tcase_create("Unit test for neigh_sol_mcast_validity_checks"); - TCase *TCase_neigh_sol_validity_checks = tcase_create("Unit test for neigh_sol_validity_checks"); - TCase *TCase_neigh_adv_checks = tcase_create("Unit test for neigh_adv_checks"); - TCase *TCase_neigh_adv_validity_checks = tcase_create("Unit test for neigh_adv_validity_checks"); - TCase *TCase_neigh_adv_mcast_validity_checks = tcase_create("Unit test for neigh_adv_mcast_validity_checks"); - TCase *TCase_pico_nd_router_sol_recv = tcase_create("Unit test for pico_nd_router_sol_recv"); - TCase *TCase_pico_nd_router_adv_recv = tcase_create("Unit test for pico_nd_router_adv_recv"); - TCase *TCase_pico_nd_neigh_sol_recv = tcase_create("Unit test for pico_nd_neigh_sol_recv"); - TCase *TCase_pico_nd_neigh_adv_recv = tcase_create("Unit test for pico_nd_neigh_adv_recv"); - TCase *TCase_pico_nd_redirect_recv = tcase_create("Unit test for pico_nd_redirect_recv"); - TCase *TCase_pico_ipv6_nd_timer_callback = tcase_create("Unit test for pico_ipv6_nd_timer_callback"); - TCase *TCase_pico_nd_queue = tcase_create("Unit test for pico_ipv6_nd: queue for pending frames"); - - - tcase_add_test(TCase_pico_nd_new_expire_time, tc_pico_nd_new_expire_time); - suite_add_tcase(s, TCase_pico_nd_new_expire_time); - tcase_add_test(TCase_pico_nd_discover, tc_pico_nd_discover); - suite_add_tcase(s, TCase_pico_nd_discover); - tcase_add_test(TCase_neigh_options, tc_neigh_options); - suite_add_tcase(s, TCase_neigh_options); - tcase_add_test(TCase_neigh_adv_complete, tc_neigh_adv_complete); - suite_add_tcase(s, TCase_neigh_adv_complete); - tcase_add_test(TCase_neigh_adv_reconfirm_router_option, tc_neigh_adv_reconfirm_router_option); - suite_add_tcase(s, TCase_neigh_adv_reconfirm_router_option); - tcase_add_test(TCase_neigh_adv_reconfirm, tc_neigh_adv_reconfirm); - suite_add_tcase(s, TCase_neigh_adv_reconfirm); - tcase_add_test(TCase_neigh_adv_check_solicited, tc_neigh_adv_check_solicited); - suite_add_tcase(s, TCase_neigh_adv_check_solicited); - tcase_add_test(TCase_neigh_adv_process, tc_neigh_adv_process); - suite_add_tcase(s, TCase_neigh_adv_process); - tcase_add_test(TCase_pico_ipv6_neighbor, tc_pico_ipv6_neighbor); - suite_add_tcase(s, TCase_pico_ipv6_neighbor); - tcase_add_test(TCase_neighbor_from_sol, tc_neighbor_from_sol); - suite_add_tcase(s, TCase_neighbor_from_sol); - tcase_add_test(TCase_neigh_sol_process, tc_neigh_sol_process); - suite_add_tcase(s, TCase_neigh_sol_process); - tcase_add_test(TCase_icmp6_initial_checks, tc_icmp6_initial_checks); - suite_add_tcase(s, TCase_icmp6_initial_checks); - tcase_add_test(TCase_neigh_adv_mcast_validity_checks, tc_neigh_adv_mcast_validity_checks); - suite_add_tcase(s, TCase_neigh_adv_mcast_validity_checks); - tcase_add_test(TCase_neigh_sol_mcast_validity_checks, tc_neigh_sol_mcast_validity_checks); - suite_add_tcase(s, TCase_neigh_sol_mcast_validity_checks); - tcase_add_test(TCase_neigh_adv_validity_checks, tc_neigh_adv_validity_checks); - suite_add_tcase(s, TCase_neigh_adv_validity_checks); - tcase_add_test(TCase_neigh_sol_validity_checks, tc_neigh_sol_validity_checks); - suite_add_tcase(s, TCase_neigh_sol_validity_checks); - tcase_add_test(TCase_neigh_adv_checks, tc_neigh_adv_checks); - suite_add_tcase(s, TCase_neigh_adv_checks); - tcase_add_test(TCase_pico_nd_router_sol_recv, tc_pico_nd_router_sol_recv); - suite_add_tcase(s, TCase_pico_nd_router_sol_recv); - tcase_add_test(TCase_pico_nd_router_adv_recv, tc_pico_nd_router_adv_recv); - suite_add_tcase(s, TCase_pico_nd_router_adv_recv); - tcase_add_test(TCase_pico_nd_neigh_sol_recv, tc_pico_nd_neigh_sol_recv); - suite_add_tcase(s, TCase_pico_nd_neigh_sol_recv); - tcase_add_test(TCase_pico_nd_neigh_adv_recv, tc_pico_nd_neigh_adv_recv); - suite_add_tcase(s, TCase_pico_nd_neigh_adv_recv); - tcase_add_test(TCase_pico_nd_redirect_recv, tc_pico_nd_redirect_recv); - suite_add_tcase(s, TCase_pico_nd_redirect_recv); - tcase_add_test(TCase_pico_ipv6_nd_timer_callback, tc_pico_ipv6_nd_timer_callback); - suite_add_tcase(s, TCase_pico_ipv6_nd_timer_callback); - tcase_add_test(TCase_pico_nd_queue, tc_pico_nd_queue); - suite_add_tcase(s, TCase_pico_nd_queue); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} -#else -int main(void) -{ - return 0; -} - -#endif diff --git a/kernel/picotcp/test/unit/modunit_pico_mdns.c b/kernel/picotcp/test/unit/modunit_pico_mdns.c deleted file mode 100644 index 45688b2..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_mdns.c +++ /dev/null @@ -1,2227 +0,0 @@ -#include "pico_config.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_socket.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_dns_client.h" -#include "pico_dns_common.h" -#include "pico_mdns.h" -#include "pico_tree.h" -#include "pico_dev_mock.c" -#include "modules/pico_mdns.c" -#include "check.h" - -Suite *pico_suite(void); -void add_records(void); /* MARK: helper to add records to MyRecords s*/ -int mdns_init(void); /* MARK: Initialise mDNS module */ - -static int amount_callback_executed = 0; -void callback( pico_mdns_rtree *tree, char *str, void *arg); -void callback( pico_mdns_rtree *tree, - char *str, - void *arg ) /* MARK: Generic callback */ -{ - IGNORE_PARAMETER(tree); - IGNORE_PARAMETER(str); - IGNORE_PARAMETER(arg); - /* Do nothing, because fail_unless and fail_if don't work here */ - amount_callback_executed++; -} - -int mdns_init() /* MARK: Initialise mDNS module */ -{ - struct mock_device *mock = NULL; - - struct pico_ip4 local = { - .addr = long_be(0x0a280064) - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - - mock = pico_mock_create(NULL); - if (!mock) { - return -1; - } - - pico_ipv4_link_add(mock->dev, local, netmask); - - /* Try to initialise the mDNS module right */ - return pico_mdns_init("host.local", local, callback, NULL); -} - -#define PICO_MDNS_COOKIE_DECLARE(name) \ - struct pico_mdns_cookie (name) = \ - { \ - {&LEAF, pico_dns_question_cmp}, \ - {&LEAF, pico_mdns_record_cmp}, \ - {&LEAF, pico_mdns_record_cmp}, \ - 0, 0, 0, 0, 0, NULL, NULL \ - } - -START_TEST(tc_mdns_init) /* MARK: mdns_init */ -{ - int ret = 0; - struct pico_ip4 local = { - 0 - }; - char hostname[] = "host.local"; - - printf("*********************** starting %s * \n", __func__); - - pico_stack_init(); - - /* Try to initialise the mDNS module wrong */ - ret = pico_mdns_init(NULL, local, callback, NULL); - fail_unless(ret, "mdns_init failed checking params!\n"); - - /* Try to initialise the mDNS module wrong */ - ret = pico_mdns_init(hostname, local, callback, NULL); - fail_unless(ret, "mdns_init failed checking params!\n"); - - /* Try to initialise the mDNS module wrong */ - ret = pico_mdns_init(hostname, local, NULL, NULL); - fail_unless(ret, "mdns_init failed checking params!\n"); - - ret = mdns_init(); - fail_unless(0 == ret, "mdns_init failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_cmp_name_type) /* MARK: mdns_record_cmp_name_type*/ -{ - struct pico_mdns_record a = { - 0 - }; - struct pico_mdns_record b = { - 0 - }; - char url1[] = "foo.local"; - char url3[] = "a.local"; - struct pico_ip4 rdata = { - 0 - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create different test records */ - a.record = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!a.record, "Record A could not be created!\n"); - b.record = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b.record, "Record B could not be created!\n"); - - /* Try to compare records with equal rname but different type */ - ret = pico_mdns_record_cmp_name_type((void *) &a, (void *) &b); - fail_unless(ret > 0, "mdns_record_cmp_name_type failed with different types!\n"); - pico_dns_record_delete((void**)(void **)&(a.record)); - pico_dns_record_delete((void**)(void **)&(b.record)); - - /* Create different test records */ - a.record = pico_dns_record_create(url3, (uint8_t *)url1, (uint16_t) strlen(url1), &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a.record, "Record A could not be created!\n"); - b.record = pico_dns_record_create(url3, (uint8_t *)url1, (uint16_t) strlen(url1), &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b.record, "Record B could not be created!\n"); - - /* Try to compare records with different rname but equal type */ - ret = pico_mdns_record_cmp_name_type((void *) &a, (void *) &b); - fail_unless(!ret, "mdns_record_cmp_name_type failed!\n"); - pico_dns_record_delete((void**)(void **)&(a.record)); - pico_dns_record_delete((void**)(void **)&(b.record)); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_cmp) /* MARK: mdns_record_cmp */ -{ - struct pico_mdns_record a = { - 0 - }; - struct pico_mdns_record b = { - 0 - }; - char url1[] = "foo.local"; - char url3[] = "a.local"; - struct pico_ip4 rdata = { - 0 - }; - uint16_t len = 0; - int ret = 0; - struct pico_ip4 firstIP = { - .addr = 0x7778797A - }; - struct pico_ip4 secondIP = { - .addr = 0x5758595A - }; - - printf("*********************** starting %s * \n", __func__); - - /* Create test records */ - a.record = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!(a.record), "Record A could not be created!\n"); - b.record = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!(b.record), "Record B could not be created!\n"); - - /* Try to compare equal records */ - ret = pico_mdns_record_cmp((void *) &a, (void *) &b); - fail_unless(!ret, "mdns_record_cmp failed with equal records!\n"); - pico_dns_record_delete((void**)(void **)&(a.record)); - pico_dns_record_delete((void**)(void **)&(b.record)); - - /* Create different test records */ - a.record = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!a.record, "Record A could not be created!\n"); - b.record = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b.record, "Record B could not be created!\n"); - - /* Try to compare records with equal rname but different type */ - ret = pico_mdns_record_cmp((void *) &a, (void *) &b); - fail_unless(ret > 0, "mdns_record_cmp failed with same name, different types!\n"); - pico_dns_record_delete((void**)(void **)&(a.record)); - pico_dns_record_delete((void**)(void **)&(b.record)); - - /* Create different test records */ - a.record = pico_dns_record_create(url3, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a.record, "Record A could not be created!\n"); - b.record = pico_dns_record_create(url1, &rdata, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!b.record, "Record B could not be created!\n"); - - /* Try to compare records with different rname but equal type */ - ret = pico_mdns_record_cmp((void *) &a, (void *) &b); - fail_unless(ret < 0, "mdns_record_cmp failed with different name, same types!\n"); - pico_dns_record_delete((void**)(void **)&(a.record)); - pico_dns_record_delete((void**)(void **)&(b.record)); - - - /* Create different test records */ - a.record = pico_dns_record_create(url1, &firstIP, 4, &len, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!a.record, "Record A could not be created!\n"); - b.record = pico_dns_record_create(url1, &secondIP, 4, &len, PICO_DNS_TYPE_A, PICO_DNS_CLASS_IN, 0); - fail_if(!b.record, "Record B could not be created!\n"); - - /* Try to compare records with equal rname but equal type different IP address (testing the effect of pico_tolower) */ - ret = pico_mdns_record_cmp((void *) &a, (void *) &b); - fail_unless(ret > 0, "mdns_record_cmp failed with same name, same types, tolower separated different rdata!\n"); - pico_dns_record_delete((void**)(void **)&(a.record)); - pico_dns_record_delete((void**)(void **)&(b.record)); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_cookie_cmp) /* MARK: mdns_cookie_cmp */ -{ - PICO_MDNS_COOKIE_DECLARE(a); - PICO_MDNS_COOKIE_DECLARE(b); - struct pico_dns_question *question1 = NULL; - struct pico_dns_question *question2 = NULL; - struct pico_dns_question *question3 = NULL; - struct pico_dns_question *question4 = NULL; - struct pico_dns_question *question5 = NULL; - struct pico_mdns_record record1 = { - 0 - }, record2 = { - 0 - }, record3 = { - 0 - }, - record4 = { - 0 - }; - char url1[] = "foo.local"; - char url2[] = "bar.local"; - char url3[] = "pi.local"; - char url4[] = "ab.local"; - struct pico_ip4 rdata = { - 0 - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create some questions */ - question1 = pico_dns_question_create(url1, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!question1, "Could not create question 1!\n"); - question2 = pico_dns_question_create(url1, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_PTR, - PICO_DNS_CLASS_IN, 0); - fail_if(!question2, "Could not create question 2!\n"); - question3 = pico_dns_question_create(url3, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!question3, "Could not create question 3!\n"); - question4 = pico_dns_question_create(url4, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!question4, "Could not create question 4!\n"); - question5 = pico_dns_question_create(url2, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_TYPE_AAAA, 0); - fail_if(!question5, "Could not create question 5!\n"); - - /* Create test records */ - record1.record = pico_dns_record_create(url1, &rdata, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record1.record, "Record 1 could not be created!\n"); - record2.record = pico_dns_record_create(url1, &rdata, 4, &len, - PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!record2.record, "Record 2 could not be created!\n"); - record3.record = pico_dns_record_create(url2, &rdata, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record3.record, "Record 3 could not be created!\n"); - record4.record = pico_dns_record_create(url4, &rdata, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record4.record, "Record 3 could not be created!\n"); - - /* Create 2 exactly the same cookies */ - pico_tree_insert(&(a.qtree), question1); - pico_tree_insert(&(a.qtree), question2); - pico_tree_insert(&(a.qtree), question3); - pico_tree_insert(&(a.qtree), question4); - pico_tree_insert(&(a.qtree), question5); - pico_tree_insert(&(a.antree), &record1); - pico_tree_insert(&(a.antree), &record2); - pico_tree_insert(&(a.antree), &record3); - pico_tree_insert(&(a.antree), &record4); - - pico_tree_insert(&(b.qtree), question1); - pico_tree_insert(&(b.qtree), question2); - pico_tree_insert(&(b.qtree), question3); - pico_tree_insert(&(b.qtree), question4); - pico_tree_insert(&(b.qtree), question5); - pico_tree_insert(&(b.antree), &record1); - pico_tree_insert(&(b.antree), &record2); - pico_tree_insert(&(b.antree), &record3); - pico_tree_insert(&(b.antree), &record4); - - /* Try to compare exactly the same cookies*/ - ret = pico_mdns_cookie_cmp((void *) &a, (void *) &b); - fail_unless(0 == ret, "mdns_cookie_cmp failed with equal cookies!\n"); - - /* Try to compare cookies but B a larger question than A*/ - pico_tree_delete(&(a.qtree), question2); - ret = pico_mdns_cookie_cmp((void *) &a, (void *) &b); - fail_unless(ret > 0, "mdns_cookie_cmp failed with larger question A!\n"); - - /* Insert more possibilities here.. */ - - PICO_DNS_QTREE_DESTROY(&(b.qtree)); - pico_dns_record_delete((void **)&(record1.record)); - pico_dns_record_delete((void **)&(record2.record)); - pico_dns_record_delete((void **)&(record3.record)); - pico_dns_record_delete((void **)&(record4.record)); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_suffix_to_uint16) /* MARK: mdns_suffix_to_uint16 */ -{ - char url[10] = "-2"; - char url2[10] = "-10"; - char url3[10] = "-500000"; - char *o = NULL, *c = NULL; - - printf("*********************** starting %s * \n", __func__); - - o = url; - c = url + 2; - fail_unless(2 == pico_mdns_suffix_to_uint16(o, c), - "mdns_suffix_to_uint16 failed with correct suffix 2!\n"); - - o = url2; - c = url2 + 3; - fail_unless(10 == pico_mdns_suffix_to_uint16(o, c), - "mdns_suffix_to_uint16 failed with correct suffix 10!\n"); - - o = url3; - c = url3 + 7; - fail_unless(0 == pico_mdns_suffix_to_uint16(o, c), - "mdns_suffix_to_uint16 failed with wrong suffix 500000\n"); - - fail_unless(0 == pico_mdns_suffix_to_uint16(NULL, NULL), - "mdns_suffix_to_uint16 failed with NULL-pointers given\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_cookie_delete) /* MARK: mdns_cookie_delete */ -{ - struct pico_mdns_cookie *a = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - - printf("*********************** starting %s * \n", __func__); - - fail_unless(pico_mdns_cookie_delete((void **)&a), - "mdns_cookie_delete failed checking params!\n"); - a = pico_mdns_cookie_create(qtree, antree, artree, 0, 0, NULL, NULL); - fail_unless(!pico_mdns_cookie_delete((void **)&a), - "mdns_cookie_delete failed!\n"); - - fail_unless(pico_mdns_cookie_delete(NULL), - "mdns_cookie_delete failed checking NULL-pointer"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_cookie_create) /* MARK: mdns_cookie_create */ -{ - struct pico_mdns_cookie *a = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - - printf("*********************** starting %s * \n", __func__); - - a = pico_mdns_cookie_create(qtree, antree, artree, 0, 0, NULL, NULL); - fail_if(!a, "mdns_cookie_create failed!\n"); - - pico_mdns_cookie_delete((void **)&a); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_cookie_tree_find_query_cookie) /* MARK: mdns_ctree_find_cookie */ -{ - struct pico_mdns_cookie *a = NULL, *b = NULL; - PICO_DNS_QTREE_DECLARE(qtree_a); - PICO_DNS_QTREE_DECLARE(qtree_b); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - struct pico_dns_question *question1 = NULL; - struct pico_dns_question *question2 = NULL; - struct pico_dns_question *question3 = NULL; - struct pico_dns_question *question4 = NULL; - struct pico_dns_question *question5 = NULL; - char url1[] = "foo.local"; - char url2[] = "bar.local"; - char url3[] = "pi.local"; - char url4[] = "ab.local"; - char url5[] = "t.local"; - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create some questions */ - question1 = pico_dns_question_create(url1, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!question1, "Could not create question 1!\n"); - question2 = pico_dns_question_create(url5, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_PTR, - PICO_DNS_CLASS_IN, 0); - fail_if(!question2, "Could not create question 2!\n"); - question3 = pico_dns_question_create(url3, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!question3, "Could not create question 3!\n"); - question4 = pico_dns_question_create(url4, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_AAAA, - PICO_DNS_CLASS_IN, 0); - fail_if(!question4, "Could not create question 4!\n"); - question5 = pico_dns_question_create(url2, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!question5, "Could not create question 5!\n"); - - pico_tree_insert(&qtree_a, question3); - pico_tree_insert(&qtree_a, question4); - - pico_tree_insert(&qtree_b, question1); - pico_tree_insert(&qtree_b, question2); - pico_tree_insert(&qtree_b, question5); - - a = pico_mdns_cookie_create(qtree_a, antree, artree, 1, - PICO_MDNS_PACKET_TYPE_QUERY, NULL, NULL); - fail_if(!a, "mdns_cookie_create failed!\n"); - b = pico_mdns_cookie_create(qtree_b, antree, artree, 1, - PICO_MDNS_PACKET_TYPE_QUERY, NULL, NULL); - fail_if(!b, "mdns_cookie_create failed!\n"); - - pico_tree_insert(&Cookies, a); - pico_tree_insert(&Cookies, b); - - fail_unless(b == pico_mdns_ctree_find_cookie("\3foo\5local", PICO_MDNS_PACKET_TYPE_QUERY), - "mdns_cookie_tree_find_query_cookie failed with foo.local\n"); - - fail_unless(a == pico_mdns_ctree_find_cookie("\2pi\5local", PICO_MDNS_PACKET_TYPE_QUERY), - "mdns_cookie_tree_find_query_cookie failed with pi.local\n"); - - fail_unless(NULL == pico_mdns_ctree_find_cookie("bla.local", PICO_MDNS_PACKET_TYPE_QUERY), - "mdns_cookie_tree_find_query_cookie failed with foo.local\n"); - - pico_tree_delete(&Cookies, a); - pico_tree_delete(&Cookies, b); - pico_mdns_cookie_delete((void **)&a); - pico_mdns_cookie_delete((void **)&b); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_cookie_apply_spt) /* MARK: mdns_cookie_apply_spt */ -{ - PICO_MDNS_COOKIE_DECLARE(a); - struct pico_mdns_record record1 = { - 0 - }, record2 = { - 0 - }, record3 = { - 0 - }, - record4 = { - 0 - }; - char url1[] = "foo.local"; - char url2[] = "bar.local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata2 = { - long_be(0xFFFFFFFF) - }; - int ret = 0; - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create test records */ - record1.record = pico_dns_record_create(url1, &rdata, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record1.record, "Record 1 could not be created!\n"); - record2.record = pico_dns_record_create(url2, &rdata2, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record2.record, "Record 2 could not be created!\n"); - record3.record = pico_dns_record_create(url1, &rdata2, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record3.record, "Record 3 could not be created!\n"); - record4.record = pico_dns_record_create(url2, &rdata, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record4.record, "Record 4 could not be created!\n"); - - /* Make it a probe cookie otherwise it will just return -1 */ - a.type = PICO_MDNS_PACKET_TYPE_PROBE; - - /* Need to initialise the stack to allow timer scheduling IMPORTANT! */ - pico_stack_init(); - - /* Create 2 exactly the same cookies */ - pico_tree_insert(&(a.antree), &record1); - pico_tree_insert(&(a.antree), &record2); - pico_tree_insert(&MyRecords, &record1); - pico_tree_insert(&MyRecords, &record2); - - ret = pico_mdns_cookie_apply_spt(&a, record3.record); - fail_unless(ret, "mdns_cookie_apply_spt failed checking parms!\n"); - - PICO_MDNS_SET_FLAG(record1.flags, PICO_MDNS_RECORD_CURRENTLY_PROBING); - PICO_MDNS_SET_FLAG(record2.flags, PICO_MDNS_RECORD_CURRENTLY_PROBING); - - /* Check with peer record which is lexicographically later */ - ret = pico_mdns_cookie_apply_spt(&a, record3.record); - fail_unless(0 == ret, "mdns_cookie_apply_spt failed!\n"); - - /* Check with peer record which is lexicographically earlier */ - ret = pico_mdns_cookie_apply_spt(&a, record4.record); - fail_unless(0 == ret, "mdns_cookie_apply_spt failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_is_suffix_present) /* MARK: mdns_is_suffix_present */ -{ - char name1[16] = { - 5, 'v', 'l', 'e', 'e', 's', 5, 'l', 'o', 'c', 'a', 'l', 0 - }; - char name2[17] = { - 8, 'v', 'l', 'e', 'e', 's', ' ', '-', '2', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name6[20] = { - 10, 'v', 'l', 'e', 'e', 's', ' ', '-', 'a', '-', '2', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name7[18] = { - 9, 'v', 'l', 'e', 'e', 's', ' ', '-', '9', 'a', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name9[17] = { - 7, 'v', 'l', 'e', 'e', 's', ' ', '-', '0', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char *o_index = NULL; - char *c_index = NULL; - uint16_t present = 0; - - printf("*********************** starting %s * \n", __func__); - present = pico_mdns_is_suffix_present(name1, &o_index, &c_index); - fail_unless(!present, - "There is no suffix present!\n"); - fail_unless(NULL == o_index && NULL == c_index, - "There should be no indexes!\n"); - - present = pico_mdns_is_suffix_present(name2, &o_index, &c_index); - fail_unless(present, - "is_suffix_present failed with suffix!\n"); - fail_unless((name2 + 7) == o_index && (name2 + 9) == c_index, - "is_suffix_pressent failed!\n"); - fail_unless(present == 2, "Suffix should be 2!\n"); - - present = pico_mdns_is_suffix_present(name7, &o_index, &c_index); - fail_unless(!present, - "There is no suffix present!\n"); - fail_unless(NULL == o_index && NULL == c_index, - "There should be no indexes!\n"); - - present = pico_mdns_is_suffix_present(name6, &o_index, &c_index); - fail_unless(present, - "is_suffix_present failed with suffix!\n"); - fail_unless((name6 + 9) == o_index && (name6 + 11) == c_index, - "is_suffix_present failed!\n"); - fail_unless(present == 2, "Suffix should be 2!\n"); - - present = pico_mdns_is_suffix_present(name9, &o_index, &c_index); - fail_unless(!present, - "is_suffix_present failed with suffix 0!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -#pragma GCC diagnostic push /* require GCC 4.6 */ -#pragma GCC diagnostic ignored "-Woverflow" -START_TEST(tc_pico_itoa) /* MARK: itoa */ -{ - char num[10] = { - 0 - }; - - uint16_t t1 = 10; - printf("*********************** starting %s * \n", __func__); - - pico_itoa(t1, num); - fail_unless(0 == strcmp(num, "10"), "ITOA with %d failed: %s\n", t1, num); - memcpy(num, "\0\0\0\0\0\0\0\0\0\0", 10); - pico_itoa(65535, num); - fail_unless(0 == strcmp(num, "65535"), "ITOA with %d failed: %s\n", 65535, num); - memcpy(num, "\0\0\0\0\0\0\0\0\0\0", 10); - pico_itoa(0, num); - fail_unless(0 == strcmp(num, "0"), "ITOA with %d failed: %s\n", 0, num); - memcpy(num, "\0\0\0\0\0\0\0\0\0\0", 10); - pico_itoa(65536, num); - fail_unless(0 == strcmp(num, "0"), "ITOA with %d failed: %s\n", 65536, num); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -#pragma GCC diagnostic pop /* require GCC 4.6 */ -START_TEST(tc_mdns_resolve_name_conflict) /* MARK: mdns_resolve_name_conflict */ -{ - char name1[13] = { - 5, 'v', 'l', 'e', 'e', 's', 5, 'l', 'o', 'c', 'a', 'l', 0 - }; - char name2[17] = { - 7, 'v', 'l', 'e', 'e', 's', '-', '2', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name3[18] = { - 8, 'v', 'l', 'e', 'e', 's', '-', '1', '0', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name4[17] = { - 7, 'v', 'l', 'e', 'e', 's', '-', '9', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name5[16] = { - 6, 'v', 'l', 'e', 'e', 's', '-', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name6[17] = { - 7, 'v', 'l', 'e', 'e', 's', '-', 'a', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char name7[18] = { - 8, 'v', 'l', 'e', 'e', 's', '-', '9', 'a', 5, 'l', 'o', 'c', 'a', 'l', '\0' - }; - char *ret = NULL; - - printf("*********************** starting %s * \n", __func__); - - ret = pico_mdns_resolve_name_conflict(name1); - fail_unless(0 == strcmp(ret, "\x7vlees-2\5local"), - "mdns_conflict_resolve_name failed 'vlees.local' to %s!\n", - ret); - PICO_FREE(ret); - ret = pico_mdns_resolve_name_conflict(name2); - fail_unless(0 == strcmp(ret, "\x7vlees-3\5local"), - "mdns_conflict_resolve_name failed 'vlees-2.local' to %s!\n", - ret); - PICO_FREE(ret); - ret = pico_mdns_resolve_name_conflict(name3); - fail_unless(0 == strcmp(ret, "\x8vlees-11\5local"), - "mdns_conflict_resolve_name failed 'vlees-10.local' to %s!\n", - ret); - PICO_FREE(ret); - ret = pico_mdns_resolve_name_conflict(name4); - fail_unless(0 == strcmp(ret, "\x8vlees-10\5local"), - "mdns_conflict_resolve_name failed 'vlees-9.local' to %s!\n", - ret); - PICO_FREE(ret); - ret = pico_mdns_resolve_name_conflict(name5); - fail_unless(0 == strcmp(ret, "\x8vlees--2\5local"), - "mdns_conflict_resolve_name failed 'vlees--2.local' to %s!\n", - ret); - PICO_FREE(ret); - ret = pico_mdns_resolve_name_conflict(name6); - fail_unless(0 == strcmp(ret, "\x9vlees-a-2\5local"), - "mdns_conflict_resolve_name failed 'vlees-a-2.local' to %s!\n", - ret); - PICO_FREE(ret); - ret = pico_mdns_resolve_name_conflict(name7); - fail_unless(0 == strcmp(ret, "\xavlees-9a-2\5local"), - "mdns_conflict_resolve_name failed 'vlees-9a-2.local' to %s!\n", - ret); - PICO_FREE(ret); - - fail_unless(NULL == pico_mdns_resolve_name_conflict(NULL), - "mdns_conflict_resolvE_name failed checking params!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_generate_new_records) /* MARK: mdns_generate_new_records */ -{ - PICO_MDNS_RTREE_DECLARE(ctree); - PICO_MDNS_RTREE_DECLARE(ntree); - struct pico_mdns_record *record = NULL; - char url[] = "foo.local"; - char url2[] = "\3foo\5local"; - char url3[] = "\7foo (2)\5local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - - printf("*********************** starting %s * \n", __func__); - - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!(record->record), "Record could not be created!\n"); - pico_tree_insert(&ctree, record); - - ntree = pico_mdns_generate_new_records(&ctree, url2, - url3); - - fail_unless(1 == pico_tree_count(&ntree), "new_tree has wrong count!\n"); - record = pico_tree_firstNode(ntree.root)->keyValue; - fail_unless(strcmp(record->record->rname, url3) == 0, - "New name isn't correctly copied %s!\n", record->record->rname); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_cookie_resolve_conflict) /* MARK: mdns_cookie_resolve_conflict */ -{ - struct pico_mdns_cookie *a = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - struct pico_dns_question *question = NULL; - struct pico_mdns_record *record = NULL; - char url[] = "foo.local"; - char url2[] = "\3foo\5local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - int ret = 0; - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - question = pico_dns_question_create(url, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!question, "Question could not be created!\n"); - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!(record->record), "Record could not be created!\n"); - - /* Create 2 exactly the same cookies */ - pico_tree_insert(&antree, record); - pico_tree_insert(&qtree, question); - - /* Make it a probe cookie otherwise it will just return -1 */ - a = pico_mdns_cookie_create(qtree, antree, artree, 1, - PICO_MDNS_PACKET_TYPE_PROBE, - callback, NULL); - - /* Need to initialise the stack to allow timer scheduling IMPORTANT! */ - pico_stack_init(); - ret = mdns_init(); - fail_unless(0 == ret, "mdns_init failed!\n"); - - /* Cookie needs to be removed from cookie tree so we need to add it first */ - pico_tree_insert(&Cookies, a); - - ret = pico_mdns_cookie_resolve_conflict(a, url2); - fail_unless(0 == ret, "mdns_cookie_resolve_conflict failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_question_create) /* MARK: mdns_question_create */ -{ - struct pico_dns_question *question = NULL; - char url[] = "1.2.3.4"; - char cmpbuf[22] = { - 0x01u, '4', - 0x01u, '3', - 0x01u, '2', - 0x01u, '1', - 0x07u, 'i', 'n', '-', 'a', 'd', 'd', 'r', - 0x04u, 'a', 'r', 'p', 'a', - 0x00u - }; - uint16_t len = 0; - - printf("*********************** starting %s * \n", __func__); - - question = pico_mdns_question_create("foo.local", - &len, - PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_MDNS_QUESTION_FLAG_UNICAST_RES, - 0); - fail_if(!question, "mdns_question_create returned NULL!\n"); - fail_unless(0 == strcmp(question->qname, "\3foo\5local"), - "mdns_question_create failed!\n"); - fail_unless(0x8001 == short_be(question->qsuffix->qclass), - "mdns_quesiton_create failed setting QU bit!\n"); - pico_dns_question_delete((void**)&question); - - question = pico_mdns_question_create("foo.local", - &len, - PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_MDNS_QUESTION_FLAG_PROBE, - 0); - fail_if(!question, "mdns_question_create returned NULL!\n"); - fail_unless(0 == strcmp(question->qname, "\3foo\5local"), - "mdns_question_create failed!\n"); - fail_unless(PICO_DNS_TYPE_ANY == short_be(question->qsuffix->qtype), - "mdns_quesiton_create failed setting type to ANY!\n"); - pico_dns_question_delete((void**)&question); - - question = pico_mdns_question_create(url, - &len, - PICO_PROTO_IPV4, - PICO_DNS_TYPE_PTR, - 0, 1); - fail_if(!question, "mdns_question_create returned NULL!\n"); - fail_unless(0 == strcmp(question->qname, cmpbuf), - "mdns_question_create failed!\n"); - pico_dns_question_delete((void**)&question); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_resolve_conflict) /* MARK: mdns_record_resolve_conflict */ -{ - struct pico_mdns_record *record = NULL; - char url[] = "foo.local"; - char url2[] = "\3foo\5local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!(record->record), "Record could not be created!\n"); - /* Need to initialise the stack to allow timer scheduling IMPORTANT! */ - pico_stack_init(); - - ret = mdns_init(); - fail_unless(0 == ret, "mdns_init failed!\n"); - - ret = pico_mdns_record_resolve_conflict(record, url2); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_am_i_lexi_later) /* MARK: mdns_record_am_i_lexi_later */ -{ - struct pico_mdns_record record1 = { - 0 - }, record2 = { - 0 - }, record3 = { - 0 - }, - record4 = { - 0 - }; - char url1[] = "foo.local"; - char url2[] = "bar.local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata2 = { - long_be(0xFFFFFFFF) - }; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create test records */ - record1.record = pico_dns_record_create(url1, &rdata, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record1.record, "Record 1 could not be created!\n"); - record2.record = pico_dns_record_create(url2, &rdata2, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record2.record, "Record 2 could not be created!\n"); - record3.record = pico_dns_record_create(url1, &rdata2, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record3.record, "Record 3 could not be created!\n"); - record4.record = pico_dns_record_create(url2, &rdata, 4, &len, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!record4.record, "Record 4 could not be created!\n"); - - ret = pico_mdns_record_am_i_lexi_later(&record1, &record3); - fail_unless(ret < 0, "mdns_record_am_i_lexi_later failed!\n"); - - ret = pico_mdns_record_am_i_lexi_later(&record2, &record4); - fail_unless(ret > 0, "mdns_record_am_i_lexi_later failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_copy_with_new_name) /* MARK: copy_with_new_name */ -{ - struct pico_mdns_record *record = NULL, *copy = NULL; - char url[] = "foo.local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - - printf("*********************** starting %s * \n", __func__); - /* Create a record */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!(record->record), "Record could not be created!\n"); - - /* Try to create a copy with a new name */ - copy = pico_mdns_record_copy_with_new_name(record, "\4test\5local"); - fail_if(!copy, "mdns_record_copy_with_new_name returned NULL!\n"); - fail_unless(0 == strcmp(copy->record->rname, "\4test\5local"), - "mdns_record_copy_with_new_name didn't copy name right!\n"); - fail_unless(strlen("\4test\5local") + 1 == copy->record->rname_length, - "mdns_record_copy_with_new_name didn't update namelength!\n"); - - pico_mdns_record_delete((void **)&record); - pico_mdns_record_delete((void **)©); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_copy) /* MARK: mdns_record_copy */ -{ - struct pico_mdns_record *record = NULL, *copy = NULL; - char url[] = "foo.local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - - printf("*********************** starting %s * \n", __func__); - /* Create a record */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!(record->record), "Record could not be created!\n"); - - /* Try to copy */ - copy = pico_mdns_record_copy(record); - fail_if(!copy, "mdns_record_copy returned NULL!\n"); - fail_if(record == copy, "Pointers point to same struct!\n"); - fail_unless(0 == strcmp(copy->record->rname, record->record->rname), - "mdns_record_copy didn't copy names right!\n"); - fail_unless(copy->claim_id == record->claim_id, - "mdns_record_copy didn't copy claim_id right!\n"); - fail_unless(copy->current_ttl == record->current_ttl, - "mdns_record_copy didn't copy current_ttl right!\n"); - fail_unless(copy->flags == record->flags, - "mdns_record_copy didn't copy flags right!\n"); - - pico_mdns_record_delete((void **)&record); - pico_mdns_record_delete((void **)©); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_create) /* MARK: mdns_record_create */ -{ - struct pico_mdns_record *record = NULL; - char url[] = "foo.local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - printf("*********************** starting %s * \n", __func__); - /* Create a record */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!(record->record), "Record could not be created!\n"); - fail_unless(0 == strcmp(record->record->rname, "\3foo\5local"), - "mdns_record_create didn't convert rname properly!\n"); - fail_unless(0x8001 == short_be(record->record->rsuffix->rclass), - "mdns_record_create didn't set QU flag correctly!\n"); - pico_mdns_record_delete((void **)&record); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_delete) /* MARK: mdns_record_delete */ -{ - struct pico_mdns_record *record = NULL; - char url[] = "foo.local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - /* Create a record */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!(record->record), "Record could not be created!\n"); - - /* Try to delete the record */ - ret = pico_mdns_record_delete((void **)&record); - fail_unless(0 == ret, "mdns_record_delete returned error!\n"); - fail_unless(!record, "mdns_record_delete didn't delete properly"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -void add_records( void ) /* MARK: helper to add records to MyRecords s*/ -{ - struct pico_mdns_record *record = NULL, *record1 = NULL, *record2 = NULL, - *record3 = NULL; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata1 = { - long_be(0xFFFFFFFF) - }; - char url[] = "foo.local"; - char url1[] = "bar.local"; - - /* Create an A record with URL */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - (PICO_MDNS_RECORD_UNIQUE | - PICO_MDNS_RECORD_PROBED | - PICO_MDNS_RECORD_HOSTNAME)); - fail_if(!record, "Record could not be created!\n"); - printf("Is hostname record: %d\n", IS_HOSTNAME_RECORD(record)); - - /* Create 2 PTR records to URL */ - record1 = pico_mdns_record_create(url, url, (uint16_t) strlen(url), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record1, "Record could not be created!\n"); - - /* Simulate that this record is probed */ - record1->flags |= PICO_MDNS_RECORD_PROBED; - - record2 = pico_mdns_record_create(url, url1, (uint16_t) strlen(url1), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Create a totally different record */ - record3 = pico_mdns_record_create(url1, &rdata1, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Add the records to the tree */ - pico_tree_insert(&MyRecords, record); - pico_tree_insert(&MyRecords, record1); - pico_tree_insert(&MyRecords, record2); - pico_tree_insert(&MyRecords, record3); -} -START_TEST(tc_mdns_record_tree_find_name) /* MARK: mdns_record_find_name */ -{ - PICO_MDNS_RTREE_DECLARE(hits); - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - int found = 1; - - printf("*********************** starting %s * \n", __func__); - - add_records(); - - hits = pico_mdns_rtree_find_name(&MyRecords, "\3foo\5local", 0); - fail_unless(2 == pico_tree_count(&hits), - "mdns_record_tree_find_name should find 2 records here!\n"); - pico_tree_foreach(node, &hits) { - if ((record = node->keyValue)) { - if (strcmp(record->record->rname, "\3foo\5local")) - found = 0; - } - } - fail_unless(1 == found, - "mdns_record_tree_find_name returned records with other name!\n"); - - hits = pico_mdns_rtree_find_name(&MyRecords, "\3bar\5local", 0); - fail_unless(1 == pico_tree_count(&hits), - "mdns_record_tree_find_name should find 1 record here!\n"); - record = pico_tree_firstNode(hits.root)->keyValue; - fail_unless(0 == strcmp(record->record->rname, - "\3bar\5local"), - "mdns_record_tree_find_name returned record with other name!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_tree_find_name_type) /* MARK: mdns_record_find_name_type */ -{ - PICO_MDNS_RTREE_DECLARE(hits); - struct pico_tree_node *node = NULL; - struct pico_mdns_record *record = NULL; - int found = 1; - char url[] = "\3foo\5local"; - char url2[] = "\3bar\5local"; - printf("*********************** starting %s * \n", __func__); - - add_records(); - - /* Try to find the first A record */ - hits = pico_mdns_rtree_find_name_type(&MyRecords, url, PICO_DNS_TYPE_A, 0); - fail_unless(1 == pico_tree_count(&hits), - "mdns_record_tree_find_name should find 1 record here!\n"); - record = pico_tree_firstNode(hits.root)->keyValue; - fail_unless(0 == strcmp(record->record->rname, url), - "mdns_record_tree_find_name returned record with other name!\n"); - - /* Try to find the 2 PTR records */ - hits = pico_mdns_rtree_find_name_type(&MyRecords, url, PICO_DNS_TYPE_PTR, 0); - pico_tree_foreach(node, &hits) { - if ((record = node->keyValue)) { - if (strcmp(record->record->rname, url)) - found = 0; - } - } - fail_unless(1 == found, - "mdns_record_tree_find_name returned records with other name!\n"); - - /* Try to find the last A record */ - hits = pico_mdns_rtree_find_name_type(&MyRecords, url2, PICO_DNS_TYPE_A, 0); - fail_unless(1 == pico_tree_count(&hits), - "mdns_record_tree_find_name should find 1 record here!\n"); - record = pico_tree_firstNode(hits.root)->keyValue; - fail_unless(0 == strcmp(record->record->rname, url2), - "mdns_record_tree_find_name returned record with other name!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_tree_del_name) /* MARK: mdns_record_tree_del_name */ -{ - PICO_MDNS_RTREE_DECLARE(hits); - struct pico_mdns_record *record = NULL, *record1 = NULL, *record2 = NULL, - *record3 = NULL; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata1 = { - long_be(0xFFFFFFFF) - }; - char url[] = "foo.local"; - char url1[] = "bar.local"; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create an A record with URL */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record, "Record could not be created!\n"); - - /* Create 2 PTR records to URL */ - record1 = pico_mdns_record_create(url, url, (uint16_t) strlen(url), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record1, "Record could not be created!\n"); - record2 = pico_mdns_record_create(url, url1, (uint16_t) strlen(url1), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Create a totally different record */ - record3 = pico_mdns_record_create(url1, &rdata1, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Add the records to the tree */ - pico_tree_insert(&MyRecords, record); - pico_tree_insert(&MyRecords, record1); - pico_tree_insert(&MyRecords, record2); - pico_tree_insert(&MyRecords, record3); - - /* Try to del the first tree records */ - ret = pico_mdns_rtree_del_name(&MyRecords, "\3foo\5local"); - fail_unless(0 == ret, - "mdns_record_tree_del_name failed!\n"); - hits = pico_mdns_rtree_find_name(&MyRecords, "\3foo\5local", 0); - fail_unless(0 == pico_tree_count(&hits), - "mdns_record_tree_find_name should find 3 records here!\n"); - - hits = pico_mdns_rtree_find_name( &MyRecords, "\3bar\5local", 0); - fail_unless(1 == pico_tree_count(&hits), - "mdns_record_tree_find_name should find 1 record here!\n"); - record = pico_tree_first(&hits); - fail_unless(0 == strcmp(record->record->rname, "\3bar\5local"), - "mdns_record_tree_del_name failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_record_tree_del_name_type) /* MARK: mdns_record_tree_del_name_type */ -{ - PICO_MDNS_RTREE_DECLARE(hits); - int ret = 0; - char url[] = "\3foo\5local"; - printf("*********************** starting %s * \n", __func__); - - add_records(); - - /* Try to del the two PTR records */ - ret = pico_mdns_rtree_del_name_type(&MyRecords, url, - PICO_DNS_TYPE_PTR); - fail_unless(0 == ret, "mdns_record_tree_del_name_type returned error!\n"); - - /* Try to find the 2 PTR records */ - hits = pico_mdns_rtree_find_name_type(&MyRecords, url, - PICO_DNS_TYPE_PTR, 0); - fail_unless(0 == pico_tree_count(&hits), - "mdns_record_tree_find_name_type returned PTR records!\n"); - - - /* Try to find the first A record */ - hits = pico_mdns_rtree_find_name_type(&MyRecords, url, - PICO_DNS_TYPE_A, 0); - fail_unless(1 == pico_tree_count(&hits), - "mdns_record_tree_del_name_type failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_my_records_add) /* MARK: mdns_my_records_add */ -{ - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *record = NULL, *record1 = NULL, *record2 = NULL, - *record3 = NULL; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata1 = { - long_be(0xFFFFFFFF) - }; - char url[] = "foo.local"; - char url1[] = "bar.local"; - - printf("*********************** starting %s * \n", __func__); - /* Create an A record with URL */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record, "Record could not be created!\n"); - - /* Create 2 PTR records to URL */ - record1 = pico_mdns_record_create(url, url, (uint16_t) strlen(url), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record1, "Record could not be created!\n"); - - /* Simulate that this record is not added again */ - record2 = pico_mdns_record_create(url, url1, (uint16_t) strlen(url1), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Create a totally different record */ - record3 = pico_mdns_record_create(url1, &rdata1, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Add the records to the tree */ - pico_tree_insert(&rtree, record); - pico_tree_insert(&rtree, record1); - pico_tree_insert(&rtree, record2); - pico_tree_insert(&rtree, record3); - - pico_mdns_my_records_add(&rtree, 0); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_my_records_find_probed) /* MARK: mdns_my_records_find_probed */ -{ - PICO_MDNS_RTREE_DECLARE(hits); - - printf("*********************** starting %s * \n", __func__); - - add_records(); - - hits = pico_mdns_my_records_find_probed(); - fail_unless(2 == pico_tree_count(&hits), - "mdns_my_records_find_probed failed %d!\n", - pico_tree_count(&hits)); - - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_my_records_find_to_probe) /* MARK: mdns_my_records_find_to_probe */ -{ - PICO_MDNS_RTREE_DECLARE(hits); - - printf("*********************** starting %s * \n", __func__); - - add_records(); - - hits = pico_mdns_my_records_find_to_probe(); - fail_unless(1 == pico_tree_count(&hits), - "mdns_my_records_find_to_probe failed! %d\n", pico_tree_count(&hits)); - - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_my_records_claimed_id) /* MARK: mnds_my_records_claimed_id */ -{ - PICO_MDNS_RTREE_DECLARE(hits); - struct pico_mdns_record *record = NULL, *record1 = NULL, *record2 = NULL, - *record3 = NULL; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata1 = { - long_be(0xFFFFFFFF) - }; - char url[] = "foo.local"; - char url1[] = "bar.local"; - - printf("*********************** starting %s * \n", __func__); - /* Create an A record with URL */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - record->claim_id = 1; - record->flags |= PICO_MDNS_RECORD_PROBED; - fail_if(!record, "Record could not be created!\n"); - - /* Create 2 PTR records to URL */ - record1 = pico_mdns_record_create(url, url, (uint16_t) strlen(url), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - record1->claim_id = 1; - record1->flags |= PICO_MDNS_RECORD_PROBED; - fail_if(!record1, "Record could not be created!\n"); - - /* Simulate that this record is not added again */ - record2 = pico_mdns_record_create(url, url1, (uint16_t)strlen(url1), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Create a totally different record */ - record3 = pico_mdns_record_create(url1, &rdata1, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Add the records to the tree */ - pico_tree_insert(&MyRecords, record); - pico_tree_insert(&MyRecords, record1); - pico_tree_insert(&MyRecords, record2); - pico_tree_insert(&MyRecords, record3); - - fail_unless(1 == pico_mdns_my_records_claimed_id(1, &hits), - "mdns_my_records_claimed_id_failed!\n"); - fail_unless(2 == pico_tree_count(&hits), - "Vector count should be 2!\n"); - - fail_unless(0 == pico_mdns_my_records_claimed_id(0, &hits), - "Claim ID '0' isn't claimed yet.."); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_my_records_claimed) /* MARK: mdns_my_records_claimed */ -{ - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *record = NULL, *record1 = NULL, *record2 = NULL, - *record3 = NULL; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata1 = { - long_be(0xFFFFFFFF) - }; - char url[] = "foo.local"; - char url1[] = "bar.local"; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create an A record with URL */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record, "Record could not be created!\n"); - - /* Create 2 PTR records to URL */ - record1 = pico_mdns_record_create(url, url, (uint16_t) strlen(url), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record1, "Record could not be created!\n"); - record2 = pico_mdns_record_create(url, url1, (uint16_t) strlen(url1), - PICO_DNS_TYPE_PTR, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Create a totally different record */ - record3 = pico_mdns_record_create(url1, &rdata1, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "Record could not be created!\n"); - - /* Add the records to the tree */ - pico_tree_insert(&MyRecords, record); - pico_tree_insert(&MyRecords, record1); - pico_tree_insert(&MyRecords, record2); - pico_tree_insert(&MyRecords, record3); - - pico_tree_insert(&rtree, record); - pico_tree_insert(&rtree, record1); - pico_tree_insert(&rtree, record2); - pico_tree_insert(&rtree, record3); - - ret = pico_mdns_my_records_claimed(rtree, callback, NULL); - fail_unless(0 == ret, "mdns_my_records_claimed failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -#if PICO_MDNS_ALLOW_CACHING == 1 -START_TEST(tc_mdns_cache_add_record) /* MARK: mdns_cache_add_record */ -{ - struct pico_mdns_record *record = NULL, *found = NULL; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - char url[] = "foo.local"; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* Create an A record with URL */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 80, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record, "Record could not be created!\n"); - - ret = pico_mdns_cache_add_record(record); - fail_unless(0 == ret, - "mdns_cache_add_record returned error!\n"); - found = pico_tree_findKey(&Cache, record); - fail_if(found == NULL, "mdns_cache_add_record failed!\n"); - ret = pico_mdns_cache_add_record(record); - fail_unless(0 == ret, - "mdns_cache_add_record returned error!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -#endif -START_TEST(tc_pico_tree_merge) -{ - PICO_MDNS_RTREE_DECLARE(src); - PICO_MDNS_RTREE_DECLARE(dst); - fail_unless(pico_tree_merge(NULL, NULL) == -1); - fail_unless(pico_tree_merge(&dst, NULL) == -1); - fail_unless(pico_tree_merge(NULL, &src) == -1); - fail_unless(pico_tree_merge(&dst, &src) == 0); -} -END_TEST -START_TEST(tc_mdns_populate_answer_vector) /* MARK: mdns_popolate_antree */ -{ - PICO_MDNS_RTREE_DECLARE(rtree); - char value[] = "\3foo\5local"; - printf("*********************** starting %s * \n", __func__); - add_records(); - - rtree = pico_mdns_populate_antree(value, PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN); - - fail_unless(1 == pico_tree_count(&rtree), "mdns_populate_answer_vector failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_handle_data_as_questions) /* MARK: handle_data_as_questions */ -{ - pico_dns_packet *packet = NULL; - PICO_MDNS_RTREE_DECLARE(antree); - PICO_DNS_QTREE_DECLARE(qtree); - char qurl[] = "picotcp.com"; - char qurl2[] = "google.com"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - struct pico_ip4 rdata1 = { - long_be(0xFFFFFFFF) - }; - uint16_t len = 0; - uint8_t *ptr = NULL; - int ret = 0; - struct pico_dns_question *a = NULL, *b = NULL; - struct pico_mdns_record *record1 = NULL, *record2 = NULL; - - printf("*********************** starting %s * \n", __func__); - - /* Create a DNS query packet */ - a = pico_mdns_question_create(qurl, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - PICO_MDNS_QUESTION_FLAG_UNICAST_RES, 0); - fail_if(!a, "dns_question_create failed!\n"); - pico_tree_insert(&qtree, a); - fail_unless(ret == 0, "dns_question_vector_add returned error!\n"); - b = pico_mdns_question_create(qurl2, &len, PICO_PROTO_IPV4, PICO_DNS_TYPE_A, - 0, 0); - fail_if(!b, "dns_question_create failed!\n"); - pico_tree_insert(&qtree, b); - packet = pico_dns_query_create(&qtree, NULL, NULL, NULL, &len); - fail_if(packet == NULL, "mdns_query_create returned NULL!\n"); - - /* Create records for answers */ - record1 = pico_mdns_record_create(qurl, &rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record1, "mdns_record_create returned NULL!\n"); - record1->flags |= 0xC0; - record2 = pico_mdns_record_create(qurl2, &rdata1, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record2, "mdns_record_created returned NULL!\n"); - record2->flags |= 0xC0; - - /* Add them to my records */ - pico_tree_insert(&MyRecords, record1); - pico_tree_insert(&MyRecords, record2); - - ptr = ((uint8_t *)packet + 12); - - antree = pico_mdns_handle_data_as_questions(&ptr, 2, packet); - fail_unless(2 == pico_tree_count(&antree), - "pico_mdns_handle_data_as_questions returned error!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_handle_data_as_answers) /* MARK: handle_data_as_answers */ -{ - pico_dns_packet *packet = NULL; - PICO_DNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *a = NULL, *b = NULL; - char url[] = "picotcp.com"; - char url2[] = "google.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint8_t *ptr = NULL; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - a = pico_mdns_record_create(url, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!a, "dns_record_create returned NULL!\n"); - b = pico_mdns_record_create(url2, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_SHARED); - fail_if(!a, "dns_record_create returned NULL!\n"); - pico_tree_insert(&rtree, a->record); - pico_tree_insert(&rtree, b->record); - - /* Try to create an answer packet */ - packet = pico_dns_answer_create(&rtree, NULL, NULL, &len); - fail_if (packet == NULL, "mdns_answer_create returned NULL!\n"); - - ptr = ((uint8_t *)packet + 12); - - ret = pico_mdns_handle_data_as_answers_generic(&ptr, 2, packet, 0); - fail_unless(0 == ret, "mdns_handle_data_as_answers failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_handle_data_as_authorities) /* MARK: handle_data_as_authorities */ -{ - pico_dns_packet *packet = NULL; - PICO_DNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *a = NULL, *b = NULL; - char url[] = "picotcp.com"; - char url2[] = "google.com"; - uint16_t len = 0; - uint8_t *ptr = NULL; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - a = pico_mdns_record_create(url, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!a, "dns_record_create returned NULL!\n"); - b = pico_mdns_record_create(url2, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_SHARED); - fail_if(!a, "dns_record_create returned NULL!\n"); - pico_tree_insert(&rtree, a->record); - pico_tree_insert(&rtree, b->record); - - /* Try to create an answer packet */ - packet = pico_dns_answer_create(&rtree, NULL, NULL, &len); - fail_if (packet == NULL, "mdns_answer_create returned NULL!\n"); - - ptr = ((uint8_t *)packet + 12); - - ret = pico_mdns_handle_data_as_answers_generic(&ptr, 2, packet, 1); - fail_unless(0 == ret, "mdns_handle_data_as_answers failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_handle_data_as_additionals) /* MARK: handle_data_as_additionals */ -{ - printf("*********************** starting %s * \n", __func__); - /* Insert code here... */ - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_sort_unicast_multicast) /* MARK: sort_unicast_multicast */ -{ - PICO_MDNS_RTREE_DECLARE(antree); - PICO_DNS_RTREE_DECLARE(antree_u); - PICO_DNS_RTREE_DECLARE(antree_m); - struct pico_mdns_record *a = NULL, *b = NULL; - char url[] = "picotcp.com"; - char url2[] = "google.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - a = pico_mdns_record_create(url, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!a, "mdns_record_create returned NULL!\n"); - b = pico_mdns_record_create(url2, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - (PICO_MDNS_RECORD_SHARED | PICO_MDNS_RECORD_SEND_UNICAST)); - fail_if(!a, "mdns_record_create returned NULL!\n"); - pico_tree_insert(&antree, a); - pico_tree_insert(&antree, b); - - ret = pico_mdns_sort_unicast_multicast(&antree, &antree_u, &antree_m); - fail_unless(0 == ret, "mdns_sort_unicast_multicast returned error!\n"); - fail_unless(1 == pico_tree_count(&antree_u), "mdns_sort_unicast_multicast failed!\n"); - fail_unless(1 == pico_tree_count(&antree_m), "mdns_sort_unicast_multicast failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_gather_additionals) /* MARK: gather_additionals */ -{ - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - struct pico_mdns_record *srv_record = NULL, *record = NULL; - struct pico_tree_node *node = NULL; - int ret = 0; - char value[] = "\0\0\0\0\0\x50\4host\5local"; - printf("*********************** starting %s * \n", __func__); - - add_records(); - - srv_record = pico_mdns_record_create("test._http._tcp.local", - value, 17, - PICO_DNS_TYPE_SRV, 120, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!srv_record, "Could not create SRV record!\n"); - pico_tree_insert(&antree, srv_record); - - ret = pico_mdns_gather_additionals(&antree, &artree); - fail_if(ret, "Gather Additionals returned error!\n"); - fail_unless(pico_tree_count(&antree) == 3, "ANtree should contain 3: %d", - pico_tree_count(&antree)); - - printf("Answers: \n"); - pico_tree_foreach(node, &antree) { - if ((record = node->keyValue)) { - printf("%d - %s\n", short_be(record->record->rsuffix->rtype), - record->record->rname); - } - } - - printf("Additionals: \n"); - pico_tree_foreach(node, &artree) { - if ((record = node->keyValue)) { - printf("%d - %s\n", short_be(record->record->rsuffix->rtype), - record->record->rname); - } - } - - fail_unless(pico_tree_count(&artree) == 3, "ARtree should contine 3: %d", - pico_tree_count(&artree)); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_apply_known_answer_suppression) /* MARK: apply_k_a_s */ -{ - pico_dns_packet *packet = NULL; - PICO_DNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *a = NULL, *b = NULL; - char url[] = "picotcp.com"; - char url2[] = "google.com"; - uint8_t rdata[4] = { - 10, 10, 0, 1 - }; - uint8_t *ptr = NULL; - uint16_t len = 0; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - a = pico_mdns_record_create(url, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!a, "dns_record_create returned NULL!\n"); - b = pico_mdns_record_create(url2, (void *)rdata, 4, PICO_DNS_TYPE_A, 120, - PICO_MDNS_RECORD_SHARED); - fail_if(!a, "dns_record_create returned NULL!\n"); - - pico_tree_insert(&antree, a->record); - pico_tree_insert(&antree, b->record); - pico_tree_insert(&rtree, a); - pico_tree_insert(&rtree, b); - - /* Try to create an answer packet */ - packet = pico_dns_answer_create(&antree, NULL, NULL, &len); - fail_if (packet == NULL, "mdns_answer_create returned NULL!\n"); - - ptr = ((uint8_t *)packet + 12); - - printf("Applying Known answer suppression...\n"); - - ret = pico_mdns_apply_k_a_s(&rtree, packet, 1, &ptr); - fail_unless(0 == ret, "mdns_apply_known_answer_suppression returned error!\n"); - - fail_unless(1 == pico_tree_count(&rtree), - "mdns_apply_known_answer_suppression failed %d!\n", pico_tree_count(&rtree)); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_send_query_packet) /* MARK: send_query_packet */ -{ - struct pico_mdns_cookie cookie; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_COOKIE_DECLARE(a); - - struct pico_dns_question *question1 = NULL; - struct pico_dns_question *question2 = NULL; - char url1[] = "foo.local"; - uint16_t len; - printf("*********************** starting %s * \n", __func__); - - /* Create some questions */ - question1 = pico_dns_question_create(url1, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_A, - PICO_DNS_CLASS_IN, 0); - fail_if(!question1, "Could not create question 1!\n"); - question2 = pico_dns_question_create(url1, &len, PICO_PROTO_IPV4, - PICO_DNS_TYPE_PTR, - PICO_DNS_CLASS_IN, 0); - pico_tree_insert(&(a.qtree), question1); - pico_tree_insert(&(a.qtree), question2); - cookie.count = 2; - - pico_stack_init(); - mdns_init(); - - pico_mdns_send_query_packet(0, &cookie); - cookie.type = PICO_MDNS_PACKET_TYPE_QUERY; - cookie.qtree = qtree; - pico_mdns_send_query_packet(0, &cookie); - cookie.type++; - cookie.status = PICO_MDNS_COOKIE_STATUS_CANCELLED; - pico_mdns_send_query_packet(0, &cookie); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_getrecord) /* MARK: getrecord */ -{ -#if PICO_MDNS_ALLOW_CACHING == 1 - struct pico_mdns_record *record = NULL, *found = NULL; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - char url[] = "foo.local"; -#endif - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - - /* - If caching is enabled: - If the record is cached: - getrecord should get the record from the cache, execute the callback and return 0 when the callback is finished. - Else: - getrecord should send a query and return 0 when the query is sent. - Else: - getrecord should send a query and return 0 when the query is sent. - */ - - /* Init */ - pico_stack_init(); - mdns_init(); - -#if PICO_MDNS_ALLOW_CACHING == 1 - /* Create an A record with URL */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 80, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record, "Record could not be created!\n"); - - ret = pico_mdns_cache_add_record(record); - fail_unless(0 == ret, - "mdns_cache_add_record returned error!\n"); - found = pico_tree_findKey(&Cache, record); - fail_if(found == NULL, "mdns_cache_add_record failed!\n"); -#endif - -#if PICO_MDNS_ALLOW_CACHING == 1 - amount_callback_executed = 0; - ret = pico_mdns_getrecord("foo.local", PICO_DNS_TYPE_A, callback, NULL); - fail_unless(1 == amount_callback_executed, "mdns_getrecord failed with cache record, callback not executed (%d) !\n", amount_callback_executed); - fail_unless(0 == ret, "mdns_getrecord failed with cache record!\n"); -#endif - - ret = pico_mdns_getrecord("bar.local", PICO_DNS_TYPE_A, callback, NULL); - fail_unless(0 == ret, "mdns_getrecord failed!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_send_announcement_packet) /* MARK: send_announcement_packet */ -{ - struct pico_mdns_cookie *cookie = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - - printf("*********************** starting %s * \n", __func__); - - cookie = pico_mdns_cookie_create(qtree, antree, artree, 2, - PICO_MDNS_PACKET_TYPE_ANNOUNCEMENT, - callback, NULL); - - pico_stack_init(); - mdns_init(); - - pico_mdns_send_announcement_packet(0, cookie); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_announce) /* MARK: annonce */ -{ - printf("*********************** starting %s * \n", __func__); - add_records(); - pico_stack_init(); - mdns_init(); - - fail_unless(0 == pico_mdns_announce(callback, NULL), - "mdns_announce failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_send_probe_packet) /* MARK: send_probe_packet */ -{ - struct pico_mdns_cookie *cookie = NULL; - PICO_DNS_QTREE_DECLARE(qtree); - PICO_MDNS_RTREE_DECLARE(antree); - PICO_MDNS_RTREE_DECLARE(artree); - - printf("*********************** starting %s * \n", __func__); - - cookie = pico_mdns_cookie_create(qtree, antree, artree, 2, - PICO_MDNS_PACKET_TYPE_PROBE, - callback, NULL); - - pico_stack_init(); - mdns_init(); - - pico_mdns_send_probe_packet(0, cookie); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_add_probe_question) /* MARK: add_probe_question */ -{ - PICO_DNS_QTREE_DECLARE(qtree); - int ret = 0; - char arg[] = "\4host\5local"; - char arg1[] = "\4tree\5local"; - char arg2[] = "\x8host (2)\5local"; - printf("*********************** starting %s * \n", __func__); - ret = pico_mdns_add_probe_question(&qtree, arg); - fail_unless(0 == ret, "mdns_add_probe_question returned error!\n"); - fail_unless(1 == pico_tree_count(&qtree), - "New probe question didn't create!\n"); - ret = pico_mdns_add_probe_question(&qtree, arg); - fail_unless(0 == ret, "mdns_add_probe_question returned error!\n"); - fail_unless(1 == pico_tree_count(&qtree), - "Count should be 1, is: %d!\n", pico_tree_count(&qtree)); - ret = pico_mdns_add_probe_question(&qtree, arg1); - fail_unless(0 == ret, "mdns_add_probe_question returned error!\n"); - fail_unless(2 == pico_tree_count(&qtree), - "New probe question didn't create!\n"); - ret = pico_mdns_add_probe_question(&qtree, arg2); - fail_unless(0 == ret, "mdns_add_probe_question returned error!\n"); - fail_unless(3 == pico_tree_count(&qtree), - "New probe question didn't create!\n"); - PICO_DNS_QTREE_DESTROY(&qtree); - fail_unless(0 == pico_tree_count(&qtree), - "Tree isn't properly destroyed %d!\n", pico_tree_count(&qtree)); - ret = pico_mdns_add_probe_question(&qtree, arg2); - fail_unless(0 == ret, "mdns_add_probe_question returned error!\n"); - fail_unless(1 == pico_tree_count(&qtree), - "New probe question didn't create!\n"); - PICO_DNS_QTREE_DESTROY(&qtree); - fail_unless(0 == pico_tree_count(&qtree), - "Tree isn't properly destroyed the second time!\n"); - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_probe) /* MARK: probe */ -{ - printf("*********************** starting %s * \n", __func__); - add_records(); - pico_stack_init(); - mdns_init(); - - fail_unless(0 == pico_mdns_probe(callback, NULL), - "mdns_announce failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_claim) /* MARK: mdns_claim */ -{ - PICO_MDNS_RTREE_DECLARE(rtree); - struct pico_mdns_record *record = NULL, *record1 = NULL; - char url[] = "foo.local"; - char url2[] = "bar.local"; - struct pico_ip4 rdata = { - long_be(0x00FFFFFF) - }; - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - /* Create a record */ - record = pico_mdns_record_create(url, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record, "Record could not be created!\n"); - record1 = pico_mdns_record_create(url2, &rdata, 4, PICO_DNS_TYPE_A, 0, - PICO_MDNS_RECORD_UNIQUE); - fail_if(!record1, "Record could not be created!\n"); - - /* Some tests */ - pico_tree_insert(&rtree, record); - pico_tree_insert(&rtree, record1); - - pico_stack_init(); - mdns_init(); - - ret = pico_mdns_claim(rtree, callback, NULL); - fail_unless(0 == ret, "mdns_claimed failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_tryclaim_hostname) /* MARK: tryclaim_hostname */ -{ - int ret = 0; - - printf("*********************** starting %s * \n", __func__); - pico_stack_init(); - mdns_init(); - - ret = pico_mdns_tryclaim_hostname("test.local", NULL); - fail_unless(0 == ret, "mdns_tryclaim_hostname failed!\n"); - - printf("*********************** ending %s * \n", __func__); -} -END_TEST -START_TEST(tc_mdns_get_hostname) /* MARK: get_hostname */ -{ - const char *c_hostname; - printf("*********************** starting %s * \n", __func__); - pico_stack_init(); - mdns_init(); - - c_hostname = pico_mdns_get_hostname(); - printf(" hostname %s\n", c_hostname); - printf("*********************** ending %s * \n", __func__); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_mdns_init = tcase_create("Unit test for mdns_init"); - - /* Comparing functions */ - TCase *TCase_mdns_record_cmp = tcase_create("Unit test for mdns_record_cmp"); - TCase *TCase_mdns_record_cmp_name_type = tcase_create("Unit test for mdns_record_cmp_name_type"); - TCase *TCase_mdns_cookie_cmp = tcase_create("Unit test for mdns_cookie_cmp"); - - TCase *TCase_mdns_suffix_to_uint16 = tcase_create("Unit test for mdns_suffix_to_uint16"); - - /* Cookie functions */ - TCase *TCase_mdns_cookie_delete = tcase_create("Unit test for mdns_cookie_delete"); - TCase *TCase_mdns_cookie_create = tcase_create("Unit test for mdns_cookie_create"); - TCase *TCase_mdns_cookie_tree_find_query_cookie = tcase_create("Unit test for mdns_cookie_tree_find_query_cookie"); - TCase *TCase_mdns_cookie_apply_spt = tcase_create("Unit test for mdns_cookie_apply_spt"); - TCase *TCase_mdns_is_suffix_present = tcase_create("Unit test for mdns_is_suffix_present"); - TCase *TCase_pico_itoa = tcase_create("Unit test for pico_itoa"); - TCase *TCase_mdns_resolve_name_conflict = tcase_create("Unit test for mdns_resolve_name_conflict"); - TCase *TCase_mdns_generate_new_records = tcase_create("Unit test for mdns_generate_new_records"); - TCase *TCase_mdns_cookie_resolve_conflict = tcase_create("Unit test for mdns_cookie_resolve_conflict"); - - /* Question functions */ - TCase *TCase_mdns_question_create = tcase_create("Unit test for mdns_question_create"); - - /* Record functions */ - TCase *TCase_mdns_record_resolve_conflict = tcase_create("Unit test for mdns_record_resolve_conflict"); - TCase *TCase_mdns_record_am_i_lexi_later = tcase_create("Unit test for mdns_record_am_i_lexi_later"); - TCase *TCase_mdns_record_copy_with_new_name = tcase_create("Unit test for mdns_record_copy"); - TCase *TCase_mdns_record_copy = tcase_create("Unit test for mdns_record_copy"); - TCase *TCase_mdns_record_create = tcase_create("Unit test for mdns_record_create"); - TCase *TCase_mdns_record_delete = tcase_create("Unit test for mdns_record_delete"); - - - /* Record tree functions */ - TCase *TCase_mdns_record_tree_find_name = tcase_create("Unit test for mdns_record_tree_find_name"); - TCase *TCase_mdns_record_tree_find_name_type = tcase_create("Unit test for mdns_record_tree_find_name_type"); - TCase *TCase_mdns_record_tree_del_name = tcase_create("Unit test for mdns_record_tree_del_name"); - TCase *TCase_mdns_record_tree_del_name_type = tcase_create("Unit test for mdns_record_tree_del_name_type"); - - /* My record functions */ - TCase *TCase_mdns_my_records_add = tcase_create("Unit test for mdns_my_records_add"); - TCase *TCase_mdns_my_records_find_probed = tcase_create("Unit test for mdns_my_records_find_probed"); - TCase *TCase_mdns_my_records_find_to_probe = tcase_create("Unit test for mdns_my_records_find_to_probe"); - TCase *TCase_mdns_my_records_claimed_id = tcase_create("Unit test for mdns_my_records_claimed_id"); - TCase *TCase_mdns_my_records_claimed = tcase_create("Unit test for mdns_my_records_claimed"); - - /* Cache functions */ - TCase *TCase_mdns_cache_add_record = tcase_create("Unit test for mdns_cache_add_record"); - - /* Handling receptions */ - TCase *TCase_mdns_populate_answer_vector = tcase_create("Unit test for mdns_populate_answer_vector"); - TCase *TCase_mdns_handle_data_as_questions = tcase_create("Unit test for mdns_handle_data_as_questions"); - TCase *TCase_mdns_handle_data_as_answers = tcase_create("Unit test for mdns_handle_data_as_answers"); - TCase *TCase_mdns_handle_data_as_authorities = tcase_create("Unit test for mdns_handle_data_as_authorities"); - TCase *TCase_mdns_handle_data_as_additionals = tcase_create("Unit test for mdns_handle_data_as_additionals"); - - /* Handling query packets */ - TCase *TCase_mdns_sort_unicast_multicast = tcase_create("Unit test for mdns_sort_unicast_multicast"); - TCase *TCase_mdns_gather_additionals = tcase_create("Unit test for mdns_gather_additionals"); - TCase *TCase_mdns_apply_known_answer_suppression = tcase_create("Unit test for mdns_apply_known_answer_suppression"); - - /* Address resolving functions */ - TCase *TCase_mdns_send_query_packet = tcase_create("Unit test for mdns_send_query_packet"); - TCase *TCase_mdns_getrecord = tcase_create("Unit test for mdns_getrecord"); - - /* Probe & Announcement functions */ - TCase *TCase_mdns_send_announcement_packet = tcase_create("Unit test for mdns_send_announcement_packet"); - TCase *TCase_mdns_announce = tcase_create("Unit test for mdns_announce"); - TCase *TCase_mdns_send_probe_packet = tcase_create("Unit test for mdns_send_probe_packet"); - TCase *TCase_mdns_add_probe_question = tcase_create("Unit test for mdns_add_probe_question"); - TCase *TCase_mdns_probe = tcase_create("Unit test for mdns_probe"); - - /* Claiming functions */ - TCase *TCase_mdns_claim = tcase_create("Unit test for mnds_claim"); - - /* API functions */ - TCase *TCase_mdns_tryclaim_hostname = tcase_create("Unit test for mdns_tryclaim_hostname"); - TCase *TCase_mdns_get_hostname = tcase_create("Unit test for mdns_get_hostname"); - - TCase *TCase_pico_tree_merge = tcase_create("Unit test for pico_tree_merge"); - - tcase_add_test(TCase_mdns_init, tc_mdns_init); - suite_add_tcase(s, TCase_mdns_init); - - /* Comparing functions */ - tcase_add_test(TCase_mdns_record_cmp, tc_mdns_record_cmp); - suite_add_tcase(s, TCase_mdns_record_cmp); - tcase_add_test(TCase_mdns_record_cmp_name_type, tc_mdns_record_cmp_name_type); - suite_add_tcase(s, TCase_mdns_record_cmp_name_type); - tcase_add_test(TCase_mdns_cookie_cmp, tc_mdns_cookie_cmp); - suite_add_tcase(s, TCase_mdns_cookie_cmp); - - tcase_add_test(TCase_mdns_suffix_to_uint16, tc_mdns_suffix_to_uint16); - suite_add_tcase(s, TCase_mdns_suffix_to_uint16); - - /* Cookie functions */ - tcase_add_test(TCase_mdns_cookie_delete, tc_mdns_cookie_delete); - suite_add_tcase(s, TCase_mdns_cookie_delete); - tcase_add_test(TCase_mdns_cookie_create, tc_mdns_cookie_create); - suite_add_tcase(s, TCase_mdns_cookie_create); - tcase_add_test(TCase_mdns_cookie_tree_find_query_cookie, tc_mdns_cookie_tree_find_query_cookie); - suite_add_tcase(s, TCase_mdns_cookie_tree_find_query_cookie); - tcase_add_test(TCase_mdns_cookie_apply_spt, tc_mdns_cookie_apply_spt); - suite_add_tcase(s, TCase_mdns_cookie_apply_spt); - tcase_add_test(TCase_mdns_is_suffix_present, tc_mdns_is_suffix_present); - suite_add_tcase(s, TCase_mdns_is_suffix_present); - tcase_add_test(TCase_pico_itoa, tc_pico_itoa); - suite_add_tcase(s, TCase_pico_itoa); - tcase_add_test(TCase_mdns_resolve_name_conflict, tc_mdns_resolve_name_conflict); - suite_add_tcase(s, TCase_mdns_resolve_name_conflict); - tcase_add_test(TCase_mdns_generate_new_records, tc_mdns_generate_new_records); - suite_add_tcase(s, TCase_mdns_generate_new_records); - tcase_add_test(TCase_mdns_cookie_resolve_conflict, tc_mdns_cookie_resolve_conflict); - suite_add_tcase(s, TCase_mdns_cookie_resolve_conflict); - - /* Question functions */ - tcase_add_test(TCase_mdns_question_create, tc_mdns_question_create); - suite_add_tcase(s, TCase_mdns_question_create); - - /* Record functions */ - tcase_add_test(TCase_mdns_record_resolve_conflict, tc_mdns_record_resolve_conflict); - suite_add_tcase(s, TCase_mdns_record_resolve_conflict); - tcase_add_test(TCase_mdns_record_am_i_lexi_later, tc_mdns_record_am_i_lexi_later); - suite_add_tcase(s, TCase_mdns_record_am_i_lexi_later); - tcase_add_test(TCase_mdns_record_copy_with_new_name, tc_mdns_record_copy_with_new_name); - suite_add_tcase(s, TCase_mdns_record_copy_with_new_name); - tcase_add_test(TCase_mdns_record_copy, tc_mdns_record_copy); - suite_add_tcase(s, TCase_mdns_record_copy); - tcase_add_test(TCase_mdns_record_create, tc_mdns_record_create); - suite_add_tcase(s, TCase_mdns_record_create); - tcase_add_test(TCase_mdns_record_delete, tc_mdns_record_delete); - suite_add_tcase(s, TCase_mdns_record_delete); - - /* Record tree functions */ - tcase_add_test(TCase_mdns_record_tree_find_name, tc_mdns_record_tree_find_name); - suite_add_tcase(s, TCase_mdns_record_tree_find_name); - tcase_add_test(TCase_mdns_record_tree_find_name_type, tc_mdns_record_tree_find_name_type); - suite_add_tcase(s, TCase_mdns_record_tree_find_name_type); - tcase_add_test(TCase_mdns_record_tree_del_name, tc_mdns_record_tree_del_name); - suite_add_tcase(s, TCase_mdns_record_tree_del_name); - tcase_add_test(TCase_mdns_record_tree_del_name_type, tc_mdns_record_tree_del_name_type); - suite_add_tcase(s, TCase_mdns_record_tree_del_name_type); - - /* My records functions */ - tcase_add_test(TCase_mdns_my_records_add, tc_mdns_my_records_add); - suite_add_tcase(s, TCase_mdns_my_records_add); - tcase_add_test(TCase_mdns_my_records_find_probed, tc_mdns_my_records_find_probed); - suite_add_tcase(s, TCase_mdns_my_records_find_probed); - tcase_add_test(TCase_mdns_my_records_find_to_probe, tc_mdns_my_records_find_to_probe); - suite_add_tcase(s, TCase_mdns_my_records_find_to_probe); - tcase_add_test(TCase_mdns_my_records_claimed_id, tc_mdns_my_records_claimed_id); - suite_add_tcase(s, TCase_mdns_my_records_claimed_id); - tcase_add_test(TCase_mdns_my_records_claimed, tc_mdns_my_records_claimed); - suite_add_tcase(s, TCase_mdns_my_records_claimed); - - /* Cache functions */ - tcase_add_test(TCase_mdns_cache_add_record, tc_mdns_cache_add_record); - suite_add_tcase(s, TCase_mdns_cache_add_record); - tcase_add_test(TCase_mdns_populate_answer_vector, tc_mdns_populate_answer_vector); - suite_add_tcase(s, TCase_mdns_populate_answer_vector); - - /* Handling receptions */ - tcase_add_test(TCase_mdns_handle_data_as_questions, tc_mdns_handle_data_as_questions); - suite_add_tcase(s, TCase_mdns_handle_data_as_questions); - tcase_add_test(TCase_mdns_handle_data_as_answers, tc_mdns_handle_data_as_answers); - suite_add_tcase(s, TCase_mdns_handle_data_as_answers); - tcase_add_test(TCase_mdns_handle_data_as_authorities, tc_mdns_handle_data_as_authorities); - suite_add_tcase(s, TCase_mdns_handle_data_as_authorities); - tcase_add_test(TCase_mdns_handle_data_as_additionals, tc_mdns_handle_data_as_additionals); - suite_add_tcase(s, TCase_mdns_handle_data_as_additionals); - - /* Handling query packets */ - tcase_add_test(TCase_mdns_sort_unicast_multicast, tc_mdns_sort_unicast_multicast); - suite_add_tcase(s, TCase_mdns_sort_unicast_multicast); - tcase_add_test(TCase_mdns_gather_additionals, tc_mdns_gather_additionals); - suite_add_tcase(s, TCase_mdns_gather_additionals); - tcase_add_test(TCase_mdns_apply_known_answer_suppression, tc_mdns_apply_known_answer_suppression); - suite_add_tcase(s, TCase_mdns_apply_known_answer_suppression); - - /* Address resolving functions */ - tcase_add_test(TCase_mdns_send_query_packet, tc_mdns_send_query_packet); - suite_add_tcase(s, TCase_mdns_send_query_packet); - tcase_add_test(TCase_mdns_getrecord, tc_mdns_getrecord); - suite_add_tcase(s, TCase_mdns_getrecord); - - /* Probe & Announcement functions */ - tcase_add_test(TCase_mdns_send_announcement_packet, tc_mdns_send_announcement_packet); - suite_add_tcase(s, TCase_mdns_send_announcement_packet); - tcase_add_test(TCase_mdns_announce, tc_mdns_announce); - suite_add_tcase(s, TCase_mdns_announce); - tcase_add_test(TCase_mdns_send_probe_packet, tc_mdns_send_probe_packet); - suite_add_tcase(s, TCase_mdns_send_probe_packet); - tcase_add_test(TCase_mdns_add_probe_question, tc_mdns_add_probe_question); - suite_add_tcase(s, TCase_mdns_add_probe_question); - tcase_add_test(TCase_mdns_probe, tc_mdns_probe); - suite_add_tcase(s, TCase_mdns_probe); - - /* Claiming functions */ - tcase_add_test(TCase_mdns_claim, tc_mdns_claim); - suite_add_tcase(s, TCase_mdns_claim); - - /* API functions */ - tcase_add_test(TCase_mdns_tryclaim_hostname, tc_mdns_tryclaim_hostname); - suite_add_tcase(s, TCase_mdns_tryclaim_hostname); - tcase_add_test(TCase_mdns_get_hostname, tc_mdns_get_hostname); - suite_add_tcase(s, TCase_mdns_get_hostname); - - tcase_add_test(TCase_pico_tree_merge, tc_pico_tree_merge); - suite_add_tcase(s, TCase_pico_tree_merge); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} - diff --git a/kernel/picotcp/test/unit/modunit_pico_mld.c b/kernel/picotcp/test/unit/modunit_pico_mld.c deleted file mode 100644 index 5b17c1a..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_mld.c +++ /dev/null @@ -1,584 +0,0 @@ -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_socket.h" -#include "pico_stack.h" -#include "pico_socket.h" -#include "pico_queue.h" -#include "pico_tree.h" -#include "modules/pico_mld.c" -#include "check.h" -#include "pico_dev_null.c" - -Suite *pico_suite(void); -void mock_callback(struct mld_timer *t); - -static uint32_t timers_added = 0; -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - IGNORE_PARAMETER(expire); - IGNORE_PARAMETER(timer); - IGNORE_PARAMETER(arg); - return ++timers_added; -} -void mock_callback(struct mld_timer *t) -{ - IGNORE_PARAMETER(t); -} -static int mcast_filter_cmp_ipv6(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - return memcmp(&a->ip6, &b->ip6, sizeof(struct pico_ip6)); -} -static int mcast_sources_cmp_ipv6(void *ka, void *kb) -{ - union pico_address *a = ka, *b = kb; - return memcmp(&a->ip6, &b->ip6, sizeof(struct pico_ip6)); -} -static PICO_TREE_DECLARE(_MCASTFilter, mcast_filter_cmp_ipv6); - -START_TEST(tc_pico_mld_fill_hopbyhop) -{ - struct pico_ipv6_hbhoption *hbh = NULL; - /* Not-null case tested by unit_ipv6.c */ - fail_if(pico_mld_fill_hopbyhop(hbh) != NULL); -} -END_TEST -START_TEST(tc_pico_mld_check_hopbyhop) -{ - struct pico_ipv6_hbhoption *hbh = NULL; - struct pico_ipv6_hbhoption _hbh; - uint8_t *p; - uint8_t options[8] = { - PICO_PROTO_ICMP6, 0, PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT, \ - PICO_IPV6_EXTHDR_OPT_ROUTER_ALERT_DATALEN, 0, 0, 1, 0 - }; - int i = 0; - int test = 0; - fail_if(pico_mld_check_hopbyhop(hbh) != -1); - _hbh.type = 1; - _hbh.len = 0; - fail_if(pico_mld_check_hopbyhop(&_hbh) != -1); - _hbh.type = PICO_PROTO_ICMP6; - _hbh.len = 1; - fail_if(pico_mld_check_hopbyhop(&_hbh) != -1); - - hbh = (struct pico_ipv6_hbhoption *) PICO_ZALLOC(sizeof(struct pico_ipv6_hbhoption) + 7); - hbh->type = PICO_PROTO_ICMP6; - hbh->len = 0; - for(test = 0; test < 7; test++) { - p = (uint8_t *)hbh + sizeof(struct pico_ipv6_hbhoption); - for(i = 0; i < 6; i++ ) { - if(i != test) - *(p++) = options[i + 2]; - else - *(p++) = 9; - } - if(test != 6) - fail_if(pico_mld_check_hopbyhop(hbh) != -1); - else - fail_if(pico_mld_check_hopbyhop(hbh) != 0); - } -} -END_TEST -START_TEST(tc_pico_mld_v1querier_expired) -{ - struct mld_timer *t = PICO_ZALLOC(sizeof(struct mld_timer)); - struct pico_ip6 addr = {{0}}; - struct pico_device *dev = pico_null_create("dummy2"); - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_frame)); - t->f = f; - pico_string_to_ipv6("AAAA::109", addr.addr); - /* void function, just check for side effects */ - /* No link */ - pico_mld_v1querier_expired(t); - f->dev = dev; - pico_ipv6_link_add(dev, addr, addr); - pico_mld_v1querier_expired(t); -} -END_TEST -START_TEST(tc_pico_mld_send_report) -{ - struct pico_frame *f; - struct pico_device *dev = pico_null_create("dummy1"); - struct pico_ip6 addr; - struct pico_ipv6_link *link; - struct mcast_parameters p; - f = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, sizeof(struct mldv2_report) + MLD_ROUTER_ALERT_LEN + sizeof(struct mldv2_group_record) + (0 * sizeof(struct pico_ip6))); - pico_string_to_ipv6("AAAA::110", addr.addr); - p.mcast_link.ip6 = addr; - /* No link */ - fail_if(pico_mld_send_report(&p, f) != -1); - link = pico_ipv6_link_add(dev, addr, addr); - p.event = 0; - link->mcast_compatibility = PICO_MLDV1; - fail_if(pico_mld_send_report(&p, f) != 0); - link->mcast_compatibility = 99; - fail_if(pico_mld_send_report(&p, f) != -1); -} -END_TEST -START_TEST(tc_pico_mld_report_expired) -{ - struct mld_timer *t = PICO_ZALLOC(sizeof(struct mld_timer)); - struct pico_ip6 zero = {0}; - - t->mcast_link = zero; - t->mcast_group = zero; - /* void function, just check for side effects */ - pico_mld_report_expired(t); -} -END_TEST -START_TEST(tc_pico_mld_delete_parameter) -{ - struct mcast_parameters p; - fail_if(pico_mld_delete_parameter(&p) != -1); -} -END_TEST -START_TEST(tc_mldt_type_compare) -{ - struct mld_timer a; - struct mld_timer b; - a.type = 1; - b.type = 2; - fail_if(mldt_type_compare(&a, &b) != -1); - fail_if(mldt_type_compare(&b, &a) != 1); - fail_if(mld_timer_cmp(&b, &a) != 1); -} -END_TEST -START_TEST(tc_pico_mld_is_checksum_valid) -{ - struct pico_frame *f; - f = pico_proto_ipv6.alloc(&pico_proto_ipv6, NULL, sizeof(struct mldv2_report) + MLD_ROUTER_ALERT_LEN + sizeof(struct mldv2_group_record) + (0 * sizeof(struct pico_ip6))); - fail_if(pico_mld_is_checksum_valid(f) == 1); -} -END_TEST -START_TEST(tc_pico_mld_find_parameter) -{ - struct pico_ip6 mcast_link, mcast_group; - struct mcast_parameters test = { - 0 - }; - fail_if(pico_mld_find_parameter(NULL, NULL) != NULL); - pico_string_to_ipv6("AAAA::111", mcast_link.addr); - fail_if(pico_mld_find_parameter(&mcast_link, NULL) != NULL); - pico_string_to_ipv6("AAAA::211", mcast_group.addr); - fail_if(pico_mld_find_parameter(&mcast_link, &mcast_group) != NULL); - test.mcast_link.ip6 = mcast_link; - test.mcast_group.ip6 = mcast_group; - pico_tree_insert(&MLDParameters, &test); - - fail_if(pico_mld_find_parameter(&mcast_link, &mcast_group) == NULL); - pico_tree_delete(&MLDParameters, &test); -} -END_TEST -START_TEST(tc_pico_mld_timer_expired) -{ - struct mld_timer *t, *s; - t = PICO_ZALLOC(sizeof(struct mld_timer)); - t->stopped = MLD_TIMER_STOPPED; - t->type = 0; - pico_string_to_ipv6("AAAA::112", t->mcast_link.addr); - pico_string_to_ipv6("AAAA::112", t->mcast_group.addr); - /* void function, just check for side effects */ - pico_mld_timer_expired(0, (void *)t); - pico_tree_insert(&MLDTimers, t); - s = PICO_ZALLOC(sizeof(struct mld_timer)); - memcpy(s, t, sizeof(struct mld_timer)); /* t will be freed next test */ - pico_mld_timer_expired(0, (void *)t); /* will be freed */ - s->stopped++; - s->start = PICO_TIME_MS() * 2; - s->type++; - pico_tree_insert(&MLDTimers, s); - t = PICO_ZALLOC(sizeof(struct mld_timer)); - memcpy(t, s, sizeof(struct mld_timer)); /* s will be freed next test */ - pico_mld_timer_expired(0, (void *)s); /* s will be freed */ - t->mld_callback = mock_callback; - pico_mld_timer_expired(0, (void *)t); /* t will be freed */ -} -END_TEST -START_TEST(tc_pico_mld_send_done) -{ - struct mcast_parameters p; - fail_if(pico_mld_send_done(&p, NULL) != 0); -} -END_TEST -START_TEST(tc_mld_stsdifs) -{ - struct mcast_parameters *p; - struct pico_device *dev = pico_null_create("dummy3"); - struct pico_ipv6_link *link; - struct mld_timer *t = PICO_ZALLOC(sizeof(struct mld_timer)); - /* Building example frame */ - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - pico_string_to_ipv6("AAAA::113", p->mcast_link.ip6.addr); - pico_string_to_ipv6("FF00::e007:707", p->mcast_group.ip6.addr); - /* no link */ - fail_if(mld_stsdifs(p) != -1); - link = pico_ipv6_link_add(dev, p->mcast_link.ip6, p->mcast_link.ip6); - link->mcast_compatibility = PICO_MLDV1; - /* no timer */ - fail_if(mld_stsdifs(p) != -1); - t->type = MLD_TIMER_GROUP_REPORT; - t->mcast_link = p->mcast_link.ip6; - t->mcast_group = p->mcast_group.ip6; - pico_tree_insert(&MLDTimers, t); - fail_if(mld_stsdifs(p) != 0); - /* set flag */ - pico_mld_flag = 1; - fail_if(mld_stsdifs(p) != 0); -} -END_TEST -START_TEST(tc_mld_srsf) -{ - struct mcast_parameters *p; - /* Building example frame */ - - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - pico_string_to_ipv6("AAAA::114", p->mcast_link.ip6.addr); - pico_string_to_ipv6("FF00::e007:707", p->mcast_group.ip6.addr); - fail_if(mld_srsf(p) != -1); -} -END_TEST - -START_TEST(tc_mld_srst) -{ - struct mcast_parameters *p; - struct pico_device *dev = pico_null_create("dummy3"); - struct pico_ipv6_link *link; - struct pico_mcast_group g; - /* Building example frame */ - - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - pico_string_to_ipv6("AAAA::99", p->mcast_link.ip6.addr); - pico_string_to_ipv6("FF00::e007:707", p->mcast_group.ip6.addr); - p->MCASTFilter = &_MCASTFilter; - p->filter_mode = 0; - g.filter_mode = 0; - g.mcast_addr = p->mcast_group; - g.MCASTSources.root = &LEAF; - g.MCASTSources.compare = mcast_sources_cmp_ipv6; - - pico_tree_insert(&MLDParameters, p); - /* no link */ - fail_if(mld_srst(p) != -1); - link = pico_ipv6_link_add(dev, p->mcast_link.ip6, p->mcast_link.ip6); - link->mcast_compatibility = PICO_MLDV1; - /* invalid proto */ - fail_if(mld_srst(p) != -1); - link->mcast_compatibility = PICO_MLDV2; - pico_tree_insert(link->MCASTGroups, &g); - - fail_if(mld_srst(p) != 0); - pico_tree_delete(&MLDParameters, p); -} -END_TEST -START_TEST(tc_mld_mrsrrt) -{ - struct mcast_parameters *p; - struct pico_device *dev = pico_null_create("dummy3"); - struct pico_ipv6_link *link; - /* Building example frame */ - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - pico_string_to_ipv6("AAAA::115", p->mcast_link.ip6.addr); - pico_string_to_ipv6("FF00::e007:707", p->mcast_group.ip6.addr); - /* no link */ - fail_if(mld_mrsrrt(p) != -1); - link = pico_ipv6_link_add(dev, p->mcast_link.ip6, p->mcast_link.ip6); - link->mcast_compatibility = PICO_MLDV1; - /* wrong proto */ - fail_if(mld_mrsrrt(p) != -1); - link->mcast_compatibility = PICO_MLDV2; - p->f = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, sizeof(struct mldv2_report) + MLD_ROUTER_ALERT_LEN + sizeof(struct mldv2_group_record) + (0 * sizeof(struct pico_ip6))); - fail_if(mld_mrsrrt(p) != -1); - -} -END_TEST -START_TEST(tc_pico_mld_process_in) -{ - struct mcast_parameters *p; - struct pico_device *dev = pico_null_create("dummy3"); - struct pico_ipv6_link *link; - uint8_t i, j, _i, _j; - int result = 0; - struct pico_mcast_group g; - struct mldv2_report *report; - /* Building example frame */ - p = PICO_ZALLOC(sizeof(struct mcast_parameters)); - pico_string_to_ipv6("AAAA::101", p->mcast_link.ip6.addr); - pico_string_to_ipv6("FF00::e007:707", p->mcast_group.ip6.addr); - /* no link */ - fail_if(pico_mld_generate_report(p) != -1); - link = pico_ipv6_link_add(dev, p->mcast_link.ip6, p->mcast_link.ip6); - pico_string_to_ipv6("AAAA::100", p->mcast_group.ip6.addr); - fail_if(pico_mld_generate_report(p) != -1); - pico_string_to_ipv6("FF00::e007:707", p->mcast_group.ip6.addr); - link->mcast_compatibility = PICO_MLDV1; - g.mcast_addr = p->mcast_group; - g.MCASTSources.root = &LEAF; - g.MCASTSources.compare = mcast_sources_cmp_ipv6; - /* No mcastsources tree */ - link->mcast_compatibility = PICO_MLDV2; - fail_if(pico_mld_generate_report(p) != -1); - pico_tree_insert(link->MCASTGroups, &g); - pico_tree_insert(&MLDParameters, p); - - link->mcast_compatibility = 99; - fail_if(pico_mld_generate_report(p) != -1); - link->mcast_compatibility = PICO_MLDV1; - fail_if(pico_mld_generate_report(p) != 0); - link->mcast_compatibility = PICO_MLDV2; - for(_j = 0; _j < 3; _j++) { /* FILTER */ - (_j == 2) ? (result = -1) : (result = 0); - for(_i = 0; _i < 3; _i++) { /* FILTER */ - if(_i == 2) result = -1; - - for(i = 0; i < 3; i++) { /* STATES */ - for(j = 0; j < 6; j++) { /* EVENTS */ - p->MCASTFilter = &_MCASTFilter; - p->filter_mode = _i; - g.filter_mode = _j; - if(p->event == MLD_EVENT_DELETE_GROUP || p->event == MLD_EVENT_QUERY_RECV) - p->event++; - - fail_if(pico_mld_generate_report(p) != result); - p->state = i; - p->event = j; - if(result != -1 && p->f) { /* in some combinations, no frame is created */ - report = (struct mldv2_report *)(p->f->transport_hdr + MLD_ROUTER_ALERT_LEN); - report->crc = short_be(pico_icmp6_checksum(p->f)); - fail_if(pico_mld_process_in(p->f) != 0); - } - } - } - } - } - pico_tree_delete(&MLDParameters, p); - -} -END_TEST -START_TEST(tc_mld_rtimrtct) -{ - struct mld_timer *t = PICO_ZALLOC(sizeof(struct mld_timer)); - struct mcast_parameters p; - pico_string_to_ipv6("AAAA::102", t->mcast_link.addr); - pico_string_to_ipv6("AAAA::102", t->mcast_group.addr); - p.mcast_link.ip6 = t->mcast_link; - p.mcast_group.ip6 = t->mcast_group; - t->type = MLD_TIMER_GROUP_REPORT; - /* not in tree */ - fail_if(mld_rtimrtct(&p) != -1); - pico_mld_timer_start(t); - fail_if(mld_rtimrtct(&p) != 0); -} -END_TEST - -START_TEST(tc_mld_stcl) -{ - struct mld_timer *t = PICO_ZALLOC(sizeof(struct mld_timer)); - struct mcast_parameters p; - pico_string_to_ipv6("AAAA::103", t->mcast_link.addr); - pico_string_to_ipv6("AAAA::103", t->mcast_group.addr); - p.mcast_link.ip6 = t->mcast_link; - p.mcast_group.ip6 = t->mcast_group; - t->type = MLD_TIMER_GROUP_REPORT; - /* not in tree */ - fail_if(mld_stcl(&p) != -1); - pico_mld_timer_start(t); - fail_if(mld_stcl(&p) != 0); -} -END_TEST -START_TEST(tc_pico_mld_compatibility_mode) -{ - struct pico_frame *f; - struct pico_device *dev = pico_null_create("dummy1"); - struct pico_ip6 addr; - - f = pico_proto_ipv6.alloc(&pico_proto_ipv6, NULL, sizeof(struct mldv2_report) + MLD_ROUTER_ALERT_LEN + sizeof(struct mldv2_group_record) + (0 * sizeof(struct pico_ip6))); - pico_string_to_ipv6("AAAA::104", addr.addr); - /* No link */ - fail_if(pico_mld_compatibility_mode(f) != -1); - pico_ipv6_link_add(dev, addr, addr); - f->dev = dev; - /* MLDv2 query */ - f->buffer_len = 28 + PICO_SIZE_IP6HDR + MLD_ROUTER_ALERT_LEN; - fail_if(pico_mld_compatibility_mode(f) != 0); - /* MLDv1 query */ - f->buffer_len = 24 + PICO_SIZE_IP6HDR + MLD_ROUTER_ALERT_LEN; - fail_if(pico_mld_compatibility_mode(f) != 0); - /* Invalid Query */ - f->buffer_len = 25 + PICO_SIZE_IP6HDR + MLD_ROUTER_ALERT_LEN; - fail_if(pico_mld_compatibility_mode(f) == 0); - /* MLDv2 query + timer amready running */ - f->dev = dev; - f->buffer_len = 28 + PICO_SIZE_IP6HDR + MLD_ROUTER_ALERT_LEN + PICO_SIZE_ETHHDR; - fail_if(pico_mld_compatibility_mode(f) != -1); -} -END_TEST -START_TEST(tc_pico_mld_timer_reset) -{ - struct mld_timer *t = PICO_ZALLOC(sizeof(struct mld_timer)); - pico_string_to_ipv6("AAAA::105", t->mcast_link.addr); - pico_string_to_ipv6("AAAA::105", t->mcast_group.addr); - t->type = 0; - fail_if(pico_mld_timer_reset(t) != -1); -} -END_TEST -START_TEST(tc_pico_mld_state_change) -{ - struct pico_ip6 mcast_link, mcast_group; - struct mcast_parameters p; - pico_string_to_ipv6("AAAA::106", mcast_link.addr); - pico_string_to_ipv6("AAAA::106", mcast_group.addr); - p.mcast_link.ip6 = mcast_link; - p.mcast_group.ip6 = mcast_group; - - fail_if(pico_mld_state_change(NULL, &mcast_group, 0, NULL, PICO_MLD_STATE_CREATE) != -1); - /* All host group */ - pico_string_to_ipv6("FF01:0:0:0:0:0:0:1", mcast_group.addr); - fail_if(pico_mld_state_change(&mcast_link, &mcast_group, 0, NULL, PICO_MLD_STATE_CREATE) != 0); - pico_string_to_ipv6("AAAA::107", mcast_group.addr); - fail_if(pico_mld_state_change(&mcast_link, &mcast_group, 0, NULL, 99) != -1); - pico_tree_insert(&MLDParameters, &p); - fail_if(pico_mld_state_change(&mcast_link, &mcast_group, 0, NULL, 99) != -1); - pico_tree_delete(&MLDParameters, &p); -} -END_TEST -START_TEST(tc_pico_mld_analyse_packet) -{ - struct pico_frame *f; - struct pico_device *dev = pico_null_create("dummy0"); - struct pico_ip6 addr; - struct pico_ip6 local; - struct pico_ipv6_hdr *ip6; - struct pico_ipv6_hbhoption *hbh; - struct pico_icmp6_hdr *mld; - f = pico_proto_ipv6.alloc(&pico_proto_ipv6, dev, sizeof(struct mld_message) + MLD_ROUTER_ALERT_LEN); - pico_string_to_ipv6("AAAA::108", addr.addr); - pico_string_to_ipv6("FE80::1", local.addr); - /* No link */ - fail_if(pico_mld_analyse_packet(f) != NULL); - pico_ipv6_link_add(dev, addr, addr); - ip6 = (struct pico_ipv6_hdr *) f->net_hdr; - ip6->hop = 99; - /* Incorrect hop */ - fail_if(pico_mld_analyse_packet(f) != NULL); - ip6->hop = 1; - hbh = (struct pico_ipv6_hbhoption *) f->transport_hdr; - pico_mld_fill_hopbyhop(hbh); - hbh->type = 99; - /* incorrect hop by hop */ - fail_if(pico_mld_analyse_packet(f) != NULL); - pico_mld_fill_hopbyhop(hbh); - ip6->src = addr; - /* Not link local */ - fail_if(pico_mld_analyse_packet(f) != NULL); - memcpy(&ip6->src, PICO_IP6_ANY, sizeof(struct pico_ip6)); - fail_if(pico_mld_analyse_packet(f) != NULL); - ip6->src = local; - mld = (struct pico_icmp6_hdr *) (f->transport_hdr + MLD_ROUTER_ALERT_LEN); - mld->type = 0; - - /* wrong type */ - fail_if(pico_mld_analyse_packet(f) != NULL); - /* all correct */ - mld->type = PICO_MLD_QUERY; - fail_if(pico_mld_analyse_packet(f) == NULL); - mld->type = PICO_MLD_REPORT; - fail_if(pico_mld_analyse_packet(f) == NULL); - mld->type = PICO_MLD_DONE; - fail_if(pico_mld_analyse_packet(f) == NULL); - mld->type = PICO_MLD_REPORTV2; - fail_if(pico_mld_analyse_packet(f) == NULL); -} -END_TEST -START_TEST(tc_pico_mld_discard) -{ - mld_discard(NULL); -} -END_TEST -Suite *pico_suite(void) -{ - - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_mld_fill_hopbyhop = tcase_create("Unit test for pico_mld_fill_hopbyhop"); - TCase *TCase_pico_mld_check_hopbyhop = tcase_create("Unit test for pico_mld_check_hopbyhop"); - TCase *TCase_pico_mld_report_expired = tcase_create("Unit test for pico_mld_report_expired"); - TCase *TCase_mldt_type_compare = tcase_create("Unit test for mldt_type_compare"); - TCase *TCase_pico_mld_analyse_packet = tcase_create("Unit test for pico_mld_analyse_packet"); - TCase *TCase_pico_mld_discard = tcase_create("Unit test for pico_mld_discard"); - TCase *TCase_pico_mld_compatibility_mode = tcase_create("Unit test for pico_mld_compatibility"); - TCase *TCase_pico_mld_v1querier_expired = tcase_create("Unit test for pico_mld_v1querier_expired"); - TCase *TCase_pico_mld_delete_parameter = tcase_create("Unit test for pico_mld_delete_parameter"); - TCase *TCase_pico_mld_timer_expired = tcase_create("Unit test for pico_mld_timer_expired"); - TCase *TCase_pico_mld_timer_reset = tcase_create("Unit test for pico_mld_timer_reset"); - TCase *TCase_pico_mld_send_done = tcase_create("Unit test for pico_mld_send_done"); - TCase *TCase_pico_mld_is_checksum_valid = tcase_create("Unit test for pico_mld_is_checksum"); - TCase *TCase_pico_mld_find_parameter = tcase_create("Unit test for pico_mld_find_parameter"); - TCase *TCase_pico_mld_state_change = tcase_create("Unit test for pico_mld_state_change"); - TCase *TCase_mld_srst = tcase_create("Unit test for pico_mld_srst"); - TCase *TCase_mld_stcl = tcase_create("Unit test for pico_mld_stcl"); - TCase *TCase_pico_mld_process_in = tcase_create("Unit test for pico_mld_process_in"); - TCase *TCase_pico_mld_send_report = tcase_create("Unit test for pico_mld_send_report"); - TCase *TCase_mld_stsdifs = tcase_create("Unit test for mld_stsdifs"); - TCase *TCase_mld_srsf = tcase_create("Unit test for mld_srsf"); - TCase *TCase_mld_rtimrtct = tcase_create("Unit test for mld_rtimrtct"); - TCase *TCase_mld_mrsrrt = tcase_create("Unit test for mld_mrsrrt"); - - tcase_add_test(TCase_pico_mld_fill_hopbyhop, tc_pico_mld_fill_hopbyhop); - suite_add_tcase(s, TCase_pico_mld_fill_hopbyhop); - tcase_add_test(TCase_pico_mld_check_hopbyhop, tc_pico_mld_check_hopbyhop); - suite_add_tcase(s, TCase_pico_mld_check_hopbyhop); - tcase_add_test(TCase_pico_mld_report_expired, tc_pico_mld_report_expired); - suite_add_tcase(s, TCase_pico_mld_report_expired); - tcase_add_test(TCase_mldt_type_compare, tc_mldt_type_compare); - suite_add_tcase(s, TCase_mldt_type_compare); - tcase_add_test(TCase_pico_mld_analyse_packet, tc_pico_mld_analyse_packet); - suite_add_tcase(s, TCase_pico_mld_analyse_packet); - tcase_add_test(TCase_pico_mld_discard, tc_pico_mld_discard); - suite_add_tcase(s, TCase_pico_mld_discard); - tcase_add_test(TCase_pico_mld_compatibility_mode, tc_pico_mld_compatibility_mode); - suite_add_tcase(s, TCase_pico_mld_compatibility_mode); - tcase_add_test(TCase_pico_mld_v1querier_expired, tc_pico_mld_v1querier_expired); - suite_add_tcase(s, TCase_pico_mld_v1querier_expired); - tcase_add_test(TCase_pico_mld_delete_parameter, tc_pico_mld_delete_parameter); - suite_add_tcase(s, TCase_pico_mld_delete_parameter); - tcase_add_test(TCase_pico_mld_timer_expired, tc_pico_mld_timer_expired); - suite_add_tcase(s, TCase_pico_mld_timer_expired); - tcase_add_test(TCase_pico_mld_timer_reset, tc_pico_mld_timer_reset); - suite_add_tcase(s, TCase_pico_mld_timer_reset); - tcase_add_test(TCase_pico_mld_send_done, tc_pico_mld_send_done); - suite_add_tcase(s, TCase_pico_mld_send_done); - tcase_add_test(TCase_pico_mld_is_checksum_valid, tc_pico_mld_is_checksum_valid); - suite_add_tcase(s, TCase_pico_mld_is_checksum_valid); - tcase_add_test(TCase_pico_mld_find_parameter, tc_pico_mld_find_parameter); - suite_add_tcase(s, TCase_pico_mld_find_parameter); - tcase_add_test(TCase_pico_mld_state_change, tc_pico_mld_state_change); - suite_add_tcase(s, TCase_pico_mld_state_change); - tcase_add_test(TCase_mld_srst, tc_mld_srst); - suite_add_tcase(s, TCase_mld_srst); - tcase_add_test(TCase_mld_stcl, tc_mld_stcl); - suite_add_tcase(s, TCase_mld_stcl); - tcase_add_test(TCase_pico_mld_process_in, tc_pico_mld_process_in); - suite_add_tcase(s, TCase_pico_mld_process_in); - tcase_add_test(TCase_pico_mld_send_report, tc_pico_mld_send_report); - suite_add_tcase(s, TCase_pico_mld_send_report); - tcase_add_test(TCase_mld_stsdifs, tc_mld_stsdifs); - suite_add_tcase(s, TCase_mld_stsdifs); - tcase_add_test(TCase_mld_srsf, tc_mld_srsf); - suite_add_tcase(s, TCase_mld_srsf); - tcase_add_test(TCase_mld_rtimrtct, tc_mld_rtimrtct); - suite_add_tcase(s, TCase_mld_rtimrtct); - tcase_add_test(TCase_mld_mrsrrt, tc_mld_mrsrrt); - suite_add_tcase(s, TCase_mld_mrsrrt); - return s; -} -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_protocol.c b/kernel/picotcp/test/unit/modunit_pico_protocol.c deleted file mode 100644 index fee1861..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_protocol.c +++ /dev/null @@ -1,229 +0,0 @@ -#include "pico_protocol.h" -#include "pico_tree.h" -#include "stack/pico_protocol.c" -#include "check.h" - -Suite *pico_suite(void); - -volatile pico_err_t pico_err = 0; - -static int protocol_passby = 0; - -static struct pico_frame f = { - .next = NULL -}; - -static struct pico_queue q = { - 0 -}; - -static struct pico_tree_node NODE_IN = { - 0 -}; -static struct pico_tree_node NODE_OUT = { - 0 -}; - -#define KEY_IN 0x0D01 -#define KEY_OUT 0x0D00 - - -START_TEST(tc_pico_proto_cmp) -{ - struct pico_protocol a = { - .hash = 0 - }; - struct pico_protocol b = { - .hash = 1 - }; - fail_if(pico_proto_cmp(&a, &b) >= 0); - a.hash = 1; - fail_if(pico_proto_cmp(&a, &b) != 0); - a.hash = 2; - fail_if(pico_proto_cmp(&a, &b) <= 0); -} -END_TEST - -static int modunit_proto_loop_cb_in(struct pico_protocol *self, struct pico_frame *p) -{ - if (!p) - protocol_passby = -1; /* Error! */ - - if (!self) - protocol_passby = -1; /* Error! */ - - if (protocol_passby != 0) /* Ensure that we are called only once. */ - protocol_passby = -1; - - protocol_passby = KEY_IN; - - return 1; /* One frame processed! */ -} - -static int modunit_proto_loop_cb_out(struct pico_protocol *self, struct pico_frame *p) -{ - if (!p) - protocol_passby = -1; /* Error! */ - - if (!self) - protocol_passby = -1; /* Error! */ - - if (protocol_passby != 0) /* Ensure that we are called only once. */ - protocol_passby = -1; - - protocol_passby = KEY_OUT; - - return 1; /* One frame processed! */ -} - -START_TEST(tc_proto_loop_in) -{ - struct pico_protocol p = { - .process_in = modunit_proto_loop_cb_in, .q_in = &q - }; - protocol_passby = 0; - pico_enqueue(p.q_in, &f); - fail_if(proto_loop_in(&p, 1) != 0); - fail_if(protocol_passby != KEY_IN); - - /* Try to dequeue from empty queue, get same loop_score */ - protocol_passby = 0; - fail_if(proto_loop_in(&p, 1) != 1); - fail_if(protocol_passby != 0); -} -END_TEST - - -START_TEST(tc_proto_loop_out) -{ - struct pico_protocol p = { - .process_out = modunit_proto_loop_cb_out, .q_out = &q - }; - protocol_passby = 0; - pico_enqueue(p.q_out, &f); - fail_if(proto_loop_out(&p, 1) != 0); - fail_if(protocol_passby != KEY_OUT); - - /* Try to dequeue from empty queue, get same loop_score */ - protocol_passby = 0; - fail_if(proto_loop_out(&p, 1) != 1); - fail_if(protocol_passby != 0); -} -END_TEST - -START_TEST(tc_proto_loop) -{ - struct pico_protocol p = { - .process_in = modunit_proto_loop_cb_in, - .process_out = modunit_proto_loop_cb_out, - .q_in = &q, - .q_out = &q - }; - protocol_passby = 0; - pico_enqueue(p.q_in, &f); - fail_if(proto_loop(&p, 1, PICO_LOOP_DIR_IN) != 0); - fail_if(protocol_passby != KEY_IN); - - protocol_passby = 0; - pico_enqueue(p.q_out, &f); - fail_if(proto_loop(&p, 1, PICO_LOOP_DIR_OUT) != 0); - fail_if(protocol_passby != KEY_OUT); - -} -END_TEST - -START_TEST(tc_pico_tree_node) -{ - struct pico_proto_rr rr = { - 0 - }; - rr.node_in = &NODE_IN; - rr.node_out = &NODE_OUT; - fail_unless(roundrobin_init(&rr, PICO_LOOP_DIR_IN) == &NODE_IN); - fail_unless(roundrobin_init(&rr, PICO_LOOP_DIR_OUT) == &NODE_OUT); -} -END_TEST - -START_TEST(tc_roundrobin_end) -{ - struct pico_proto_rr rr; - roundrobin_end(&rr, PICO_LOOP_DIR_IN, &NODE_IN); - fail_if(rr.node_in != &NODE_IN); - roundrobin_end(&rr, PICO_LOOP_DIR_OUT, &NODE_OUT); - fail_if(rr.node_out != &NODE_OUT); -} -END_TEST - -START_TEST(tc_pico_protocol_generic_loop) -{ - struct pico_proto_rr rr = { - 0 - }; - int ret = 0; - - rr.node_in = &NODE_IN; - rr.node_out = &NODE_OUT; - ret = pico_protocol_generic_loop(&rr, 0, PICO_LOOP_DIR_IN); - - fail_if(ret != 0); - - pico_protocols_loop(0); -} -END_TEST - - -START_TEST(tc_proto_layer_rr_reset) -{ - struct pico_proto_rr rr; - rr.node_in = &NODE_IN; - rr.node_out = &NODE_OUT; - proto_layer_rr_reset(&rr); - fail_if(rr.node_in != NULL); - fail_if(rr.node_out != NULL); -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("pico_protocol.c"); - - TCase *TCase_pico_proto_cmp = tcase_create("Unit test for pico_proto_cmp"); - TCase *TCase_proto_loop_in = tcase_create("Unit test for proto_loop_in"); - TCase *TCase_proto_loop_out = tcase_create("Unit test for proto_loop_out"); - TCase *TCase_proto_loop = tcase_create("Unit test for proto_loop"); - TCase *TCase_pico_tree_node = tcase_create("Unit test for pico_tree_node"); - TCase *TCase_roundrobin_end = tcase_create("Unit test for roundrobin_end"); - TCase *TCase_pico_protocol_generic_loop = tcase_create("Unit test for pico_protocol_generic_loop"); - TCase *TCase_proto_layer_rr_reset = tcase_create("Unit test for proto_layer_rr_reset"); - - - tcase_add_test(TCase_pico_proto_cmp, tc_pico_proto_cmp); - suite_add_tcase(s, TCase_pico_proto_cmp); - tcase_add_test(TCase_proto_loop_in, tc_proto_loop_in); - suite_add_tcase(s, TCase_proto_loop_in); - tcase_add_test(TCase_proto_loop_out, tc_proto_loop_out); - suite_add_tcase(s, TCase_proto_loop_out); - tcase_add_test(TCase_proto_loop, tc_proto_loop); - suite_add_tcase(s, TCase_proto_loop); - tcase_add_test(TCase_pico_tree_node, tc_pico_tree_node); - suite_add_tcase(s, TCase_pico_tree_node); - tcase_add_test(TCase_roundrobin_end, tc_roundrobin_end); - suite_add_tcase(s, TCase_roundrobin_end); - tcase_add_test(TCase_pico_protocol_generic_loop, tc_pico_protocol_generic_loop); - suite_add_tcase(s, TCase_pico_protocol_generic_loop); - tcase_add_test(TCase_proto_layer_rr_reset, tc_proto_layer_rr_reset); - suite_add_tcase(s, TCase_proto_layer_rr_reset); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_sntp_client.c b/kernel/picotcp/test/unit/modunit_pico_sntp_client.c deleted file mode 100644 index b2de620..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_sntp_client.c +++ /dev/null @@ -1,428 +0,0 @@ -#include "pico_sntp_client.h" -#include "modules/pico_sntp_client.c" -#include "check.h" -#include "pico_socket.h" -/* Mocking functions, variables, ... */ -volatile pico_time pico_tick = 0ull; -volatile pico_err_t pico_err = 0; - -Suite *pico_suite(void); -void cb_synced(pico_err_t status); - -/* Used in pico_sntp_sync_start */ -struct pico_socket *pico_socket_open(uint16_t net, uint16_t proto, void (*wakeup)(uint16_t ev, struct pico_socket *s)) -{ - struct pico_socket *sock = PICO_ZALLOC(sizeof(struct pico_socket)); - (void) net; - (void) proto; - (void) wakeup; - fail_unless (sock != NULL); - return sock; -} - -/* Used in pico_sntp_sync_start */ -int pico_socket_bind(struct pico_socket *s, void *local_addr, uint16_t *port) -{ - (void) s; - (void) local_addr; - (void) port; - return 0; -} - -/* Used in pico_sntp_sync_start */ -int pico_socket_close(struct pico_socket *s) -{ - (void) s; - return 0; -} - -/* Used in pico_sntp_send */ -int8_t pico_socket_del(struct pico_socket *s) -{ - (void) s; - return 0; -} - -/* Used in dnsCallback */ -int pico_string_to_ipv4(const char *ipstr, uint32_t *ip) -{ - (void) ipstr; - (void) ip; - return 0; -} - -/* Used in dnsCallback */ -int pico_string_to_ipv6(const char *ipstr, uint8_t *ip) -{ - (void) ipstr; - (void) ip; - return 0; -} - -/* Used in pico_sntp_sync_start_ipv4 */ -int pico_ipv4_to_string(char* ipbuf, const uint32_t ip) -{ - (void) ipbuf; - (void) ip; - return 0; -} - -/* Used in pico_sntp_sync_start_ipv6 */ -int pico_ipv6_to_string(char* ipbuf, const uint8_t ip[PICO_SIZE_IP6]) -{ - (void) ipbuf; - (void) ip; - return 0; -} - -/* Used in pico_sntp_client_wakeup */ -int pico_socket_recvfrom(struct pico_socket *s, void *buf, int len, void *orig, uint16_t *remote_port) -{ - (void) s; - (void) buf; - (void) len; - (void) orig; - (void) remote_port; - return 0; -} - -/* Used in pico_sntp_send */ -int pico_socket_sendto(struct pico_socket *s, const void *buf, int len, void *dst, uint16_t remote_port) -{ - (void) s; - (void) buf; - (void) len; - (void) dst; - (void) remote_port; - return 0; -} - -/* Used in pico_sntp_sync_start_dns_ipv4, not tested */ -int pico_dns_client_getaddr(const char *url, void (*callback)(char *ip, void *arg), void *arg) -{ - (void) url; - (void) callback; - (void) arg; - return 0; -} - -/* Used in pico_sntp_sync_start_dns_ipv6, not tested */ -int pico_dns_client_getaddr6(const char *url, void (*callback)(char *, void *), void *arg) -{ - (void) url; - (void) callback; - (void) arg; - return 0; -} - -/* Used in pico_sntp_parse */ -void cb_synced(pico_err_t status) -{ - (void) status; - -} - -/* Used in pico_sntp_send */ -static uint32_t timers_added = 0; -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - (void) expire; - (void) timer; - (void) arg; - return ++timers_added; -} - -/* Used in pico_sntp_cleanup */ -void pico_timer_cancel(uint32_t t) -{ - IGNORE_PARAMETER(t); -} - -START_TEST(tc_timestamp_convert) -{ - struct pico_sntp_ts ts; - struct pico_timeval tv; - pico_time delay = 0ull; - int ret = 0; - - /* Input is all zero */ - ts.sec = long_be(0ul); - ts.frac = long_be(0ul); - ret = timestamp_convert(&ts, &tv, delay); - ck_assert(ret == -1); - ck_assert(tv.tv_sec == 0); - ck_assert(tv.tv_msec == 0); - - /* Minimum input*/ - ts.sec = long_be(SNTP_UNIX_OFFSET + 1390000000ul); - ts.frac = long_be(4310344ul); /* MIN value: 1msec */ - ret = timestamp_convert(&ts, &tv, delay); - ck_assert(ret == 0); - fail_unless(tv.tv_sec == 1390000000); - fail_unless(tv.tv_msec == 1); - - /* Intermediate input */ - ts.sec = long_be(SNTP_UNIX_OFFSET + 1390000000ul); - ts.frac = long_be(3865470566ul); /* value: 899msec */ - ret = timestamp_convert(&ts, &tv, delay); - ck_assert(ret == 0); - fail_unless(tv.tv_sec == 1390000000); - fail_unless(tv.tv_msec == 900); - - /* Maximum input */ - ts.sec = long_be(SNTP_UNIX_OFFSET + 1390000000ul); - ts.frac = long_be(4294967295ul); /* MAX value: 999msec */ - ret = timestamp_convert(&ts, &tv, delay); - ck_assert(ret == 0); - fail_unless(tv.tv_sec == 1390000001); - fail_unless(tv.tv_msec == 0); - - /* Intermediate input with delay */ - ts.sec = long_be(SNTP_UNIX_OFFSET + 1390000000ul); - ts.frac = long_be(3865470566ul); /* value: 899msec */ - delay = 200ull; - ret = timestamp_convert(&ts, &tv, delay); - ck_assert(ret == 0); - fail_unless(tv.tv_sec == 1390000001); - fail_unless(tv.tv_msec == 100); -} -END_TEST -START_TEST(tc_pico_sntp_cleanup) -{ - struct sntp_server_ns_cookie *ck; - struct pico_socket *sock; - ck = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - fail_unless (ck != NULL); - ck->hostname = PICO_ZALLOC(sizeof(char) * 5); - fail_unless (ck->hostname != NULL); - ck->stamp = 0ull; - ck->cb_synced = cb_synced; - - sock = pico_socket_open(0, 0, &pico_sntp_client_wakeup); - ck->sock = sock; - sock->priv = ck; - - - pico_sntp_cleanup(ck, PICO_ERR_NOERR); -} -END_TEST -START_TEST(tc_pico_sntp_parse) -{ - /* TODO: test this: static void pico_sntp_parse(char *buf, struct sntp_server_ns_cookie *ck) */ - struct sntp_server_ns_cookie *ck; - struct pico_socket *sock; - struct pico_sntp_header header = { - 0 - }; - - ck = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - fail_unless (ck != NULL); - ck->hostname = PICO_ZALLOC(sizeof(char) * 5); - fail_unless (ck->hostname != NULL); - ck->stamp = 0ull; - ck->cb_synced = cb_synced; - - sock = pico_socket_open(0, 0, &pico_sntp_client_wakeup); - ck->sock = sock; - sock->priv = ck; - - header.mode = 4; /* server mode */ - header.vn = 4; /* sntp version 4 */ - header.stratum = 1; /* primary reference */ - header.trs_ts.sec = long_be(SNTP_UNIX_OFFSET + 1390000000ul); - header.trs_ts.frac = long_be(3865470566ul); /* value: 899msec */ - - fail_if(pico_sntp_parse((char *) &header, NULL) == 0); - fail_if(pico_sntp_parse((char *) &header, ck) != 0); -} -END_TEST -START_TEST(tc_pico_sntp_client_wakeup) -{ - /* TODO: test this: static void pico_sntp_client_wakeup(uint16_t ev, struct pico_socket *s) */ - uint16_t event = PICO_SOCK_EV_ERR; - struct sntp_server_ns_cookie *ck; - struct pico_socket *sock; - ck = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - fail_unless (ck != NULL); - ck->hostname = PICO_ZALLOC(sizeof(char) * 5); - fail_unless (ck->hostname != NULL); - ck->stamp = 0ull; - ck->cb_synced = cb_synced; - - sock = pico_socket_open(0, 0, &pico_sntp_client_wakeup); - ck->sock = sock; - sock->priv = ck; - - ck->cb_synced = cb_synced; - printf("Started wakeup unit test\n"); - - pico_sntp_client_wakeup(event, sock); -} -END_TEST -START_TEST(tc_sntp_receive_timeout) -{ - struct sntp_server_ns_cookie *ck; - struct pico_socket *sock; - ck = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - fail_unless (ck != NULL); - ck->hostname = PICO_ZALLOC(sizeof(char) * 5); - fail_unless (ck->hostname != NULL); - ck->stamp = 0ull; - ck->cb_synced = cb_synced; - - sock = pico_socket_open(0, 0, &pico_sntp_client_wakeup); - ck->sock = sock; - sock->priv = ck; - sntp_receive_timeout(0ull, ck); - -} -END_TEST -START_TEST(tc_pico_sntp_send) -{ - /* TODO: test this: static void pico_sntp_send(struct pico_socket *sock, union pico_address *dst) */ - struct pico_socket sock = { - 0 - }; - union pico_address dst; - struct sntp_server_ns_cookie ck = { - 0 - }; - sock.priv = &ck; - - pico_sntp_send(&sock, &dst); -} -END_TEST -START_TEST(tc_dnsCallback) -{ - /* TODO: test this: static void dnsCallback(char *ip, void *arg) */ - char ip[] = "198.123.30.132"; - struct sntp_server_ns_cookie *ck; - ck = PICO_ZALLOC(sizeof(struct sntp_server_ns_cookie)); - - dnsCallback(ip, ck); -} -END_TEST -START_TEST(tc_pico_sntp_sync) -{ - const char *sntp_server= "ntp.nasa.gov"; - - fail_if(pico_sntp_sync(NULL, cb_synced) == 0); - fail_if(pico_err != PICO_ERR_EINVAL); - - fail_if(pico_sntp_sync(sntp_server, NULL) == 0); - fail_if(pico_err != PICO_ERR_EINVAL); - - fail_if(pico_sntp_sync(sntp_server, cb_synced) != 0); -} -END_TEST -START_TEST(tc_pico_sntp_sync_ip) -{ - union pico_address sntp_addr = { .ip4.addr = 0ul }; - - fail_if(pico_sntp_sync_ip(NULL, cb_synced) == 0); - fail_if(pico_err != PICO_ERR_EINVAL); - - fail_if(pico_sntp_sync_ip(&sntp_addr, NULL) == 0); - fail_if(pico_err != PICO_ERR_EINVAL); - - fail_if(pico_sntp_sync_ip(&sntp_addr, cb_synced) != 0); -} -END_TEST -START_TEST(tc_pico_sntp_sync_start) -{ - struct sntp_server_ns_cookie ck = { 0 }; - union pico_address sntp_addr = { .ip4.addr= 0ul }; - - fail_if(pico_sntp_sync_start(&ck, &sntp_addr) != 0); -} -END_TEST -START_TEST(tc_pico_sntp_sync_start_dns_ipv4) -{ - const char *sntp_server = "ntp.nasa.gov"; - - fail_if(pico_sntp_sync_start_dns_ipv4(sntp_server, cb_synced) != 0); -} -END_TEST -START_TEST(tc_pico_sntp_sync_start_dns_ipv6) -{ - const char *sntp_server = "ntp.nasa.gov"; - - fail_if(pico_sntp_sync_start_dns_ipv6(sntp_server, cb_synced) != 0); -} -END_TEST -START_TEST(tc_pico_sntp_sync_start_ipv4) -{ - union pico_address sntp_addr = { .ip4.addr = 0}; - - fail_if(pico_sntp_sync_start_ipv4(&sntp_addr, cb_synced) != 0); -} -END_TEST -START_TEST(tc_pico_sntp_sync_start_ipv6) -{ - union pico_address sntp_addr = { .ip6.addr = { 0 } }; - - fail_if(pico_sntp_sync_start_ipv6(&sntp_addr, cb_synced) != 0); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_timestamp_convert = tcase_create("Unit test for pico_timeval"); - TCase *TCase_pico_sntp_cleanup = tcase_create("Unit test for pico_sntp_cleanup"); - TCase *TCase_pico_sntp_send = tcase_create("Unit test for pico_sntp_send"); - TCase *TCase_pico_sntp_parse = tcase_create("Unit test for pico_sntp_parse"); - TCase *TCase_pico_sntp_client_wakeup = tcase_create("Unit test for pico_sntp_client_wakeup"); - TCase *TCase_sntp_receive_timeout = tcase_create("Unit test for sntp_receive_timeout"); - TCase *TCase_dnsCallback = tcase_create("Unit test for dnsCallback"); - TCase *TCase_pico_sntp_sync = tcase_create("Unit test for pico_sntp_sync"); - TCase *TCase_pico_sntp_sync_ip = tcase_create("Unit test for pico_sntp_sync_ip"); - TCase *TCase_pico_sntp_sync_start = tcase_create("Unit test for pico_sntp_sync_start"); - TCase *TCase_pico_sntp_sync_start_dns_ipv4 = tcase_create("Unit test for pico_sntp_sync_start_dns_ipv4"); - TCase *TCase_pico_sntp_sync_start_dns_ipv6 = tcase_create("Unit test for pico_sntp_sync_start_dns_ipv6"); - TCase *TCase_pico_sntp_sync_start_ipv4 = tcase_create("Unit test for pico_sntp_sync_start_ipv4"); - TCase *TCase_pico_sntp_sync_start_ipv6 = tcase_create("Unit test for pico_sntp_sync_start_ipv6"); - - - tcase_add_test(TCase_timestamp_convert, tc_timestamp_convert); - suite_add_tcase(s, TCase_timestamp_convert); - tcase_add_test(TCase_pico_sntp_cleanup, tc_pico_sntp_cleanup); - suite_add_tcase(s, TCase_pico_sntp_cleanup); - tcase_add_test(TCase_pico_sntp_parse, tc_pico_sntp_parse); - suite_add_tcase(s, TCase_pico_sntp_parse); - tcase_add_test(TCase_pico_sntp_client_wakeup, tc_pico_sntp_client_wakeup); - suite_add_tcase(s, TCase_pico_sntp_client_wakeup); - tcase_add_test(TCase_sntp_receive_timeout, tc_sntp_receive_timeout); - suite_add_tcase(s, TCase_sntp_receive_timeout); - tcase_add_test(TCase_pico_sntp_send, tc_pico_sntp_send); - suite_add_tcase(s, TCase_pico_sntp_send); - tcase_add_test(TCase_dnsCallback, tc_dnsCallback); - suite_add_tcase(s, TCase_dnsCallback); - tcase_add_test(TCase_pico_sntp_sync, tc_pico_sntp_sync); - suite_add_tcase(s, TCase_pico_sntp_sync); - tcase_add_test(TCase_pico_sntp_sync_ip, tc_pico_sntp_sync_ip); - suite_add_tcase(s, TCase_pico_sntp_sync_ip); - tcase_add_test(TCase_pico_sntp_sync_start, tc_pico_sntp_sync_start); - suite_add_tcase(s, TCase_pico_sntp_sync_start); - tcase_add_test(TCase_pico_sntp_sync_start_dns_ipv4, tc_pico_sntp_sync_start_dns_ipv4); - suite_add_tcase(s, TCase_pico_sntp_sync_start_dns_ipv4); - tcase_add_test(TCase_pico_sntp_sync_start_dns_ipv6, tc_pico_sntp_sync_start_dns_ipv6); - suite_add_tcase(s, TCase_pico_sntp_sync_start_dns_ipv6); - tcase_add_test(TCase_pico_sntp_sync_start_ipv4, tc_pico_sntp_sync_start_ipv4); - suite_add_tcase(s, TCase_pico_sntp_sync_start_ipv4); - tcase_add_test(TCase_pico_sntp_sync_start_ipv6, tc_pico_sntp_sync_start_ipv6); - suite_add_tcase(s, TCase_pico_sntp_sync_start_ipv6); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_stack.c b/kernel/picotcp/test/unit/modunit_pico_stack.c deleted file mode 100644 index bc51638..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_stack.c +++ /dev/null @@ -1,145 +0,0 @@ -#include "pico_config.h" -#include "pico_frame.h" -#include "pico_device.h" -#include "pico_protocol.h" -#include "pico_stack.h" -#include "pico_addressing.h" -#include "pico_dns_client.h" -#include "pico_eth.h" -#include "pico_arp.h" -#include "pico_ipv4.h" -#include "pico_ipv6.h" -#include "pico_icmp4.h" -#include "pico_igmp.h" -#include "pico_udp.h" -#include "pico_tcp.h" -#include "pico_socket.h" -#include "heap.h" -#include "stack/pico_stack.c" -#include "check.h" - - -Suite *pico_suite(void); -void fake_timer(pico_time __attribute__((unused)) now, void __attribute__((unused)) *n); - -START_TEST(tc_pico_ll_receive) -{ - /* TODO: test this: static int32_t pico_ll_receive(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_ll_check_bcast) -{ - /* TODO: test this: static void pico_ll_check_bcast(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_destination_is_bcast) -{ - /* TODO: test this: static int destination_is_bcast(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_destination_is_mcast) -{ - /* TODO: test this: static int destination_is_mcast(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_eth) -{ - /* TODO: test this: static struct pico_eth *pico_ethernet_mcast_translate(struct pico_frame *f, uint8_t *pico_mcast_mac) */ -} -END_TEST -START_TEST(tc_pico_ethsend_local) -{ - /* TODO: test this: static int32_t pico_ethsend_local(struct pico_frame *f, struct pico_eth_hdr *hdr, int *ret) */ -} -END_TEST -START_TEST(tc_pico_ethsend_bcast) -{ - /* TODO: test this: static int32_t pico_ethsend_bcast(struct pico_frame *f, int *ret) */ -} -END_TEST -START_TEST(tc_pico_ethsend_dispatch) -{ - /* TODO: test this: static int32_t pico_ethsend_dispatch(struct pico_frame *f, int *ret) */ -} -END_TEST -START_TEST(tc_calc_score) -{ - /* TODO: test this: static int calc_score(int *score, int *index, int avg[][PROTO_DEF_AVG_NR], int *ret) */ -} -END_TEST - -#ifdef PICO_FAULTY -void fake_timer(pico_time __attribute__((unused)) now, void __attribute__((unused)) *n) -{ - -} -#endif - -START_TEST(tc_stack_generic) -{ -#ifdef PICO_FAULTY - printf("Testing with faulty memory in pico_stack_init (11)\n"); - pico_set_mm_failure(13); - fail_if(pico_stack_init() != -1); -#endif - pico_stack_init(); -#ifdef PICO_FAULTY - printf("Testing with faulty memory in pico_timer_add (1)\n"); - pico_set_mm_failure(1); - fail_if(pico_timer_add(0, fake_timer, NULL) != 0); -#endif - - -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_pico_ll_receive = tcase_create("Unit test for pico_ll_receive"); - TCase *TCase_pico_ll_check_bcast = tcase_create("Unit test for pico_ll_check_bcast"); - TCase *TCase_destination_is_bcast = tcase_create("Unit test for destination_is_bcast"); - TCase *TCase_destination_is_mcast = tcase_create("Unit test for destination_is_mcast"); - TCase *TCase_pico_eth = tcase_create("Unit test for pico_eth"); - TCase *TCase_pico_ethsend_local = tcase_create("Unit test for pico_ethsend_local"); - TCase *TCase_pico_ethsend_bcast = tcase_create("Unit test for pico_ethsend_bcast"); - TCase *TCase_pico_ethsend_dispatch = tcase_create("Unit test for pico_ethsend_dispatch"); - TCase *TCase_calc_score = tcase_create("Unit test for calc_score"); - TCase *TCase_stack_generic = tcase_create("GENERIC stack initialization unit test"); - - - tcase_add_test(TCase_pico_ll_receive, tc_pico_ll_receive); - suite_add_tcase(s, TCase_pico_ll_receive); - tcase_add_test(TCase_pico_ll_check_bcast, tc_pico_ll_check_bcast); - suite_add_tcase(s, TCase_pico_ll_check_bcast); - tcase_add_test(TCase_destination_is_bcast, tc_destination_is_bcast); - suite_add_tcase(s, TCase_destination_is_bcast); - tcase_add_test(TCase_destination_is_mcast, tc_destination_is_mcast); - suite_add_tcase(s, TCase_destination_is_mcast); - tcase_add_test(TCase_pico_eth, tc_pico_eth); - suite_add_tcase(s, TCase_pico_eth); - tcase_add_test(TCase_pico_ethsend_local, tc_pico_ethsend_local); - suite_add_tcase(s, TCase_pico_ethsend_local); - tcase_add_test(TCase_pico_ethsend_bcast, tc_pico_ethsend_bcast); - suite_add_tcase(s, TCase_pico_ethsend_bcast); - tcase_add_test(TCase_pico_ethsend_dispatch, tc_pico_ethsend_dispatch); - suite_add_tcase(s, TCase_pico_ethsend_dispatch); - tcase_add_test(TCase_calc_score, tc_calc_score); - suite_add_tcase(s, TCase_calc_score); - tcase_add_test(TCase_stack_generic, tc_stack_generic); - suite_add_tcase(s, TCase_stack_generic); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_strings.c b/kernel/picotcp/test/unit/modunit_pico_strings.c deleted file mode 100644 index b0d5104..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_strings.c +++ /dev/null @@ -1,59 +0,0 @@ -#include "modules/pico_strings.c" -#include "check.h" - -Suite *pico_suite(void); - -START_TEST(tc_get_string_terminator_position) -{ - char buf[6] = "unit"; - get_string_terminator_position(NULL,0); - fail_if(get_string_terminator_position(buf,2) != 0); - fail_if(get_string_terminator_position(buf,6) != &buf[4]); -} -END_TEST -START_TEST(tc_pico_strncasecmp) -{ - fail_if(pico_strncasecmp("unit","UNIT",4) != 0); - fail_if(pico_strncasecmp("unit1","UNIT2",5) != -1); - fail_if(pico_strncasecmp("unit2","UNIT1",5) != 1); -} -END_TEST -START_TEST(tc_num2string) -{ - char buf[20]; - fail_if(num2string(-1,NULL,1) != -1); - fail_if(num2string(1,NULL,1) != -1); - fail_if(num2string(1,buf,1) != -1); - fail_if(num2string(1,buf,3) != 2); - fail_if(num2string(11,buf,3) != 3); - fail_if(num2string(112,buf,4) != 4); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_get_string_terminator_position = tcase_create("Unit test for get_string_terminator_position"); - TCase *TCase_num2string = tcase_create("Unit test for num2string"); - TCase *TCase_pico_strncasecmp = tcase_create("Unit test for pico_strncasecmp"); - - tcase_add_test(TCase_get_string_terminator_position, tc_get_string_terminator_position); - suite_add_tcase(s, TCase_get_string_terminator_position); - tcase_add_test(TCase_num2string,tc_num2string); - suite_add_tcase(s, TCase_num2string); - tcase_add_test(TCase_pico_strncasecmp,tc_pico_strncasecmp); - suite_add_tcase(s, TCase_pico_strncasecmp); - - return s; -} -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_tcp.c b/kernel/picotcp/test/unit/modunit_pico_tcp.c deleted file mode 100644 index f2b19ca..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_tcp.c +++ /dev/null @@ -1,863 +0,0 @@ -#include "pico_tcp.h" -#include "pico_config.h" -#include "pico_eth.h" -#include "pico_socket.h" -#include "pico_stack.h" -#include "pico_socket.h" -#include "pico_queue.h" -#include "pico_tree.h" -#include "modules/pico_tcp.c" -#include "check.h" - -Suite *pico_suite(void); - -static uint32_t timers_added = 0; -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - IGNORE_PARAMETER(expire); - IGNORE_PARAMETER(timer); - IGNORE_PARAMETER(arg); - return ++timers_added; -} - -START_TEST(tc_input_segment_compare) -{ - struct tcp_input_segment A = { - .seq = 0xFFFFFFFF - }; - struct tcp_input_segment B = { - .seq = 0xFFFFFFFe - }; - struct tcp_input_segment a = { - .seq = 0x01 - }; - struct tcp_input_segment b = { - .seq = 0x02 - }; - - fail_if(input_segment_compare(&A, &B) <= 0); - fail_if(input_segment_compare(&a, &b) >= 0); - fail_if(input_segment_compare(&A, &b) >= 0); - fail_if(input_segment_compare(&A, &A) != 0); -} -END_TEST -START_TEST(tc_tcp_input_segment) -{ - /* TODO: test this: static struct tcp_input_segment *segment_from_frame(struct pico_frame *f) */ - struct pico_frame *f = pico_frame_alloc(60); - struct tcp_input_segment *seg; - - fail_if(!f); - f->payload = f->start; - f->payload_len = 60; - f->transport_hdr = f->payload; - f->transport_len = (uint16_t)(f->payload_len - 40); - memset(f->payload, 'c', f->payload_len); - ((struct pico_tcp_hdr *)((f)->transport_hdr))->seq = long_be(0xdeadbeef); - - seg = segment_from_frame(f); - fail_if(!seg); - fail_if(seg->seq != 0xdeadbeef); - fail_if(seg->payload_len != f->payload_len); - fail_if(memcmp(seg->payload, f->payload, f->payload_len) != 0); - -#ifdef PICO_FAULTY - printf("Testing with faulty memory in segment_from_frame (1)\n"); - pico_set_mm_failure(1); - seg = segment_from_frame(f); - fail_if(seg); - - printf("Testing with faulty memory in segment_from_frame (2)\n"); - pico_set_mm_failure(2); - seg = segment_from_frame(f); - fail_if(seg); -#endif - printf("Testing segment_from_frame with empty payload\n"); - f->payload_len = 0; - seg = segment_from_frame(f); - fail_if(seg); - -} -END_TEST -START_TEST(tc_segment_compare) -{ - /* TODO: test this: static int segment_compare(void *ka, void *kb) */ - struct pico_frame *a = pico_frame_alloc(40); - struct pico_frame *b = pico_frame_alloc(60); - a->transport_hdr = a->start; - b->transport_hdr = b->start; - - ((struct pico_tcp_hdr *)((b)->transport_hdr))->seq = long_be(0xaa00); - ((struct pico_tcp_hdr *)((a)->transport_hdr))->seq = long_be(0xffffaa00); - fail_if(segment_compare(a, b) >= 0); - fail_if(segment_compare(a, a) != 0); - - -} -END_TEST -START_TEST(tc_tcp_discard_all_segments) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)pico_tcp_open(PICO_PROTO_IPV4); - struct pico_frame *f = pico_frame_alloc(80); - struct tcp_input_segment *is; - fail_if(!t); - fail_if(!f); - - printf("Testing enqueuing bogus frame\n"); - f->buffer_len = 0; - fail_if(pico_enqueue_segment(&t->tcpq_out, f) >= 0); - f->buffer_len = 80; - f->transport_hdr = f->start; - f->transport_len = (uint16_t)(f->buffer_len - 40); - f->payload = f->start + 40; - f->payload_len = 40; - memset(f->payload, 'c', f->payload_len); - is = segment_from_frame(f); - fail_if(!is); - is->payload_len = 0; - fail_if(pico_enqueue_segment(&t->tcpq_in, is) >= 0); - is->payload_len = 40; - - /* Successfull cases */ - fail_if(pico_enqueue_segment(&t->tcpq_out, f) <= 0); - fail_if(pico_enqueue_segment(&t->tcpq_in, is) <= 0); - - /* Fail because size exceeded. Must return 0. */ - t->tcpq_out.max_size = 50; - t->tcpq_in.max_size = 50; - fail_if(pico_enqueue_segment(&t->tcpq_out, f) != 0); - fail_if(pico_enqueue_segment(&t->tcpq_in, is) != 0); - - -#ifdef PICO_FAULTY - /* Fail because the tree cannot allocate a new node. Should return 0 */ - printf("Testing with faulty memory (1)\n"); - pico_set_mm_failure(1); - fail_if(pico_enqueue_segment(&t->tcpq_out, f) > 0); - pico_set_mm_failure(1); - fail_if(pico_enqueue_segment(&t->tcpq_in, is) > 0); - - printf("Testing input segment conversion with faulty mm(1)\n"); - pico_set_mm_failure(1); - is = segment_from_frame(f); - fail_if(is); - printf("Testing input segment conversion with faulty mm(2)\n"); - pico_set_mm_failure(2); - is = segment_from_frame(f); - fail_if(is); -#endif - - /* Discard all segments */ - fail_if(t->tcpq_out.size == 0); - fail_if(t->tcpq_out.frames == 0); - tcp_discard_all_segments(&t->tcpq_out); - fail_if(t->tcpq_out.size != 0); - fail_if(t->tcpq_out.frames != 0); - - fail_if(t->tcpq_in.size == 0); - fail_if(t->tcpq_in.frames == 0); - fail_if(pico_tcp_queue_in_is_empty(&t->sock)); - - tcp_discard_all_segments(&t->tcpq_in); - fail_if(t->tcpq_in.size != 0); - fail_if(t->tcpq_in.frames != 0); - fail_unless(pico_tcp_queue_in_is_empty(&t->sock)); - - - /* Testing next_segment with NULLS */ - fail_if(next_segment(NULL, NULL) != NULL); -} -END_TEST - -START_TEST(tc_release_until) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)pico_tcp_open(PICO_PROTO_IPV6); - struct pico_frame *f; - uint32_t i = 0; - int ret = 0; - struct tcp_input_segment *is; - fail_if(!t); - ret = release_until(&t->tcpq_out, 0); - fail_unless(ret == 0); - - /* Test with output queue */ - for (i = 0; i < 32; i++) { - f = pico_frame_alloc(84); - fail_if(!f); - f->transport_hdr = f->start; - f->transport_len = (uint16_t)f->buffer_len; - f->payload_len = f->transport_len; - ((struct pico_tcp_hdr *)((f)->transport_hdr))->seq = long_be(0xaa00 + f->buffer_len * i); - printf("inserting frame seq = %08x len = %d\n", 0xaa00 + f->buffer_len * i, f->buffer_len); - fail_if(pico_enqueue_segment(&t->tcpq_out, f) <= 0); - } - ret = release_until(&t->tcpq_out, 0xaa00 + f->buffer_len * 30); - printf("Release until %08x\n", 0xaa00 + f->buffer_len * 30); - fail_if(ret != 30); - printf("Ret is %d\n", ret); - printf("Remaining is %d\n", t->tcpq_out.frames); - fail_if(t->tcpq_out.frames != 2); - - /* Test with input queue */ - for (i = 0; i < 32; i++) { - f = pico_frame_alloc(84); - fail_if(!f); - f->transport_hdr = f->start; - f->transport_len = (uint16_t)f->buffer_len; - f->payload_len = f->transport_len; - f->payload = f->start; - ((struct pico_tcp_hdr *)((f)->transport_hdr))->seq = long_be(0xaa00 + f->buffer_len * i); - is = segment_from_frame(f); - fail_if(!is); - printf("inserting Input frame seq = %08x len = %d\n", long_be(is->seq), is->payload_len); - fail_if(!is); - fail_if(pico_enqueue_segment(&t->tcpq_in, is) <= 0); - } - ret = release_until(&t->tcpq_in, 0xaa00 + f->buffer_len * 30); - printf("Release until %08x\n", 0xaa00 + f->buffer_len * 30); - fail_if(ret != 30); - printf("Ret is %d\n", ret); - printf("Remaining is %d\n", t->tcpq_out.frames); - fail_if(t->tcpq_out.frames != 2); -} -END_TEST - -START_TEST(tc_release_all_until) -{ - struct pico_socket_tcp *t = (struct pico_socket_tcp *)pico_tcp_open(PICO_PROTO_IPV4); - struct pico_frame *f; - uint32_t i = 0; - int ret = 0; - struct tcp_input_segment *is; - pico_time tm; - fail_if(!t); - ret = release_all_until(&t->tcpq_out, 0, &tm); - fail_unless(ret == 0); - - /* Test with output queue */ - for (i = 0; i < 32; i++) { - f = pico_frame_alloc(84); - fail_if(!f); - f->transport_hdr = f->start; - f->transport_len = (uint16_t)f->buffer_len; - f->payload_len = f->transport_len; - ((struct pico_tcp_hdr *)((f)->transport_hdr))->seq = long_be(0xaa00 + f->buffer_len * i); - printf("inserting frame seq = %08x len = %d\n", 0xaa00 + f->buffer_len * i, f->buffer_len); - fail_if(pico_enqueue_segment(&t->tcpq_out, f) <= 0); - } - ret = release_all_until(&t->tcpq_out, 0xaa00 + f->buffer_len * 30, &tm); - printf("Release until %08x\n", 0xaa00 + f->buffer_len * 30); - fail_if(ret != 30); - printf("Ret is %d\n", ret); - printf("Remaining is %d\n", t->tcpq_out.frames); - fail_if(t->tcpq_out.frames != 2); - - /* Test with input queue */ - for (i = 0; i < 32; i++) { - f = pico_frame_alloc(84); - fail_if(!f); - f->transport_hdr = f->start; - f->transport_len = (uint16_t)f->buffer_len; - f->payload_len = f->transport_len; - f->payload = f->start; - ((struct pico_tcp_hdr *)((f)->transport_hdr))->seq = long_be(0xaa00 + f->buffer_len * i); - is = segment_from_frame(f); - fail_if(!is); - printf("inserting Input frame seq = %08x len = %d\n", long_be(is->seq), is->payload_len); - fail_if(!is); - fail_if(pico_enqueue_segment(&t->tcpq_in, is) <= 0); - } - ret = release_all_until(&t->tcpq_in, 0xaa00 + f->buffer_len * 30, &tm); - printf("Release until %08x\n", 0xaa00 + f->buffer_len * 30); - fail_if(ret != 30); - printf("Ret is %d\n", ret); - printf("Remaining is %d\n", t->tcpq_out.frames); - fail_if(t->tcpq_out.frames != 2); - - /* Test enqueue_segment with NULL segment */ - fail_if(pico_enqueue_segment(NULL, NULL) != -1); - - -} -END_TEST -START_TEST(tc_tcp_send_fin) -{ - /* TODO: test this: static void tcp_send_fin(struct pico_socket_tcp *t); */ -} -END_TEST -START_TEST(tc_pico_tcp_process_out) -{ - /* TODO: test this: static int pico_tcp_process_out(struct pico_protocol *self, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_pico_paws) -{ - pico_paws(); - /* Nothing to test for a random function...*/ -} -END_TEST - - -START_TEST(tc_tcp_add_options) -{ - /* TODO: test this: static void tcp_add_options(struct pico_socket_tcp *ts, struct pico_frame *f, uint16_t flags, uint16_t optsiz) */ - struct pico_socket_tcp ts = { }; - struct pico_frame *f = pico_frame_alloc(100); - uint16_t flags = 0; - uint16_t optsiz = 50; - uint8_t *frame_opt_buff; - int i; - struct tcp_sack_block *a, *b, *c; - uint32_t al = 0xa0, - ar = 0xaf, - bl = 0xb0, - br = 0xbf, - cl = 0xc0, - cr = 0xcf; - f->transport_hdr = f->start; - f->transport_len = (uint16_t)f->buffer_len; - f->payload_len = f->transport_len; - frame_opt_buff = f->transport_hdr + PICO_SIZE_TCPHDR; - - /* Window scale only */ - printf("Testing window scale option\n"); - ts.wnd_scale = 66; - tcp_add_options(&ts, f, flags, optsiz); - fail_if(frame_opt_buff[0] != PICO_TCP_OPTION_WS); - fail_if(frame_opt_buff[1] != PICO_TCPOPTLEN_WS); - fail_if(frame_opt_buff[2] != 66); - for (i = 3; i < optsiz - 1; i++) - fail_if(frame_opt_buff[i] != PICO_TCP_OPTION_NOOP); - fail_if(frame_opt_buff[optsiz - 1] != PICO_TCP_OPTION_END); - - /* MSS + SACK_OK + WS + TIMESTAMPS */ - printf("Testing full SYN options\n"); - flags = PICO_TCP_SYN; - ts.wnd_scale = 66; - ts.mss = 0xAA88; - tcp_add_options(&ts, f, flags, optsiz); - fail_if(frame_opt_buff[0] != PICO_TCP_OPTION_MSS); - fail_if(frame_opt_buff[1] != PICO_TCPOPTLEN_MSS); - fail_if(frame_opt_buff[2] != 0xAA); - fail_if(frame_opt_buff[3] != 0x88); - fail_if(frame_opt_buff[4] != PICO_TCP_OPTION_SACK_OK); - fail_if(frame_opt_buff[5] != PICO_TCPOPTLEN_SACK_OK); - fail_if(frame_opt_buff[6] != PICO_TCP_OPTION_WS); - fail_if(frame_opt_buff[7] != PICO_TCPOPTLEN_WS); - fail_if(frame_opt_buff[8] != 66); - fail_if(frame_opt_buff[9] != PICO_TCP_OPTION_TIMESTAMP); - fail_if(frame_opt_buff[10] != PICO_TCPOPTLEN_TIMESTAMP); - /* Timestamps: up to byte 18 */ - for (i = 19; i < optsiz - 1; i++) - fail_if(frame_opt_buff[i] != PICO_TCP_OPTION_NOOP); - fail_if(frame_opt_buff[optsiz - 1] != PICO_TCP_OPTION_END); - - /* Testing SACKs */ - printf("Testing full SACK options\n"); - a = PICO_ZALLOC(sizeof (struct tcp_sack_block)); - b = PICO_ZALLOC(sizeof (struct tcp_sack_block)); - c = PICO_ZALLOC(sizeof (struct tcp_sack_block)); - a->left = al; - a->right = ar; - a->next = b; - b->left = bl; - b->right = br; - b->next = c; - c->left = cl; - c->right = cr; - c->next = NULL; - - ts.sack_ok = 1; - ts.sacks = a; - flags = PICO_TCP_ACK; - tcp_add_options(&ts, f, flags, optsiz); - fail_if(frame_opt_buff[0] != PICO_TCP_OPTION_WS); - fail_if(frame_opt_buff[1] != PICO_TCPOPTLEN_WS); - fail_if(frame_opt_buff[2] != 66); - fail_if(frame_opt_buff[3] != PICO_TCP_OPTION_SACK); - fail_if(frame_opt_buff[4] != PICO_TCPOPTLEN_SACK + 6 * (sizeof(uint32_t))); - fail_if(memcmp(frame_opt_buff + 5, &al, 4) != 0); - fail_if(memcmp(frame_opt_buff + 9, &ar, 4) != 0); - fail_if(memcmp(frame_opt_buff + 13, &bl, 4) != 0); - fail_if(memcmp(frame_opt_buff + 17, &br, 4) != 0); - fail_if(memcmp(frame_opt_buff + 21, &cl, 4) != 0); - fail_if(memcmp(frame_opt_buff + 25, &cr, 4) != 0); - fail_if(ts.sacks != NULL); - for (i = 29; i < optsiz - 1; i++) - fail_if(frame_opt_buff[i] != PICO_TCP_OPTION_NOOP); - fail_if(frame_opt_buff[optsiz - 1] != PICO_TCP_OPTION_END); - - - - - - -} -END_TEST -START_TEST(tc_tcp_options_size_frame) -{ - /* TODO: test this: static uint16_t tcp_options_size_frame(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_add_options_frame) -{ - /* TODO: test this: static void tcp_add_options_frame(struct pico_socket_tcp *ts, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_send_ack) -{ - /* TODO: test this: static void tcp_send_ack(struct pico_socket_tcp *t); */ -} -END_TEST -START_TEST(tc_tcp_set_space) -{ - /* TODO: test this: static void tcp_set_space(struct pico_socket_tcp *t) */ -} -END_TEST -START_TEST(tc_tcp_options_size) -{ - /* TODO: test this: static uint16_t tcp_options_size(struct pico_socket_tcp *t, uint16_t flags) */ -} -END_TEST -START_TEST(tc_tcp_process_sack) -{ - /* TODO: test this: static void tcp_process_sack(struct pico_socket_tcp *t, uint32_t start, uint32_t end) */ -} -END_TEST -START_TEST(tc_tcp_rcv_sack) -{ - /* TODO: test this: static void tcp_rcv_sack(struct pico_socket_tcp *t, uint8_t *opt, int len) */ -} -END_TEST -START_TEST(tc_tcp_parse_options) -{ - /* TODO: test this: static void tcp_parse_options(struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_send) -{ - /* TODO: test this: static int tcp_send(struct pico_socket_tcp *ts, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_sock_stats) -{ - /* TODO: test this: static void sock_stats(uint32_t when, void *arg) */ -} -END_TEST -START_TEST(tc_initconn_retry) -{ - /* TODO: test this: static void initconn_retry(pico_time when, void *arg) */ -} -END_TEST -START_TEST(tc_tcp_send_synack) -{ - /* TODO: test this: static int tcp_send_synack(struct pico_socket *s) */ -} -END_TEST -START_TEST(tc_tcp_send_empty) -{ - /* TODO: test this: static void tcp_send_empty(struct pico_socket_tcp *t, uint16_t flags, int is_keepalive) */ -} -END_TEST -START_TEST(tc_tcp_send_probe) -{ - /* TODO: test this: static void tcp_send_probe(struct pico_socket_tcp *t) */ -} -END_TEST -START_TEST(tc_tcp_send_rst) -{ - /* TODO: test this: static int tcp_send_rst(struct pico_socket *s, struct pico_frame *fr) */ -} -END_TEST -START_TEST(tc_tcp_nosync_rst) -{ - /* TODO: test this: static int tcp_nosync_rst(struct pico_socket *s, struct pico_frame *fr) */ -} -END_TEST -START_TEST(tc_tcp_sack_prepare) -{ - /* TODO: test this: static void tcp_sack_prepare(struct pico_socket_tcp *t) */ -} -END_TEST -START_TEST(tc_tcp_data_in) -{ - /* TODO: test this: static int tcp_data_in(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_ack_advance_una) -{ - /* TODO: test this: static int tcp_ack_advance_una(struct pico_socket_tcp *t, struct pico_frame *f, pico_time *timestamp) */ -} -END_TEST -START_TEST(tc_time_diff) -{ - /* TODO: test this: static uint16_t time_diff(pico_time a, pico_time b) */ -} -END_TEST -START_TEST(tc_tcp_rtt) -{ - /* TODO: test this: static void tcp_rtt(struct pico_socket_tcp *t, uint32_t rtt) */ - -} -END_TEST -START_TEST(tc_tcp_congestion_control) -{ - /* TODO: test this: static void tcp_congestion_control(struct pico_socket_tcp *t) */ -} -END_TEST -START_TEST(tc_add_retransmission_timer) -{ - /* TODO: test this: static void add_retransmission_timer(struct pico_socket_tcp *t, pico_time next_ts); */ -} -END_TEST -START_TEST(tc_tcp_first_timeout) -{ - /* TODO: test this: static void tcp_first_timeout(struct pico_socket_tcp *t) */ -} -END_TEST -START_TEST(tc_tcp_rto_xmit) -{ - /* TODO: test this: static int tcp_rto_xmit(struct pico_socket_tcp *t, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_next_zerowindow_probe) -{ - /* TODO: test this: static void tcp_next_zerowindow_probe(struct pico_socket_tcp *t) */ -} -END_TEST -START_TEST(tc_tcp_is_allowed_to_send) -{ - /* TODO: test this: static int tcp_is_allowed_to_send(struct pico_socket_tcp *t) */ -} -END_TEST -START_TEST(tc_tcp_retrans_timeout) -{ - /* TODO: test this: static void tcp_retrans_timeout(pico_time val, void *sock) */ -} -END_TEST -START_TEST(tc_tcp_retrans) -{ - /* TODO: test this: static int tcp_retrans(struct pico_socket_tcp *t, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_ack_dbg) -{ - /* TODO: test this: static void tcp_ack_dbg(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_ack) -{ - /* TODO: test this: static int tcp_ack(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_finwaitack) -{ - /* TODO: test this: static int tcp_finwaitack(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_deltcb) -{ - /* TODO: test this: static void tcp_deltcb(pico_time when, void *arg) */ -} -END_TEST -START_TEST(tc_tcp_finwaitfin) -{ - /* TODO: test this: static int tcp_finwaitfin(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_closewaitack) -{ - /* TODO: test this: static int tcp_closewaitack(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_lastackwait) -{ - /* TODO: test this: static int tcp_lastackwait(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_syn) -{ - /* TODO: test this: static int tcp_syn(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_set_init_point) -{ - /* TODO: test this: static void tcp_set_init_point(struct pico_socket *s) */ -} -END_TEST -START_TEST(tc_tcp_synack) -{ - /* TODO: test this: static int tcp_synack(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_first_ack) -{ - /* TODO: test this: static int tcp_first_ack(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_closewait) -{ - /* TODO: test this: static int tcp_closewait(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_fin) -{ - /* TODO: test this: static int tcp_fin(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_rcvfin) -{ - /* TODO: test this: static int tcp_rcvfin(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_finack) -{ - /* TODO: test this: static int tcp_finack(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_force_closed) -{ - /* TODO: test this: static void tcp_force_closed(struct pico_socket *s) */ -} -END_TEST -START_TEST(tc_tcp_wakeup_pending) -{ - /* TODO: test this: static void tcp_wakeup_pending(struct pico_socket *s, uint16_t ev) */ -} -END_TEST -START_TEST(tc_tcp_rst) -{ - /* TODO: test this: static int tcp_rst(struct pico_socket *s, struct pico_frame *f) */ -} -END_TEST -START_TEST(tc_tcp_halfopencon) -{ - /* TODO: test this: static int tcp_halfopencon(struct pico_socket *s, struct pico_frame *fr) */ -} -END_TEST -START_TEST(tc_tcp_closeconn) -{ - /* TODO: test this: static int tcp_closeconn(struct pico_socket *s, struct pico_frame *fr) */ -} -END_TEST -START_TEST(tc_invalid_flags) -{ - /* TODO: test this: static uint8_t invalid_flags(struct pico_socket *s, uint8_t flags) */ -} -END_TEST -START_TEST(tc_checkLocalClosing) -{ - /* TODO: test this: static int checkLocalClosing(struct pico_socket *s) */ -} -END_TEST -START_TEST(tc_checkRemoteClosing) -{ - /* TODO: test this: static int checkRemoteClosing(struct pico_socket *s) */ -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *TCase_input_segment_compare = tcase_create("Unit test for input_segment_compare"); - TCase *TCase_tcp_input_segment = tcase_create("Unit test for tcp_input_segment"); - TCase *TCase_segment_compare = tcase_create("Unit test for segment_compare"); - TCase *TCase_tcp_discard_all_segments = tcase_create("Unit test for tcp_discard_all_segments"); - TCase *TCase_release_until = tcase_create("Unit test for release_until"); - TCase *TCase_release_all_until = tcase_create("Unit test for release_all_until"); - TCase *TCase_tcp_send_fin = tcase_create("Unit test for tcp_send_fin"); - TCase *TCase_pico_tcp_process_out = tcase_create("Unit test for pico_tcp_process_out"); - TCase *TCase_pico_paws = tcase_create("Unit test for pico_paws"); - TCase *TCase_tcp_add_options = tcase_create("Unit test for tcp_add_options"); - TCase *TCase_tcp_options_size_frame = tcase_create("Unit test for tcp_options_size_frame"); - TCase *TCase_tcp_add_options_frame = tcase_create("Unit test for tcp_add_options_frame"); - TCase *TCase_tcp_send_ack = tcase_create("Unit test for tcp_send_ack"); - TCase *TCase_tcp_set_space = tcase_create("Unit test for tcp_set_space"); - TCase *TCase_tcp_options_size = tcase_create("Unit test for tcp_options_size"); - TCase *TCase_tcp_process_sack = tcase_create("Unit test for tcp_process_sack"); - TCase *TCase_tcp_rcv_sack = tcase_create("Unit test for tcp_rcv_sack"); - TCase *TCase_tcp_parse_options = tcase_create("Unit test for tcp_parse_options"); - TCase *TCase_tcp_send = tcase_create("Unit test for tcp_send"); - TCase *TCase_sock_stats = tcase_create("Unit test for sock_stats"); - TCase *TCase_initconn_retry = tcase_create("Unit test for initconn_retry"); - TCase *TCase_tcp_send_synack = tcase_create("Unit test for tcp_send_synack"); - TCase *TCase_tcp_send_empty = tcase_create("Unit test for tcp_send_empty"); - TCase *TCase_tcp_send_probe = tcase_create("Unit test for tcp_send_probe"); - TCase *TCase_tcp_send_rst = tcase_create("Unit test for tcp_send_rst"); - TCase *TCase_tcp_nosync_rst = tcase_create("Unit test for tcp_nosync_rst"); - TCase *TCase_tcp_sack_prepare = tcase_create("Unit test for tcp_sack_prepare"); - TCase *TCase_tcp_data_in = tcase_create("Unit test for tcp_data_in"); - TCase *TCase_tcp_ack_advance_una = tcase_create("Unit test for tcp_ack_advance_una"); - TCase *TCase_time_diff = tcase_create("Unit test for time_diff"); - TCase *TCase_tcp_rtt = tcase_create("Unit test for tcp_rtt"); - TCase *TCase_tcp_congestion_control = tcase_create("Unit test for tcp_congestion_control"); - TCase *TCase_add_retransmission_timer = tcase_create("Unit test for add_retransmission_timer"); - TCase *TCase_tcp_first_timeout = tcase_create("Unit test for tcp_first_timeout"); - TCase *TCase_tcp_rto_xmit = tcase_create("Unit test for tcp_rto_xmit"); - TCase *TCase_tcp_next_zerowindow_probe = tcase_create("Unit test for tcp_next_zerowindow_probe"); - TCase *TCase_tcp_is_allowed_to_send = tcase_create("Unit test for tcp_is_allowed_to_send"); - TCase *TCase_tcp_retrans_timeout = tcase_create("Unit test for tcp_retrans_timeout"); - TCase *TCase_tcp_retrans = tcase_create("Unit test for tcp_retrans"); - TCase *TCase_tcp_ack_dbg = tcase_create("Unit test for tcp_ack_dbg"); - TCase *TCase_tcp_ack = tcase_create("Unit test for tcp_ack"); - TCase *TCase_tcp_finwaitack = tcase_create("Unit test for tcp_finwaitack"); - TCase *TCase_tcp_deltcb = tcase_create("Unit test for tcp_deltcb"); - TCase *TCase_tcp_finwaitfin = tcase_create("Unit test for tcp_finwaitfin"); - TCase *TCase_tcp_closewaitack = tcase_create("Unit test for tcp_closewaitack"); - TCase *TCase_tcp_lastackwait = tcase_create("Unit test for tcp_lastackwait"); - TCase *TCase_tcp_syn = tcase_create("Unit test for tcp_syn"); - TCase *TCase_tcp_set_init_point = tcase_create("Unit test for tcp_set_init_point"); - TCase *TCase_tcp_synack = tcase_create("Unit test for tcp_synack"); - TCase *TCase_tcp_first_ack = tcase_create("Unit test for tcp_first_ack"); - TCase *TCase_tcp_closewait = tcase_create("Unit test for tcp_closewait"); - TCase *TCase_tcp_fin = tcase_create("Unit test for tcp_fin"); - TCase *TCase_tcp_rcvfin = tcase_create("Unit test for tcp_rcvfin"); - TCase *TCase_tcp_finack = tcase_create("Unit test for tcp_finack"); - TCase *TCase_tcp_force_closed = tcase_create("Unit test for tcp_force_closed"); - TCase *TCase_tcp_wakeup_pending = tcase_create("Unit test for tcp_wakeup_pending"); - TCase *TCase_tcp_rst = tcase_create("Unit test for tcp_rst"); - TCase *TCase_tcp_halfopencon = tcase_create("Unit test for tcp_halfopencon"); - TCase *TCase_tcp_closeconn = tcase_create("Unit test for tcp_closeconn"); - TCase *TCase_invalid_flags = tcase_create("Unit test for invalid_flags"); - TCase *TCase_checkLocalClosing = tcase_create("Unit test for checkLocalClosing"); - TCase *TCase_checkRemoteClosing = tcase_create("Unit test for checkRemoteClosing"); - - - tcase_add_test(TCase_input_segment_compare, tc_input_segment_compare); - suite_add_tcase(s, TCase_input_segment_compare); - tcase_add_test(TCase_tcp_input_segment, tc_tcp_input_segment); - suite_add_tcase(s, TCase_tcp_input_segment); - tcase_add_test(TCase_segment_compare, tc_segment_compare); - suite_add_tcase(s, TCase_segment_compare); - tcase_add_test(TCase_tcp_discard_all_segments, tc_tcp_discard_all_segments); - suite_add_tcase(s, TCase_tcp_discard_all_segments); - tcase_add_test(TCase_release_until, tc_release_until); - suite_add_tcase(s, TCase_release_until); - tcase_add_test(TCase_release_all_until, tc_release_all_until); - suite_add_tcase(s, TCase_release_all_until); - tcase_add_test(TCase_tcp_send_fin, tc_tcp_send_fin); - suite_add_tcase(s, TCase_tcp_send_fin); - tcase_add_test(TCase_pico_tcp_process_out, tc_pico_tcp_process_out); - suite_add_tcase(s, TCase_pico_tcp_process_out); - tcase_add_test(TCase_pico_paws, tc_pico_paws); - suite_add_tcase(s, TCase_pico_paws); - tcase_add_test(TCase_tcp_add_options, tc_tcp_add_options); - suite_add_tcase(s, TCase_tcp_add_options); - tcase_add_test(TCase_tcp_options_size_frame, tc_tcp_options_size_frame); - suite_add_tcase(s, TCase_tcp_options_size_frame); - tcase_add_test(TCase_tcp_add_options_frame, tc_tcp_add_options_frame); - suite_add_tcase(s, TCase_tcp_add_options_frame); - tcase_add_test(TCase_tcp_send_ack, tc_tcp_send_ack); - suite_add_tcase(s, TCase_tcp_send_ack); - tcase_add_test(TCase_tcp_set_space, tc_tcp_set_space); - suite_add_tcase(s, TCase_tcp_set_space); - tcase_add_test(TCase_tcp_options_size, tc_tcp_options_size); - suite_add_tcase(s, TCase_tcp_options_size); - tcase_add_test(TCase_tcp_process_sack, tc_tcp_process_sack); - suite_add_tcase(s, TCase_tcp_process_sack); - tcase_add_test(TCase_tcp_rcv_sack, tc_tcp_rcv_sack); - suite_add_tcase(s, TCase_tcp_rcv_sack); - tcase_add_test(TCase_tcp_parse_options, tc_tcp_parse_options); - suite_add_tcase(s, TCase_tcp_parse_options); - tcase_add_test(TCase_tcp_send, tc_tcp_send); - suite_add_tcase(s, TCase_tcp_send); - tcase_add_test(TCase_sock_stats, tc_sock_stats); - suite_add_tcase(s, TCase_sock_stats); - tcase_add_test(TCase_initconn_retry, tc_initconn_retry); - suite_add_tcase(s, TCase_initconn_retry); - tcase_add_test(TCase_tcp_send_synack, tc_tcp_send_synack); - suite_add_tcase(s, TCase_tcp_send_synack); - tcase_add_test(TCase_tcp_send_empty, tc_tcp_send_empty); - suite_add_tcase(s, TCase_tcp_send_empty); - tcase_add_test(TCase_tcp_send_probe, tc_tcp_send_probe); - suite_add_tcase(s, TCase_tcp_send_probe); - tcase_add_test(TCase_tcp_send_rst, tc_tcp_send_rst); - suite_add_tcase(s, TCase_tcp_send_rst); - tcase_add_test(TCase_tcp_nosync_rst, tc_tcp_nosync_rst); - suite_add_tcase(s, TCase_tcp_nosync_rst); - tcase_add_test(TCase_tcp_sack_prepare, tc_tcp_sack_prepare); - suite_add_tcase(s, TCase_tcp_sack_prepare); - tcase_add_test(TCase_tcp_data_in, tc_tcp_data_in); - suite_add_tcase(s, TCase_tcp_data_in); - tcase_add_test(TCase_tcp_ack_advance_una, tc_tcp_ack_advance_una); - suite_add_tcase(s, TCase_tcp_ack_advance_una); - tcase_add_test(TCase_time_diff, tc_time_diff); - suite_add_tcase(s, TCase_time_diff); - tcase_add_test(TCase_tcp_rtt, tc_tcp_rtt); - suite_add_tcase(s, TCase_tcp_rtt); - tcase_add_test(TCase_tcp_congestion_control, tc_tcp_congestion_control); - suite_add_tcase(s, TCase_tcp_congestion_control); - tcase_add_test(TCase_add_retransmission_timer, tc_add_retransmission_timer); - suite_add_tcase(s, TCase_add_retransmission_timer); - tcase_add_test(TCase_tcp_first_timeout, tc_tcp_first_timeout); - suite_add_tcase(s, TCase_tcp_first_timeout); - tcase_add_test(TCase_tcp_rto_xmit, tc_tcp_rto_xmit); - suite_add_tcase(s, TCase_tcp_rto_xmit); - tcase_add_test(TCase_tcp_next_zerowindow_probe, tc_tcp_next_zerowindow_probe); - suite_add_tcase(s, TCase_tcp_next_zerowindow_probe); - tcase_add_test(TCase_tcp_is_allowed_to_send, tc_tcp_is_allowed_to_send); - suite_add_tcase(s, TCase_tcp_is_allowed_to_send); - tcase_add_test(TCase_tcp_retrans_timeout, tc_tcp_retrans_timeout); - suite_add_tcase(s, TCase_tcp_retrans_timeout); - tcase_add_test(TCase_tcp_retrans, tc_tcp_retrans); - suite_add_tcase(s, TCase_tcp_retrans); - tcase_add_test(TCase_tcp_ack_dbg, tc_tcp_ack_dbg); - suite_add_tcase(s, TCase_tcp_ack_dbg); - tcase_add_test(TCase_tcp_ack, tc_tcp_ack); - suite_add_tcase(s, TCase_tcp_ack); - tcase_add_test(TCase_tcp_finwaitack, tc_tcp_finwaitack); - suite_add_tcase(s, TCase_tcp_finwaitack); - tcase_add_test(TCase_tcp_deltcb, tc_tcp_deltcb); - suite_add_tcase(s, TCase_tcp_deltcb); - tcase_add_test(TCase_tcp_finwaitfin, tc_tcp_finwaitfin); - suite_add_tcase(s, TCase_tcp_finwaitfin); - tcase_add_test(TCase_tcp_closewaitack, tc_tcp_closewaitack); - suite_add_tcase(s, TCase_tcp_closewaitack); - tcase_add_test(TCase_tcp_lastackwait, tc_tcp_lastackwait); - suite_add_tcase(s, TCase_tcp_lastackwait); - tcase_add_test(TCase_tcp_syn, tc_tcp_syn); - suite_add_tcase(s, TCase_tcp_syn); - tcase_add_test(TCase_tcp_set_init_point, tc_tcp_set_init_point); - suite_add_tcase(s, TCase_tcp_set_init_point); - tcase_add_test(TCase_tcp_synack, tc_tcp_synack); - suite_add_tcase(s, TCase_tcp_synack); - tcase_add_test(TCase_tcp_first_ack, tc_tcp_first_ack); - suite_add_tcase(s, TCase_tcp_first_ack); - tcase_add_test(TCase_tcp_closewait, tc_tcp_closewait); - suite_add_tcase(s, TCase_tcp_closewait); - tcase_add_test(TCase_tcp_fin, tc_tcp_fin); - suite_add_tcase(s, TCase_tcp_fin); - tcase_add_test(TCase_tcp_rcvfin, tc_tcp_rcvfin); - suite_add_tcase(s, TCase_tcp_rcvfin); - tcase_add_test(TCase_tcp_finack, tc_tcp_finack); - suite_add_tcase(s, TCase_tcp_finack); - tcase_add_test(TCase_tcp_force_closed, tc_tcp_force_closed); - suite_add_tcase(s, TCase_tcp_force_closed); - tcase_add_test(TCase_tcp_wakeup_pending, tc_tcp_wakeup_pending); - suite_add_tcase(s, TCase_tcp_wakeup_pending); - tcase_add_test(TCase_tcp_rst, tc_tcp_rst); - suite_add_tcase(s, TCase_tcp_rst); - tcase_add_test(TCase_tcp_halfopencon, tc_tcp_halfopencon); - suite_add_tcase(s, TCase_tcp_halfopencon); - tcase_add_test(TCase_tcp_closeconn, tc_tcp_closeconn); - suite_add_tcase(s, TCase_tcp_closeconn); - tcase_add_test(TCase_invalid_flags, tc_invalid_flags); - suite_add_tcase(s, TCase_invalid_flags); - tcase_add_test(TCase_checkLocalClosing, tc_checkLocalClosing); - suite_add_tcase(s, TCase_checkLocalClosing); - tcase_add_test(TCase_checkRemoteClosing, tc_checkRemoteClosing); - suite_add_tcase(s, TCase_checkRemoteClosing); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_pico_tftp.c b/kernel/picotcp/test/unit/modunit_pico_tftp.c deleted file mode 100644 index 3d6cfd1..0000000 --- a/kernel/picotcp/test/unit/modunit_pico_tftp.c +++ /dev/null @@ -1,375 +0,0 @@ -#include -#include -#include -#include -#include "modules/pico_tftp.c" -#include "check.h" - - -Suite *pico_suite(void); -int tftp_user_cb(struct pico_tftp_session *session, uint16_t err, uint8_t *block, int32_t len, void *arg); -/* MOCKS */ -static int called_pico_socket_close = 0; -static uint16_t expected_opcode = 0; -static int called_user_cb = 0; -static int called_sendto = 0; -static uint32_t called_pico_timer_add = 0; -static int called_pico_timer_cancel = 0; -static struct pico_socket example_socket; -static struct pico_tftp_session example_session; - -int pico_socket_close(struct pico_socket *s) -{ - fail_if(s != example_session.socket); - called_pico_socket_close++; - return 0; -} - -int pico_socket_sendto(struct pico_socket *s, const void *buf, const int len, void *dst, uint16_t remote_port) -{ - const struct pico_tftp_hdr *h = (const struct pico_tftp_hdr *)buf; - fail_if(s != &example_socket); - fail_if(short_be(h->opcode) != expected_opcode); - fail_if(len <= 0); - (void)dst; - (void)remote_port; - called_sendto++; - return 0; -} - -int tftp_user_cb(struct pico_tftp_session *session, uint16_t err, uint8_t *block, int32_t len, void *arg) -{ - (void)session; - (void)err; - (void)block; - (void)len; - (void)arg; - called_user_cb++; - return 0; -} - -uint32_t pico_timer_add(pico_time expire, void (*timer)(pico_time, void *), void *arg) -{ - (void)expire; - (void)timer; - (void)arg; - - return ++called_pico_timer_add; -} - -void pico_timer_cancel(uint32_t t) -{ - (void)t; - called_pico_timer_cancel++; -} - -/* TESTS */ - -/* START_TEST(tc_check_opcode) */ -/* { */ -/* / * TODO: test this: static int check_opcode(struct pico_tftp_hdr *th) * / */ -/* struct pico_tftp_hdr th; */ -/* th.opcode = 0; */ -/* fail_unless(check_opcode(&th) == -1); */ -/* th.opcode = short_be(PICO_TFTP_RRQ); */ -/* fail_unless(check_opcode(&th) == 0); */ -/* th.opcode = short_be(0xFF); */ -/* fail_unless(check_opcode(&th) == -1); */ -/* } */ -/* END_TEST */ - - -START_TEST(tc_find_session_by_socket) -{ - tftp_sessions = (struct pico_tftp_session *)PICO_ZALLOC(sizeof(struct pico_tftp_session)); - tftp_sessions->socket = &example_socket; - tftp_sessions->next = (struct pico_tftp_session *)PICO_ZALLOC(sizeof(struct pico_tftp_session)); - tftp_sessions->socket = NULL; - tftp_sessions->next = NULL; - fail_if(find_session_by_socket(&example_socket) != tftp_sessions->next); -} -END_TEST - -START_TEST(tc_tftp_finish) -{ - tftp_sessions = 0; - - /* Test case: client */ - example_session.socket = &example_socket; - called_pico_socket_close = 0; - tftp_finish(&example_session); - fail_if(!called_pico_socket_close); - - /* Test eval_finish() len is 5*/ - example_session.socket = &example_socket; - called_pico_socket_close = 0; - tftp_eval_finish(&example_session, 5); - fail_if(example_session.state != TFTP_STATE_CLOSING); - fail_if(!called_pico_socket_close); - - /* Test eval_finish() len is PICO_TFTP_TOTAL_BLOCK_SIZE */ - example_session.socket = &example_socket; - called_pico_socket_close = 0; - tftp_eval_finish(&example_session, PICO_TFTP_TOTAL_BLOCK_SIZE); - fail_if(called_pico_socket_close); -} -END_TEST - -START_TEST(tc_tftp_send_ack) -{ - example_session.socket = &example_socket; -#ifdef PICO_FAULTY - /* send_ack must not segfault when out of memory */ - pico_set_mm_failure(1); - tftp_send_ack(&example_session); - fail_if(called_sendto > 0); -#endif - expected_opcode = PICO_TFTP_ACK; - tftp_send_ack(&example_session); - fail_if(called_sendto < 1); - -} -END_TEST - -START_TEST(tc_tftp_send_req) -{ - /* Not needed. The tftp_send_rx_req and tftp_send_tx_req cover this. */ -} -END_TEST - -START_TEST(tc_tftp_send_rx_req) -{ - char filename[14] = "some filename"; - - example_session.socket = &example_socket; - called_user_cb = 0; - called_pico_socket_close = 0; - called_sendto = 0; -#ifdef PICO_FAULTY - example_session.callback = tftp_user_cb; - - /* send_req must call error cb when out of memory */ - pico_set_mm_failure(1); - tftp_send_rx_req(&example_session, NULL, 0, filename); - fail_if(called_user_cb < 1); - fail_if(called_sendto > 0); -#endif - expected_opcode = PICO_TFTP_RRQ; - tftp_send_rx_req(&example_session, NULL, 0, NULL); - fail_if(called_sendto > 0); /* Calling with filename = NULL: not good */ - - tftp_send_rx_req(&example_session, NULL, 0, filename); - fail_if(called_sendto < 0); -} -END_TEST - -START_TEST(tc_tftp_send_tx_req) -{ - char filename[14] = "some filename"; - - example_session.socket = &example_socket; - called_user_cb = 0; - called_pico_socket_close = 0; - called_sendto = 0; -#ifdef PICO_FAULTY - example_session.callback = tftp_user_cb; - - /* send_req must call error cb when out of memory */ - pico_set_mm_failure(1); - tftp_send_tx_req(&example_session, NULL, 0, filename); - fail_if(called_user_cb < 1); - fail_if(called_sendto > 0); -#endif - expected_opcode = PICO_TFTP_WRQ; - tftp_send_tx_req(&example_session, NULL, 0, NULL); - fail_if(called_sendto > 0); /* Calling with filename = NULL: not good */ - - tftp_send_tx_req(&example_session, NULL, 0, filename); - fail_if(called_sendto < 0); -} -END_TEST - -START_TEST(tc_tftp_send_error) -{ - char longtext[1024]; - example_session.socket = &example_socket; - called_user_cb = 0; - called_pico_socket_close = 0; - - /* Sending empty msg */ - called_sendto = 0; - expected_opcode = PICO_TFTP_ERROR; - tftp_send_error(&example_session, NULL, 0, 0, NULL); - fail_if(called_sendto < 1); - /* Sending some msg */ - called_sendto = 0; - expected_opcode = PICO_TFTP_ERROR; - tftp_send_error(&example_session, NULL, 0, 0, "some text here"); - fail_if(called_sendto < 1); - - /* sending some very long msg */ - memset(longtext, 'a', 1023); - longtext[1023] = (char)0; - called_sendto = 0; - expected_opcode = PICO_TFTP_ERROR; - tftp_send_error(&example_session, NULL, 0, 0, longtext); - fail_if(called_sendto < 1); -} -END_TEST - -START_TEST(tc_tftp_send_data) -{ - example_session.state = 0; - example_session.socket = &example_socket; - called_sendto = 0; - expected_opcode = PICO_TFTP_DATA; - tftp_send_data(&example_session, (const uint8_t*)"buffer", strlen("buffer")); - fail_if(called_sendto < 1); - fail_if(example_session.state != TFTP_STATE_WAIT_LAST_ACK); -} -END_TEST - -START_TEST(tc_pico_tftp_abort) -{ - int ret; - server.listen_socket = NULL; - - /*first case: no session and no listening socket*/ - ret = pico_tftp_abort(NULL, TFTP_ERR_EUSR, "test"); - fail_if(ret != -1); - /*second case: no session but listening socket*/ - server.listen_socket = example_session.socket = &example_socket; - pico_tftp_abort(NULL, TFTP_ERR_EUSR, "test"); - fail_if(ret != -1); - /*tirdh case: session non into list*/ - ret = pico_tftp_abort(&example_session, TFTP_ERR_EUSR, "test"); - fail_if(ret != -1); -} -END_TEST - -/* Receiving functions */ - -START_TEST(tc_tftp_data) -{ - /* TODO: test this: static void tftp_data(uint8_t *block, uint32_t len, union pico_address *a, uint16_t port) */ -} -END_TEST -START_TEST(tc_tftp_ack) -{ - /* TODO: test this: static void tftp_ack(uint8_t *block, uint32_t len, union pico_address *a, uint16_t port) */ -} -END_TEST -START_TEST(tc_tftp_timeout) -{ - /* TODO: test this: static void tftp_timeout(pico_time t) */ -} -END_TEST -START_TEST(tc_tftp_req) -{ - /* TODO: test this: static void tftp_req(uint8_t *block, uint32_t len, union pico_address *a, uint16_t port) */ -} -END_TEST -START_TEST(tc_tftp_data_err) -{ - /* TODO: test this: static void tftp_data_err(uint8_t *block, uint32_t len, union pico_address *a, uint16_t port) */ -} -END_TEST -START_TEST(tc_tftp_fsm_timeout) -{ - /* TODO: test this: static void tftp_fsm_timeout(pico_time now, void *arg) */ -} -END_TEST -START_TEST(tc_tftp_receive) -{ - /* TODO: test this: static void tftp_receive(uint8_t *block, uint32_t r, union pico_address *a, uint16_t port) */ -} -END_TEST -START_TEST(tc_tftp_cb) -{ - /* TODO: test this: static void tftp_cb(uint16_t ev, struct pico_socket *s) */ -} -END_TEST -START_TEST(tc_tftp_socket_open) -{ - /* TODO: test this: static int tftp_socket_open(uint16_t family, union pico_address *a, uint16_t port) */ - fail_if(tftp_socket_open(0xFFFF, 21) != NULL); - fail_if(tftp_socket_open(0xFFFF, 0xFFFF) != NULL); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - -/* TCase *TCase_check_opcode = tcase_create("Unit test for check_opcode"); */ - TCase *TCase_find_session_by_socket = tcase_create("Unit test for find_session_by_socket"); - TCase *TCase_tftp_finish = tcase_create("Unit test for tftp_finish"); - TCase *TCase_tftp_send_ack = tcase_create("Unit test for tftp_send_ack"); - TCase *TCase_tftp_send_req = tcase_create("Unit test for tftp_send_req"); - TCase *TCase_tftp_send_rx_req = tcase_create("Unit test for tftp_send_rx_req"); - TCase *TCase_tftp_send_tx_req = tcase_create("Unit test for tftp_send_tx_req"); - TCase *TCase_tftp_send_error = tcase_create("Unit test for tftp_send_error"); - TCase *TCase_tftp_send_data = tcase_create("Unit test for tftp_send_data"); - TCase *Tcase_pico_tftp_abort = tcase_create("Unit test for pico_tftp_abort"); - TCase *TCase_tftp_data = tcase_create("Unit test for tftp_data"); - TCase *TCase_tftp_ack = tcase_create("Unit test for tftp_ack"); - TCase *TCase_tftp_timeout = tcase_create("Unit test for tftp_timeout"); - TCase *TCase_tftp_req = tcase_create("Unit test for tftp_req"); - TCase *TCase_tftp_data_err = tcase_create("Unit test for tftp_data_err"); - TCase *TCase_tftp_fsm_timeout = tcase_create("Unit test for tftp_fsm_timeout"); - TCase *TCase_tftp_receive = tcase_create("Unit test for tftp_receive"); - TCase *TCase_tftp_cb = tcase_create("Unit test for tftp_cb"); - TCase *TCase_tftp_socket_open = tcase_create("Unit test for tftp_socket_open"); - - -/* tcase_add_test(TCase_check_opcode, tc_check_opcode); */ -/* suite_add_tcase(s, TCase_check_opcode); */ - tcase_add_test(TCase_find_session_by_socket, tc_find_session_by_socket); - suite_add_tcase(s, TCase_find_session_by_socket); - tcase_add_test(TCase_tftp_finish, tc_tftp_finish); - suite_add_tcase(s, TCase_tftp_finish); - tcase_add_test(TCase_tftp_send_ack, tc_tftp_send_ack); - suite_add_tcase(s, TCase_tftp_send_ack); - tcase_add_test(TCase_tftp_send_req, tc_tftp_send_req); - suite_add_tcase(s, TCase_tftp_send_req); - tcase_add_test(TCase_tftp_send_rx_req, tc_tftp_send_rx_req); - suite_add_tcase(s, TCase_tftp_send_rx_req); - tcase_add_test(TCase_tftp_send_tx_req, tc_tftp_send_tx_req); - suite_add_tcase(s, TCase_tftp_send_tx_req); - tcase_add_test(TCase_tftp_send_error, tc_tftp_send_error); - suite_add_tcase(s, TCase_tftp_send_error); - tcase_add_test(TCase_tftp_send_data, tc_tftp_send_data); - suite_add_tcase(s, TCase_tftp_send_data); - tcase_add_test(TCase_tftp_data, tc_tftp_data); - suite_add_tcase(s, Tcase_pico_tftp_abort); - tcase_add_test(Tcase_pico_tftp_abort, tc_pico_tftp_abort); - suite_add_tcase(s, TCase_tftp_data); - tcase_add_test(TCase_tftp_ack, tc_tftp_ack); - suite_add_tcase(s, TCase_tftp_ack); - tcase_add_test(TCase_tftp_timeout, tc_tftp_timeout); - suite_add_tcase(s, TCase_tftp_timeout); - tcase_add_test(TCase_tftp_req, tc_tftp_req); - suite_add_tcase(s, TCase_tftp_req); - tcase_add_test(TCase_tftp_data_err, tc_tftp_data_err); - suite_add_tcase(s, TCase_tftp_data_err); - tcase_add_test(TCase_tftp_fsm_timeout, tc_tftp_fsm_timeout); - suite_add_tcase(s, TCase_tftp_fsm_timeout); - tcase_add_test(TCase_tftp_receive, tc_tftp_receive); - suite_add_tcase(s, TCase_tftp_receive); - tcase_add_test(TCase_tftp_cb, tc_tftp_cb); - suite_add_tcase(s, TCase_tftp_cb); - tcase_add_test(TCase_tftp_socket_open, tc_tftp_socket_open); - suite_add_tcase(s, TCase_tftp_socket_open); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_queue.c b/kernel/picotcp/test/unit/modunit_queue.c deleted file mode 100644 index 9d13aa3..0000000 --- a/kernel/picotcp/test/unit/modunit_queue.c +++ /dev/null @@ -1,84 +0,0 @@ -#include "pico_frame.h" -#include "pico_queue.h" -#include "stack/pico_frame.c" -#include "pico_stack.h" -#include "check.h" - - -Suite *pico_suite(void); - -struct pico_queue q1 = { - 0 -}, q2 = { - 0 -}; - -START_TEST(tc_q) -{ - struct pico_frame *f0 = pico_frame_alloc(100); - struct pico_frame *f1 = pico_frame_alloc(100); - struct pico_frame *f2 = pico_frame_alloc(100); - struct pico_frame *f3 = pico_frame_alloc(100); - struct pico_frame *f4 = pico_frame_alloc(100); - - pico_queue_protect(&q1); - - q1.max_frames = 4; - q2.max_size = 4 * 100; - - fail_if (pico_enqueue(&q1, pico_frame_copy(f0)) < 0); - fail_if (pico_enqueue(&q1, pico_frame_copy(f1)) < 0); - fail_if (pico_enqueue(&q1, pico_frame_copy(f2)) < 0); - fail_if (pico_enqueue(&q1, pico_frame_copy(f3)) < 0); - fail_if (pico_enqueue(&q1, pico_frame_copy(f4)) >= 0); - - fail_if (pico_enqueue(&q2, pico_frame_copy(f0)) < 0); - fail_if (pico_enqueue(&q2, pico_frame_copy(f1)) < 0); - fail_if (pico_enqueue(&q2, pico_frame_copy(f2)) < 0); - fail_if (pico_enqueue(&q2, pico_frame_copy(f3)) < 0); - fail_if (pico_enqueue(&q2, pico_frame_copy(f4)) >= 0); - - fail_if((pico_dequeue(&q1))->buffer != f0->buffer); - fail_if((pico_dequeue(&q1))->buffer != f1->buffer); - fail_if((pico_dequeue(&q1))->buffer != f2->buffer); - fail_if((pico_dequeue(&q1))->buffer != f3->buffer); - fail_if(pico_queue_peek(&q1) != NULL); - fail_if(pico_dequeue(&q1) != NULL); - fail_if(q1.size != 0); - fail_if(q1.frames != 0); - - - pico_queue_empty(&q2); - fail_if(q2.size != 0); - fail_if(q2.frames != 0); - fail_if(pico_queue_peek(&q2) != NULL); - fail_if(pico_dequeue(&q2) != NULL); - - pico_queue_deinit(&q1); - pico_queue_deinit(&q2); - - -} -END_TEST - - -Suite *pico_suite(void) -{ - Suite *s = suite_create("Packet Queues"); - - TCase *TCase_q = tcase_create("Unit test for pico_queue.c"); - tcase_add_test(TCase_q, tc_q); - suite_add_tcase(s, TCase_q); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/modunit_seq.c b/kernel/picotcp/test/unit/modunit_seq.c deleted file mode 100644 index 0c1c2c4..0000000 --- a/kernel/picotcp/test/unit/modunit_seq.c +++ /dev/null @@ -1,59 +0,0 @@ -#include "pico_tcp.c" -#include - -Suite *pico_suite(void); - -START_TEST(tc_seq_compare) -{ - uint32_t big_a = 0xFFFFFF0alu; - uint32_t big_b = 0xFFFFFF0blu; - uint32_t small_a = 0xalu; - uint32_t small_b = 0xblu; - uint32_t under_thresh = 0x7ffffffflu; - uint32_t over_thresh = 0x80000000lu; - uint32_t zero = 0lu; - - fail_if(pico_seq_compare(small_a, small_b) >= 0); - fail_if(pico_seq_compare(small_b, small_a) <= 0); - - fail_if(pico_seq_compare(over_thresh, under_thresh) <= 0); - fail_if(pico_seq_compare(under_thresh, over_thresh) >= 0); - - fail_if(pico_seq_compare(small_a, big_b) <= 0); - fail_if(pico_seq_compare(big_b, small_a) >= 0); - - fail_if(pico_seq_compare(small_a, zero) <= 0); - fail_if(pico_seq_compare(zero, small_a) >= 0); - - fail_if(pico_seq_compare(big_a, zero) >= 0); - fail_if(pico_seq_compare(zero, big_a) <= 0); - - fail_if(pico_seq_compare(big_a, big_b) >= 0); - fail_if(pico_seq_compare(big_b, big_a) <= 0); - - fail_if(pico_seq_compare(big_a, big_a) != 0); - fail_if(pico_seq_compare(zero, zero) != 0); - -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("pico tcp sequence numbers"); - TCase *TCase_seq_compare = tcase_create("Unit test for pico_seq_compare"); - tcase_add_test(TCase_seq_compare, tc_seq_compare); - suite_add_tcase(s, TCase_seq_compare); - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} - diff --git a/kernel/picotcp/test/unit/unit_arp.c b/kernel/picotcp/test/unit/unit_arp.c deleted file mode 100644 index c6684bf..0000000 --- a/kernel/picotcp/test/unit/unit_arp.c +++ /dev/null @@ -1,213 +0,0 @@ -#include "pico_ethernet.c" - -static struct pico_frame *init_frame(struct pico_device *dev) -{ - struct pico_frame *f = pico_frame_alloc(PICO_SIZE_ETHHDR + PICO_SIZE_ARPHDR); - f->net_hdr = f->buffer + PICO_SIZE_ETHHDR; - f->datalink_hdr = f->buffer; - f->dev = dev; - - return f; -} - -START_TEST (arp_update_max_arp_reqs_test) -{ - pico_stack_init(); - max_arp_reqs = 0; - usleep((PICO_ARP_INTERVAL + 1) * 1000); - pico_stack_tick(); - fail_unless(max_arp_reqs > 0); - - max_arp_reqs = PICO_ARP_MAX_RATE; - usleep((PICO_ARP_INTERVAL + 1) * 1000); - pico_stack_tick(); - fail_unless(max_arp_reqs == PICO_ARP_MAX_RATE); -} -END_TEST - -START_TEST (arp_compare_test) -{ - struct pico_arp a, b; - char ipstr[] = "192.168.1.1"; - - memset(&a, 0, sizeof(a)); - pico_string_to_ipv4(ipstr, &b.ipv4.addr); - - fail_unless(arp_compare(&a, &b) == -1); - fail_unless(arp_compare(&b, &a) == 1); - fail_unless(arp_compare(&a, &a) == 0); -} -END_TEST - -START_TEST (arp_lookup_test) -{ - struct pico_ip4 ip; - struct pico_eth *eth = NULL; - char ipstr[] = "192.168.1.1"; - struct pico_arp entry; - - eth = pico_arp_lookup(&ip); - fail_unless(eth == NULL); - - pico_string_to_ipv4(ipstr, &ip.addr); - entry.ipv4 = ip; - - pico_stack_init(); - fail_unless(pico_arp_add_entry(&entry) == 0); - entry.arp_status = PICO_ARP_STATUS_STALE; - eth = pico_arp_lookup(&ip); - fail_unless(eth == NULL); - pico_tree_delete(&arp_tree, &entry); -} -END_TEST - -START_TEST (arp_expire_test) -{ - struct pico_arp entry; - entry.arp_status = PICO_ARP_STATUS_REACHABLE; - entry.timestamp = 0; - - arp_expire(PICO_ARP_TIMEOUT, &entry); - fail_unless(entry.arp_status == PICO_ARP_STATUS_STALE); -} -END_TEST - -START_TEST(tc_pico_arp_queue) -{ - struct pico_ip4 addr = { - .addr = 0xaabbccdd - }; - int i; - struct pico_frame *f = pico_frame_alloc(sizeof(struct pico_ipv4_hdr)); - struct pico_ipv4_hdr *h = (struct pico_ipv4_hdr *) f->buffer; - fail_if(!f); - f->net_hdr = (uint8_t *)h; - h->dst.addr = addr.addr; - - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) { - fail_if(frames_queued[i] != NULL); - } - pico_arp_unreachable(&addr); - for (i = 0; i < PICO_ND_MAX_FRAMES_QUEUED; i++) { - fail_if(frames_queued[i] != NULL); - } - pico_arp_postpone(f); - fail_if(frames_queued[0]->buffer != f->buffer); - pico_arp_unreachable(&addr); - PICO_FREE(f); -} -END_TEST - - - -START_TEST (arp_receive_test) -{ - struct mock_device *mock; - struct pico_frame *f = NULL; - struct pico_arp_hdr *ah = NULL; - struct pico_eth_hdr *eh = NULL; - uint8_t macaddr1[6] = { - 0, 0, 0, 0xa, 0xb, 0xf - }; - uint8_t macaddr2[6] = { - 0, 0, 0, 0xc, 0xd, 0xf - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct pico_ip4 ip1 = { - .addr = long_be(0x0A2800AA) - }; - struct pico_ip4 ip2 = { - .addr = long_be(0x0A2800AB) - }; - - pico_stack_init(); - - /* Create mock device */ - mock = pico_mock_create(macaddr1); - fail_if(!mock, "MOCK DEVICE creation failed"); - fail_if(pico_ipv4_link_add(mock->dev, ip1, netmask), "add link to mock device failed"); - - /* Normal ARP request */ - f = init_frame(mock->dev); - fail_if(!f, "FRAME INIT failed"); - eh = (struct pico_eth_hdr *) f->datalink_hdr; - ah = (struct pico_arp_hdr *) f->net_hdr; - - memcpy(eh->saddr, macaddr2, PICO_SIZE_ETH); - memcpy(eh->daddr, PICO_ETHADDR_ALL, PICO_SIZE_ETH); - eh->proto = PICO_IDETH_ARP; - - ah->htype = PICO_ARP_HTYPE_ETH; - ah->ptype = PICO_IDETH_IPV4; - ah->hsize = PICO_SIZE_ETH; - ah->psize = PICO_SIZE_IP4; - ah->opcode = PICO_ARP_REQUEST; - memcpy(ah->s_mac, macaddr2, PICO_SIZE_ETH); - ah->src.addr = ip2.addr; - ah->dst.addr = ip1.addr; - fail_unless(pico_arp_receive(f) == 0); - - /* net_hdr is a nullpointer */ - f = init_frame(mock->dev); - fail_if(!f, "FRAME INIT failed"); - f->net_hdr = NULL; - fail_unless(pico_arp_receive(f) == -1); - - /* wrong hardware type */ - f = init_frame(mock->dev); - fail_if(!f, "FRAME INIT failed"); - ah = (struct pico_arp_hdr *) f->net_hdr; - ah->htype = 0; - fail_unless(pico_arp_receive(f) == -1); - - /* wrong protocol type */ - f = init_frame(mock->dev); - fail_if(!f, "FRAME INIT failed"); - ah = (struct pico_arp_hdr *) f->net_hdr; - ah->ptype = 0; - fail_unless(pico_arp_receive(f) == -1); - - /* source mac address is multicast */ - f = init_frame(mock->dev); - fail_if(!f, "FRAME INIT failed"); - ah = (struct pico_arp_hdr *) f->net_hdr; - ah->s_mac[0] = 0x01; - fail_unless(pico_arp_receive(f) == -1); - pico_ipv4_link_del(mock->dev, ip1); -} -END_TEST - -START_TEST (arp_get_test) -{ - struct pico_frame *f = NULL; - struct mock_device *mock; - struct pico_ipv4_hdr *hdr = NULL; - struct pico_eth *eth = NULL; - uint8_t macaddr[6] = { - 0, 0, 0xa, 0xa, 0xb, 0xf - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct pico_ip4 ip = { - .addr = long_be(0x0A28000B) - }; - - mock = pico_mock_create(macaddr); - fail_if(!mock, "MOCK DEVICE creation failed"); - fail_if(pico_ipv4_link_add(mock->dev, ip, netmask), "add link to mock device failed"); - - f = pico_frame_alloc(PICO_SIZE_ETHHDR + sizeof(struct pico_ipv4_hdr)); - f->net_hdr = f->start + PICO_SIZE_ETHHDR; - f->datalink_hdr = f->start; - f->dev = mock->dev; - - hdr = (struct pico_ipv4_hdr *) f->net_hdr; - hdr->dst.addr = ip.addr; - eth = pico_arp_get(f); - fail_unless(eth == &mock->dev->eth->mac); - pico_ipv4_link_del(mock->dev, ip); -} -END_TEST diff --git a/kernel/picotcp/test/unit/unit_dhcp.c b/kernel/picotcp/test/unit/unit_dhcp.c deleted file mode 100644 index 08124b9..0000000 --- a/kernel/picotcp/test/unit/unit_dhcp.c +++ /dev/null @@ -1,554 +0,0 @@ - -static struct pico_dhcp_client_cookie*dhcp_client_ptr; - -void callback_dhcpclient(void*cli, int code); -int generate_dhcp_msg(uint8_t *buf, uint32_t *len, uint8_t type); - -void callback_dhcpclient(void*cli, int code) -{ - struct pico_ip4 gateway; - char gw_txt_addr[30]; - IGNORE_PARAMETER(cli); - - if(code == PICO_DHCP_SUCCESS) { - gateway = pico_dhcp_get_gateway(&dhcp_client_ptr); - pico_ipv4_to_string(gw_txt_addr, gateway.addr); - } - - printf("callback happened with code %d!\n", code); -} - -int generate_dhcp_msg(uint8_t *buf, uint32_t *len, uint8_t type) -{ - if(type == DHCP_MSG_TYPE_DISCOVER) { - uint8_t buffer[] = { - 0x01, 0x01, 0x06, 0x00, 0x0c, 0x10, - 0x53, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x0a, 0x0b, 0x0f, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x01, 0x37, 0x07, 0x01, - 0x1c, 0x02, 0x03, 0x0c, 0x3a, 0x3b, 0x39, 0x02, 0x02, 0x40, 0xff, 0x00 - }; - *len = sizeof(buffer); - memcpy(&(buf[0]), buffer, *len); - }else if(type == DHCP_MSG_TYPE_OFFER) { - return 1; - }else if(type == DHCP_MSG_TYPE_REQUEST) { - uint32_t i = 0; - uint8_t buffer1[] = { - /* 0x63,0x82,0x53,0x63,// MAGIC COOCKIE */ - /* 0x35,0x01,0x03, // DHCP REQUEST */ - /* 0x36,0x04,0x00,0x00,0x00,0x00 // SERVER ID */ - 0x32, 0x04, buf[0x3a], buf[0x3b], buf[0x3c], buf[0x3e], /* requested ip */ - 0x37, 0x04, 0x01, 0x03, 0x06, 0x2a, /* Parameter list */ - 0x3d, 0x07, 0x01, buf[0x06], buf[0x07], buf[0x08], buf[0x09], buf[0x0a], buf[0x0b], /* Client id */ - 0xff - }; - - buf[0x02a] = 0x01; /* change to boot request */ - buf[0x11c] = 0x03; /* request */ - - memcpy(&(buf[0x123]), &(buffer1[0]), sizeof(buffer1)); - *len = sizeof(buffer1) + 0x123; - for(i = *len; i < 0x150; i++) { - buf[i + 10] = 0x00; - } - return 0; - }else if(type == DHCP_MSG_TYPE_ACK) { - return 1; - } - - return 0; -} - -START_TEST (test_dhcp_server_api) -{ -/************************************************************************ - * Check if dhcp recv works correctly if - * MAC address of client is not in arp table yet - * Status : Done - ************************************************************************/ - - struct mock_device *mock; - uint8_t macaddr1[6] = { - 0xc1, 0, 0, 0xa, 0xb, 0xf - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct pico_ip4 serverip = { - .addr = long_be(0x0A28000A) - }; - uint8_t buf[600] = { - 0 - }; - /* Declaration test 1 */ - struct pico_dhcp_server_setting s1 = { - 0 - }; - /* Declaration test 2 */ - struct pico_dhcp_server_setting s2 = { - 0 - }; - - printf("*********************** starting %s * \n", __func__); - - /* Create mock device */ - mock = pico_mock_create(macaddr1); - fail_if(!mock, "MOCK DEVICE creation failed"); - fail_if(pico_mock_network_read(mock, buf, BUFLEN), "data on network that shouldn't be there"); - fail_if(pico_ipv4_link_add(mock->dev, serverip, netmask), "add link to mock device failed"); - - /* test 0 */ - /* Clear error code */ - pico_err = PICO_ERR_NOERR; - /* Test 0 statements */ - fail_unless(pico_dhcp_server_initiate(NULL), "DHCP_SERVER> initiate succeeded after pointer to dev == NULL"); - fail_unless(pico_err == PICO_ERR_EINVAL, "DHCP_SERVER> initiate succeeded without PICO_ERR_EINVAL after wrong parameter"); - - /* test 1 */ - /* Clear error code */ - pico_err = PICO_ERR_NOERR; - /* Store data in settings */ - s1.server_ip.addr = long_be(0x0A28000F); /* make sure this IP is not assigned */ - /* Test 1 statements */ - fail_unless(pico_dhcp_server_initiate(&s1), "DHCP_SERVER> initiate succeeded after pointer to dev == NULL"); - fail_unless(pico_err == PICO_ERR_EINVAL, "DHCP_SERVER> initiate succeeded without PICO_ERR_EINVAL after wrong parameter"); - - /* test 2 */ - /* Clear error code */ - pico_err = PICO_ERR_NOERR; - /* Store data in settings */ - s2.server_ip = serverip; - /* Test 2 statements */ - fail_if(pico_dhcp_server_initiate(&s2), "DHCP_SERVER> failed after correct parameter"); -} -END_TEST - -START_TEST (test_dhcp) -{ -/************************************************************************ - * Check if all states (offer, bound) are changed correctly - * and if response messages are replied correctly - * Status : Done - *************************************************************************/ - struct mock_device*mock; - struct pico_dhcp_server_setting s = { - 0 - }; - struct pico_ip4 xid = { - .addr = long_be(0x00003d1d) - }; - uint8_t macaddr1[6] = { - 0xc1, 0, 0, 0xa, 0xb, 0xf - }; - uint8_t macaddr2[6] = { - 0xc6, 0, 0, 0xa, 0xb, 0xf - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct pico_ip4 serverip = { - .addr = long_be(0x0A28000A) - }; - struct pico_socket sock = { }; - struct pico_dhcp_server_negotiation *dn = NULL; - struct pico_ip4 *stored_ipv4 = NULL; - uint32_t len = 0; - int network_read = 0; - uint8_t *buf; - uint8_t printbufactive = 0; - - buf = PICO_ZALLOC(600); - - printf("*********************** starting %s * \n", __func__); - - /*Insert custom values in buffer*/ - fail_if(generate_dhcp_msg(buf, &len, DHCP_MSG_TYPE_DISCOVER), "DHCP_SERVER->failed to generate buffer"); - memcpy(&(buf[4]), &(xid.addr), sizeof(struct pico_ip4)); - memcpy(&(buf[28]), &(macaddr1[0]), sizeof(struct pico_ip4)); - printbuf(&(buf[0]), len, "DHCP-DISCOVER packet", printbufactive); - - /*Initiate test setup*/ - pico_stack_init(); - - /* Create mock device */ - mock = pico_mock_create(macaddr2); - fail_if(!mock, "MOCK DEVICE creation failed"); - fail_if(pico_mock_network_read(mock, buf, BUFLEN), "data on network that shouldn't be there"); - fail_if(pico_ipv4_link_add(mock->dev, serverip, netmask), "add link to mock device failed"); - - s.server_ip = serverip; - - fail_if(pico_dhcp_server_initiate(&s), "DHCP_SERVER> server initiation failed"); - - dn = pico_dhcp_server_find_negotiation(xid.addr); - fail_unless(dn == NULL, "DCHP SERVER -> negotiation data available befor discover msg recvd"); - - /* simulate reception of a DISCOVER packet */ - sock.local_addr.ip4 = serverip; - pico_dhcp_server_recv(&sock, buf, len); - - tick_it(3); - - /* check if negotiation data is stored */ - dn = pico_dhcp_server_find_negotiation(xid.addr); - fail_if(dn == NULL, "DCHP SERVER -> no negotiation stored after discover msg recvd"); - - /* check if new ip is in ARP cache */ - stored_ipv4 = pico_arp_reverse_lookup(&dn->hwaddr); - fail_if(stored_ipv4 == NULL, "DCHP SERVER -> new address is not inserted in ARP"); - fail_unless(stored_ipv4->addr == dn->ciaddr.addr, "DCHP SERVER -> new ip not stored in negotiation data"); - - /* check if state is changed and reply is received */ - network_read = pico_mock_network_read(mock, buf, BUFLEN); - fail_unless(network_read > 0, "received msg on network of %u bytes", network_read); - printbuf(&(buf[0]), (uint32_t)network_read, "DHCP-OFFER msg", printbufactive); - fail_unless(buf[0x011c] == 0x02, "No DHCP offer received after discovery"); - fail_unless(dn->state == PICO_DHCP_STATE_OFFER, "DCHP SERVER -> negotiation state not changed to OFFER"); - - /*change offer to request*/ - fail_if(generate_dhcp_msg(buf, &len, DHCP_MSG_TYPE_REQUEST), "DHCP_SERVER->failed to generate buffer"); - printbuf(&(buf[0x2a]), len - 0x2a, "request buffer", printbufactive); - - /* simulate reception of a offer packet */ - pico_dhcp_server_recv(&sock, &(buf[0x2a]), len - 0x2a); - fail_unless(dn->state == PICO_DHCP_STATE_BOUND, "DCHP SERVER -> negotiation state not changed to BOUND"); - - tick_it(3); - - /* check if state is changed and reply is received */ - do { - network_read = pico_mock_network_read(mock, buf, BUFLEN); - } while (buf[0] == 0x33); - printf("Received message: %d bytes\n", network_read); - fail_unless(network_read > 0, "received msg on network of %d bytes", network_read); - printbuf(&(buf[0]), (uint32_t)network_read, "DHCP-ACK msg", printbufactive); - fail_unless(buf[0x11c] == 0x05, "No DHCP ACK received after discovery"); -} -END_TEST - - -START_TEST (test_dhcp_server_ipninarp) -{ -/************************************************************************ - * Check if dhcp recv works correctly if - * MAC address of client is not in arp table yet - * Status : Done - *************************************************************************/ - struct mock_device*mock; - struct pico_dhcp_server_setting s = { - 0 - }; - struct pico_ip4 xid = { - .addr = long_be(0x00003d1d) - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct pico_ip4 serverip = { - .addr = long_be(0x0A28000A) - }; - struct pico_socket sock = { }; - struct pico_dhcp_server_negotiation *dn = NULL; - struct pico_ip4 *stored_ipv4 = NULL; - unsigned char macaddr1[6] = { - 0xc1, 0, 0, 0xa, 0xb, 0xf - }; - uint32_t len = 0; - uint8_t buf[600] = { - 0 - }; - uint8_t printbufactive = 0; - - printf("*********************** starting %s * \n", __func__); - - /*Insert custom values in buffer*/ - fail_if(generate_dhcp_msg(buf, &len, DHCP_MSG_TYPE_DISCOVER), "DHCP_SERVER->failed to generate buffer"); - memcpy(&(buf[4]), &(xid.addr), sizeof(struct pico_ip4)); - memcpy(&(buf[28]), &(macaddr1[0]), sizeof(struct pico_ip4)); - printbuf(&(buf[0]), len, "DHCP-DISCOVER packet", printbufactive); - - /*Initiate test setup*/ - pico_stack_init(); - - /* Create mock device */ - mock = pico_mock_create(macaddr1); - fail_if(!mock, "MOCK DEVICE creation failed"); - fail_if(pico_mock_network_read(mock, buf, BUFLEN), "data on network that shouldn't be there"); - fail_if(pico_ipv4_link_add(mock->dev, serverip, netmask), "add link to mock device failed"); - s.server_ip = serverip; - - fail_if(pico_dhcp_server_initiate(&s), "DHCP_SERVER> server initiation failed"); - - dn = pico_dhcp_server_find_negotiation(xid.addr); - fail_unless(dn == NULL, "DCHP SERVER -> negotiation data available before discover msg recvd"); - - /* simulate reception of a DISCOVER packet */ - sock.local_addr.ip4 = serverip; - pico_dhcp_server_recv(&sock, buf, len); - - /* check if negotiation data is stored */ - dn = pico_dhcp_server_find_negotiation(xid.addr); - fail_if(dn == NULL, "DCHP SERVER -> no negotiation stored after discover msg recvd"); - - /* check if new ip is in ARP cache */ - stored_ipv4 = pico_arp_reverse_lookup(&dn->hwaddr); - fail_if(stored_ipv4 == NULL, "DCHP SERVER -> new address is not inserted in ARP"); - fail_unless(stored_ipv4->addr == dn->ciaddr.addr, "DCHP SERVER -> new ip not stored in negotiation data"); - - /* check if new ip is in ARP cache */ - fail_if(pico_arp_reverse_lookup(&dn->hwaddr) == NULL, "DCHP SERVER -> new address is not inserted in ARP"); -} -END_TEST - -START_TEST (test_dhcp_server_ipinarp) -{ -/************************************************************************ - * Check if dhcp recv works correctly if - * MAC address of client is allready in arp table - * Status : Done - *************************************************************************/ - struct mock_device*mock; - struct pico_dhcp_server_setting s = { - 0 - }; - struct pico_ip4 ipv4address = { - .addr = long_be(0x0a280067) - }; - struct pico_ip4 xid = { - .addr = long_be(0x00003d1d) - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct pico_ip4 serverip = { - .addr = long_be(0x0A28000A) - }; - struct pico_socket sock = { }; - struct pico_ip4 *stored_ipv4 = NULL; - struct pico_dhcp_server_negotiation *dn = NULL; - struct pico_eth *arp_resp = NULL; - unsigned char macaddr1[6] = { - 0xc1, 0, 0, 0xa, 0xb, 0xf - }; - uint32_t len = 0; - uint8_t buf[600] = { - 0 - }; - - printf("*********************** starting %s * \n", __func__); - - /*Insert custom values in buffer*/ - fail_if(generate_dhcp_msg(buf, &len, DHCP_MSG_TYPE_DISCOVER), "DHCP_SERVER->failed to generate buffer"); - memcpy(&(buf[28]), &(macaddr1[0]), sizeof(struct pico_ip4)); - memcpy(&(buf[4]), &(xid.addr), sizeof(struct pico_ip4)); - - /* Create mock device */ - mock = pico_mock_create(macaddr1); - fail_if(!mock, "MOCK DEVICE creation failed"); - fail_if(pico_ipv4_link_add(mock->dev, serverip, netmask), "add link to mock device failed"); - s.server_ip = serverip; - - /*Initiate test setup*/ - pico_stack_init(); - pico_arp_create_entry(&(macaddr1[0]), ipv4address, s.dev); - - fail_if(pico_dhcp_server_initiate(&s), "DHCP_SERVER> server initiation failed"); - - /* simulate reception of a DISCOVER packet */ - sock.local_addr.ip4 = serverip; - pico_dhcp_server_recv(&sock, buf, len); - - /* check if negotiation data is stored */ - dn = pico_dhcp_server_find_negotiation(xid.addr); - fail_if(dn == NULL, "DCHP SERVER -> no negotiation stored after discover msg recvd"); - - /* check if new ip is in ARP cache */ - stored_ipv4 = pico_arp_reverse_lookup(&dn->hwaddr); - fail_if(stored_ipv4 == NULL, "DCHP SERVER -> new address is not inserted in ARP"); - fail_unless(stored_ipv4->addr == dn->ciaddr.addr, "DCHP SERVER -> new ip not stored in negotiation data"); - - /* check if new ip is in ARP cache */ - arp_resp = pico_arp_lookup(&ipv4address); - fail_if(arp_resp == NULL, "DCHP SERVER -> address unavailable in arp cache"); -} -END_TEST - -#if 0 -START_TEST (test_dhcp_client) -{ - struct mock_device*mock; - uint32_t dhcp_hdr_offset = PICO_SIZE_ETHHDR + PICO_SIZE_IP4HDR + PICO_UDPHDR_SIZE; - unsigned char macaddr1[6] = { - 0xc1, 0, 0, 0xa, 0xb, 0xf - }; - struct pico_ip4 address = { - 0 - }; - struct pico_ip4 yiaddr = { - .addr = long_be(0xC0A8000A) - }; - struct pico_ip4 gateway = { - 0 - }; - struct pico_ip4 router = { - .addr = long_be(0xC0A800FE) - }; - uint8_t buf[BUFLEN] = { - 0 - }; - uint8_t offer_buf1[] = { - 0x00, 0x00, 0x00, 0x00, 0xC0, 0xA8, 0x00, 0x01 - }; - uint8_t offer_buf2[] = { - 0x63, 0x82, 0x53, 0x63, 0x35, 0x01, 0x02, 0x01, 0x04, 0xff, 0xff, 0xff, 0x00, 0x3a, 0x04, 0x00, 0x00, 0x07, 0x08, 0x3b, 0x04, 0x00, 0x00, 0x0c, 0x4e, 0x33, 0x04, 0x00, 0x00, 0x0e, 0x10, 0x36, 0x04, 0xc0, 0xa8, 0x00, 0x01, 0xff - }; - uint8_t routeropt_buf[] = { - PICO_DHCPOPT_ROUTER, 0x04, 0xC0, 0xA8, 0x00, 0xFE, 0xFF - }; - int type = 0; - uint8_t printbufactive = 0; - uint32_t len = 0; - uint32_t xid = 0; - struct pico_dhcp_client_cookie *cli = NULL; - - pico_stack_init(); - - /* Create mock device */ - mock = pico_mock_create(macaddr1); - fail_if(!mock, "MOCK DEVICE creation failed"); - fail_if(pico_mock_network_read(mock, buf, BUFLEN), "data on network that shouldn't be there"); - - /* initiate negotiation -> change state to */ - pico_dhcp_initiate_negotiation(mock->dev, &callback_dhcpclient, &xid); - cli = get_cookie_by_xid(xid); - dhcp_client_ptr = cli; - fail_if(cli == NULL, "initiate fail"); - fail_unless(cli->state == DHCPSTATE_DISCOVER, "Not in discover state after init negotiate"); - fail_if(pico_mock_network_read(mock, buf, BUFLEN), "data on network that shouldn't be there"); - - /* push discover msg on network */ - tick_it(3); - - /* read discover message from network */ - len = pico_mock_network_read(mock, buf, BUFLEN ); - fail_unless(len, "No msg received on network!"); - printbuf(&(buf[0]), len, "DHCP-DISCOVER packet", printbufactive); - fail_unless(buf[0x011c] == 0x01, "No DHCP Discover received after initiate negotiation"); - mock_print_protocol(buf); - fail_if(pico_mock_network_read(mock, buf, BUFLEN), "data on network that shouldn't be there"); - - /* check API access functions */ - address = pico_dhcp_get_address(cli); - fail_unless(address.addr == 0, "Client address gets value at init -> should get it from dhcp server"); - - gateway = pico_dhcp_get_gateway(cli); - fail_unless(gateway.addr == 0, "Gateway gets value at init -> should get it from dhcp server "); - - /* Change received discovery msg to offer offer msg */ - buf[0x2a] = 0x02; - memcpy(&(buf[0x3a]), &(offer_buf1[0]), sizeof(offer_buf1)); - memcpy(&(buf[0x3a]), &(yiaddr.addr), sizeof(struct pico_ip4)); - memcpy(&(buf[0x116]), &(offer_buf2[0]), sizeof(offer_buf2)); - memcpy(&(buf[0x13b]), &(routeropt_buf[0]), sizeof(routeropt_buf)); - memcpy(&(buf[0x13d]), &(router.addr), sizeof(struct pico_ip4)); - printbuf(&(buf[dhcp_hdr_offset]), len - dhcp_hdr_offset, "DHCP-OFFER message", printbufactive); - - /* generate dhcp type from msg */ - type = pico_dhcp_verify_and_identify_type(&(buf[dhcp_hdr_offset]), len - dhcp_hdr_offset, cli); - fail_if(type == 0, "unkown DHCP type"); - - /* simulate reception of a DHCP server offer */ - pico_dhcp_state_machine(type, cli, &(buf[dhcp_hdr_offset]), len - dhcp_hdr_offset); - fail_if(cli->state == DHCPSTATE_DISCOVER, "still in discover state after dhcp server offer"); - fail_unless(cli->state == DHCPSTATE_REQUEST, "not in REQUEST state after dhcp server offer"); - - address = pico_dhcp_get_address(cli); - fail_unless(address.addr == yiaddr.addr, "Client address incorrect => yiaddr or pico_dhcp_get_address incorrect"); - gateway = pico_dhcp_get_gateway(cli); - fail_unless(gateway.addr == router.addr, "Gateway incorrect! => routeroption or pico_dhcp_get_gateway incorrect"); - tick_it(3); - - len = pico_mock_network_read(mock, buf, BUFLEN); - fail_unless(len, "received msg on network of %d bytes", len); - printbuf(&(buf[0]), len, "DHCP-REQUEST packet", printbufactive); - fail_unless(buf[0x011c] == 0x03, "No DHCP request received after offer"); - -} -END_TEST -#endif - -START_TEST (test_dhcp_client_api) -{ -/************************************************************************ - * Check API of pico_dhcp_initiate_negotiation - * Status : Done - ************************************************************************/ - - /* Declaration test 0 */ - uint32_t xid0 = 0; - struct pico_dhcp_client_cookie *cli0 = NULL; - /* Declaration test 1 */ - uint32_t xid1 = 0; - struct pico_dhcp_client_cookie *cli1 = NULL; - - printf("*********************** starting %s * \n", __func__); - - /* test 0 */ - /* Clear error code */ - pico_err = PICO_ERR_NOERR; - /* Test 0 statements */ - pico_dhcp_initiate_negotiation(NULL, NULL, &xid0); - cli0 = pico_dhcp_client_find_cookie(xid0); - fail_unless(cli0 == NULL, "DHCP_CLIENT> initiate succeeded after pointer to dev == NULL"); - fail_unless(pico_err == PICO_ERR_EINVAL, "DHCP_SERVER> initiate succeeded without PICO_ERR_EINVAL after wrong parameter"); - - /* test 1 */ - /* Clear error code */ - pico_err = PICO_ERR_NOERR; - /* Test 1 statements */ - pico_dhcp_initiate_negotiation(NULL, &callback_dhcpclient, &xid1); - cli1 = pico_dhcp_client_find_cookie(xid1); - fail_unless(cli1 == NULL, "DHCP_CLIENT> initiate succeeded after pointer to dev == NULL"); - fail_unless(pico_err == PICO_ERR_EINVAL, "DHCP_SERVER> initiate succeeded without PICO_ERR_EINVAL after wrong parameter"); - -#if 0 - /* not testable since we do not have a stub for the pico_socket_sendto */ - /* Declaration test 2 */ - uint32_t xid2 = 0; - struct pico_dhcp_client_cookie *cli2 = NULL; - struct pico_device *dev2; - struct mock_device *mock2 = NULL; - - /* test 2 */ - /* Create device */ - dev2 = pico_null_create("dummy"); - mock2 = pico_mock_create(NULL); - fail_if(mock2 == NULL, "No device created"); - /* Clear error code */ - pico_err = PICO_ERR_NOERR; - /* Test 2 statements */ - xid2 = pico_dhcp_initiate_negotiation(dev2, &callback_dhcpclient); - cli2 = get_cookie_by_xid(xid2); - fail_if(cli2 == NULL, "DHCP_CLIENT: error initiating: %s", strerror(pico_err)); - xid2 = pico_dhcp_initiate_negotiation(mock2->dev, &callback_dhcpclient); - cli2 = get_cookie_by_xid(xid2); - fail_if(cli2 == NULL, "DHCP_CLIENT: error initiating: %s", strerror(pico_err)); - xid2 = pico_dhcp_initiate_negotiation(dev2, &callback_dhcpclient); - cli2 = get_cookie_by_xid(xid2); - fail_if(cli2 == NULL, "DHCP_CLIENT: error initiating: %s", strerror(pico_err)); -#endif -} -END_TEST diff --git a/kernel/picotcp/test/unit/unit_dns.c b/kernel/picotcp/test/unit/unit_dns.c deleted file mode 100644 index 7b3e218..0000000 --- a/kernel/picotcp/test/unit/unit_dns.c +++ /dev/null @@ -1,88 +0,0 @@ -void cb_dns(char *ip, void *arg); - -void cb_dns(char *ip, void *arg) -{ - if (!ip) { - /* Error occured */ - printf("DNS error getaddr\n"); - return; - } - - /* Do something */ - printf("DNS -> %s\n", ip); - PICO_FREE(ip); - if (arg) - PICO_FREE(arg); -} - - -START_TEST (test_dns) -{ - int ret; - char url[] = "www.google.com"; - char ip[] = "8.8.4.4"; - struct pico_ip4 ns; - - ns.addr = long_be(0x0a00280a); /* 10.40.0.10 */ - - pico_stack_init(); - - printf("START DNS TEST\n"); - - /* testing nameserver API */ - ret = pico_dns_client_nameserver(NULL, PICO_DNS_NS_ADD); - fail_if(ret == 0, "dns> dns_client_nameserver add error"); - - ret = pico_dns_client_nameserver(NULL, PICO_DNS_NS_DEL); - fail_if(ret == 0, "dns> dns_client_nameserver del error"); - - ret = pico_dns_client_nameserver(NULL, 99); - fail_if(ret == 0, "dns> dns_client_nameserver wrong code"); - - ret = pico_dns_client_nameserver(NULL, 0xFF); - fail_if(ret == 0, "dns> dns_client_nameserver wrong code"); - - ret = pico_dns_client_nameserver(&ns, PICO_DNS_NS_DEL); /* delete non added ns */ - fail_if(ret == 0, "dns> dns_client_nameserver del error"); - - ret = pico_dns_client_nameserver(&ns, 99); - fail_if(ret == 0, "dns> dns_client_nameserver wrong code"); - - ret = pico_dns_client_nameserver(&ns, PICO_DNS_NS_ADD); /* add correct one */ - fail_if(ret < 0, "dns> dns_client_nameserver add error: %s", strerror(pico_err)); - - ret = pico_dns_client_nameserver(&ns, 99); - fail_if(ret == 0, "dns> dns_client_nameserver wrong code"); - - ret = pico_dns_client_nameserver(&ns, PICO_DNS_NS_DEL); - fail_if(ret < 0, "dns> dns_client_nameserver del error: %s", strerror(pico_err)); - - ret = pico_dns_client_nameserver(&ns, PICO_DNS_NS_ADD); /* add correct one */ - fail_if(ret < 0, "dns> dns_client_nameserver add error: %s", strerror(pico_err)); - - ret = pico_dns_client_nameserver(&ns, PICO_DNS_NS_ADD); /* add correct one again */ - fail_if(ret < 0, "dns> dns_client_nameserver add double failed"); - - /* testing getaddr API */ - /* not testable since we do not have a stub for the pico_socket_send */ - /* ret = pico_dns_client_getaddr(url, cb_dns, NULL); / * ask correct one * / */ - /* fail_if(ret < 0, "dns> dns_client_getaddr: %s",strerror(pico_err)); */ - - ret = pico_dns_client_getaddr(NULL, cb_dns, NULL); - fail_if(ret == 0, "dns> dns_client_getaddr: no url"); - - ret = pico_dns_client_getaddr(url, NULL, NULL); - fail_if(ret == 0, "dns> dns_client_getaddr: no cb"); - - /* testing getname API */ - /* not testable since we do not have a stub for the pico_socket_send */ - /* ret = pico_dns_client_getname(ip, cb_dns, NULL); / * ask correct one * / */ - /* fail_if(ret < 0, "dns> dns_client_getname: %s",strerror(pico_err)); */ - - ret = pico_dns_client_getname(NULL, cb_dns, NULL); - fail_if(ret == 0, "dns> dns_client_getname: no ip"); - - ret = pico_dns_client_getname(ip, NULL, NULL); - fail_if(ret == 0, "dns> dns_client_getname: no cb"); -} -END_TEST diff --git a/kernel/picotcp/test/unit/unit_icmp4.c b/kernel/picotcp/test/unit/unit_icmp4.c deleted file mode 100644 index 086a41c..0000000 --- a/kernel/picotcp/test/unit/unit_icmp4.c +++ /dev/null @@ -1,401 +0,0 @@ - -#include "pico_icmp4.h" -#define NUM_PING 1 -int ping_test_var = 0; - -void cb_ping(struct pico_icmp4_stats *s); -void icmp4_unreach_socket_cb(uint16_t ev, struct pico_socket *s); - -void cb_ping(struct pico_icmp4_stats *s) -{ - char host[30]; - pico_ipv4_to_string(host, s->dst.addr); - if (s->err == 0) { - dbg("%lu bytes from %s: icmp_req=%lu ttl=64 time=%lu ms\n", s->size, host, s->seq, s->time); - if (s->seq == NUM_PING) { - ping_test_var++; - } - - fail_if (s->seq > NUM_PING); - } else { - dbg("PING %lu to %s: Error %d\n", s->seq, host, s->err); - exit(1); - } -} - -START_TEST (test_icmp4_ping) -{ - struct pico_ip4 local = { - 0 - }; - struct pico_ip4 remote = { - 0 - }; - struct pico_ip4 netmask = { - 0 - }; - struct mock_device *mock = NULL; - char local_address[] = { - "192.168.1.102" - }; - char remote_address[] = { - "192.168.1.103" - }; - uint16_t interval = 1000; - uint16_t timeout = 5000; - uint8_t size = 48; - - int bufferlen = 80; - uint8_t buffer[bufferlen]; - int len; - uint8_t temp_buf[4]; - printf("*********************** starting %s * \n", __func__); - - pico_string_to_ipv4(local_address, &(local.addr)); - pico_string_to_ipv4("255.255.255.0", &(netmask.addr)); - - pico_string_to_ipv4(remote_address, &(remote.addr)); - pico_string_to_ipv4("255.255.255.0", &(netmask.addr)); - - pico_stack_init(); - - mock = pico_mock_create(NULL); - fail_if(mock == NULL, "No device created"); - - pico_ipv4_link_add(mock->dev, local, netmask); - - fail_if(pico_icmp4_ping(local_address, NUM_PING, interval, timeout, size, cb_ping) < 0); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - fail_if(ping_test_var != 1); - - pico_icmp4_ping(remote_address, NUM_PING, interval, timeout, size, cb_ping); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - /* get the packet from the mock_device */ - memset(buffer, 0, (size_t)bufferlen); - len = pico_mock_network_read(mock, buffer, bufferlen); - fail_if(len < 20); - /* inspect it */ - fail_unless(mock_ip_protocol(mock, buffer, len) == 1); - fail_unless(mock_icmp_type(mock, buffer, len) == 8); - fail_unless(mock_icmp_code(mock, buffer, len) == 0); - fail_unless(pico_checksum(buffer + 20, (uint32_t)(len - 20)) == 0); - - /* cobble up a reply */ - buffer[20] = 0; /* type 0 : reply */ - memcpy(temp_buf, buffer + 12, 4); - memcpy(buffer + 12, buffer + 16, 4); - memcpy(buffer + 16, temp_buf, 4); - - /* using the mock-device because otherwise I have to put everything in a pico_frame correctly myself. */ - pico_mock_network_write(mock, buffer, len); - /* check if it is received */ - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - fail_unless(ping_test_var == 2); - - /* repeat but make it an invalid reply... */ - - pico_icmp4_ping(remote_address, NUM_PING, interval, timeout, size, cb_ping); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - /* get the packet from the mock_device */ - memset(buffer, 0, (size_t)bufferlen); - len = pico_mock_network_read(mock, buffer, bufferlen); - /* inspect it */ - fail_unless(mock_ip_protocol(mock, buffer, len) == 1); - fail_unless(mock_icmp_type(mock, buffer, len) == 8); - fail_unless(mock_icmp_code(mock, buffer, len) == 0); - fail_unless(pico_checksum(buffer + 20, (uint32_t)(len - 20)) == 0); - - /* cobble up a reply */ - buffer[20] = 0; /* type 0 : reply */ - memcpy(temp_buf, buffer + 12, 4); - memcpy(buffer + 12, buffer + 16, 4); - memcpy(buffer + 16, temp_buf, 4); - buffer[26] = (uint8_t)~buffer[26]; /* flip some bits in the sequence number, to see if the packet gets ignored properly */ - - /* using the mock-device because otherwise I have to put everything in a pico_frame correctly myself. */ - pico_mock_network_write(mock, buffer, len); - /* check if it is received */ - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - fail_unless(ping_test_var == 2); -} -END_TEST - - -START_TEST (test_icmp4_incoming_ping) -{ - int bufferlen = 76; - uint8_t buffer[76] = { - 0x45, 0x00, 0x00, 0x4c, - 0x91, 0xc3, 0x40, 0x00, - 0x40, 0x01, 0x24, 0xd0, - 0xc0, 0xa8, 0x01, 0x66, - 0xc0, 0xa8, 0x01, 0x64, - 0x08, 0x00, 0x66, 0x3c, - 0x91, 0xc2, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00 - }; - int buffer2len = 76; - int len; - int cntr = 0; - uint8_t buffer2[bufferlen]; - struct pico_ip4 local = { - .addr = long_be(0xc0a80164) - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct mock_device*mock; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) buffer; - printf("*********************** starting %s * \n", __func__); - - pico_stack_init(); - - mock = pico_mock_create(NULL); - fail_if(mock == NULL, "No device created"); - - pico_ipv4_link_add(mock->dev, local, netmask); - - hdr->crc = 0; - hdr->crc = short_be(pico_checksum(hdr, PICO_SIZE_IP4HDR)); - pico_mock_network_write(mock, buffer, bufferlen); - /* check if it is received */ - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - - len = pico_mock_network_read(mock, buffer2, buffer2len); - /* inspect it */ - - while(cntr < len) { - printf("0x%02x ", buffer2[cntr]); - cntr++; - if(cntr % 4 == 0) - printf("\n"); - } - fail_unless(len == buffer2len, "ping reply lenght does not match, expected len: %d, got: %d", buffer2len, len); - fail_unless(mock_ip_protocol(mock, buffer2, len) == 1); - fail_unless(mock_icmp_type(mock, buffer2, len) == 0); - fail_unless(mock_icmp_code(mock, buffer2, len) == 0); - fail_unless(pico_checksum(buffer2 + 20, (uint32_t)(len - 20)) == 0); - -} -END_TEST - -START_TEST (test_icmp4_unreachable_send) -{ - struct pico_ip4 local = { - .addr = long_be(0x0a280064) - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct mock_device*mock; - int len = 0; - int bufferlen = 80; - uint8_t buffer2[bufferlen]; - - uint8_t buffer[32] = { - 0x45, 0x00, 0x00, 0x20, 0x91, 0xc0, 0x40, 0x00, - 0x40, 0x11, 0x94, 0xb4, 0x0a, 0x28, 0x00, 0x05, - 0x0a, 0x28, 0x00, 0x04, 0x15, 0xb3, 0x15, 0xb3, - 0x00, 0x0c, 0x00, 0x00, 'e', 'l', 'l', 'o' - }; - - /* fake packet with bad upper-layer-protocol */ - uint8_t buffer3[20] = { - 0x45, 0x00, 0x00, 0x14, 0x91, 0xc0, 0x40, 0x00, - 0x40, 0xff, 0x94, 0xb4, 0x0a, 0x28, 0x00, 0x05, - 0x0a, 0x28, 0x00, 0x04 - }; - - struct pico_frame*f = PICO_ZALLOC(sizeof(struct pico_frame)); - printf("*********************** starting %s * \n", __func__); - - f->net_hdr = buffer; - f->buffer = buffer; - - pico_stack_init(); - - mock = pico_mock_create(NULL); - fail_if(mock == NULL, "No device created"); - - pico_ipv4_link_add(mock->dev, local, netmask); - - - fail_if(pico_icmp4_dest_unreachable(f)); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - len = pico_mock_network_read(mock, buffer2, bufferlen); - - fail_unless(len == 56, "len is indeed %d\n", len); - fail_unless(mock_ip_protocol(mock, buffer2, len) == 1); - fail_unless(mock_icmp_type(mock, buffer2, len) == 3); /* destination unreachable */ - fail_unless(mock_icmp_code(mock, buffer2, len) == 1); /* host unreachable */ - fail_unless(pico_checksum(buffer2 + 20, (uint32_t)(len - 20)) == 0); - - - fail_if(pico_icmp4_port_unreachable(f)); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - len = pico_mock_network_read(mock, buffer2, bufferlen); - - fail_unless(len == 56); - fail_unless(mock_ip_protocol(mock, buffer2, len) == 1); - fail_unless(mock_icmp_type(mock, buffer2, len) == 3); /* destination unreachable */ - fail_unless(mock_icmp_code(mock, buffer2, len) == 3); /* port unreachable */ - fail_unless(pico_checksum(buffer2 + 20, (uint32_t)(len - 20)) == 0); - - - fail_if(pico_icmp4_proto_unreachable(f)); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - len = pico_mock_network_read(mock, buffer2, bufferlen); - - fail_unless(len == 56); - fail_unless(mock_ip_protocol(mock, buffer2, len) == 1); - fail_unless(mock_icmp_type(mock, buffer2, len) == 3); /* destination unreachable */ - fail_unless(mock_icmp_code(mock, buffer2, len) == 2); /* proto unreachable */ - fail_unless(pico_checksum(buffer2 + 20, (uint32_t)(len - 20)) == 0); - - - fail_if(pico_icmp4_ttl_expired(f)); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - len = pico_mock_network_read(mock, buffer2, bufferlen); - - fail_unless(len == 56); - fail_unless(mock_ip_protocol(mock, buffer2, len) == 1); - fail_unless(mock_icmp_type(mock, buffer2, len) == 11); /* ttl expired */ - fail_unless(mock_icmp_code(mock, buffer2, len) == 0); - fail_unless(pico_checksum(buffer2 + 20, (uint32_t)(len - 20)) == 0); - - f->net_hdr = buffer3; - f->buffer = buffer3; - - fail_if(pico_icmp4_proto_unreachable(f)); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - - len = pico_mock_network_read(mock, buffer2, bufferlen); - - fail_unless(len == 48); /* Buffer 3 is shorter, reply is shorter too... */ - fail_unless(mock_ip_protocol(mock, buffer2, len) == 1); - fail_unless(mock_icmp_type(mock, buffer2, len) == 3); /* destination unreachable */ - fail_unless(mock_icmp_code(mock, buffer2, len) == 2); /* proto unreachable */ - fail_unless(pico_checksum(buffer2 + 20, (uint32_t)(len - 20)) == 0); -} -END_TEST - -int icmp4_socket_unreach_status = 0; -void icmp4_unreach_socket_cb(uint16_t ev, struct pico_socket *s) -{ - IGNORE_PARAMETER(s); - - if (ev == PICO_SOCK_EV_ERR) { - icmp4_socket_unreach_status = 1; - } -} - -START_TEST (test_icmp4_unreachable_recv) -{ - struct pico_ip4 local = { - .addr = long_be(0x0a280064) - }; - struct pico_ip4 remote = { - .addr = long_be(0x0a280065) - }; - struct pico_ip4 netmask = { - .addr = long_be(0xffffff00) - }; - struct mock_device*mock; - struct pico_socket*sock; - uint16_t port = short_be(7777); - - /* put a host unreachable in the queue, run a few stack ticks */ - uint8_t buffer[] = { - 0x45, 0x00, 0x00, 0x20, - 0x91, 0xc0, 0x40, 0x00, - 0x40, 0x01, 0x94, 0xb4, - 0x0a, 0x28, 0x00, 0x65, - 0x0a, 0x28, 0x00, 0x64, - 0x03, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - }; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) buffer; - - printf("*********************** starting %s * \n", __func__); - pico_stack_init(); - - mock = pico_mock_create(NULL); - fail_if(mock == NULL, "No device created"); - - pico_ipv4_link_add(mock->dev, local, netmask); - - /* open a socket */ - sock = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, &icmp4_unreach_socket_cb); - fail_if(sock == NULL); - fail_if(pico_socket_bind(sock, &local, &port)); - pico_socket_connect(sock, &remote, port); - pico_socket_write(sock, "fooo", 4); - /* see if my callback was called with the proper code */ - - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - /* filling in the IP header and first 8 bytes */ - hdr->crc = 0; - hdr->crc = short_be(pico_checksum(hdr, PICO_SIZE_IP4HDR)); - printf("read %d bytes\n", pico_mock_network_read(mock, buffer + 28, 28)); - - printf("wrote %d bytes\n", pico_mock_network_write(mock, buffer, 56)); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - fail_unless(icmp4_socket_unreach_status == 1); -} -END_TEST diff --git a/kernel/picotcp/test/unit/unit_ipv4.c b/kernel/picotcp/test/unit/unit_ipv4.c deleted file mode 100644 index a708688..0000000 --- a/kernel/picotcp/test/unit/unit_ipv4.c +++ /dev/null @@ -1,852 +0,0 @@ - -START_TEST (test_ipv4) -{ - #define IP_TST_SIZ 256 - uint32_t i; - - struct pico_device *dev[IP_TST_SIZ]; - char devname[8]; - struct pico_ip4 a[IP_TST_SIZ], d[IP_TST_SIZ], *source[IP_TST_SIZ], nm16, nm32, gw[IP_TST_SIZ], r[IP_TST_SIZ], ret; - struct pico_ipv4_link *l[IP_TST_SIZ]; - - char ipstr[] = "192.168.1.1"; - struct pico_ip4 ipaddr; - - struct pico_frame *f_NULL = NULL; - struct pico_ip4 *dst_NULL = NULL; - - pico_stack_init(); - - nm16.addr = long_be(0xFFFF0000); - nm32.addr = long_be(0xFFFFFFFF); - - /*link_add*/ - for (i = 0; i < IP_TST_SIZ; i++) { - snprintf(devname, 8, "nul%d", i); - dev[i] = pico_null_create(devname); - a[i].addr = long_be(0x0a000001u + (i << 16)); - d[i].addr = long_be(0x0a000002u + (i << 16)); - fail_if(pico_ipv4_link_add(dev[i], a[i], nm16) != 0, "Error adding link"); - } - /*link_find + link_get + route_add*/ - for (i = 0; i < IP_TST_SIZ; i++) { - gw[i].addr = long_be(0x0a0000f0u + (i << 16)); - r[i].addr = long_be(0x0c00001u + (i << 16)); - fail_unless(pico_ipv4_link_find(&a[i]) == dev[i], "Error finding link"); - l[i] = pico_ipv4_link_get(&a[i]); - fail_if(l[i] == NULL, "Error getting link"); - fail_if(pico_ipv4_route_add(r[i], nm32, gw[i], 1, l[i]) != 0, "Error adding route"); - fail_if(pico_ipv4_route_add(d[i], nm32, gw[i], 1, l[i]) != 0, "Error adding route"); - } - /*get_gateway + source_find*/ - for (i = 0; i < IP_TST_SIZ; i++) { - ret = pico_ipv4_route_get_gateway(&r[i]); - fail_if(ret.addr != gw[i].addr, "Error get gateway: returned wrong route"); - source[i] = pico_ipv4_source_find(&d[i]); - fail_if(source[i]->addr != a[i].addr, "Error find source: returned wrong route"); - } - /*route_del + link_del*/ - for (i = 0; i < IP_TST_SIZ; i++) { - fail_if(pico_ipv4_route_del(r[i], nm32, 1) != 0, "Error deleting route"); - fail_if(pico_ipv4_link_del(dev[i], a[i]) != 0, "Error deleting link"); - } - /*string_to_ipv4 + ipv4_to_string*/ - pico_string_to_ipv4(ipstr, &(ipaddr.addr)); - fail_if(ipaddr.addr != long_be(0xc0a80101), "Error string to ipv4"); - memset(ipstr, 0, 12); - pico_ipv4_to_string(ipstr, ipaddr.addr); - fail_if(strncmp(ipstr, "192.168.1.1", 11) != 0, "Error ipv4 to string"); - - /*valid_netmask*/ - fail_if(pico_ipv4_valid_netmask(long_be(nm32.addr)) != 32, "Error checking netmask"); - - /*is_unicast*/ - fail_if((pico_ipv4_is_unicast(long_be(0xc0a80101))) != 1, "Error checking unicast"); - fail_if((pico_ipv4_is_unicast(long_be(0xe0000001))) != 0, "Error checking unicast"); - - /*rebound*/ - fail_if(pico_ipv4_rebound(f_NULL) != -1, "Error rebound frame"); - - /*frame_push*/ - fail_if(pico_ipv4_frame_push(f_NULL, dst_NULL, PICO_PROTO_TCP) != -1, "Error push frame"); -} -END_TEST - -START_TEST (test_nat_enable_disable) -{ - struct pico_ipv4_link link = { - .address = {.addr = long_be(0x0a320001)} - }; /* 10.50.0.1 */ - struct pico_frame *f = pico_ipv4_alloc(&pico_proto_ipv4, NULL, PICO_UDPHDR_SIZE); - struct pico_ipv4_hdr *net = (struct pico_ipv4_hdr *)f->net_hdr; - struct pico_udp_hdr *udp = (struct pico_udp_hdr *)f->transport_hdr; - const char *raw_data = "ello"; - - net->vhl = 0x45; /* version = 4, hdr len = 5 (32-bit words) */ - net->tos = 0; - net->len = short_be(32); /* hdr + data (bytes) */ - net->id = short_be(0x91c0); - net->frag = short_be(0x4000); /* don't fragment flag, offset = 0 */ - net->ttl = 64; - net->proto = 17; /* UDP */ - net->crc = 0; - net->src.addr = long_be(0x0a280008); /* 10.40.0.8 */ - net->dst.addr = long_be(0x0a320001); /* 10.50.0.1 */ - - udp->trans.sport = short_be(5555); - udp->trans.dport = short_be(6667); - udp->len = 12; - udp->crc = 0; - - f->payload = f->transport_hdr + PICO_UDPHDR_SIZE; - memcpy(f->payload, raw_data, 4); - - printf(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> NAT ENABLE/DISABLE TEST\n"); - pico_stack_init(); - - fail_if(pico_ipv4_nat_enable(&link)); - fail_unless(nat_link->address.addr == link.address.addr); - fail_unless(pico_ipv4_nat_is_enabled(&link.address)); - - fail_if(pico_ipv4_nat_outbound(f, &net->dst)); - pico_ipv4_nat_table_cleanup(pico_tick, NULL); - - fail_if(pico_ipv4_nat_disable()); - fail_if(pico_ipv4_nat_is_enabled(&link.address)); -} -END_TEST - -START_TEST (test_nat_translation) -{ - struct pico_ipv4_link link = { - .address = {.addr = long_be(0x0a320001)} - }; /* 10.50.0.1 */ - struct pico_frame *f = pico_ipv4_alloc(&pico_proto_ipv4, NULL, PICO_UDPHDR_SIZE); - struct pico_ipv4_hdr *net = (struct pico_ipv4_hdr *)f->net_hdr; - struct pico_udp_hdr *udp = (struct pico_udp_hdr *)f->transport_hdr; - struct pico_ip4 src_ori = { - .addr = long_be(0x0a280008) - }; /* 10.40.0.8 */ - struct pico_ip4 dst_ori = { - .addr = long_be(0x0a320009) - }; /* 10.50.0.9 */ - struct pico_ip4 nat = { - .addr = long_be(0x0a320001) - }; /* 10.50.0.9 */ - const char *raw_data = "ello"; - uint16_t sport_ori = short_be(5555); - uint16_t dport_ori = short_be(6667); - uint16_t nat_port = 0; - - net->vhl = 0x45; /* version = 4, hdr len = 5 (32-bit words) */ - net->tos = 0; - net->len = short_be(32); /* hdr + data (bytes) */ - net->id = short_be(0x91c0); - net->frag = short_be(0x4000); /* don't fragment flag, offset = 0 */ - net->ttl = 64; - net->proto = 17; /* UDP */ - net->crc = 0; - net->src = src_ori; - net->dst = dst_ori; - - udp->trans.sport = sport_ori; - udp->trans.dport = dport_ori; - udp->len = 12; - udp->crc = 0; - - f->payload = f->transport_hdr + PICO_UDPHDR_SIZE; - memcpy(f->payload, raw_data, 4); - - printf(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> NAT TRANSLATION TEST\n"); - pico_stack_init(); - fail_if(pico_ipv4_nat_enable(&link)); - - /* perform outbound translation, check if source IP got translated */ - fail_if(pico_ipv4_nat_outbound(f, &nat_link->address)); - fail_if(net->src.addr != link.address.addr, "source address not translated"); - - /* perform outbound translation of same packet, check if source IP and PORT got translated the same as previous packet */ - nat_port = udp->trans.sport; - net->src = src_ori; /* restore original src */ - udp->trans.sport = sport_ori; /* restore original sport */ - fail_if(pico_ipv4_nat_outbound(f, &nat_link->address)); - fail_if(net->src.addr != link.address.addr, "source address not translated"); - fail_if(udp->trans.sport != nat_port, "frames with the same source IP, source PORT and PROTO did not get translated the same"); - - /* perform outbound translation of packet with changed source PORT, check if source PORT got translated differently as previous packet */ - nat_port = udp->trans.sport; - net->src = src_ori; /* restore original src */ - udp->trans.sport = short_be(5556); /* change sport */ - fail_if(pico_ipv4_nat_outbound(f, &nat_link->address)); - fail_if(net->src.addr != link.address.addr, "source address not translated"); - fail_if(udp->trans.sport == short_be(sport_ori), "two frames with different sport get translated the same"); - - /* perform inbound translation of previous packet, check if destination IP and PORT got translated to the original source IP and PORT */ - nat_port = udp->trans.sport; - net->src = dst_ori; - net->dst = nat; - udp->trans.sport = sport_ori; - udp->trans.dport = nat_port; - fail_if(pico_ipv4_nat_inbound(f, &nat_link->address)); - fail_if(net->dst.addr != src_ori.addr, "destination address not translated correctly"); - fail_if(udp->trans.dport != short_be(5556), "ports not translated correctly"); - pico_ipv4_nat_table_cleanup(pico_tick, NULL); - - fail_if(pico_ipv4_nat_disable()); -} -END_TEST - -START_TEST (test_nat_port_forwarding) -{ - struct pico_ipv4_link link = { - .address = {.addr = long_be(0x0a320001)} - }; /* 10.50.0.1 */ - struct pico_frame *f = pico_ipv4_alloc(&pico_proto_ipv4, NULL, PICO_UDPHDR_SIZE); - struct pico_ipv4_hdr *net = (struct pico_ipv4_hdr *)f->net_hdr; - struct pico_udp_hdr *udp = (struct pico_udp_hdr *)f->transport_hdr; - struct pico_ip4 src_addr = { - .addr = long_be(0x0a280008) - }; /* 10.40.0.8 */ - struct pico_ip4 dst_addr = { - .addr = long_be(0x0a320009) - }; /* 10.50.0.9 */ - struct pico_ip4 nat_addr = { - .addr = long_be(0x0a320001) - }; /* 10.50.0.9 */ - const char *raw_data = "ello"; - uint16_t sport_ori = short_be(5555); - uint16_t fport_pub = short_be(80); - uint16_t fport_priv = short_be(8080); - - net->vhl = 0x45; /* version = 4, hdr len = 5 (32-bit words) */ - net->tos = 0; - net->len = short_be(32); /* hdr + data (bytes) */ - net->id = short_be(0x91c0); - net->frag = short_be(0x4000); /* don't fragment flag, offset = 0 */ - net->ttl = 64; - net->proto = 17; /* UDP */ - net->crc = 0; - net->src = dst_addr; - net->dst = nat_addr; - - udp->trans.sport = sport_ori; - udp->trans.dport = fport_pub; - udp->len = 12; - udp->crc = 0; - - f->payload = f->transport_hdr + PICO_UDPHDR_SIZE; - memcpy(f->payload, raw_data, 4); - - printf(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> NAT PORT FORWARD TEST\n"); - pico_stack_init(); - fail_if(pico_ipv4_nat_enable(&link)); - - fail_if(pico_ipv4_port_forward(nat_addr, fport_pub, src_addr, fport_priv, 17, PICO_NAT_PORT_FORWARD_ADD)); - - fail_if(pico_ipv4_nat_inbound(f, &nat_link->address)); - fail_if(net->dst.addr != src_addr.addr, "destination address not translated correctly"); - fail_if(udp->trans.dport != fport_priv, "destination port not translated correctly"); - - fail_if(pico_ipv4_port_forward(nat_addr, fport_pub, src_addr, fport_priv, 17, PICO_NAT_PORT_FORWARD_DEL)); - pico_ipv4_nat_table_cleanup(pico_tick, NULL); -} -END_TEST - -START_TEST (test_ipfilter) -{ - struct pico_device *dev = NULL; - uint8_t proto = 0, tos = 0; - uint16_t sport = 0, dport = 0; - int8_t priority = 0; - int ret = 0; - - struct pico_ip4 src_addr = { - 0 - }; - struct pico_ip4 saddr_netmask = { - 0 - }; - struct pico_ip4 dst_addr = { - 0 - }; - struct pico_ip4 daddr_netmask = { - 0 - }; - - enum filter_action action = 1; - - uint32_t filter_id1; - - /* 192.168.1.2:16415 -> 192.168.1.109:1222 [sending a TCP syn] */ - uint8_t ipv4_buf[] = { - 0x00, 0x02, 0xf7, 0xf1, 0x79, 0x33, 0xe0, 0xdb, 0x55, - 0xd4, 0xb6, 0x27, 0x08, 0x00, 0x45, 0x00, 0x00, 0x28, - 0x00, 0x01, 0x00, 0x00, 0x40, 0x06, 0xf7, 0x0f, 0xc0, - 0xa8, 0x01, 0x02, 0xc0, 0xa8, 0x01, 0x6d, 0x40, 0x1f, - 0x04, 0xc6, 0x00, 0xb1, 0x56, 0x5a, 0x00, 0x00, 0x00, - 0x00, 0x50, 0x02, 0x20, 0x00, 0x70, 0x32, 0x00, 0x00 - }; - - struct pico_frame *f; - - printf("IP Filter> Adding a new filter...\n"); - filter_id1 = pico_ipv4_filter_add(dev, proto, &src_addr, &saddr_netmask, &dst_addr, &daddr_netmask, sport, dport, priority, tos, action); - fail_if(filter_id1 <= 0, "Error adding filter\n"); - printf("filter_id1 = %d\n", filter_id1); - - printf("IP Filter> Trying to add the same filter...\n"); - filter_id1 = pico_ipv4_filter_add(dev, proto, &src_addr, &saddr_netmask, &dst_addr, &daddr_netmask, sport, dport, priority, tos, action); - fail_if(ret > 0, "Error adding filter\n"); - - printf("IP Filter> Deleting added filter...\n"); - ret = pico_ipv4_filter_del(filter_id1); - fail_if(ret != 0, "Error deleting the filter\n"); - - printf("IP Filter> Trying to delete the same filter\n"); - ret = pico_ipv4_filter_del(filter_id1); - fail_if(ret != -1, "Deleting non existing filter failed\n"); - - f = (struct pico_frame *)PICO_ZALLOC(200); - f->buffer = PICO_ZALLOC(20); - f->usage_count = PICO_ZALLOC(sizeof(uint32_t)); - f->buffer = ipv4_buf; - f->net_hdr = ipv4_buf + 14u; /* shifting to IP layer */ - f->transport_hdr = ipv4_buf + 34u; /* shifting to Transport layer */ - - /* adding exact filter */ - pico_string_to_ipv4("192.168.1.109", &src_addr.addr); - pico_string_to_ipv4("255.255.255.255", &saddr_netmask.addr); - sport = 1222u; - filter_id1 = pico_ipv4_filter_add(dev, proto, &src_addr, &saddr_netmask, &dst_addr, &daddr_netmask, sport, dport, priority, tos, FILTER_REJECT); - fail_if(filter_id1 <= 0, "Error adding exact filter\n"); - printf("Filter is added\n"); - sync(); - sleep(1); - - ret = ipfilter(f); - fail_if(ret != 1, "Frame wasn't filtered\n"); - - printf("IP Filter> Deleting added filter...\n"); - ret = pico_ipv4_filter_del(filter_id1); - fail_if(ret != 0, "Error deleting the filter\n"); - - printf("IP Filter> Adding masked filter...\n"); - pico_string_to_ipv4("192.168.1.7", &src_addr.addr); - pico_string_to_ipv4("255.255.255.0", &saddr_netmask.addr); - sport = 1222u; - - filter_id1 = pico_ipv4_filter_add(dev, proto, &src_addr, &saddr_netmask, &dst_addr, &daddr_netmask, sport, dport, priority, tos, FILTER_DROP); - fail_if(filter_id1 <= 0, "Error adding masked filter\n"); - - f = (struct pico_frame *)PICO_ZALLOC(200); - f->buffer = PICO_ZALLOC(20); - f->usage_count = PICO_ZALLOC(sizeof(uint32_t)); - f->buffer = ipv4_buf; - f->net_hdr = ipv4_buf + 14u; /* shifting to IP layer */ - f->transport_hdr = ipv4_buf + 34u; /* shifting to Transport layer */ - ret = ipfilter(f); - fail_if(ret != 1, "Mask filter failed to filter\n"); - - printf("IP Filter> Deleting added filter...\n"); - ret = pico_ipv4_filter_del(filter_id1); - fail_if(ret != 0, "Error deleting the filter\n"); - - printf("IP Filter> Adding bad filter..\n"); - pico_string_to_ipv4("191.1.1.7", &src_addr.addr); - pico_string_to_ipv4("255.255.255.0", &saddr_netmask.addr); - sport = 1991u; - filter_id1 = pico_ipv4_filter_add(dev, proto, &src_addr, &saddr_netmask, &dst_addr, &daddr_netmask, sport, dport, priority, tos, FILTER_DROP); - fail_if(filter_id1 <= 0, "Error adding bad filter\n"); - - f = (struct pico_frame *)PICO_ZALLOC(200); - f->buffer = PICO_ZALLOC(20); - f->usage_count = PICO_ZALLOC(sizeof(uint32_t)); - f->buffer = ipv4_buf; - f->net_hdr = ipv4_buf + 14u; /* shifting to IP layer */ - f->transport_hdr = ipv4_buf + 34u; /* shifting to Transport layer */ - ret = ipfilter(f); - fail_if(ret != 0, "Filter shouldn't have filtered this frame\n"); - - printf("IP Filter> Deleting added filter...\n"); - ret = pico_ipv4_filter_del(filter_id1); - fail_if(ret != 0, "Error deleting the filter\n"); - -} -END_TEST - -#ifdef PICO_SUPPORT_MCAST -START_TEST (test_igmp_sockopts) -{ - int i = 0, j = 0, k = 0, ret = 0; - struct pico_socket *s, *s1 = NULL; - struct pico_device *dev = NULL; - union pico_address *source = NULL; - union pico_address inaddr_dst = { - 0 - }, inaddr_incorrect = { - 0 - }, inaddr_uni = { - 0 - }, inaddr_null = { - 0 - }, netmask = { - 0 - }; - union pico_address inaddr_link[2] = {0}; - union pico_address inaddr_mcast[8] = {0}; - union pico_address inaddr_source[8] = {0}; - struct pico_ip_mreq _mreq = {0}, mreq[16] = {0}; - struct pico_ip_mreq_source mreq_source[128] = {0}; - struct pico_tree_node *index = NULL; - - int ttl = 64; - int getttl = 0; - int loop = 9; - int getloop = 0; - union pico_address mcast_def_link = { - 0 - }; - - pico_stack_init(); - - printf("START IGMP SOCKOPTS TEST\n"); - - pico_string_to_ipv4("224.7.7.7", &inaddr_dst.ip4.addr); - pico_string_to_ipv4("10.40.0.2", &inaddr_uni.ip4.addr); - pico_string_to_ipv4("224.8.8.8", &inaddr_incorrect.ip4.addr); - pico_string_to_ipv4("0.0.0.0", &inaddr_null.ip4.addr); - - pico_string_to_ipv4("10.40.0.1", &inaddr_link[0].ip4.addr); /* 0 */ - pico_string_to_ipv4("10.50.0.1", &inaddr_link[1].ip4.addr); /* 1 */ - - pico_string_to_ipv4("232.1.1.0", &inaddr_mcast[0].ip4.addr); /* 0 */ - pico_string_to_ipv4("232.2.2.1", &inaddr_mcast[1].ip4.addr); /* 1 */ - pico_string_to_ipv4("232.3.3.2", &inaddr_mcast[2].ip4.addr); /* 2 */ - pico_string_to_ipv4("232.4.4.3", &inaddr_mcast[3].ip4.addr); /* 3 */ - pico_string_to_ipv4("232.5.5.4", &inaddr_mcast[4].ip4.addr); /* 4 */ - pico_string_to_ipv4("232.6.6.5", &inaddr_mcast[5].ip4.addr); /* 5 */ - pico_string_to_ipv4("232.7.7.6", &inaddr_mcast[6].ip4.addr); /* 6 */ - pico_string_to_ipv4("232.8.8.7", &inaddr_mcast[7].ip4.addr); /* 7 */ - - pico_string_to_ipv4("10.40.1.0", &inaddr_source[0].ip4.addr); /* 0 */ - pico_string_to_ipv4("10.40.1.1", &inaddr_source[1].ip4.addr); /* 1 */ - pico_string_to_ipv4("10.40.1.2", &inaddr_source[2].ip4.addr); /* 2 */ - pico_string_to_ipv4("10.40.1.3", &inaddr_source[3].ip4.addr); /* 3 */ - pico_string_to_ipv4("10.40.1.4", &inaddr_source[4].ip4.addr); /* 4 */ - pico_string_to_ipv4("10.40.1.5", &inaddr_source[5].ip4.addr); /* 5 */ - pico_string_to_ipv4("10.40.1.6", &inaddr_source[6].ip4.addr); /* 6 */ - pico_string_to_ipv4("10.40.1.7", &inaddr_source[7].ip4.addr); /* 7 */ - - /* 00 01 02 03 04 05 06 07 | 10 11 12 13 14 15 16 17 */ - for (i = 0; i < 16; i++) { - mreq[i].mcast_link_addr = inaddr_link[i / 8]; - mreq[i].mcast_group_addr = inaddr_mcast[i % 8]; - } - /* 000 001 002 003 004 005 006 007 | 010 011 012 013 014 015 016 017 */ - for (i = 0; i < 16; i++) { - for (j = 0; j < 8; j++) { - /* printf(">>>>> mreq_source[%d]: link[%d] mcast[%d] source[%d]\n", (i*8)+j, i/8, i%8, j); */ - mreq_source[(i * 8) + j].mcast_link_addr = inaddr_link[i / 8]; - mreq_source[(i * 8) + j].mcast_group_addr = inaddr_mcast[i % 8]; - mreq_source[(i * 8) + j].mcast_source_addr = inaddr_source[j]; - } - } - dev = pico_null_create("dummy0"); - netmask.ip4.addr = long_be(0xFFFF0000); - ret = pico_ipv4_link_add(dev, inaddr_link[0].ip4, netmask.ip4); - fail_if(ret < 0, "link add failed"); - - dev = pico_null_create("dummy1"); - netmask.ip4.addr = long_be(0xFFFF0000); - ret = pico_ipv4_link_add(dev, inaddr_link[1].ip4, netmask.ip4); - fail_if(ret < 0, "link add failed"); - - s = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, NULL); - fail_if(s == NULL, "UDP socket open failed"); - s1 = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, NULL); - fail_if(s1 == NULL, "UDP socket open failed"); - - /* argument validation tests */ - printf("IGMP SETOPTION ARGUMENT VALIDATION TEST\n"); - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_IF, &mcast_def_link); - fail_if(ret == 0, "unsupported PICO_IP_MULTICAST_IF succeeded\n"); - ret = pico_socket_getoption(s, PICO_IP_MULTICAST_IF, &mcast_def_link); - fail_if(ret == 0, "unsupported PICO_IP_MULTICAST_IF succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_TTL, &ttl); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_TTL failed\n"); - - ret = pico_socket_getoption(s, PICO_IP_MULTICAST_TTL, &getttl); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_TTL failed\n"); - fail_if(getttl != ttl, "setoption ttl != getoption ttl\n"); - - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_LOOP, &loop); - fail_if(ret == 0, "PICO_IP_MULTICAST_LOOP succeeded with invalid (not 0 or 1) loop value\n"); - loop = 0; - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_LOOP, &loop); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_LOOP failed disabling\n"); - ret = pico_socket_getoption(s, PICO_IP_MULTICAST_LOOP, &getloop); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_LOOP failed getting value\n"); - fail_if(getloop != loop, "setoption loop != getoption loop\n"); - _mreq.mcast_group_addr = inaddr_dst; - _mreq.mcast_link_addr = inaddr_link[0]; - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "supported PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "supported PICO_IP_DROP_MEMBERSHIP failed\n"); - _mreq.mcast_group_addr = inaddr_dst; - _mreq.mcast_link_addr = inaddr_null; - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed with valid NULL (use default) link address\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed with valid NULL (use default) link address\n"); - _mreq.mcast_group_addr = inaddr_uni; - _mreq.mcast_link_addr = inaddr_link[0]; - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded with invalid (unicast) group address\n"); - _mreq.mcast_group_addr = inaddr_null; - _mreq.mcast_link_addr = inaddr_link[0]; - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded with invalid (NULL) group address\n"); - _mreq.mcast_group_addr = inaddr_dst; - _mreq.mcast_link_addr = inaddr_uni; - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded with invalid link address\n"); - _mreq.mcast_group_addr = inaddr_incorrect; - _mreq.mcast_link_addr = inaddr_link[0]; - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (not added) group address\n"); - _mreq.mcast_group_addr = inaddr_uni; - _mreq.mcast_link_addr = inaddr_link[0]; - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (unicast) group address\n"); - _mreq.mcast_group_addr = inaddr_null; - _mreq.mcast_link_addr = inaddr_link[0]; - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (NULL) group address\n"); - _mreq.mcast_group_addr = inaddr_dst; - _mreq.mcast_link_addr = inaddr_uni; - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (unicast) link address\n"); - - /* flow validation tests */ - printf("IGMP SETOPTION FLOW VALIDATION TEST\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed with err %s\n", strerror(pico_err)); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_UNBLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - /* stress tests */ - printf("IGMP SETOPTION STRESS TEST\n"); - for (k = 0; k < 2; k++) { - /* ADD for even combinations of group and link, ADD_SOURCE for uneven */ - for (i = 0; i < 16; i++) { - if (i % 2) { - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[i]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - } - } else { - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - } - } - } - /* UNBLOCK and DROP for even combinations, DROP_SOURCE for uneven */ - for (i = 0; i < 16; i++) { - if (i % 2) { - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_UNBLOCK_SOURCE failed\n"); - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[i]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - } else { - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - } - } - } - /* everything should be cleanup up, next iteration will fail if not */ - } - /* filter validation tests */ - printf("IGMP SETOPTION FILTER VALIDATION TEST\n"); - /* INCLUDE + INCLUDE expected filter: source of 0 and 1*/ - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - i = 0; - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 2) - fail("MCASTFilter (INCLUDE + INCLUDE) too many elements\n"); - - source = index->keyValue; - if (source->ip4.addr == mreq_source[0].mcast_source_addr.ip4.addr) { /* OK */ - } - else if (source->ip4.addr == mreq_source[1].mcast_source_addr.ip4.addr) { /* OK */ - } - else { - fail("MCASTFilter (INCLUDE + INCLUDE) incorrect\n"); - } - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - /* INCLUDE + EXCLUDE expected filter: source of 2 */ - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[2]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - i = 0; - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 1) - fail("MCASTFilter (INCLUDE + EXCLUDE) too many elements\n"); - - source = index->keyValue; - if (source->ip4.addr == mreq_source[2].mcast_source_addr.ip4.addr) { /* OK */ - } - else { - fail("MCASTFilter (INCLUDE + EXCLUDE) incorrect\n"); - } - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - /* EXCLUDE + INCLUDE expected filter: source of 0 and 1 */ - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - i = 0; - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 2) - fail("MCASTFilter (EXCLUDE + INCLUDE) too many elements\n"); - - source = index->keyValue; - if (source->ip4.addr == mreq_source[0].mcast_source_addr.ip4.addr) { /* OK */ - } - else if (source->ip4.addr == mreq_source[1].mcast_source_addr.ip4.addr) { /* OK */ - } - else { - fail("MCASTFilter (EXCLUDE + INCLUDE) incorrect\n"); - } - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - /* EXCLUDE + EXCLUDE expected filter: source of 3 and 4 */ - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[5]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[6]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - i = 0; - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 2) - fail("MCASTFilter (EXCLUDE + EXCLUDE) too many elements\n"); - - source = index->keyValue; - if (source->ip4.addr == mreq_source[3].mcast_source_addr.ip4.addr) { /* OK */ - } - else if (source->ip4.addr == mreq_source[4].mcast_source_addr.ip4.addr) { /* OK */ - } - else { - fail("MCASTFilter (EXCLUDE + EXCLUDE) incorrect\n"); - } - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - - ret = pico_socket_close(s); - fail_if(ret < 0, "socket close failed: %s\n", strerror(pico_err)); - ret = pico_socket_close(s1); - fail_if(ret < 0, "socket close failed: %s\n", strerror(pico_err)); -} -END_TEST -#endif - -START_TEST (test_slaacv4) -{ - uint32_t tmp; - struct pico_device *dev; - struct mock_device *mock; - char ip_addr[20]; - uint8_t macaddr1[6] = { - 0xc3, 0, 0, 0xa, 0xc, 0xf - }; - - - - /* verify min boundary*/ - tmp = SLAACV4_CREATE_IPV4(0); - pico_ipv4_to_string(ip_addr, tmp); - printf("IP address generated by slaac: %s\n", ip_addr); - - fail_if(long_be(tmp) < (long_be(SLAACV4_NETWORK) | SLAACV4_MINRANGE)); - - /* verify max boundary*/ - tmp = SLAACV4_CREATE_IPV4(0x00FD); - fail_if(long_be(tmp) > (long_be(SLAACV4_NETWORK) | 0x0000FEFF)); - - /* verify case where dev->eth is NULL */ - dev = pico_null_create("dummy"); - tmp = pico_slaacv4_getip(dev, 0); - fail_if(long_be(tmp) != (long_be(SLAACV4_NETWORK) | SLAACV4_MINRANGE)); - /* verify nominal case; two runs of slaacv4_get_ip need to return same value */ - mock = pico_mock_create(macaddr1); - tmp = pico_slaacv4_getip(mock->dev, 0); - fail_if(tmp != pico_slaacv4_getip(mock->dev, 0)); - -} -END_TEST diff --git a/kernel/picotcp/test/unit/unit_ipv6.c b/kernel/picotcp/test/unit/unit_ipv6.c deleted file mode 100644 index 1d918f1..0000000 --- a/kernel/picotcp/test/unit/unit_ipv6.c +++ /dev/null @@ -1,760 +0,0 @@ - -#ifdef PICO_SUPPORT_IPV6 -START_TEST (test_ipv6) -{ - char ipstr[40] = { - 0 - }; - char ipstr0[] = "2001:0db8:130f:0000:0000:09c0:876a:130b"; - char ipstr0_t[] = "2001:0db8:130f:0000:0000:09c0:876a:130b"; - char ipstr1[] = "2001:db8:130f:0000:0000:09c0:876a:130b"; - char ipstr1_t[] = "2001:0db8:130f:0000:0000:09c0:876a:130b"; - char ipstr2[] = "2001:b8:130f:0000:0000:09c0:876a:130b"; - char ipstr2_t[] = "2001:00b8:130f:0000:0000:09c0:876a:130b"; - char ipstr3[] = "2001:8:130f:0000:0000:09c0:876a:130b"; - char ipstr3_t[] = "2001:0008:130f:0000:0000:09c0:876a:130b"; - char ipstr4[] = "2001:8:130f:0:0:09c0:876a:130b"; - char ipstr4_t[] = "2001:0008:130f:0000:0000:09c0:876a:130b"; - char ipstr5[] = "2001::8:130f:09c0:876a:130b"; - char ipstr5_t[] = "2001:0000:0000:0008:130f:09c0:876a:130b"; - char ipstr6[] = "2001::8:09c0:876a:130b"; - char ipstr6_t[] = "2001:0000:0000:0000:0008:09c0:876a:130b"; - char ipstr7[] = "2001::8:876a:130b"; - char ipstr7_t[] = "2001:0000:0000:0000:0000:0008:876a:130b"; - char ipstr8[] = "2001::876a:130b"; - char ipstr8_t[] = "2001:0000:0000:0000:0000:0000:876a:130b"; - char ipstr9[] = "ff01::1"; - char ipstr9_t[] = "ff01:0000:0000:0000:0000:0000:0000:0001"; - char ipstr10[] = "::1"; - char ipstr10_t[] = "0000:0000:0000:0000:0000:0000:0000:0001"; - char ipstr11[] = "fe80::"; - char ipstr11_t[] = "fe80:0000:0000:0000:0000:0000:0000:0000"; - char ipstr12[] = "::"; - char ipstr12_t[] = "0000:0000:0000:0000:0000:0000:0000:0000"; - char ipstr13[] = "2001:8:130f::09c0::130b"; /* invalid */ - char ipstr14[] = "2001:8:xxxx::09c0:130b"; /* invalid */ - char ipstr15[] = "2001:8:$$$$::09c0:130b"; /* invalid */ - char ipstr16[] = "2001:8:!@#$::%^&*:()0b"; /* invalid */ - char ipstr17[] = "2001:1"; /* invalid */ - char ipstr18[] = "20010db8:130f:0000:0000:09c0:876a:130b"; /* invalid */ - char ipstr19[] = "20010db8130f0000000009c0876a130b"; /* invalid */ - char ipstr20[] = "2001;0db8;130f;0000;0000;09c0;876a;130b"; /* invalid */ - uint8_t iphex0[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x0d, 0xb8, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x09, 0xc0, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex1[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x0d, 0xb8, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x09, 0xc0, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex2[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x00, 0xb8, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x09, 0xc0, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex3[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x00, 0x08, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x09, 0xc0, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex4[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x00, 0x08, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x09, 0xc0, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex5[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x13, 0x0f, 0x09, 0xc0, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex6[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x09, 0xc0, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex7[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex8[PICO_SIZE_IP6] = { - 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, 0x6a, 0x13, 0x0b - }; - uint8_t iphex9[PICO_SIZE_IP6] = { - 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 - }; - uint8_t iphex10[PICO_SIZE_IP6] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 - }; - uint8_t iphex11[PICO_SIZE_IP6] = { - 0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - }; - uint8_t iphex12[PICO_SIZE_IP6] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - }; - struct pico_ip6 iphex_a = {{ 0x20, 0x01, 0x0d, 0xb8, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}; - struct pico_ip6 iphex_r = {{ 0x40, 0x02, 0x0d, 0xb8, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}; - struct pico_ip6 iphex_gw = {{ 0x20, 0x01, 0x0d, 0xb8, 0x13, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f }}; - struct pico_ip6 nm64 = {{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}; - struct pico_ip6 nm128 = {{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}; - struct pico_ip6 ipaddr = {{0}}; - - struct pico_ip6 _gw, r[IP_TST_SIZ], a[IP_TST_SIZ], gw[IP_TST_SIZ], *source[IP_TST_SIZ]; - struct pico_device *dev[IP_TST_SIZ]; - struct pico_ipv6_link *l[IP_TST_SIZ]; - struct pico_ipv6_link *_link = NULL; - struct pico_ipv6_route *_route = NULL; - char devname[8]; - int ret = 0; - int i = 0; - - pico_stack_init(); - - /* pico_string_to_ipv6 and pico_ipv6_to_string */ - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr0); - pico_string_to_ipv6(ipstr0, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex0, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr0_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr1); - pico_string_to_ipv6(ipstr1, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex1, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr1_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr2); - pico_string_to_ipv6(ipstr2, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex2, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr2_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr3); - pico_string_to_ipv6(ipstr3, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex3, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr3_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr4); - pico_string_to_ipv6(ipstr4, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex4, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr4_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr5); - pico_string_to_ipv6(ipstr5, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex5, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr5_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr6); - pico_string_to_ipv6(ipstr6, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex6, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr6_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr7); - pico_string_to_ipv6(ipstr7, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex7, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr7_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr8); - pico_string_to_ipv6(ipstr8, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex8, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr8_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr9); - pico_string_to_ipv6(ipstr9, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex9, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr9_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr10); - pico_string_to_ipv6(ipstr10, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex10, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr10_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr11); - pico_string_to_ipv6(ipstr11, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex11, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr11_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 valid conversion of %s\n", ipstr12); - pico_string_to_ipv6(ipstr12, ipaddr.addr); - fail_if(memcmp(ipaddr.addr, iphex12, PICO_SIZE_IP6), "Error string to ipv6"); - pico_ipv6_to_string(ipstr, ipaddr.addr); - printf("pico_ipv6_to_string valid conversion to %s\n", ipstr); - fail_if(strncmp(ipstr, ipstr12_t, 40) != 0, "Error ipv6 to string"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr13); - ret = pico_string_to_ipv6(ipstr13, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr14); - ret = pico_string_to_ipv6(ipstr14, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr15); - ret = pico_string_to_ipv6(ipstr15, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr16); - ret = pico_string_to_ipv6(ipstr16, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr17); - ret = pico_string_to_ipv6(ipstr17, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr18); - ret = pico_string_to_ipv6(ipstr18, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr19); - ret = pico_string_to_ipv6(ipstr19, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - printf("pico_string_to_ipv6 invalid conversion of %s\n", ipstr20); - ret = pico_string_to_ipv6(ipstr20, ipaddr.addr); - fail_if(ret == 0, "Error string to ipv6"); - - /*link_add*/ - for (i = 0; i < 10; ++i) { - snprintf(devname, 8, "nul%d", i); - dev[i] = pico_null_create(devname); - a[i] = iphex_a; - a[i].addr[4] = (uint8_t)(a[i].addr[4] + i); - fail_if(pico_ipv6_link_add(dev[i], a[i], nm64) == NULL, "Error adding link"); - } - /*link_find + link_get + route_add*/ - for (i = 0; i < 10; ++i) { - gw[i] = iphex_gw; - gw[i].addr[4] = (uint8_t)(gw[i].addr[4] + i); - fail_unless(pico_ipv6_link_find(&a[i]) == dev[i], "Error finding link"); - l[i] = pico_ipv6_link_get(&a[i]); - fail_if(l[i] == NULL, "Error getting link"); - r[i] = iphex_r; - r[i].addr[4] = (uint8_t)(r[i].addr[4] + i); - fail_if(pico_ipv6_route_add(r[i], nm128, a[i], 1, l[i]) != 0, "Error adding route"); - } - /*get_gateway*/ - for (i = 0; i < 10; i++) { - _gw = pico_ipv6_route_get_gateway(&r[i]); - fail_if(memcmp(_gw.addr, a[i].addr, PICO_SIZE_IP6) != 0, "Error get gateway: returned wrong route"); - source[i] = pico_ipv6_source_find(&r[i]); - fail_if(memcmp(source[i]->addr, a[i].addr, PICO_SIZE_IP6) != 0, "Error find source: returned wrong route"); - } - /*route_del + link_del*/ - for (i = 0; i < 10; i++) { - fail_if(pico_ipv6_route_del(r[i], nm128, a[i], 1, l[i]) != 0, "Error deleting route"); - fail_if(pico_ipv6_link_del(dev[i], a[i]) != 0, "Error deleting link"); - } - /* add 2 links to dev[0] */ - _link = pico_ipv6_link_add(dev[0], a[0], nm64); - fail_if (!_link, "Error adding link"); - _link = pico_ipv6_link_add(dev[0], a[1], nm64); - fail_if (!_link, "Error adding link"); - /* add 2 routes to each of the links */ - ret = pico_ipv6_route_add(r[0], nm128, a[0], 1, l[0]); - fail_if(ret != 0, "Error adding route"); - ret = pico_ipv6_route_add(r[1], nm128, a[0], 1, l[0]); - fail_if(ret != 0, "Error adding route"); - ret = pico_ipv6_route_add(r[2], nm128, a[1], 1, l[1]); - fail_if(ret != 0, "Error adding route"); - ret = pico_ipv6_route_add(r[3], nm128, a[1], 1, l[1]); - fail_if(ret != 0, "Error adding route"); - - /* add 2 links to dev[1] */ - _link = pico_ipv6_link_add(dev[1], a[8], nm64); - fail_if (!_link, "Error adding link"); - _link = pico_ipv6_link_add(dev[1], a[9], nm64); - fail_if (!_link, "Error adding link"); - /* add 2 routes to each of the links */ - ret = pico_ipv6_route_add(r[6], nm128, a[8], 1, l[8]); - fail_if(ret != 0, "Error adding route"); - ret = pico_ipv6_route_add(r[7], nm128, a[8], 1, l[8]); - fail_if(ret != 0, "Error adding route"); - ret = pico_ipv6_route_add(r[8], nm128, a[9], 1, l[9]); - fail_if(ret != 0, "Error adding route"); - ret = pico_ipv6_route_add(r[9], nm128, a[9], 1, l[9]); - fail_if(ret != 0, "Error adding route"); - - /* destroy device, should clean up all links and routes */ - pico_device_destroy(dev[0]); - _link = pico_ipv6_link_get(&a[0]); - fail_if(_link != NULL, "Error destroying device"); - _link = pico_ipv6_link_get(&a[1]); - fail_if(_link != NULL, "Error destroying device"); - _link = pico_ipv6_link_get(&a[8]); - fail_if(_link == NULL, "Error destroying device"); - _link = pico_ipv6_link_get(&a[9]); - fail_if(_link == NULL, "Error destroying device"); - - _route = pico_ipv6_route_find(&r[0]); - fail_if(_route != NULL, "Error destroying device"); - _route = pico_ipv6_route_find(&r[1]); - fail_if(_route != NULL, "Error destroying device"); - _route = pico_ipv6_route_find(&r[2]); - fail_if(_route != NULL, "Error destroying device"); - _route = pico_ipv6_route_find(&r[3]); - fail_if(_route != NULL, "Error destroying device"); - - _route = pico_ipv6_route_find(&r[6]); - fail_if(_route == NULL, "Error destroying device"); - _route = pico_ipv6_route_find(&r[7]); - fail_if(_route == NULL, "Error destroying device"); - _route = pico_ipv6_route_find(&r[8]); - fail_if(_route == NULL, "Error destroying device"); - _route = pico_ipv6_route_find(&r[9]); - fail_if(_route == NULL, "Error destroying device"); -} -END_TEST - -#ifdef PICO_SUPPORT_MCAST -START_TEST (test_mld_sockopts) -{ - int i = 0, j = 0, k = 0, ret = 0; - struct pico_socket *s, *s1 = NULL; - struct pico_device *dev = NULL; - union pico_address *source = NULL; - union pico_address inaddr_dst = { - 0 - }, inaddr_incorrect = { - 0 - }, inaddr_uni = { - 0 - }, inaddr_null = { - 0 - }; - struct pico_ip6 netmask = {{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}; - - union pico_address inaddr_link[2] = {0}; - union pico_address inaddr_mcast[8] = {0}; - union pico_address inaddr_source[8] = {0}; - struct pico_ip_mreq _mreq = {0}, mreq[16] = {0}; - struct pico_ip_mreq_source mreq_source[128] = {0}; - struct pico_tree_node *index = NULL; - struct pico_ipv6_link *ret_link = NULL; - int ttl = 64; - int getttl = 0; - int loop = 9; - int getloop = 0; - struct pico_ip6 mcast_def_link = { - 0 - }; - - pico_stack_init(); - - printf("START MLD SOCKOPTS TEST\n"); - - pico_string_to_ipv6("ff00:0:0:0:0:0:e007:707", inaddr_dst.ip6.addr); - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:2", inaddr_uni.ip6.addr); - pico_string_to_ipv6("ff00:0:0:0:0:0:e008:808", inaddr_incorrect.ip6.addr); - pico_string_to_ipv6("::", inaddr_null.ip6.addr); - - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:0001", inaddr_link[0].ip6.addr); /* 0 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a32:0001", inaddr_link[1].ip6.addr); /* 1 */ - - pico_string_to_ipv6("ff00:0:0:0:0:0:e801:100", inaddr_mcast[0].ip6.addr); /* 0 */ - pico_string_to_ipv6("ff00:0:0:0:0:0:e802:201", inaddr_mcast[1].ip6.addr); /* 1 */ - pico_string_to_ipv6("ff00:0:0:0:0:0:e803:302", inaddr_mcast[2].ip6.addr); /* 2 */ - pico_string_to_ipv6("ff00:0:0:0:0:0:e803:403", inaddr_mcast[3].ip6.addr); /* 3 */ - pico_string_to_ipv6("ff00:0:0:0:0:0:e803:504", inaddr_mcast[4].ip6.addr); /* 4 */ - pico_string_to_ipv6("ff00:0:0:0:0:0:e803:605", inaddr_mcast[5].ip6.addr); /* 5 */ - pico_string_to_ipv6("ff00:0:0:0:0:0:e803:706", inaddr_mcast[6].ip6.addr); /* 6 */ - pico_string_to_ipv6("ff00:0:0:0:0:0:e803:807", inaddr_mcast[7].ip6.addr); /* 7 */ - - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:100", inaddr_source[0].ip6.addr); /* 0 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:101", inaddr_source[1].ip6.addr); /* 1 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:102", inaddr_source[2].ip6.addr); /* 2 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:103", inaddr_source[3].ip6.addr); /* 3 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:104", inaddr_source[4].ip6.addr); /* 4 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:105", inaddr_source[5].ip6.addr); /* 5 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:106", inaddr_source[6].ip6.addr); /* 6 */ - pico_string_to_ipv6("fe80:0:0:0:0:0:a28:107", inaddr_source[7].ip6.addr); /* 7 */ - - /* 00 01 02 03 04 05 06 07 | 10 11 12 13 14 15 16 17 */ - for (i = 0; i < 16; i++) { - mreq[i].mcast_link_addr = inaddr_link[i / 8]; - mreq[i].mcast_group_addr = inaddr_mcast[i % 8]; - } - /* 000 001 002 003 004 005 006 007 | 010 011 012 013 014 015 016 017 */ - for (i = 0; i < 16; i++) { - for (j = 0; j < 8; j++) { - /* printf(">>>>> mreq_source[%d]: link[%d] mcast[%d] source[%d]\n", (i*8)+j, i/8, i%8, j); */ - mreq_source[(i * 8) + j].mcast_link_addr = inaddr_link[i / 8]; - mreq_source[(i * 8) + j].mcast_group_addr = inaddr_mcast[i % 8]; - mreq_source[(i * 8) + j].mcast_source_addr = inaddr_source[j]; - } - } - dev = pico_null_create("dummy0"); - ret_link = pico_ipv6_link_add(dev, inaddr_link[0].ip6, netmask); - fail_if(ret_link == NULL, "link add failed"); - dev = pico_null_create("dummy1"); - ret_link = pico_ipv6_link_add(dev, inaddr_link[1].ip6, netmask); - fail_if(ret_link == NULL, "link add failed"); - - - s = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_UDP, NULL); - fail_if(s == NULL, "UDP socket open failed"); - s1 = pico_socket_open(PICO_PROTO_IPV6, PICO_PROTO_UDP, NULL); - fail_if(s1 == NULL, "UDP socket open failed"); - - - /* argument validation tests */ - printf("MLD SETOPTION ARGUMENT VALIDATION TEST\n"); - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_IF, &mcast_def_link); - fail_if(ret == 0, "unsupported PICO_IP_MULTICAST_IF succeeded\n"); - ret = pico_socket_getoption(s, PICO_IP_MULTICAST_IF, &mcast_def_link); - fail_if(ret == 0, "unsupported PICO_IP_MULTICAST_IF succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_TTL, &ttl); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_TTL failed\n"); - - ret = pico_socket_getoption(s, PICO_IP_MULTICAST_TTL, &getttl); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_TTL failed\n"); - fail_if(getttl != ttl, "setoption ttl != getoption ttl\n"); - - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_LOOP, &loop); - fail_if(ret == 0, "PICO_IP_MULTICAST_LOOP succeeded with invalid (not 0 or 1) loop value\n"); - loop = 0; - ret = pico_socket_setoption(s, PICO_IP_MULTICAST_LOOP, &loop); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_LOOP failed disabling\n"); - ret = pico_socket_getoption(s, PICO_IP_MULTICAST_LOOP, &getloop); - fail_if(ret < 0, "supported PICO_IP_MULTICAST_LOOP failed getting value\n"); - fail_if(getloop != loop, "setoption loop != getoption loop\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_dst.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_link[0].ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "supported PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "supported PICO_IP_DROP_MEMBERSHIP failed\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_dst.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_null.ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed with valid NULL (use default) link address\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed with valid NULL (use default) link address\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_uni.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_link[0].ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded with invalid (unicast) group address\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_null.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_link[0].ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded with invalid (NULL) group address\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_dst.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_uni.ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded with invalid link address\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_incorrect.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_link[0].ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (not added) group address\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_uni.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_link[0].ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (unicast) group address\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_null.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_link[0].ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (NULL) group address\n"); - memcpy(&_mreq.mcast_group_addr, &inaddr_dst.ip6, sizeof(struct pico_ip6)); - memcpy(&_mreq.mcast_link_addr, &inaddr_uni.ip6, sizeof(struct pico_ip6)); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &_mreq); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded with invalid (unicast) link address\n"); - /* flow validation tests */ - printf("MLD SETOPTION FLOW VALIDATION TEST\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed with err %s\n", strerror(pico_err)); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret == 0, "PICO_IP_DROP_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret == 0, "PICO_IP_ADD_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_UNBLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_BLOCK_SOURCE succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_UNBLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret == 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP succeeded\n"); - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - /* stress tests */ - - printf("MLD SETOPTION STRESS TEST\n"); - for (k = 0; k < 2; k++) { - /* ADD for even combinations of group and link, ADD_SOURCE for uneven */ - for (i = 0; i < 16; i++) { - if (i % 2) { - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[i]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - } - } else { - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - } - } - } - /* UNBLOCK and DROP for even combinations, DROP_SOURCE for uneven */ - for (i = 0; i < 16; i++) { - if (i % 2) { - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_UNBLOCK_SOURCE, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_UNBLOCK_SOURCE failed\n"); - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[i]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - } else { - for (j = 0; j < 8; j++) { - ret = pico_socket_setoption(s, PICO_IP_DROP_SOURCE_MEMBERSHIP, &mreq_source[(i * 8) + j]); - fail_if(ret < 0, "PICO_IP_DROP_SOURCE_MEMBERSHIP failed\n"); - } - } - } - /* everything should be cleanup up, next iteration will fail if not */ - } - /* filter validation tests */ - printf("MLD SETOPTION FILTER VALIDATION TEST\n"); - /* INCLUDE + INCLUDE expected filter: source of 0 and 1*/ - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - i = 0; - - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 2) - fail("MCASTFilter (INCLUDE + INCLUDE) too many elements\n"); - - source = index->keyValue; - if (memcmp(&source->ip6, &mreq_source[0].mcast_source_addr, sizeof(struct pico_ip6)) == 0) { /* OK */ - } - else if (memcmp(&source->ip6, &mreq_source[1].mcast_source_addr, sizeof(struct pico_ip6)) == 0) { /* OK */ - } - else { - fail("MCASTFilter (INCLUDE + INCLUDE) incorrect\n"); - } - } - - - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - /* INCLUDE + EXCLUDE expected filter: source of 2 */ - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[2]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - i = 0; - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 1) - fail("MCASTFilter (INCLUDE + EXCLUDE) too many elements\n"); - - source = index->keyValue; - if (memcmp(&source->ip6, &mreq_source[2].mcast_source_addr, sizeof(struct pico_ip6)) == 0) { /* OK */ - } - else { - fail("MCASTFilter (INCLUDE + EXCLUDE) incorrect\n"); - } - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - /* EXCLUDE + INCLUDE expected filter: source of 0 and 1 */ - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_SOURCE_MEMBERSHIP, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_ADD_SOURCE_MEMBERSHIP failed\n"); - i = 0; - - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 2) - fail("MCASTFilter (EXCLUDE + INCLUDE) too many elements\n"); - - source = index->keyValue; - if (memcmp(&source->ip6, &mreq_source[0].mcast_source_addr, sizeof(struct pico_ip6)) == 0) { /* OK */ - } - else if (memcmp(&source->ip6, &mreq_source[1].mcast_source_addr, sizeof(struct pico_ip6)) == 0) { /* OK */ - } - else { - fail("MCASTFilter (EXCLUDE + INCLUDE) incorrect\n"); - } - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - /* EXCLUDE + EXCLUDE expected filter: source of 3 and 4 */ - ret = pico_socket_setoption(s, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[0]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[1]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s, PICO_IP_BLOCK_SOURCE, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_ADD_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_ADD_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[3]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[4]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[5]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_BLOCK_SOURCE, &mreq_source[6]); - fail_if(ret < 0, "PICO_IP_BLOCK_SOURCE failed\n"); - i = 0; - pico_tree_foreach(index, &MCASTFilter) - { - if (++i > 2) - fail("MCASTFilter (EXCLUDE + EXCLUDE) too many elements\n"); - - source = index->keyValue; - if (memcmp(&source->ip6, &mreq_source[3].mcast_source_addr, sizeof(struct pico_ip6) == 0)) { /* OK */ - } - else if (memcmp(&source->ip6, &mreq_source[4].mcast_source_addr, sizeof(struct pico_ip6)) == 0) { /* OK */ - } - else { - fail("MCASTFilter (EXCLUDE + EXCLUDE) incorrect\n"); - } - } - ret = pico_socket_setoption(s, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - ret = pico_socket_setoption(s1, PICO_IP_DROP_MEMBERSHIP, &mreq[0]); - fail_if(ret < 0, "PICO_IP_DROP_MEMBERSHIP failed\n"); - - - ret = pico_socket_close(s); - fail_if(ret < 0, "socket close failed: %s\n", strerror(pico_err)); - ret = pico_socket_close(s1); - fail_if(ret < 0, "socket close failed: %s\n", strerror(pico_err)); -} -END_TEST -#endif - - -#endif diff --git a/kernel/picotcp/test/unit/unit_mem_manager.c b/kernel/picotcp/test/unit/unit_mem_manager.c deleted file mode 100644 index 309b633..0000000 --- a/kernel/picotcp/test/unit/unit_mem_manager.c +++ /dev/null @@ -1,2158 +0,0 @@ -/* PicoTCP unit test platform */ -/* How does it works: - * 1. Define your unit test function as described in the check manual - * 2. Add your test to the suite in the pico_suite() function - */ - -#include "pico_mm.c" -#include "pico_tree.c" -#include - -volatile pico_err_t pico_err; - -START_TEST (test_compare_slab_keys) -{ - - uint32_t len1 = 1200; - uint32_t len2 = 1600; - uint32_t len3 = 1600; - uint32_t*lenptr1; - uint32_t*lenptr2; - uint32_t*lenptr3; - uint32_t**doublelenptr1; - uint32_t**doublelenptr2; - uint32_t**doublelenptr3; - struct pico_mem_block*block1; - struct pico_mem_block*block2; - struct pico_mem_block*block3; - struct pico_mem_slab_node*node1; - struct pico_mem_slab_node*node2; - struct pico_mem_slab_node*node3; - - /* Dependencies: none */ - printf("\n***************Running test_compare_slab_keys***************\n\n"); - /* Scenario's to test: */ - /* >Compare a large size with a small size */ - /* >Compare a small size with a large size */ - /* >Compare equal sizes */ - /* >Finally, compare with int pointers and with slab_nodes */ - - block1 = pico_zalloc(sizeof(struct pico_mem_block)); - block1->internals.heap_block.size = 1200; - block2 = pico_zalloc(sizeof(struct pico_mem_block)); - block2->internals.heap_block.size = 1600; - block3 = pico_zalloc(sizeof(struct pico_mem_block)); - block3->internals.heap_block.size = 1600; - node1 = pico_zalloc(sizeof(struct pico_mem_slab_node)); - node1->slab = block1; - node2 = pico_zalloc(sizeof(struct pico_mem_slab_node)); - node2->slab = block2; - node3 = pico_zalloc(sizeof(struct pico_mem_slab_node)); - node3->slab = block3; - - lenptr1 = &len1; - lenptr2 = &len2; - lenptr3 = &len3; - doublelenptr1 = &lenptr1; - doublelenptr2 = &lenptr2; - doublelenptr3 = &lenptr3; - - ck_assert(compare_slab_keys(&node1, &node2) > 0); - ck_assert(compare_slab_keys(&node2, &node3) == 0); - ck_assert(compare_slab_keys(&node2, &node1) < 0); - - ck_assert(compare_slab_keys(&doublelenptr1, &doublelenptr2) > 0); - ck_assert(compare_slab_keys(&doublelenptr2, &doublelenptr3) == 0); - ck_assert(compare_slab_keys(&doublelenptr2, &doublelenptr1) < 0); - - ck_assert(compare_slab_keys(&doublelenptr1, &node1) == 0); - ck_assert(compare_slab_keys(&node3, &doublelenptr1) < 0); - - pico_free(block1); - pico_free(block2); - pico_free(block3); - pico_free(node1); - pico_free(node2); - pico_free(node3); -} -END_TEST - -START_TEST (test_manager_extra_alloc) -{ - - uint8_t*byteptr; - uint8_t*byteptr1; - uint8_t*byteptr2; - struct pico_mem_block*block; - size_t sizeLeft; - size_t size = 50; - - uint8_t*data0; - uint8_t*data1; - uint8_t*data2; - uint8_t*data3; - - struct pico_mem_manager_extra*heap_page; - struct pico_mem_manager_extra*heap_page2; - - - /* Dependencies: */ - /* >pico_zalloc */ - printf("\n***************Running test_manager_extra_alloc***************\n\n"); - /* Scenario's to test: */ - /* Page with enough space in it passed, space should not be split up further */ - /* Page with not enough space in it passed, manager isn't allowed to alloc further space */ - /* Page with not enough space in it passed, manager is allowed to alloc further space */ - /* Page with enough space in it passed, space should be split up further */ - - sizeLeft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager_extra); - - /* Housekeeping of extra manager page */ - heap_page = pico_zalloc(PICO_MEM_PAGE_SIZE); - heap_page->blocks = 2; - heap_page->timestamp = 12345; - - heap_page->next = NULL; - /* Housekeeping of manager page */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->manager_extra = heap_page; - manager->used_size = 2 * PICO_MEM_PAGE_SIZE; - manager->size = 10 * PICO_MEM_PAGE_SIZE; - manager->first_page = NULL; -/* - byteptr = (uint8_t*) (heap_page+1); - block = (pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = 100; - sizeLeft -= sizeof(pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - byteptr += sizeof(pico_mem_block); - byteptr += block->internals.heap_block.size; - */ - /* First block in extra manager page, unusable due to too small size */ - byteptr = (uint8_t*) (heap_page + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(size / 2); - sizeLeft -= sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - - /* Second block in extra manager page, free with more than enough size */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(100 - size / 2); - sizeLeft -= sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - byteptr1 = byteptr; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - - /* Third block in extra manager page, not free, takes up remainder of the space, minus the space of one extra block */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - /* Size of this block is thus that only one of the two testblocks will fit in the page */ - block->internals.heap_block.size = (uint32_t)(sizeLeft - 2 * sizeof(struct pico_mem_block) - size); - sizeLeft -= sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - byteptr2 = byteptr; - - /* Fourth block in extra manager page, free, large enough size */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - - /* Second block will be used */ - data0 = _pico_mem_manager_extra_alloc(heap_page, size); - /* Fourth block will be used */ - data1 = _pico_mem_manager_extra_alloc(heap_page, size); - /* Limit the space */ - manager->size = 2 * PICO_MEM_PAGE_SIZE; - /* No more space for another block, no more space for another page, function should return NULL */ - data2 = _pico_mem_manager_extra_alloc(heap_page, size); - ck_assert(data2 == NULL); - /* Allow more space */ - manager->size = 10 * PICO_MEM_PAGE_SIZE; - /* New page will be allocated, first block in it will be used, the space will be split up properly */ - data2 = _pico_mem_manager_extra_alloc(heap_page, size); - heap_page2 = manager->manager_extra; - ck_assert(heap_page2 != heap_page); - ck_assert(heap_page2->next == heap_page); - ck_assert(manager->used_size == 3 * PICO_MEM_PAGE_SIZE); - data3 = _pico_mem_manager_extra_alloc(heap_page2, size); - - /* Check the buildup of page 1 */ - ck_assert(heap_page->blocks == 4); - ck_assert(heap_page->timestamp == 0); - - block = (struct pico_mem_block*) byteptr1; - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.size == 100 - size / 2); - ck_assert(data0 == (byteptr1 + sizeof(struct pico_mem_block))); - - block = (struct pico_mem_block*) byteptr2; - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.size == size); - ck_assert(data1 == (byteptr2 + sizeof(struct pico_mem_block))); - - /* Check the buildup of page 2 */ - ck_assert(heap_page2->blocks == 2); - ck_assert(heap_page2->timestamp == 0); - - byteptr = (uint8_t*) (heap_page2 + 1); - sizeLeft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager_extra); - block = (struct pico_mem_block*) byteptr; - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.size == size); - ck_assert(data2 == (byteptr + sizeof(struct pico_mem_block))); - sizeLeft -= sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - - block = (struct pico_mem_block*) byteptr; - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.size == size); - ck_assert(data3 == (byteptr + sizeof(struct pico_mem_block))); - sizeLeft -= sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - - block = (struct pico_mem_block*) byteptr; - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block->internals.heap_block.size == sizeLeft - sizeof(struct pico_mem_block)); - - /* DEPENDENCY ON CLEANUP */ - pico_mem_deinit(); -} -END_TEST - -START_TEST (test_page0_zalloc) -{ - uint8_t*byteptr; - struct pico_mem_block*block; - size_t size1 = 50; - uint8_t*temp; - size_t sizeLeft; - struct pico_mem_manager_extra*heap_page; - - /* Dependencies: */ - /* >pico_zalloc */ - /* >_pico_mem_manager_extra_alloc() */ - printf("\n***************Running test_page0_zalloc***************\n\n"); - - /* Scenario's to test: */ - /* Empty block somewhere that doesn't fit the needed space */ - /* Empty block somewhere that exactly fits the needed space */ - /* NOTE: Splitting up isn't implemented, assessed as not necessary */ - /* Large empty block in the middle of the heap */ - /* Empty space at the end that needs splitting up */ - - sizeLeft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager); - - /* Memory manager housekeeping */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - byteptr = (uint8_t*) (manager + 1); - /* Block 1: not free, size1 */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)size1; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - /* Block 2: free, size1/2 */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(size1 / 2); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - /* Block 3: free, size1 */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)size1; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - /* Block 4: free, size1*2 */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(size1 * 2); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - /* Rest of the heap space (free) */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - - pico_mem_page0_zalloc(size1); - pico_mem_page0_zalloc(size1); - pico_mem_page0_zalloc((size_t)size1); - sizeLeft -= sizeof(struct pico_mem_block); - sizeLeft -= size1; - - /* Check buildup of heap space */ - byteptr = (uint8_t*) (manager + 1); - block = (struct pico_mem_block*) byteptr; - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_FREE); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block->internals.heap_block.size == sizeLeft - sizeof(struct pico_mem_block)); - - /* Now, fill up the rest of the space minus a few bytes, so that the space can't be split up further */ - /* pico_mem_page0_zalloc(sizeLeft - sizeof(struct pico_mem_block) - 3); */ - pico_mem_page0_zalloc(sizeLeft - sizeof(struct pico_mem_block) - 3); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.size == sizeLeft - sizeof(struct pico_mem_block)); - - pico_free(manager); - - /* Extra scenario's: */ - /* No more space left in the main heap, a second page doesn't exist yet */ - /* No more space left in the main heap, a second heap_page exists (space left doesn't matter, extra_alloc handles that) */ - - /* Manager housekeeping */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->manager_extra = NULL; - manager->used_size = PICO_MEM_PAGE_SIZE; - manager->size = 10 * PICO_MEM_PAGE_SIZE; - sizeLeft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager); - - /* Heap space full */ - byteptr = (uint8_t*) (manager + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - - /* Limit manager space */ - manager->size = PICO_MEM_PAGE_SIZE; - /* Try to alloc another block, no more space in manager heap, no more pages can be created: NULL should be returned */ - temp = pico_mem_page0_zalloc(size1); - ck_assert(temp == NULL); - /* Allow more space usage */ - manager->size = 10 * PICO_MEM_PAGE_SIZE; - /* Alloc 2 more blocks */ - pico_mem_page0_zalloc(size1); - pico_mem_page0_zalloc(size1); - - /* Check extra manager page housekeeping */ - ck_assert(manager->manager_extra != NULL); - heap_page = manager->manager_extra; - - ck_assert(heap_page->blocks == 2); - ck_assert(heap_page->next == NULL); - ck_assert(heap_page->timestamp == 0); - ck_assert(manager->used_size == 2 * PICO_MEM_PAGE_SIZE); - - /* Check extra manager page heap */ - block = (struct pico_mem_block*) (heap_page + 1); - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.size == size1); - byteptr = (uint8_t*) block; - byteptr += sizeof(struct pico_mem_block); - byteptr += size1; - block = (struct pico_mem_block*) byteptr; - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.size == size1); - - pico_free(manager->manager_extra); - pico_free(manager); -} -END_TEST - -START_TEST (test_init_page) -{ - uint8_t*byteptr; - uint32_t*lenptr; - uint32_t**doublelenptr = &lenptr; - int vlag = 0; - int i; - struct pico_mem_block*intermediate_heap_block; - struct pico_mem_page*page0; - struct pico_mem_block*block; - struct pico_mem_page*page1; - struct pico_mem_page*page2; - struct pico_mem_block*slab; - struct pico_tree_node*tree_node; - struct pico_mem_slab_node*slab_node; - uint32_t slabsize1; - uint32_t slabsize2; - - /* Dependencies: */ - /* >picotree_findNode */ - /* >pico_mem_page0_zalloc */ - printf("\n***************Running test_init_page***************\n\n"); - - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - page0 = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->first_page = page0; - manager->size = 10 * PICO_MEM_PAGE_SIZE; - manager->used_size = 2 * PICO_MEM_PAGE_SIZE; - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - manager->manager_extra = NULL; - page0->next_page = NULL; - - block = (struct pico_mem_block*) (manager + 1); - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block); - - page1 = pico_zalloc(PICO_MEM_PAGE_SIZE); - page2 = pico_zalloc(PICO_MEM_PAGE_SIZE); - - slabsize1 = PICO_MEM_DEFAULT_SLAB_SIZE; - /* Slabsize 975 => 4 slab blocks fit in the page with 44 heap size */ - /* with a minimum heap size of 100, one slab block will be used as heapspace */ - slabsize2 = 975; - - _pico_mem_init_page(page1, slabsize1); - _pico_mem_init_page(page2, slabsize2); - - /* Check the housekeeping of page1 */ - ck_assert(page1->slab_size == slabsize1); - ck_assert(page1->slabs_max == (PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + slabsize1)); - ck_assert(page1->slabs_free == page1->slabs_max); - ck_assert(page1->timestamp == 0); - ck_assert(page1->heap_max_size == (PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - page1->slabs_max * (sizeof(struct pico_mem_block) + slabsize1))); - ck_assert(page1->heap_max_free_space == page1->heap_max_size); - - /* Check the housekeeping of page2 */ - ck_assert(page2->slab_size == slabsize2); - ck_assert(page2->slabs_max == (PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + slabsize2)); - ck_assert(page2->slabs_free == page2->slabs_max); - ck_assert(page2->timestamp == 0); - ck_assert(page2->heap_max_size == (PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - page2->slabs_max * (sizeof(struct pico_mem_block) + slabsize2))); - ck_assert(page2->heap_max_free_space == page2->heap_max_size); - - /* Check the housekeeping of the manager, and the page linked list */ - ck_assert(manager->first_page == page2); - ck_assert(page2->next_page == page1); - ck_assert(page1->next_page == page0); - ck_assert(page0->next_page == NULL); - - /* Check the slab_node double linked list for all slabs of page1 */ - byteptr = (uint8_t*) (page1 + 1); - byteptr += sizeof(struct pico_mem_block); - byteptr += page1->heap_max_size; - slab = (struct pico_mem_block*) byteptr; - - lenptr = &slabsize1; - tree_node = pico_tree_findNode(&manager->tree, &doublelenptr); - ck_assert(tree_node != NULL); - slab_node = tree_node->keyValue; - while(slab_node != NULL) - { - if(slab_node->slab == slab) - { - vlag = 1; - break; - } - - slab_node = slab_node->next; - } - ck_assert(vlag != 0); - vlag = 0; - for(i = 0; i < page1->slabs_max - 1; i++) - { - byteptr += sizeof(struct pico_mem_block); - byteptr += page1->slab_size; - slab = (struct pico_mem_block*) byteptr; - slab_node = tree_node->keyValue; - while(slab_node != NULL) - { - if(slab_node->slab == slab) - { - vlag = 1; - break; - } - - slab_node = slab_node->next; - } - ck_assert(vlag != 0); - vlag = 0; - } - /* Check the slab_node double linked list for all slabs of page2 */ - byteptr = (uint8_t*) (page2 + 1); - byteptr += sizeof(struct pico_mem_block); - byteptr += page2->heap_max_size; - slab = (struct pico_mem_block*) byteptr; - - lenptr = &slabsize2; - tree_node = pico_tree_findNode(&manager->tree, &doublelenptr); - ck_assert(tree_node != NULL); - slab_node = tree_node->keyValue; - while(slab_node != NULL) - { - if(slab_node->slab == slab) - { - vlag = 1; - break; - } - - slab_node = slab_node->next; - } - ck_assert(vlag != 0); - vlag = 0; - for(i = 0; i < page2->slabs_max - 1; i++) - { - byteptr += sizeof(struct pico_mem_block); - byteptr += page2->slab_size; - slab = (struct pico_mem_block*) byteptr; - slab_node = tree_node->keyValue; - while(slab_node != NULL) - { - if(slab_node->slab == slab) - { - vlag = 1; - break; - } - - slab_node = slab_node->next; - } - ck_assert(vlag != 0); - vlag = 0; - } - /* DEPENDENCY ON CLEANUP */ - pico_mem_deinit(); - - /* Extra scenario: Managerheap almost full (enough space for a slab_node, but not the necessary tree node), try to init a page */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - page0 = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->first_page = page0; - manager->size = 3 * PICO_MEM_PAGE_SIZE; - manager->used_size = 3 * PICO_MEM_PAGE_SIZE; - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - manager->manager_extra = NULL; - page0->next_page = NULL; - - block = (struct pico_mem_block*) (manager + 1); - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block) - (sizeof(struct pico_mem_block) + sizeof(struct pico_tree_node)) - (sizeof(struct pico_mem_block) + sizeof(struct pico_mem_slab_node)); - byteptr = (uint8_t*) (block + 1); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = sizeof(struct pico_mem_slab_node); - intermediate_heap_block = block; - byteptr = (uint8_t*) (block + 1); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = sizeof(struct pico_tree_node); - - page1 = pico_zalloc(PICO_MEM_PAGE_SIZE); - _pico_mem_init_page(page1, slabsize1); - - /* Check the housekeeping of page1 */ - ck_assert(page1->slab_size == slabsize1); - ck_assert(page1->slabs_max == 0); - ck_assert(page1->slabs_free == page1->slabs_max); - ck_assert(page1->timestamp == 0); - ck_assert(page1->heap_max_size == (PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - 2 * (sizeof(struct pico_mem_block) + slabsize1))); - ck_assert(page1->heap_max_free_space == page1->heap_max_size); - ck_assert(page1->next_page == page0); - ck_assert(intermediate_heap_block->internals.heap_block.free == HEAP_BLOCK_FREE); - - /* Extra scenario: Managerheap almost full (enough space for a slab_node and a tree node), try to init a page */ - manager->first_page = page0; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - _pico_mem_init_page(page1, slabsize1); - - /* Check the housekeeping of page1 */ - ck_assert(page1->slab_size == slabsize1); - ck_assert(page1->slabs_max == 1); - ck_assert(page1->slabs_free == page1->slabs_max); - ck_assert(page1->timestamp == 0); - ck_assert(page1->heap_max_size == (PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - 2 * (sizeof(struct pico_mem_block) + slabsize1))); - ck_assert(page1->heap_max_free_space == page1->heap_max_size); - ck_assert(page1->next_page == page0); - ck_assert(intermediate_heap_block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - - /* DEPENDENCY ON CLEANUP */ - pico_mem_deinit(); -} -END_TEST - -START_TEST (test_mem_init_whitebox) -{ - struct pico_mem_page*page; - int amountOfSlabs; - - /* Dependencies: */ - /* >pico_zalloc */ - /* >_pico_mem_init_page */ - /* >PICO_FREE */ - printf("\n***************Running test test_mem_init_whitebox***************\n\n"); - - /* No manager should be instantiated */ - ck_assert(manager == NULL); - - /* Init memory segment that is too small */ - pico_err = 0; - pico_mem_init(2); - ck_assert(pico_err == PICO_ERR_ENOMEM); - ck_assert(manager == NULL); - - /* Init 10 pages of memory */ - pico_err = 0; - pico_mem_init(10 * PICO_MEM_PAGE_SIZE); - ck_assert(pico_err == 0); - - ck_assert(manager != NULL); - ck_assert(manager->first_page != NULL); - ck_assert(manager->manager_extra == NULL); - ck_assert(manager->size == 10 * PICO_MEM_PAGE_SIZE); - ck_assert(manager->used_size == 2 * PICO_MEM_PAGE_SIZE); - - page = manager->first_page; - amountOfSlabs = (PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block)) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(page->heap_max_size == (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - ((size_t)amountOfSlabs) * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - ck_assert(page->heap_max_free_space == page->heap_max_size); - ck_assert(page->next_page == NULL); - ck_assert(page->slab_size == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(page->slabs_max == amountOfSlabs); - /* printf("free = %u ?= %u = max\n", page->slabs_free, page->slabs_max); */ - ck_assert(page->slabs_free == page->slabs_max); - - pico_mem_deinit(); -} -END_TEST - -START_TEST (test_free_and_merge_heap_block) -{ - - uint8_t*byteptr; - struct pico_mem_block*block; - uint16_t size = 50; - uint32_t sizeLeft; - struct pico_mem_block*block1; - struct pico_mem_block*block2; - struct pico_mem_block*block3; - struct pico_mem_block*block4; - struct pico_mem_page*page; - - /* Dependencies: none */ - printf("\n***************Running test_free_and_merge_heap_block***************\n\n"); - - /* Scenario's to test: Structure: |block1|block2|block3|block4|-------|slabs */ - /* free block1 (no merging) */ - /* free block2, check whitespace in block1 (merging before the block) */ - /* free block4, check whitespace after block3 (merging after the block) */ - /* free block3, check whitespace in block1 (merging before and after the block) */ - - - page = pico_zalloc(PICO_MEM_PAGE_SIZE); - page->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page->slabs_max = ((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE)); - page->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - page->heap_max_free_space = page->heap_max_size; - sizeLeft = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - - /* Block 1: */ - byteptr = (uint8_t*) (page + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block1 = block; - /* Block 2: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block2 = block; - /* Block 3: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block3 = block; - /* Block 4: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block4 = block; - /* Free space: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - /* Slab block 1 housekeeping */ - block = (struct pico_mem_block*) byteptr; - block->type = SLAB_BLOCK_TYPE; - /* Rest: don't care */ - - /* Free Block1: */ - _pico_mem_free_and_merge_heap_block(page, block1); - ck_assert(block1->type == HEAP_BLOCK_TYPE); - ck_assert(block1->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block1->internals.heap_block.size == size); - - /* Free Block2: */ - _pico_mem_free_and_merge_heap_block(page, block2); - ck_assert(block1->type == HEAP_BLOCK_TYPE); - ck_assert(block1->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block1->internals.heap_block.size == sizeof(struct pico_mem_block) + (size_t)(2 * size)); - - /* Free Block4: */ - _pico_mem_free_and_merge_heap_block(page, block4); - ck_assert(block4->type == HEAP_BLOCK_TYPE); - ck_assert(block4->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block4->internals.heap_block.size == sizeof(struct pico_mem_block) + size + sizeLeft); - - /* Free Block3: */ - _pico_mem_free_and_merge_heap_block(page, block3); - ck_assert(block1->type == HEAP_BLOCK_TYPE); - ck_assert(block1->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block1->internals.heap_block.size == page->heap_max_size); - /* printf("page->heap_max_size=%u ?= block1.size=%u\n", page->heap_max_size, block1->internals.heap_block.size); */ - - pico_free(page); - - /* Additional scenario to test: |block1|block2|block3|slabs */ - page = pico_zalloc(PICO_MEM_PAGE_SIZE); - page->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page->slabs_max = ((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE)); - page->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - page->heap_max_free_space = page->heap_max_size; - sizeLeft = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - - /* Block 1: */ - byteptr = (uint8_t*) (page + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block) - 2 * (sizeof(struct pico_mem_block) + size)); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block1 = block; - /* Block 2: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block2 = block; - /* Block 3: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block3 = block; - /* Slab block 1 housekeeping */ - block = (struct pico_mem_block*) byteptr; - block->type = SLAB_BLOCK_TYPE; - /* Rest: don't care */ - - /* Free block3 */ - _pico_mem_free_and_merge_heap_block(page, block3); - ck_assert(block3->type == HEAP_BLOCK_TYPE); - ck_assert(block3->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block3->internals.heap_block.size == size); - - /* Free block2 */ - _pico_mem_free_and_merge_heap_block(page, block2); - ck_assert(block2->type == HEAP_BLOCK_TYPE); - ck_assert(block2->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block2->internals.heap_block.size == size + sizeof(struct pico_mem_block) + size); - - /* Free block1 */ - _pico_mem_free_and_merge_heap_block(page, block1); - ck_assert(block1->type == HEAP_BLOCK_TYPE); - ck_assert(block1->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block1->internals.heap_block.size == page->heap_max_free_space); - - pico_free(page); -} -END_TEST - -START_TEST (test_determine_max_free_space) -{ - uint32_t temp; - uint8_t*byteptr; - struct pico_mem_block*block; - uint16_t size = 50; - uint32_t sizeLeft; - struct pico_mem_block*block1; - struct pico_mem_block*block2; - struct pico_mem_block*block3; - struct pico_mem_block*block4; - struct pico_mem_page*page; - - /* Dependencies: none */ - printf("\n***************Running test_determine_max_free_space***************\n\n"); - - /* Scenario's to test: Structure: |size 50 f|size 100 nf|size 25 f|size 75 nf|nf|slabs */ - /* block4 with size 100 becomes f, previous max free size 50 */ - - /* Page housekeeping */ - page = pico_zalloc(PICO_MEM_PAGE_SIZE); - page->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page->slabs_max = ((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE)); - page->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - page->heap_max_free_space = size; - sizeLeft = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - - /* Block 1: */ - byteptr = (uint8_t*) (page + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block1 = block; - /* Block 2: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(2 * size); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block2 = block; - /* Block 3: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = size / 2; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block3 = block; - /* Block 4: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(3 * size / 2); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block4 = block; - /* Rest of the space */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - /* Slab block 1 housekeeping */ - block = (struct pico_mem_block*) byteptr; - block->type = SLAB_BLOCK_TYPE; - /* Rest: don't care */ - - ck_assert(page->heap_max_free_space == size); - _pico_mem_determine_max_free_space(page); - ck_assert(page->heap_max_free_space == 3 * size / 2); - - /* All blocks full: max_free_space = 0 */ - block1->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block2->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block3->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block4->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - temp = _pico_mem_determine_max_free_space(page); - ck_assert(temp == 0); - ck_assert(page->heap_max_free_space == 0); - - pico_free(page); -} -END_TEST - -START_TEST (test_free_slab_block) -{ - - struct pico_mem_page*page0 = pico_zalloc(PICO_MEM_PAGE_SIZE); - struct pico_mem_page*page1 = pico_zalloc(PICO_MEM_PAGE_SIZE); - struct pico_mem_slab_node*slab_node; - struct pico_mem_slab_node*original_slab_node; - struct pico_mem_block*block; - struct pico_mem_block*slab_block1; - struct pico_mem_block*original_slab_block; - struct pico_mem_block*slab_block2; - struct pico_tree_node*tree_node; - uint32_t size = 900; - uint32_t*lenptr; - uint32_t**doublelenptr; - uint8_t*byteptr; - - - /* Dependencies: */ - /* >pico_mem_page0_zalloc */ - /* >pico_tree_findNode */ - /* >pico_tree_insert */ - printf("\n***************Running test_free_slab_block***************\n\n"); - - /* Scenario's to test: */ - /* Freeing a block with an existing pico_tree_node */ - /* Freeing a block without an existing pico_tree_node */ - - /* Manager and page housekeepings */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - - - manager->first_page = page0; - manager->size = 10 * PICO_MEM_PAGE_SIZE; - manager->used_size = 3 * PICO_MEM_PAGE_SIZE; - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - manager->manager_extra = NULL; - page0->next_page = page1; - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - - page1->next_page = NULL; - page1->slab_size = size; - page1->slabs_max = 4; - page1->slabs_free = 0; - lenptr = &size; - doublelenptr = &lenptr; - - /* Manager heap space available */ - block = (struct pico_mem_block*) (manager + 1); - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block); - - /* Page 0: one slab free (slab_node exists in tree, for this size) */ - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page0->slabs_max = 2; - page0->slabs_free = 1; - original_slab_block = pico_zalloc(sizeof(struct pico_mem_block)); - original_slab_block->type = SLAB_BLOCK_TYPE; - original_slab_block->internals.slab_block.page = page0; - original_slab_node = pico_mem_page0_zalloc(sizeof(struct pico_mem_slab_node)); - original_slab_block->internals.slab_block.slab_node = original_slab_node; - original_slab_node->slab = original_slab_block; - original_slab_node->prev = NULL; - original_slab_node->next = NULL; - /* pico_tree_insert(&manager->tree, original_slab_node); */ - manager_tree_insert(&manager->tree, original_slab_node); - - /* Page 0: one slab not free */ - slab_block1 = pico_zalloc(sizeof(struct pico_mem_block)); - slab_block1->type = SLAB_BLOCK_TYPE; - slab_block1->internals.slab_block.page = page0; - slab_block1->internals.slab_block.slab_node = NULL; - - /* Page 1: all slabs not free, this one will be freed (no node in the tree for this size) */ - slab_block2 = pico_zalloc(sizeof(struct pico_mem_block)); - slab_block2->type = SLAB_BLOCK_TYPE; - slab_block2->internals.slab_block.page = page1; - slab_block2->internals.slab_block.slab_node = NULL; - - /* Free slabs, check page housekeepings */ - _pico_mem_free_slab_block(slab_block1); - _pico_mem_free_slab_block(slab_block2); - ck_assert(page0->slabs_free == page0->slabs_max); - ck_assert(page1->slabs_free == 1); - - /* Check the pico_tree, two nodes should exist, one with 2 slab_nodes, the other with 1 slab_node */ - tree_node = pico_tree_findNode(&manager->tree, original_slab_node); - ck_assert(tree_node != NULL); - ck_assert(tree_node->keyValue != NULL); - slab_node = (struct pico_mem_slab_node*) tree_node->keyValue; - ck_assert(slab_node->prev == NULL); - ck_assert(slab_node->next == original_slab_node); - ck_assert(slab_node->slab == slab_block1); - ck_assert(slab_node->next->prev == slab_node); - ck_assert(slab_node->next->next == NULL); - ck_assert(slab_node->next->slab == original_slab_block); - - tree_node = pico_tree_findNode(&manager->tree, &doublelenptr); - ck_assert(tree_node != NULL); - ck_assert(tree_node->keyValue != NULL); - slab_node = (struct pico_mem_slab_node*) tree_node->keyValue; - ck_assert(slab_node->prev == NULL); - ck_assert(slab_node->next == NULL); - ck_assert(slab_node->slab == slab_block2); - - pico_free(slab_block1); - pico_free(slab_block2); - pico_free(original_slab_block); - pico_mem_deinit(); - - /* Extra scenario: Managerheap almost full (enough space for a slab_node, but not the necessary tree node), try to free the slab block */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - page0 = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->first_page = page0; - manager->size = 2 * PICO_MEM_PAGE_SIZE; - manager->used_size = 2 * PICO_MEM_PAGE_SIZE; - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - manager->manager_extra = NULL; - page0->next_page = NULL; - - block = (struct pico_mem_block*) (manager + 1); - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block) - (sizeof(struct pico_mem_block) + sizeof(struct pico_mem_slab_node)); - byteptr = (uint8_t*) (block + 1); - byteptr += block->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = sizeof(struct pico_mem_slab_node); - - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page0->slabs_max = 2; - page0->slabs_free = 1; - slab_block1 = pico_zalloc(sizeof(struct pico_mem_block)); - slab_block1->type = SLAB_BLOCK_TYPE; - slab_block1->internals.slab_block.page = page0; - slab_block1->internals.slab_block.slab_node = NULL; - - _pico_mem_free_slab_block(slab_block1); - - /* Check the housekeeping of page0 */ - ck_assert(page0->slab_size == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(page0->slabs_max == 2); - ck_assert(page0->slabs_free == 2); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(manager->tree.root == &LEAF); - - /* Extra scenario: Managerheap full (not enough space for the slab_node), try to free the slab block */ - block->internals.heap_block.size = sizeof(struct pico_mem_slab_node) - 1; - page0->slabs_free = 1; - - _pico_mem_free_slab_block(slab_block1); - - /* Check the housekeeping of page0 */ - ck_assert(page0->slab_size == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(page0->slabs_max == 2); - ck_assert(page0->slabs_free == 2); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_FREE); - - pico_free(slab_block1); - /* DEPENDENCY ON CLEANUP */ - pico_mem_deinit(); -} -END_TEST - -START_TEST (test_zero_initialize) -{ - - size_t i; - size_t size = 100; - size_t leftBound = 5; - size_t rightBound = 5; - size_t uninitialized = 0; - size_t initialized = 0; - char*bytestream; - - /* Dependencies: none */ - printf("\n***************Running test_zero_initialize***************\n\n"); - - /* Scenario's to test: */ - /* >Zero-initializing a NULL pointer */ - /* >Zero-initializing a piece of memory like this: 11111|111111111111111111111|11111 => 11111|0000000000000000000000|11111 */ - - bytestream = pico_zalloc(size); - memset(bytestream, 'a', size); - - _pico_mem_zero_initialize(bytestream + leftBound, size - leftBound - rightBound); - for(i = 0; i < size; i++) - { - if(i < leftBound || i >= size - rightBound) - { - /* printf("Bytestream[%i] = '%c' ?= '%c'\n", i, bytestream[i], 'a'); */ - ck_assert(bytestream[i] == 'a'); - uninitialized++; - } - else - { - /* printf("Bytestream[%i] = '%c' ?= '%c'\n", i, bytestream[i], 0); */ - ck_assert(bytestream[i] == 0); - initialized++; - } - } - ck_assert(uninitialized == leftBound + rightBound); - ck_assert(initialized == size - leftBound - rightBound); - - pico_free(bytestream); -} -END_TEST - -START_TEST (test_find_heap_block) -{ - - uint8_t*byteptr; - struct pico_mem_block*block; - uint16_t size = 50; - uint32_t sizeLeft; - uint8_t*noData; - uint32_t block2Size; - uint8_t*startOfData2; - uint8_t*startOfData1; - struct pico_mem_block*block2; - struct pico_mem_block*block4; - struct pico_mem_page*page; - - /* Dependencies: */ - /* pico_mem_zero_initialize */ - /* pico_mem_determine_max_free_space */ - printf("\n***************Running test_find_heap_block***************\n\n"); - - /* Scenario's to test: Structure: [size 25 f| size 50 nf | size 60 f | size 50 nf | free space] */ - /* >Searching for a heap block of len > max_free_space */ - /* >Searching for a heap block of len < max_free_space, block cannot be split up in smaller blocks */ - /* >Searching for a heap block of len < max_free_space, block split up in smaller blocks */ - - /* Page housekeeping */ - page = pico_zalloc(PICO_MEM_PAGE_SIZE); - page->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page->slabs_max = ((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE)); - page->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - page->heap_max_free_space = page->heap_max_size; - sizeLeft = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - (page->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - - /* Block 0: */ - byteptr = (uint8_t*) (page + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = size / 2; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - /* Block 1: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - /* Block 2: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(size + size / 5); - block2Size = block->internals.heap_block.size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block2 = block; - /* Block 3: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - /* Free space: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - block4 = block; - /* Slab block 1 housekeeping */ - block = (struct pico_mem_block*) byteptr; - block->type = SLAB_BLOCK_TYPE; - /* Rest: don't care */ - - page->heap_max_free_space = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - noData = _pico_mem_find_heap_block(page, PICO_MEM_DEFAULT_SLAB_SIZE); - startOfData1 = _pico_mem_find_heap_block(page, size); - startOfData2 = _pico_mem_find_heap_block(page, size); - - ck_assert(noData == NULL); - - ck_assert(block2->type == HEAP_BLOCK_TYPE); - ck_assert(block2->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block2->internals.heap_block.size == block2Size); - ck_assert((uint8_t*) (block2 + 1) == startOfData1); - - ck_assert(block4->type == HEAP_BLOCK_TYPE); - ck_assert(block4->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - ck_assert(block4->internals.heap_block.size == size); - ck_assert((uint8_t*) (block4 + 1) == startOfData2); - - byteptr = (uint8_t*) (block4 + 1); - byteptr += block4->internals.heap_block.size; - sizeLeft -= block4->internals.heap_block.size; - block = (struct pico_mem_block*) byteptr; - ck_assert(block->type == HEAP_BLOCK_TYPE); - ck_assert(block->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block->internals.heap_block.size == sizeLeft - sizeof(struct pico_mem_block)); - - pico_free(page); -} -END_TEST - -START_TEST (test_find_slab) -{ - uint8_t*startOfData2; - uint8_t*noData; - uint32_t size = 900; - uint8_t*startOfData1; - struct pico_mem_block*slab_block1; - struct pico_mem_block*slab_block2; - struct pico_mem_page*page0; - struct pico_mem_block*block; - struct pico_mem_slab_node*slab_node1; - struct pico_mem_slab_node*slab_node2; - - /* Dependencies: */ - /* pico_tree_findNode */ - /* pico_tree_delete */ - /* pico_mem_zero_initialize */ - /* pico_mem_page0_free */ - printf("\n***************Running test_find_slab***************\n\n"); - - /* Scenario's to test */ - /* >The size you request has no slab_nodes available, it returns NULL */ - /* >The size you request has multiple slab nodes available, it returns one */ - /* >The size you request has one slab node available, it returns it and deletes the tree_node */ - - /* Manager housekeeping */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - page0 = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->first_page = page0; - manager->size = 10 * PICO_MEM_PAGE_SIZE; - manager->used_size = 2 * PICO_MEM_PAGE_SIZE; - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - manager->manager_extra = NULL; - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - - /* Manager heap: free */ - block = (struct pico_mem_block*) (manager + 1); - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block); - - /* Page 0 housekeeping */ - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page0->slabs_max = 2; - page0->slabs_free = 2; - page0->timestamp = 12345; - page0->next_page = NULL; - - /* Build tree with two slab nodes */ - slab_block1 = pico_zalloc(sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE); - slab_block1->type = SLAB_BLOCK_TYPE; - slab_block1->internals.slab_block.page = page0; - slab_node1 = pico_mem_page0_zalloc(sizeof(struct pico_mem_slab_node)); - slab_block1->internals.slab_block.slab_node = slab_node1; - slab_node1->slab = slab_block1; - slab_node1->prev = NULL; - /* pico_tree_insert(&manager->tree, slab_node1); */ - manager_tree_insert(&manager->tree, slab_node1); - - slab_block2 = pico_zalloc(sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE); - slab_block2->type = SLAB_BLOCK_TYPE; - slab_block2->internals.slab_block.page = page0; - slab_node2 = pico_mem_page0_zalloc(sizeof(struct pico_mem_slab_node)); - slab_node1->next = slab_node2; - slab_block2->internals.slab_block.slab_node = slab_node2; - slab_node2->slab = slab_block2; - slab_node2->prev = slab_node1; - slab_node2->next = NULL; - - /* Find slab with a size for which no tree_node exists */ - noData = _pico_mem_find_slab(size); - /* Find the existing slabs */ - startOfData1 = _pico_mem_find_slab(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(page0->slabs_free == 1); - ck_assert(page0->timestamp == 0); - startOfData2 = _pico_mem_find_slab(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(page0->slabs_free == 0); - - ck_assert(noData == NULL); - /* printf("startOfData1 = %p ?= %p\n", startOfData1, ((uint8_t*) (slab_block1)) + sizeof(pico_mem_block)); */ - ck_assert(startOfData1 == ((uint8_t*) (slab_block1)) + sizeof(struct pico_mem_block)); - /* printf("startOfData2 = %p ?= %p\n", startOfData2, ((uint8_t*) (slab_block2)) + sizeof(pico_mem_block)); */ - ck_assert(startOfData2 == ((uint8_t*) (slab_block2)) + sizeof(struct pico_mem_block)); - - /* printf("root=%p, LEAF=%p\n", manager->tree.root, &LEAF); */ - /* TODO: ????? */ - ck_assert(manager->tree.root == &LEAF); - - /* DEPENDENCY ON CLEANUP */ - pico_mem_deinit(); - pico_free(slab_block1); - pico_free(slab_block2); -} -END_TEST - -START_TEST (test_free) -{ - uint8_t*byteptr; - uint32_t sizeLeft2; - uint32_t sizeLeft1; - uint32_t size = 50; - struct pico_mem_block*block; - struct pico_mem_block*block1; - struct pico_mem_block*block2; - struct pico_mem_page*page1; - struct pico_mem_page*page0; - struct pico_mem_block*slab_block1; - - /* Dependencies */ - /* >_pico_mem_free_slab_block */ - /* >_pico_mem_free_and_merge_heap_block */ - /* >_pico_mem_determine_max_free_space */ - - printf("\n***************Running test_free***************\n\n"); - /* Scenario's: */ - /* Request to free a slab block: pico_mem_free_slab_block must be called => cover one case, if it works, then the forwarding has happened correctly */ - /* Request to free a heap block: correct page must be determined and the corresponding heap functions must be called => cover 2 cases in different pages to verify the page search */ - - /* Manager housekeeping */ - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - page0 = pico_zalloc(PICO_MEM_PAGE_SIZE); - page1 = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->first_page = page0; - manager->size = 10 * PICO_MEM_PAGE_SIZE; - manager->used_size = 3 * PICO_MEM_PAGE_SIZE; - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - manager->manager_extra = NULL; - - /* Manager heap: free */ - block = (struct pico_mem_block*) (manager + 1); - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block); - - /* Page 0 housekeeping */ - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page0->timestamp = 12345; - page0->next_page = page1; - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page0->slabs_max = ((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE)); - page0->slabs_free = page0->slabs_max; - page0->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page0->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - page0->heap_max_free_space = page0->heap_max_size; - sizeLeft1 = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - (page0->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - - /* Page 1 housekeeping */ - page1->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page1->timestamp = 12345; - page1->next_page = NULL; - page1->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page1->slabs_max = ((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE)); - page1->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page1->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - page1->heap_max_free_space = page1->heap_max_size; - sizeLeft2 = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - (page1->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - - /* Set up the slab block */ - slab_block1 = pico_zalloc(sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE); - slab_block1->type = SLAB_BLOCK_TYPE; - slab_block1->internals.slab_block.page = page0; - slab_block1->internals.slab_block.slab_node = NULL; - page0->slabs_free--; - /* - pico_mem_slab_node* slab_node1 = pico_mem_page0_zalloc(sizeof(pico_mem_slab_node)); - slab_block1->internals.slab_block.slab_node = slab_node1; - slab_node1->slab = slab_block1; - slab_node1->prev = NULL; - slab_node1->next = NULL; - pico_tree_insert(&manager->tree, slab_node1); - */ - - /* Set up the two heap blocks */ - /* Block 1: */ - byteptr = (uint8_t*) (page0 + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft1 -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft1 -= block->internals.heap_block.size; - block1 = block; - /* Free space: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft1 - sizeof(struct pico_mem_block)); - sizeLeft1 -= (uint32_t)sizeof(struct pico_mem_block); - byteptr += (uint32_t)sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - /* Block 2: */ - byteptr = (uint8_t*) (page1 + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft2 -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft2 -= block->internals.heap_block.size; - block2 = block; - /* Free space: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft2 - sizeof(struct pico_mem_block)); - sizeLeft2 -= (uint32_t)sizeof(struct pico_mem_block); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - - /* Free the slab block and check it */ - ck_assert(page0->slabs_free == page0->slabs_max - 1); - pico_mem_free(slab_block1 + 1); - ck_assert(page0->slabs_free == page0->slabs_max); - - /* Free heap block 1 and check it */ - pico_mem_free(block1 + 1); - ck_assert(block1->type == HEAP_BLOCK_TYPE); - ck_assert(block1->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block1->internals.heap_block.size != size); - ck_assert(block2->internals.heap_block.free == HEAP_BLOCK_NOT_FREE); - - /* Free heap block 2 and check it */ - pico_mem_free(block2 + 1); - ck_assert(block2->type == HEAP_BLOCK_TYPE); - ck_assert(block2->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(block2->internals.heap_block.size != size); - - /* DEPENDENCY ON CLEANUP */ - pico_mem_deinit(); - pico_free(slab_block1); -} -END_TEST - -START_TEST (test_determine_slab_size) -{ - uint32_t slab_size = 1000; - uint32_t slab_size2 = 1400; - size_t result; - - /* Dependencies: */ - /* >_pico_mem_reset_slab_statistics */ - printf("\n***************Running test_determine_slab_size***************\n\n"); - /* Scenario's to test: */ - /* 1: Asking for another slabsize 3 times => switch slab size */ - /* 2: Asking for a bigger slab size => return the bigger slab size, but don't switch the default yet */ - /* 3: After 3 times, switch the size again */ - - _pico_mem_reset_slab_statistics(); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == 1200); - ck_assert(slab_size_global == 1200); - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == 1200); - ck_assert(slab_size_global == 1200); - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == 1200); - ck_assert(slab_size_global == 1200); - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == 1200); - ck_assert(slab_size_global == 1200); - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == 1200); - ck_assert(slab_size_global == 1200); - - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1200); - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1200); - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1200); - - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1400); - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1400); - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1400); - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1400); - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1400); - - result = _pico_mem_determine_slab_size(slab_size); - ck_assert(result == 1400); - ck_assert(slab_size_global == 1400); - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == 1400); - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == 1400); - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == 1400); - - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - result = _pico_mem_determine_slab_size(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); - - result = _pico_mem_determine_slab_size(slab_size2); - ck_assert(result == PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(slab_size_global == PICO_MEM_DEFAULT_SLAB_SIZE); -} -END_TEST - -START_TEST (test_zalloc) -{ - - uint8_t*byteptr; - uint32_t oldHeapSize; - uint32_t sizeLeft1; - uint32_t size = 50; - uint32_t slabsize = 1200; - struct pico_mem_block*block; - struct pico_mem_block*slab_block; - struct pico_mem_page*page2; - struct pico_mem_page*page0; - struct pico_mem_page*page1; - struct pico_mem_slab_node*slab_node1; - - /* Dependencies: */ - /* >_pico_mem_determine_slab_size */ - /* >_pico_mem_find_slab */ - /* >_pico_zalloc */ - /* >_pico_mem_init_page */ - /* >_pico_mem_find_heap_block */ - printf("\n***************Running test_zalloc***************\n\n"); - /* Scenario's to test: */ - /* >0: Manager NULL or len>PICO_MAX_SLAB_SIZE */ - /* >1: Alloc for a slab: 1 exists */ - /* >2: Alloc for a slab: none exists but new page can be created */ - /* >3: Alloc for a slab: none exists and no new pages can be created */ - /* >4: Alloc for a heap block: 1 exists in a page somewhere */ - /* >5: Alloc for a heap block: none exists but new page can be created */ - /* >6: Alloc for a heap block: none exists and no new pages can be created, and a slab block is free (then we know the correct function is called, no need to test the case of a non-existing slab) */ - /* >7: Another default slabsize; a new page must be created with this size */ - /* >8: Request for a heap size of less than the minimum object size must still result in an allocation of the minimum object size */ - - - /* Scenario 0, part 1: manager = NULL */ - printf("SCENARIO 0\n"); - byteptr = pico_mem_zalloc(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(byteptr == NULL); - - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - page0 = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->first_page = page0; - manager->size = 3 * PICO_MEM_PAGE_SIZE; - manager->used_size = 2 * PICO_MEM_PAGE_SIZE; - manager->tree.compare = compare_slab_keys; - manager->tree.root = &LEAF; - manager->manager_extra = NULL; - - - block = (struct pico_mem_block*) (manager + 1); - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_FREE; - block->internals.heap_block.size = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager) - sizeof(struct pico_mem_block); - - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page0->timestamp = 12345; - page0->next_page = NULL; - page0->slab_size = PICO_MEM_DEFAULT_SLAB_SIZE; - page0->slabs_max = ((PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - PICO_MIN_HEAP_SIZE) / (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE)); - page0->slabs_free = page0->slabs_max; - page0->heap_max_size = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - sizeof(struct pico_mem_block) - (page0->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - /* page0->heap_max_free_space = page0->heap_max_size; */ - page0->heap_max_free_space = 0; - sizeLeft1 = (uint32_t)(PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_page) - (page0->slabs_max * (sizeof(struct pico_mem_block) + PICO_MEM_DEFAULT_SLAB_SIZE))); - - /* Set up the blocks */ - /* Block 1: */ - byteptr = (uint8_t*) (page0 + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft1 -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft1 -= block->internals.heap_block.size; - /* Free space: */ - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft1 - sizeof(struct pico_mem_block)); - sizeLeft1 -= (uint32_t)sizeof(struct pico_mem_block); - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - /* Slab block 1: */ - slab_block = (struct pico_mem_block*) byteptr; - slab_block->type = SLAB_BLOCK_TYPE; - slab_block->internals.slab_block.page = page0; - slab_block->internals.slab_block.slab_node = NULL; - byteptr += sizeof(struct pico_mem_block); - byteptr += PICO_MEM_DEFAULT_SLAB_SIZE; - page0->slabs_free--; - /* Slab slab_block 2: */ - slab_block = (struct pico_mem_block*) byteptr; - /* TODO: INVALID WRITE HERE: */ - slab_block->type = SLAB_BLOCK_TYPE; - slab_block->internals.slab_block.page = page0; - slab_node1 = pico_mem_page0_zalloc(sizeof(struct pico_mem_slab_node)); - slab_node1->slab = slab_block; - slab_node1->next = NULL; - slab_node1->prev = NULL; - slab_block->internals.slab_block.slab_node = slab_node1; - /* DEPENDENCY */ - /* pico_tree_insert(&manager->tree, slab_node1); */ - manager_tree_insert(&manager->tree, slab_node1); - - /* Scenario 0, part 2: len>PICO_MAX_SLAB_SIZE */ - byteptr = pico_mem_zalloc(PICO_MAX_SLAB_SIZE + 1); - ck_assert(byteptr == NULL); - /* Scenario 1: Ask for an existing slab block */ - printf("SCENARIO 1\n"); - byteptr = pico_mem_zalloc(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(byteptr == (uint8_t*) (slab_block + 1)); - /* Scenario 2: Ask for another slab block; a new page can be created */ - printf("SCENARIO 2\n"); - byteptr = pico_mem_zalloc(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(manager->used_size == 3 * PICO_MEM_PAGE_SIZE); - page1 = manager->first_page; - ck_assert(page1->next_page == page0); - ck_assert((uint8_t*) page1 < byteptr); - ck_assert(byteptr < ((uint8_t*) page1) + PICO_MEM_PAGE_SIZE); - /* Setup for scenario 3: */ - pico_mem_zalloc(PICO_MEM_DEFAULT_SLAB_SIZE); - /* Scenario 3: Ask for another slab block: no new page can be created, NULL should be returned */ - printf("SCENARIO 3\n"); - byteptr = pico_mem_zalloc(PICO_MEM_DEFAULT_SLAB_SIZE); - ck_assert(byteptr == NULL); - ck_assert(manager->used_size == 3 * PICO_MEM_PAGE_SIZE); - /* Scenario 4: Ask for an existing heap block */ - printf("SCENARIO 4\n"); - byteptr = pico_mem_zalloc(page1->heap_max_free_space); - /* TODO: Why? */ - /* byteptr = pico_mem_zalloc(page1->heap_max_free_space%4); */ - ck_assert(page1->heap_max_free_space == 0); - ck_assert((uint8_t*) page1 < byteptr); - ck_assert(byteptr < ((uint8_t*) page1) + PICO_MEM_PAGE_SIZE); - /* Setup for scenario 5: */ - manager->size += PICO_MEM_PAGE_SIZE; - /* Scenario 5; Ask for a heap block: none are available but a new page can be created */ - printf("SCENARIO 5\n"); - byteptr = pico_mem_zalloc(page1->heap_max_size); - /* TODO: Why? */ - /* byteptr = pico_mem_zalloc(page1->heap_max_size%4); */ - ck_assert(manager->used_size == 4 * PICO_MEM_PAGE_SIZE); - page2 = manager->first_page; - ck_assert(page2->next_page == page1); - ck_assert(page2->heap_max_free_space == 0); - ck_assert((uint8_t*) page2 < byteptr); - ck_assert(byteptr < ((uint8_t*) page2) + PICO_MEM_PAGE_SIZE); - /* Scenario 6: Ask for a heap block: none are available and no new page can be created, but a slab block is available */ - printf("SCENARIO 6\n"); - byteptr = pico_mem_zalloc(page1->heap_max_size); - ck_assert((uint8_t*) page2 < byteptr); - ck_assert(byteptr < ((uint8_t*) page2) + PICO_MEM_PAGE_SIZE); - /* Scenario 7: A new page with a new slabsize must be created */ - printf("SCENARIO 7\n"); - manager->size += 3 * PICO_MEM_PAGE_SIZE; - byteptr = pico_mem_zalloc(slabsize); - block = (struct pico_mem_block*) (byteptr - sizeof(struct pico_mem_block)); - ck_assert(block->internals.slab_block.page->slab_size == PICO_MEM_DEFAULT_SLAB_SIZE); - byteptr = pico_mem_zalloc(slabsize); - block = (struct pico_mem_block*) (byteptr - sizeof(struct pico_mem_block)); - ck_assert(block->internals.slab_block.page->slab_size == PICO_MEM_DEFAULT_SLAB_SIZE); - byteptr = pico_mem_zalloc(slabsize); - block = (struct pico_mem_block*) (byteptr - sizeof(struct pico_mem_block)); - ck_assert(block->internals.slab_block.page->slab_size == PICO_MEM_DEFAULT_SLAB_SIZE); - /* At this point, a new page should be created with the correct size */ - byteptr = pico_mem_zalloc(slabsize); - block = (struct pico_mem_block*) (byteptr - sizeof(struct pico_mem_block)); - ck_assert(block->internals.slab_block.page->slab_size < PICO_MEM_DEFAULT_SLAB_SIZE); - /* Scenario 8: A request for a heap block of less than PICO_MEM_MINIMUM_OBJECT_SIZE will have its size enlargened */ - printf("SCENARIO 8\n"); - oldHeapSize = manager->first_page->heap_max_free_space; - byteptr = pico_mem_zalloc(1); - ck_assert(oldHeapSize == manager->first_page->heap_max_free_space + sizeof(struct pico_mem_block) + PICO_MEM_MINIMUM_OBJECT_SIZE); - - /* - //TODO: Testing of profiling - struct profiling_data profiling_struct; - pico_mem_profile_collect_data(&profiling_struct); - printf("Struct: \n\tfree_heap_space = %u\n\tfree_slab_space = %u\n\tused_heap_space = %u\n\tused_slab_space = %u\n", profiling_struct.free_heap_space, profiling_struct.free_slab_space, profiling_struct.used_heap_space, profiling_struct.used_slab_space); - pico_mem_profile_scan_data(); - */ - - pico_mem_deinit(); -} -END_TEST - -START_TEST (test_page0_free) -{ - - uint32_t sizeLeft; - uint8_t*byteptr; - struct pico_mem_block*block; - struct pico_mem_block*block1; - struct pico_mem_block*block2; - struct pico_mem_block*block3; - struct pico_mem_manager_extra*heap_page; - struct pico_mem_manager_extra*heap_page2; - uint32_t size = 50; - uint32_t blockAmount = 5; - uint32_t blockAmount2 = 8; - - /* Dependencies: none */ - printf("\n***************Running test_page0_free***************\n\n"); - /* Scenario's to test: */ - /* >1: Freeing a block in the manager heap */ - /* >2: Freeing a block in an extra manager page */ - - manager = pico_zalloc(PICO_MEM_PAGE_SIZE); - - sizeLeft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager); - byteptr = (uint8_t*) (manager + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block1 = block; - - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - - heap_page = pico_zalloc(PICO_MEM_PAGE_SIZE); - heap_page2 = pico_zalloc(PICO_MEM_PAGE_SIZE); - manager->manager_extra = heap_page; - heap_page->next = heap_page2; - heap_page->blocks = blockAmount; - heap_page2->blocks = blockAmount2; - heap_page2->next = NULL; - - sizeLeft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager_extra); - byteptr = (uint8_t*) (heap_page + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block2 = block; - - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - - sizeLeft = PICO_MEM_PAGE_SIZE - sizeof(struct pico_mem_manager_extra); - byteptr = (uint8_t*) (heap_page2 + 1); - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = size; - byteptr += sizeof(struct pico_mem_block); - byteptr += block->internals.heap_block.size; - sizeLeft -= (uint32_t)sizeof(struct pico_mem_block); - sizeLeft -= block->internals.heap_block.size; - block3 = block; - - block = (struct pico_mem_block*) byteptr; - block->type = HEAP_BLOCK_TYPE; - block->internals.heap_block.free = HEAP_BLOCK_NOT_FREE; - block->internals.heap_block.size = (uint32_t)(sizeLeft - sizeof(struct pico_mem_block)); - - /* Scenario 1 */ - pico_mem_page0_free(block1 + 1); - ck_assert(block1->type == HEAP_BLOCK_TYPE); - ck_assert(block1->internals.heap_block.free == HEAP_BLOCK_FREE); - - /* Scenario 2: */ - pico_mem_page0_free(block2 + 1); - ck_assert(block2->type == HEAP_BLOCK_TYPE); - ck_assert(block2->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(heap_page->blocks == blockAmount - 1); - - pico_mem_page0_free(block3 + 1); - ck_assert(block3->type == HEAP_BLOCK_TYPE); - ck_assert(block3->internals.heap_block.free == HEAP_BLOCK_FREE); - ck_assert(heap_page2->blocks == blockAmount2 - 1); - - /* Cleanup */ - pico_free(manager); - pico_free(heap_page); - pico_free(heap_page2); -} -END_TEST - -START_TEST (test_cleanup) -{ - /* Dependencies: */ - /* >pico_tree_findNode */ - /* >pico_tree_delete */ - /* >pico_mem_page0_free */ - /* >PICO_FREE */ - uint32_t timestamp = 1; - struct pico_mem_page*page; - struct pico_mem_manager_extra*heap_page; - - printf("\n***************Running test_cleanup***************\n\n"); - - - timestamp = 1000; - /* Initialized manager has 1 completely empty page */ - pico_mem_init(21 * PICO_MEM_PAGE_SIZE); - /* Page 2: extra page with 1 slab occupied */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, PICO_MEM_DEFAULT_SLAB_SIZE); - page->slabs_free--; - /* Page 3: 1 extra page with some heap occupied */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, PICO_MEM_DEFAULT_SLAB_SIZE); - page->heap_max_free_space--; - /* Page 4: 1 extra page with old timestamp */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, PICO_MEM_DEFAULT_SLAB_SIZE); - page->timestamp = 500; - /* Page 5: 1 extra page with recent timestamp */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, PICO_MEM_DEFAULT_SLAB_SIZE); - page->timestamp = 950; - /* Page 6: 1 extra page with wrong timestamp */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, PICO_MEM_DEFAULT_SLAB_SIZE); - page->timestamp = 1500; - /* Page 7: 1 extra page with same timestamp */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, PICO_MEM_DEFAULT_SLAB_SIZE); - page->timestamp = 1000; - /* Page 8: 1 extra page with 1 slab occupied, slabsize 1200 */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, 1200); - page->slabs_free--; - /* Page 9: 1 extra empty page, slabsize 1200 */ - page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - _pico_mem_init_page(page, 1200); - /* 1 empty extra manager page */ - heap_page = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - manager->manager_extra = heap_page; - heap_page->blocks = 0; - heap_page->timestamp = 0; - /* 1 non-empty extra manager page */ - heap_page->next = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - heap_page = heap_page->next; - heap_page->blocks = 1; - heap_page->timestamp = 0; - /* 1 empty extra manager page with old timestamp */ - heap_page->next = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - heap_page = heap_page->next; - heap_page->blocks = 0; - heap_page->timestamp = 500; - /* 1 empty manager page with recent timestamp */ - heap_page->next = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - heap_page = heap_page->next; - heap_page->blocks = 0; - heap_page->timestamp = 950; - /* 1 empty manager page with wrong timestamp */ - heap_page->next = malloc(PICO_MEM_PAGE_SIZE); - manager->used_size += PICO_MEM_PAGE_SIZE; - heap_page = heap_page->next; - heap_page->blocks = 0; - heap_page->timestamp = 1500; - /* END OF MANAGER PAGES */ - heap_page->next = NULL; - - /* Run 1 cleanup */ - pico_mem_cleanup(timestamp); - /* Check all pages and manager pages */ - page = manager->first_page; - /* Page 9: empty with timestamp 0 */ - ck_assert(page->timestamp == timestamp); - page = page->next_page; - /* Page 8: not empty, 1 slab free, slabsize 1200 */ - ck_assert(page->timestamp == 0); - ck_assert(page->slab_size == 1200); - ck_assert(page->slabs_free == page->slabs_max - 1); - page = page->next_page; - /* Page 7: empty with same timestamp */ - ck_assert(page->timestamp == timestamp); - page = page->next_page; - /* Page 6: empty with wrong timestamp */ - ck_assert(page->timestamp == timestamp); - page = page->next_page; - /* Page 5: empty with recent timestamp */ - ck_assert(page->timestamp != timestamp); - page = page->next_page; - /* Page 4: empty with old timestamp: removed */ - /* Page 3: not empty with 1B less heap */ - ck_assert(page->timestamp == 0); - ck_assert(page->heap_max_free_space == page->heap_max_size - 1); - page = page->next_page; - /* Page 2: not empty with 1 slab occupied */ - ck_assert(page->timestamp == 0); - ck_assert(page->slabs_free == page->slabs_max - 1); - page = page->next_page; - /* Page 1: was empty with timestamp 0 */ - ck_assert(page->timestamp == timestamp); - ck_assert(page->next_page == NULL); - /* Check all manager pages */ - heap_page = manager->manager_extra; - /* Page 1: was empty with timestamp 0 */ - ck_assert(heap_page->timestamp == timestamp); - heap_page = heap_page->next; - /* Page 2: not empty with 1 block occupied */ - ck_assert(heap_page->blocks == 1); - ck_assert(heap_page->timestamp == 0); - heap_page = heap_page->next; - /* Page 3: empty with old timestamp: removed */ - /* Page 4: empty with recent timestamp */ - ck_assert(heap_page->timestamp != timestamp); - heap_page = heap_page->next; - /* Page 5: empty with wrong timestamp */ - ck_assert(heap_page->timestamp == timestamp); - ck_assert(heap_page->next == NULL); - - /* Advance timestamp, run another cleanup */ - timestamp += 500; - pico_mem_cleanup(timestamp); - /* Check all pages and manager pages */ - page = manager->first_page; - /* Page 9: empty with timestamp 0 */ - /* Page 8: not empty with 1 slab occupied, slabsize 1200 */ - ck_assert(page->timestamp == 0); - ck_assert(page->slab_size == 1200); - ck_assert(page->slabs_free == page->slabs_max - 1); - page = page->next_page; - /* Page 7: empty with same timestamp */ - /* Page 6: empty with wrong timestamp */ - /* Page 5: empty with recent timestamp */ - /* Page 4: empty with old timestamp: removed */ - /* Page 3: not empty with 1B less heap */ - ck_assert(page->timestamp == 0); - ck_assert(page->heap_max_free_space == page->heap_max_size - 1); - page = page->next_page; - /* Page 2: not empty with 1 slab occupied */ - ck_assert(page->timestamp == 0); - ck_assert(page->slabs_free == page->slabs_max - 1); - ck_assert(page->next_page == NULL); - /* Page 1: was empty with timestamp 0 */ - /* Check all manager pages */ - heap_page = manager->manager_extra; - /* Page 1: was empty with timestamp 0 */ - /* Page 2: not empty with 1 block occupied */ - ck_assert(heap_page->blocks == 1); - ck_assert(heap_page->timestamp == 0); - ck_assert(heap_page->next == NULL); - /* Page 3: empty with old timestamp: removed */ - /* Page 4: empty with recent timestamp */ - /* Page 5: empty with wrong timestamp */ - - /* Still in use: manager page + 1 extra manager page + 3 normal pages */ - ck_assert(manager->used_size == 5 * PICO_MEM_PAGE_SIZE); - - /* Free extra manager page */ - manager->manager_extra->blocks = 0; - - timestamp += 500; - pico_mem_cleanup(timestamp); - ck_assert(manager->used_size == 5 * PICO_MEM_PAGE_SIZE); - ck_assert(manager->manager_extra->timestamp == timestamp); - timestamp += 500; - pico_mem_cleanup(timestamp); - ck_assert(manager->used_size == 4 * PICO_MEM_PAGE_SIZE); - ck_assert(manager->manager_extra == NULL); - - /* Free normal pages */ - page = manager->first_page; - page->slabs_free = page->slabs_max; - page = page->next_page; - page->heap_max_free_space = page->heap_max_size; - page = page->next_page; - page->slabs_free = page->slabs_max; - - timestamp += 500; - pico_mem_cleanup(timestamp); - ck_assert(manager->used_size == 4 * PICO_MEM_PAGE_SIZE); - page = manager->first_page; - ck_assert(page->timestamp == timestamp); - page = page->next_page; - ck_assert(page->timestamp == timestamp); - page = page->next_page; - ck_assert(page->timestamp == timestamp); - - timestamp += 500; - pico_mem_cleanup(timestamp); - ck_assert(manager->used_size == PICO_MEM_PAGE_SIZE); - - pico_mem_deinit(); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *mm = tcase_create("Memory_Manager"); - - tcase_add_test(mm, test_compare_slab_keys); - tcase_add_test(mm, test_manager_extra_alloc); - tcase_add_test(mm, test_page0_zalloc); - tcase_add_test(mm, test_init_page); - tcase_add_test(mm, test_mem_init_whitebox ); - tcase_add_test(mm, test_free_and_merge_heap_block); - tcase_add_test(mm, test_determine_max_free_space); - tcase_add_test(mm, test_free_slab_block); - tcase_add_test(mm, test_zero_initialize); - tcase_add_test(mm, test_find_heap_block); - tcase_add_test(mm, test_find_slab); - tcase_add_test(mm, test_free); - tcase_add_test(mm, test_determine_slab_size); - tcase_add_test(mm, test_zalloc); - tcase_add_test(mm, test_page0_free); - tcase_add_test(mm, test_cleanup); - suite_add_tcase(s, mm); - - return s; -} - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/unit/unit_mocks.c b/kernel/picotcp/test/unit/unit_mocks.c deleted file mode 100644 index 2f1381d..0000000 --- a/kernel/picotcp/test/unit/unit_mocks.c +++ /dev/null @@ -1,71 +0,0 @@ -#define BUFLEN (576 + 14 + 20 + 8) - -int mock_print_protocol(uint8_t *buf); -int printbuf(uint8_t *buf, uint32_t len, const char *str, uint8_t printbufactive); -int tick_it(uint32_t nticks); - -int mock_print_protocol(uint8_t *buf) -{ - uint8_t pnr = buf[0x17]; /* protocol number */ - - printf("transport protocol: %s\n", - (pnr == PICO_PROTO_ICMP4 ? "icmp4" : - (pnr == PICO_PROTO_IGMP ? "igmp" : - (pnr == PICO_PROTO_TCP ? "tcp" : - (pnr == PICO_PROTO_UDP ? "udp" : - (pnr == PICO_PROTO_ICMP6 ? "icmp6" : - "unknown proto")))))); - return 0; -} - -int printbuf(uint8_t *buf, uint32_t len, const char *str, uint8_t printbufactive) -{ - uint8_t printMethod = 0; - uint32_t cntr = 0; - uint32_t cntr2 = 0; - if((printbufactive) && (printMethod == 0)) { - printf("\n%s:\n", str); - for(cntr = 0; cntr < len; cntr++) { - if((cntr % 8) == 0 && cntr != 0) - printf(" "); - - if((cntr % 16) == 0 && cntr != 0) - printf("\n"); - - if((cntr % 16) == 0) - printf("%03x0 ", cntr2++); - - printf("%02x ", buf[cntr]); - } - printf("\n"); - }else if((printbufactive) && (printMethod == 1)) { - printf("\n%s:\n", str); - printf("Buf = {"); - for(cntr = 0; cntr < len; cntr++) { - if(cntr != 0) - printf(","); - - if((cntr % 16 == 0) && (cntr != 0)) - printf("\n"); - - printf("0x%02x", buf[cntr]); - } - printf("}\n"); - } - - return 0; -} - -#define BUFLEN (576 + 14 + 20 + 8) -#define DHCP_MSG_TYPE_DISCOVER (1) -#define DHCP_MSG_TYPE_OFFER (2) -#define DHCP_MSG_TYPE_REQUEST (3) -#define DHCP_MSG_TYPE_ACK (4) -int tick_it(uint32_t nticks) -{ - uint32_t i = 0; - for (i = 0; i < nticks; i++) { - pico_stack_tick(); - } - return 0; -} diff --git a/kernel/picotcp/test/unit/unit_rbtree.c b/kernel/picotcp/test/unit/unit_rbtree.c deleted file mode 100644 index feb9eeb..0000000 --- a/kernel/picotcp/test/unit/unit_rbtree.c +++ /dev/null @@ -1,93 +0,0 @@ -/* RB tree unit test */ -typedef struct -{ - int value; -}elem; - -int compare(void *a, void *b); - -int compare(void *a, void *b) -{ - return ((elem *)a)->value - ((elem *)b)->value; -} - -static PICO_TREE_DECLARE(test_tree, compare); -static PICO_TREE_DECLARE(test_tree2, compare); -#define RBTEST_SIZE 20000 - -START_TEST (test_rbtree2) -{ - struct pico_tree_node *s; - elem *e; - int i, last; - struct timeval start, end; - gettimeofday(&start, 0); - - srand48(RBTEST_SIZE); /* use test-size as salt */ - for (i = 0; i < (RBTEST_SIZE >> 1); i++) - { - e = malloc(sizeof(elem)); - e->value = (int)lrand48() % RBTEST_SIZE; - if (pico_tree_findKey(&test_tree2, e)) { - free(e); - } else { - pico_tree_insert(&test_tree2, e); - } - } - gettimeofday(&end, 0); - printf("Rbtree test 2 inserted %d entries in %d milliseconds\n", RBTEST_SIZE, - (int)((end.tv_sec - start.tv_sec) * 1000 + (end.tv_usec - start.tv_usec) / 1000)); - last = 0; - pico_tree_foreach(s, &test_tree2){ - fail_if (last > ((elem *)(s->keyValue))->value, "error"); - last = ((elem *)(s->keyValue))->value; - } - - gettimeofday(&end, 0); - printf("Rbtree test 2 duration with %d entries: %d milliseconds\n", RBTEST_SIZE, - (int)((end.tv_sec - start.tv_sec) * 1000 + (end.tv_usec - start.tv_usec) / 1000)); - printf("Test finished.\n"); -} -END_TEST - -START_TEST (test_rbtree) -{ - struct pico_tree_node *s, *tmp; - elem t, *e; - int i; - struct timeval start, end; - printf("Started test...\n"); - gettimeofday(&start, 0); - - for (i = 0; i < (RBTEST_SIZE >> 1); i++) { - e = malloc(sizeof(elem)); - e->value = i; - pico_tree_insert(&test_tree, e); - /* RB_INSERT(rbtree, &RBTREE, e); */ - e = malloc(sizeof(elem)); - e->value = (RBTEST_SIZE - 1) - i; - pico_tree_insert(&test_tree, e); - } - i = 0; - pico_tree_foreach(s, &test_tree){ - fail_if (i++ != ((elem *)(s->keyValue))->value, "error"); - } - t.value = RBTEST_SIZE >> 2; - - e = pico_tree_findKey(&test_tree, &t); - fail_if(!e, "Search failed..."); - fail_if(e->value != t.value, "Wrong element returned..."); - - pico_tree_foreach_reverse_safe(s, &test_tree, tmp){ - fail_if(!s, "Reverse safe returned null"); - e = (elem *)pico_tree_delete(&test_tree, s->keyValue); - free(e); - } - - fail_if(!pico_tree_empty(&test_tree), "Not empty"); - gettimeofday(&end, 0); - printf("Rbtree test duration with %d entries: %d milliseconds\n", RBTEST_SIZE, - (int)((end.tv_sec - start.tv_sec) * 1000 + (end.tv_usec - start.tv_usec) / 1000)); - printf("Test finished...\n"); -} -END_TEST diff --git a/kernel/picotcp/test/unit/unit_socket.c b/kernel/picotcp/test/unit/unit_socket.c deleted file mode 100644 index ca040bd..0000000 --- a/kernel/picotcp/test/unit/unit_socket.c +++ /dev/null @@ -1,521 +0,0 @@ - -int pico_aodv_init(void) -{ - return 0; -} -START_TEST (test_socket) -{ - int ret = 0; - uint16_t port_be = 0, porta, proto, port_got; - char buf[] = "test"; - struct pico_socket *sk_tcp, *sk_udp, *s, *sl, *sa; - struct pico_device *dev; - struct pico_ip4 inaddr_dst, inaddr_link, inaddr_incorrect, inaddr_uni, inaddr_null, netmask, orig, inaddr_got; - - int getnodelay = -1; - int nodelay = -1; - int count = 0; - - uint32_t getsocket_buffer = 0; - uint32_t socket_buffer = 0; - - pico_stack_init(); - - printf("START SOCKET TEST\n"); - - pico_string_to_ipv4("224.7.7.7", &inaddr_dst.addr); - pico_string_to_ipv4("10.40.0.2", &inaddr_link.addr); - pico_string_to_ipv4("224.8.8.8", &inaddr_incorrect.addr); - pico_string_to_ipv4("0.0.0.0", &inaddr_null.addr); - pico_string_to_ipv4("10.40.0.3", &inaddr_uni.addr); - - dev = pico_null_create("dummy"); - netmask.addr = long_be(0xFFFF0000); - ret = pico_ipv4_link_add(dev, inaddr_link, netmask); - fail_if(ret < 0, "socket> error adding link"); - - - /* socket_open passing wrong parameters */ - s = pico_socket_open(PICO_PROTO_IPV4, 99, NULL); - fail_if(s != NULL, "Error got socket wrong parameters"); - - s = pico_socket_open(PICO_PROTO_IPV4, 0xFFFF, NULL); - fail_if(s != NULL, "Error got socket"); - - s = pico_socket_open(99, PICO_PROTO_UDP, NULL); - fail_if(s != NULL, "Error got socket"); - - s = pico_socket_open(0xFFFF, PICO_PROTO_UDP, NULL); - fail_if(s != NULL, "Error got socket"); - - - sk_tcp = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, NULL); - fail_if(sk_tcp == NULL, "socket> tcp socket open failed"); - - - port_be = short_be(5555); - /* socket_bind passing wrong parameters */ - ret = pico_socket_bind(NULL, &inaddr_link, &port_be); - fail_if(ret == 0, "socket> tcp socket bound wrong parameter"); - ret = pico_socket_bind(sk_tcp, NULL, &port_be); - fail_if(ret == 0, "socket> tcp socket bound wrong parameter"); - ret = pico_socket_bind(sk_tcp, &inaddr_link, NULL); - fail_if(ret == 0, "socket> tcp socket bound wrong parameter"); - /* socket_getname passing wrong parameters */ - ret = pico_socket_getname(NULL, &inaddr_link, &port_be, &proto); - fail_if(ret == 0, "socket> tcp socket getname with wrong parameter"); - ret = pico_socket_getname(sk_tcp, NULL, &port_be, &proto); - fail_if(ret == 0, "socket> tcp socket getname with wrong parameter"); - ret = pico_socket_getname(sk_tcp, &inaddr_link, NULL, &proto); - fail_if(ret == 0, "socket> tcp socket getname with wrong parameter"); - ret = pico_socket_getname(sk_tcp, &inaddr_link, &port_be, NULL); - fail_if(ret == 0, "socket> tcp socket getname with wrong parameter"); - /* socket_bind passing correct parameters */ - ret = pico_socket_bind(sk_tcp, &inaddr_link, &port_be); - fail_if(ret < 0, "socket> tcp socket bind failed"); - count = pico_count_sockets(PICO_PROTO_TCP); - printf("Count: %d\n", count); - fail_unless(count == 1); - count = pico_count_sockets(0); - printf("Count: %d\n", count); - fail_unless(count == 1); - - sk_udp = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_UDP, NULL); - fail_if(sk_udp == NULL, "socket> udp socket open failed"); - - port_be = short_be(5555); - ret = pico_socket_bind(sk_udp, &inaddr_link, &port_be); - fail_if(ret < 0, "socket> udp socket bind failed"); - - fail_if (pico_count_sockets(PICO_PROTO_UDP) != 1); - fail_if (pico_count_sockets(0) != 2); - - - ret = pico_socket_getname(sk_udp, &inaddr_got, &port_got, &proto); - fail_if(ret < 0, "socket> udp socket getname failed"); - fail_if(inaddr_got.addr != inaddr_link.addr, "Getname: Address is different"); - fail_if(port_be != port_got, "Getname: Port is different"); - fail_if(proto != PICO_PROTO_IPV4, "Getname: proto is wrong"); - - /* socket_close passing wrong parameter */ - ret = pico_socket_close(NULL); - fail_if(ret == 0, "Error socket close with wrong parameters"); - - - /* socket_connect passing wrong parameters */ - ret = pico_socket_connect(sk_udp, NULL, port_be); - fail_if(ret == 0, "Error socket connect with wrong parameters"); - ret = pico_socket_connect(NULL, &inaddr_dst, port_be); - fail_if(ret == 0, "Error socket connect with wrong parameters"); - - /* socket_connect passing correct parameters */ - ret = pico_socket_connect(sk_udp, &inaddr_dst, port_be); - fail_if(ret < 0, "Error socket connect"); - ret = pico_socket_connect(sk_tcp, &inaddr_dst, port_be); - fail_if(ret < 0, "Error socket connect"); - - - /* testing listening socket */ - sl = pico_socket_open(PICO_PROTO_IPV4, PICO_PROTO_TCP, NULL); - fail_if(sl == NULL, "socket> tcp socket open failed"); - port_be = short_be(6666); - ret = pico_socket_bind(sl, &inaddr_link, &port_be); - fail_if(ret < 0, "socket> tcp socket bind failed"); - /* socket_listen passing wrong parameters */ - ret = pico_socket_listen(sl, 0); - fail_if(ret == 0, "Error socket tcp socket listen done, wrong parameter"); - ret = pico_socket_listen(NULL, 10); - fail_if(ret == 0, "Error socket tcp socket listen done, wrong parameter"); - /* socket_listen passing correct parameters */ - ret = pico_socket_listen(sl, 10); - fail_if(ret < 0, "socket> tcp socket listen failed: %s", strerror(pico_err)); - - /* socket_accept passing wrong parameters */ - sa = pico_socket_accept(sl, &orig, NULL); - fail_if(sa != NULL, "Error socket tcp socket accept wrong argument"); - sa = pico_socket_accept(sl, NULL, &porta); - fail_if(sa != NULL, "Error socket tcp socket accept wrong argument"); - /* socket_accept passing correct parameters */ - sa = pico_socket_accept(sl, &orig, &porta); - fail_if(sa == NULL && pico_err != PICO_ERR_EAGAIN, "socket> tcp socket accept failed: %s", strerror(pico_err)); - - ret = pico_socket_close(sl); - fail_if(ret < 0, "socket> tcp socket close failed: %s\n", strerror(pico_err)); - - - /* testing socket read/write */ - /* socket_write passing wrong parameters */ - ret = pico_socket_write(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket write succeeded, wrong argument\n"); - ret = pico_socket_write(sk_tcp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket write succeeded, wrong argument\n"); - ret = pico_socket_write(sk_tcp, (void *)buf, 0); - fail_if(ret > 0, "Error socket write succeeded, wrong argument\n"); - /* socket_write passing correct parameters */ - ret = pico_socket_write(sk_tcp, (void *)buf, sizeof(buf)); - fail_if(ret < 0, "socket> tcp socket write failed: %s\n", strerror(pico_err)); - /* socket_read passing wrong parameters */ - ret = pico_socket_read(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket read succeeded, wrong argument\n"); - ret = pico_socket_read(sk_tcp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket read succeeded, wrong argument\n"); - ret = pico_socket_read(sk_tcp, (void *)buf, 0); - fail_if(ret > 0, "Error socket read succeeded, wrong argument\n"); - /* socket_read passing correct parameters */ - ret = pico_socket_read(sk_tcp, (void *)buf, sizeof(buf)); - fail_if(ret < 0, "socket> tcp socket read failed, ret = %d: %s\n", ret, strerror(pico_err)); /* tcp_recv returns 0 when no frame !? */ - - - /* send/recv */ - /* socket_send passing wrong parameters */ - ret = pico_socket_send(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket send succeeded, wrong argument\n"); - ret = pico_socket_send(sk_tcp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket send succeeded, wrong argument\n"); - ret = pico_socket_send(sk_tcp, (void *)buf, 0); - fail_if(ret > 0, "Error socket send succeeded, wrong argument\n"); - /* socket_write passing correct parameters */ - ret = pico_socket_send(sk_tcp, (void *)buf, sizeof(buf)); - fail_if(ret <= 0, "socket> tcp socket send failed: %s\n", strerror(pico_err)); - /* socket_recv passing wrong parameters */ - ret = pico_socket_recv(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket recv succeeded, wrong argument\n"); - ret = pico_socket_recv(sk_tcp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket recv succeeded, wrong argument\n"); - ret = pico_socket_recv(sk_tcp, (void *)buf, 0); - fail_if(ret > 0, "Error socket recv succeeded, wrong argument\n"); - /* socket_recv passing correct parameters */ - ret = pico_socket_recv(sk_tcp, (void *)buf, sizeof(buf)); - fail_if(ret < 0, "socket> tcp socket recv failed, ret = %d: %s\n", ret, strerror(pico_err)); /* tcp_recv returns 0 when no frame !? */ - - - /* sendto/recvfrom */ - /* socket_sendto passing wrong parameters */ - ret = pico_socket_sendto(NULL, (void *)buf, sizeof(buf), &inaddr_dst, port_be); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_tcp, NULL, sizeof(buf), &inaddr_dst, port_be); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_tcp, (void *)buf, 0, &inaddr_dst, port_be); - fail_if(ret > 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_tcp, (void *)buf, sizeof(buf), NULL, port_be); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_tcp, (void *)buf, sizeof(buf), &inaddr_dst, 0xFFFF); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - /* socket_write passing correct parameters */ - ret = pico_socket_sendto(sk_tcp, (void *)buf, sizeof(buf), &inaddr_dst, short_be(5555)); - fail_if(ret <= 0, "socket> udp socket sendto failed, ret = %d: %s\n", ret, strerror(pico_err)); - /* socket_recvfrom passing wrong parameters */ - ret = pico_socket_recvfrom(NULL, (void *)buf, sizeof(buf), &orig, &porta); - fail_if(ret >= 0, "Error socket recvfrom succeeded, wrong argument\n"); - ret = pico_socket_recvfrom(sk_tcp, NULL, sizeof(buf), &orig, &porta); - fail_if(ret >= 0, "Error socket recvfrom succeeded, wrong argument\n"); - ret = pico_socket_recvfrom(sk_tcp, (void *)buf, 0, &orig, &porta); - fail_if(ret > 0, "Error socket recvfrom succeeded, wrong argument\n"); - ret = pico_socket_recvfrom(sk_tcp, (void *)buf, sizeof(buf), NULL, &porta); - fail_if(ret > 0, "Error socket recvfrom succeeded, wrong argument\n"); - ret = pico_socket_recvfrom(sk_tcp, (void *)buf, sizeof(buf), &orig, NULL); - fail_if(ret > 0, "Error socket recvfrom succeeded, wrong argument\n"); - /* socket_recvfrom passing correct parameters */ - ret = pico_socket_recvfrom(sk_tcp, (void *)buf, sizeof(buf), &orig, &porta); - fail_if(ret != 0, "socket> tcp socket recvfrom failed, ret = %d: %s\n", ret, strerror(pico_err)); /* tcp_recv returns -1 when no frame !? */ - - - /* testing socket read/write */ - /* socket_write passing wrong parameters */ - ret = pico_socket_write(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket write succeeded, wrong argument\n"); - ret = pico_socket_write(sk_udp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket write succeeded, wrong argument\n"); - ret = pico_socket_write(sk_udp, (void *)buf, 0); - fail_if(ret > 0, "Error socket write succeeded, wrong argument\n"); - /* socket_write passing correct parameters */ - ret = pico_socket_write(sk_udp, (void *)buf, sizeof(buf)); - fail_if(ret < 0, "socket> tcp socket write failed: %s\n", strerror(pico_err)); - /* socket_read passing wrong parameters */ - ret = pico_socket_read(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket read succeeded, wrong argument\n"); - ret = pico_socket_read(sk_udp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket read succeeded, wrong argument\n"); - ret = pico_socket_read(sk_udp, (void *)buf, 0); - fail_if(ret > 0, "Error socket read succeeded, wrong argument\n"); - ret = pico_socket_read(sk_udp, (void *)buf, 0xFFFF + 1); - fail_if(ret >= 0, "Error socket read succeeded while len was > 0xFFFF"); - /* socket_read passing correct parameters */ - ret = pico_socket_read(sk_udp, (void *)buf, sizeof(buf)); - fail_if(ret != 0, "socket> udp socket read failed, ret = %d: %s\n", ret, strerror(pico_err)); - - - /* send/recv */ - /* socket_send passing wrong parameters */ - ret = pico_socket_send(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket send succeeded, wrong argument\n"); - ret = pico_socket_send(sk_udp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket send succeeded, wrong argument\n"); - ret = pico_socket_send(sk_udp, (void *)buf, 0); - fail_if(ret > 0, "Error socket send succeeded, wrong argument\n"); - /* socket_write passing correct parameters */ - ret = pico_socket_send(sk_udp, (void *)buf, sizeof(buf)); - fail_if(ret <= 0, "socket> tcp socket send failed: %s\n", strerror(pico_err)); - /* socket_recv passing wrong parameters */ - ret = pico_socket_recv(NULL, (void *)buf, sizeof(buf)); - fail_if(ret == 0, "Error socket recv succeeded, wrong argument\n"); - ret = pico_socket_recv(sk_udp, NULL, sizeof(buf)); - fail_if(ret == 0, "Error socket recv succeeded, wrong argument\n"); - ret = pico_socket_recv(sk_udp, (void *)buf, 0); - fail_if(ret > 0, "Error socket recv succeeded, wrong argument\n"); - ret = pico_socket_recv(sk_udp, (void *)buf, 0xFFFF + 1); - fail_if(ret >= 0, "Error socket recv succeeded while len was > 0xFFFF"); - /* socket_recv passing correct parameters */ - ret = pico_socket_recv(sk_udp, (void *)buf, sizeof(buf)); - fail_if(ret != 0, "socket> udp socket recv failed, ret = %d: %s\n", ret, strerror(pico_err)); - - - /* sendto/recvfrom */ - /* socket_sendto passing wrong parameters */ - ret = pico_socket_sendto(NULL, (void *)buf, sizeof(buf), &inaddr_dst, port_be); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_udp, NULL, sizeof(buf), &inaddr_dst, port_be); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_udp, (void *)buf, 0, &inaddr_dst, port_be); - fail_if(ret > 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_udp, (void *)buf, sizeof(buf), NULL, port_be); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - ret = pico_socket_sendto(sk_udp, (void *)buf, sizeof(buf), &inaddr_dst, 0xFFFF); - fail_if(ret >= 0, "Error socket sendto succeeded, wrong argument\n"); - /* socket_write passing correct parameters */ - ret = pico_socket_sendto(sk_udp, (void *)buf, sizeof(buf), &inaddr_dst, short_be(5555)); - fail_if(ret <= 0, "socket> udp socket sendto failed, ret = %d: %s\n", ret, strerror(pico_err)); - /* socket_recvfrom passing wrong parameters */ - ret = pico_socket_recvfrom(NULL, (void *)buf, sizeof(buf), &orig, &porta); - fail_if(ret >= 0, "Error socket recvfrom succeeded, wrong argument\n"); - ret = pico_socket_recvfrom(sk_udp, NULL, sizeof(buf), &orig, &porta); - fail_if(ret >= 0, "Error socket recvfrom succeeded, wrong argument\n"); - ret = pico_socket_recvfrom(sk_udp, (void *)buf, 0xFFFF + 1, &orig, &porta); - fail_if(ret >= 0, "Error socket recvfrom succeeded while len was > 0xFFFF"); - /* socket_recvfrom passing correct parameters */ - ret = pico_socket_recvfrom(sk_udp, (void *)buf, 0, &orig, &porta); - fail_if(ret != 0, "socket> udp socket recvfrom failed, ret = %d: %s\n", ret, strerror(pico_err)); - ret = pico_socket_recvfrom(sk_udp, (void *)buf, sizeof(buf), &orig, &porta); - fail_if(ret != 0, "socket> udp socket recvfrom failed, ret = %d: %s\n", ret, strerror(pico_err)); - - /* temporary fix, until Nagle problems are analyzed and fixed */ - { - nodelay = 0; - ret = pico_socket_setoption(sk_tcp, PICO_TCP_NODELAY, &nodelay); - } - - /* setoption/getoption */ - ret = pico_socket_getoption(sk_tcp, PICO_TCP_NODELAY, &getnodelay); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_TCP_NODELAY failed (err = %s)\n", strerror(pico_err)); - fail_if(getnodelay != 0, "socket> socket_setoption: default PICO_TCP_NODELAY != 0 (nagle disabled by default)\n"); - - nodelay = 1; - ret = pico_socket_setoption(sk_tcp, PICO_TCP_NODELAY, &nodelay); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_TCP_NODELAY failed\n"); - ret = pico_socket_getoption(sk_tcp, PICO_TCP_NODELAY, &getnodelay); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_TCP_NODELAY failed\n"); - fail_if(getnodelay == 0, "socket> socket_setoption: PICO_TCP_NODELAY is off (expected: on!)\n"); - - nodelay = 0; - ret = pico_socket_setoption(sk_tcp, PICO_TCP_NODELAY, &nodelay); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_TCP_NODELAY failed\n"); - ret = pico_socket_getoption(sk_tcp, PICO_TCP_NODELAY, &getnodelay); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_TCP_NODELAY failed\n"); - fail_if(getnodelay != 0, "socket> socket_setoption: PICO_TCP_NODELAY is on (expected: off!)\n"); - - - /* Set/get recv buffer (TCP) */ - ret = pico_socket_getoption(sk_tcp, PICO_SOCKET_OPT_RCVBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - fail_if(getsocket_buffer != PICO_DEFAULT_SOCKETQ, - "socket> socket_setoption: default PICO_SOCKET_OPT_SNDBUF != DEFAULT\n"); - - socket_buffer = PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_tcp, PICO_SOCKET_OPT_RCVBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - ret = pico_socket_getoption(sk_tcp, PICO_SOCKET_OPT_RCVBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_RCVBUF is != than expected\n"); - - socket_buffer = 2 * PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_tcp, PICO_SOCKET_OPT_RCVBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - ret = pico_socket_getoption(sk_tcp, PICO_SOCKET_OPT_RCVBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_RCVBUF is != than expected\n"); - - /* Set/get send buffer (TCP) */ - ret = pico_socket_getoption(sk_tcp, PICO_SOCKET_OPT_SNDBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - fail_if(getsocket_buffer != PICO_DEFAULT_SOCKETQ, - "socket> socket_setoption: default PICO_SOCKET_OPT_SNDBUF != DEFAULT got: %d exp: %d\n", getsocket_buffer, PICO_DEFAULT_SOCKETQ); - - socket_buffer = PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_tcp, PICO_SOCKET_OPT_SNDBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - ret = pico_socket_getoption(sk_tcp, PICO_SOCKET_OPT_SNDBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_SNDBUF is != than expected\n"); - - socket_buffer = 2 * PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_tcp, PICO_SOCKET_OPT_SNDBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - ret = pico_socket_getoption(sk_tcp, PICO_SOCKET_OPT_SNDBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_SNDBUF is != than expected\n"); - - /* Set/get recv buffer (UDP) */ - ret = pico_socket_getoption(sk_udp, PICO_SOCKET_OPT_RCVBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - fail_if(getsocket_buffer != PICO_DEFAULT_SOCKETQ, - "socket> socket_setoption: default PICO_SOCKET_OPT_SNDBUF != DEFAULT\n"); - - socket_buffer = PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_udp, PICO_SOCKET_OPT_RCVBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - ret = pico_socket_getoption(sk_udp, PICO_SOCKET_OPT_RCVBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_RCVBUF is != than expected\n"); - - socket_buffer = 2 * PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_udp, PICO_SOCKET_OPT_RCVBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - ret = pico_socket_getoption(sk_udp, PICO_SOCKET_OPT_RCVBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_RCVBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_RCVBUF is != than expected\n"); - - /* Set/get send buffer (UDP) */ - ret = pico_socket_getoption(sk_udp, PICO_SOCKET_OPT_SNDBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - fail_if(getsocket_buffer != PICO_DEFAULT_SOCKETQ, - "socket> socket_setoption: default PICO_SOCKET_OPT_SNDBUF != DEFAULT\n"); - - socket_buffer = PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_udp, PICO_SOCKET_OPT_SNDBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - ret = pico_socket_getoption(sk_udp, PICO_SOCKET_OPT_SNDBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_SNDBUF is != than expected\n"); - - socket_buffer = 2 * PICO_DEFAULT_SOCKETQ; - ret = pico_socket_setoption(sk_udp, PICO_SOCKET_OPT_SNDBUF, &socket_buffer); - fail_if(ret < 0, "socket> socket_setoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - ret = pico_socket_getoption(sk_udp, PICO_SOCKET_OPT_SNDBUF, &getsocket_buffer); - fail_if(ret < 0, "socket> socket_getoption: supported PICO_SOCKET_OPT_SNDBUF failed\n"); - fail_if(getsocket_buffer != socket_buffer, "UDP socket> socket_setoption: PICO_SOCKET_OPT_SNDBUF is != than expected\n"); - - /* Close sockets, eventually. */ - ret = pico_socket_close(sk_tcp); - fail_if(ret < 0, "socket> tcp socket close failed: %s\n", strerror(pico_err)); - ret = pico_socket_close(sk_udp); - fail_if(ret < 0, "socket> udp socket close failed: %s\n", strerror(pico_err)); -} -END_TEST - -#ifdef PICO_SUPPORT_CRC_FAULTY_UNIT_TEST -START_TEST (test_crc_check) -{ - uint8_t buffer[64] = { - 0x45, 0x00, 0x00, 0x40, /* start of IP hdr */ - 0x91, 0xc3, 0x40, 0x00, - 0x40, 0x11, 0x24, 0xcf, /* last 2 bytes are CRC */ - 0xc0, 0xa8, 0x01, 0x66, - 0xc0, 0xa8, 0x01, 0x64, /* end of IP hdr */ - 0x15, 0xb3, 0x1F, 0x90, /* start of UDP/TCP hdr */ - 0x00, 0x2c, 0x27, 0x22, /* end of UDP hdr */ - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x0b, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, /* end of TCP hdr */ - 0x01, 0x23, 0x45, 0x67, /* start of data */ - 0x89, 0xab, 0xcd, 0xef, - 0xc0, 0xca, 0xc0, 0x1a - }; - struct pico_frame *f = NULL; - struct pico_ipv4_hdr *hdr = (struct pico_ipv4_hdr *) buffer; - struct pico_udp_hdr *udp_hdr = NULL; - struct pico_tcp_hdr *tcp_hdr = NULL; - uint32_t *f_usage_count = NULL; - uint8_t *f_buffer = NULL; - int ret = -1; - - printf("START CRC TEST\n"); - pico_stack_init(); - - /* IPv4 CRC unit tests */ - /* Allocated memory will not be freed when pico_ipv4_crc_check fails */ - f = calloc(1, sizeof(struct pico_frame)); - f_usage_count = calloc(1, sizeof(uint32_t)); - f_buffer = calloc(1, sizeof(uint8_t)); - f->net_hdr = buffer; - f->net_len = PICO_SIZE_IP4HDR; - f->transport_hdr = buffer + PICO_SIZE_IP4HDR; - f->transport_len = sizeof(buffer) - PICO_SIZE_IP4HDR; - f->usage_count = f_usage_count; - f->buffer = f_buffer; - *(f->usage_count) = 512; - - hdr->crc = 0; - printf(">>>>>>>>>>>>>>>>>>>>> CRC VALUE = %X\n", pico_checksum(hdr, PICO_SIZE_IP4HDR)); - hdr->crc = short_be(0x24CF); /* Make check pass */ - ret = pico_ipv4_crc_check(f); - fail_if(ret == 0, "correct IPv4 checksum got rejected\n"); - hdr->crc = short_be(0x8899); /* Make check fail */ - ret = pico_ipv4_crc_check(f); - fail_if(ret == 1, "incorrect IPv4 checksum got accepted\n"); - - /* UDP CRC unit tests */ - /* Allocated memory will be freed when pico_transport_crc_check fails */ - f = calloc(1, sizeof(struct pico_frame)); - f_usage_count = calloc(1, sizeof(uint32_t)); - f_buffer = calloc(1, sizeof(uint8_t)); - f->net_hdr = buffer; - f->transport_hdr = buffer + PICO_SIZE_IP4HDR; - f->transport_len = sizeof(buffer) - PICO_SIZE_IP4HDR; - f->usage_count = f_usage_count; - f->buffer = f_buffer; - *(f->usage_count) = 1; - hdr->proto = 0x11; /* UDP */ - hdr->crc = short_be(0x24cf); /* Set IPv4 CRC correct */ - udp_hdr = (struct pico_udp_hdr *) f->transport_hdr; - - /* udp_hdr->crc = 0; */ - /* printf(">>>>>>>>>>>>>>>>>>>>> UDP CRC VALUE = %X\n", pico_udp_checksum_ipv4(f)); */ - ret = pico_transport_crc_check(f); - fail_if(ret == 0, "correct UDP checksum got rejected\n"); - udp_hdr->crc = 0; - ret = pico_transport_crc_check(f); - fail_if(ret == 0, "UDP checksum of 0 did not get ignored\n"); - udp_hdr->crc = short_be(0x8899); /* Make check fail */ - ret = pico_transport_crc_check(f); - fail_if(ret == 1, "incorrect UDP checksum got accepted\n"); - - /* TCP CRC unit tests */ - /* Allocated memory will be freed when pico_transport_crc_check fails */ - f = calloc(1, sizeof(struct pico_frame)); - f_usage_count = calloc(1, sizeof(uint32_t)); - f_buffer = calloc(1, sizeof(uint8_t)); - f->net_hdr = buffer; - f->transport_hdr = buffer + PICO_SIZE_IP4HDR; - f->transport_len = sizeof(buffer) - PICO_SIZE_IP4HDR; - f->usage_count = f_usage_count; - f->buffer = f_buffer; - *(f->usage_count) = 1; - hdr->proto = 0x06; /* TCP */ - hdr->crc = short_be(0x24cf); /* Set IPv4 CRC correct */ - tcp_hdr = (struct pico_tcp_hdr *) f->transport_hdr; - tcp_hdr->seq = long_be(0x002c2722); /* Set sequence number correct */ - - /* tcp_hdr = 0; */ - /* printf(">>>>>>>>>>>>>>>>>>>>> TCP CRC VALUE = %X\n", pico_tcp_checksum_ipv4(f)); */ - tcp_hdr->crc = short_be(0x0016); /* Set correct TCP CRC */ - ret = pico_transport_crc_check(f); - fail_if(ret == 0, "correct TCP checksum got rejected\n"); - tcp_hdr->crc = short_be(0x8899); /* Make check fail */ - ret = pico_transport_crc_check(f); - fail_if(ret == 1, "incorrect TCP checksum got accepted\n"); -} -END_TEST -#endif diff --git a/kernel/picotcp/test/unit/unit_timer.c b/kernel/picotcp/test/unit/unit_timer.c deleted file mode 100644 index e00c811..0000000 --- a/kernel/picotcp/test/unit/unit_timer.c +++ /dev/null @@ -1,40 +0,0 @@ -#define EXISTING_TIMERS 7 - - -START_TEST (test_timers) -{ - uint32_t T[128]; - int i; - struct pico_timer_ref *tref; - pico_stack_init(); - for (i = 0; i < 128; i++) { - pico_time expire = (pico_time)(999999 + i); - void (*timer)(pico_time, void *) =(void (*)(pico_time, void *))0xff00 + i; - void *arg = ((void*)0xaa00 + i); - - T[i] = pico_timer_add(expire, timer, arg); - printf("New timer %u\n", T[i]); - } - for (i = 0; i < 128; i++) { - void (*timer)(pico_time, void *) =(void (*)(pico_time, void *))0xff00 + i; - void *arg = ((void*)0xaa00 + i); - - fail_if((uint32_t)(i + 1) > Timers->n); - tref = heap_get_element(Timers, (uint32_t)i + EXISTING_TIMERS); - fail_unless(tref->id == T[i]); - fail_unless(tref->tmr->timer == timer); - fail_unless(tref->tmr->arg == arg); - } - for (i = 127; i >= 0; i--) { - printf("Deleting timer %d \n", i ); - pico_timer_cancel(T[i]); - printf("Deleted timer %d \n", i ); - tref = heap_get_element(Timers, (uint32_t)i + EXISTING_TIMERS); - fail_unless(tref->tmr == NULL); - } - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); - pico_stack_tick(); -} -END_TEST diff --git a/kernel/picotcp/test/units.c b/kernel/picotcp/test/units.c deleted file mode 100644 index b043b30..0000000 --- a/kernel/picotcp/test/units.c +++ /dev/null @@ -1,233 +0,0 @@ -/* PicoTCP unit test platform */ -/* How does it works: - * 1. Define your unit test function as described in the check manual - * 2. Add your test to the suite in the pico_suite() function - */ - - -/* Inclusion of all the modules to test */ -/* This allow direct access to static functions, and also - * by compiling this, the namespace is checked for clashes in - * static symbols. - */ -#include "pico_device.c" -#include "pico_frame.c" -#include "pico_stack.c" -#include "pico_protocol.c" -#include "pico_802154.c" -#include "pico_6lowpan.c" -#include "pico_6lowpan_ll.c" -#include "pico_ipv4.c" -#include "pico_socket.c" -#include "pico_socket_multicast.c" -#include "pico_socket_tcp.c" -#include "pico_socket_udp.c" -#include "pico_dev_null.c" -#include "pico_dev_mock.c" -#include "pico_udp.c" -#include "pico_tcp.c" -#include "pico_arp.c" -#include "pico_icmp4.c" -#include "pico_dns_client.c" -#include "pico_dns_common.c" -#include "pico_dhcp_common.c" -#include "pico_dhcp_server.c" -#include "pico_dhcp_client.c" -#include "pico_nat.c" -#include "pico_ipfilter.c" -#include "pico_tree.c" -#include "pico_slaacv4.c" -#include "pico_hotplug_detection.c" -#ifdef PICO_SUPPORT_MCAST -#include "pico_mcast.c" -#include "pico_igmp.c" -#endif -#ifdef PICO_SUPPORT_IPV6 -#include "pico_ipv6.c" -#include "pico_ipv6_nd.c" -#include "pico_icmp6.c" -#ifdef PICO_SUPPORT_MCAST -#include "pico_mld.c" -#endif -#endif - - -/* Include Check. */ -#include - -/* Inclusion of unit submodules. - * Historically, this code has been part of - * the units.c file. - * Moved for readability of the units. - */ -#include "unit_mocks.c" -#include "unit_ipv4.c" -#include "unit_icmp4.c" -#include "unit_dhcp.c" -#include "unit_dns.c" -#include "unit_rbtree.c" -#include "unit_socket.c" -#include "unit_timer.c" -#include "unit_arp.c" -#include "unit_ipv6.c" - -Suite *pico_suite(void); - -START_TEST (test_frame) -{ - struct pico_frame *f1; - struct pico_frame *cpy; - struct pico_frame *deepcpy; - - f1 = pico_frame_alloc(200); - f1->payload = f1->buffer + 32; - f1->net_hdr = f1->buffer + 16; - cpy = pico_frame_copy(f1); - deepcpy = pico_frame_deepcopy(f1); - fail_unless(*f1->usage_count == 2); - fail_unless(*deepcpy->usage_count == 1); - pico_frame_discard(f1); - fail_unless(*cpy->usage_count == 1); - pico_frame_discard(cpy); - fail_unless(*deepcpy->usage_count == 1); - pico_frame_discard(deepcpy); -} -END_TEST - -START_TEST (test_tick) -{ - pico_tick = (uint64_t)-1; - fail_if(pico_tick != 0xFFFFFFFFFFFFFFFF, "Failed to assign (uint64_t)-1 to pico_tick\n"); -} -END_TEST - -Suite *pico_suite(void) -{ - Suite *s = suite_create("PicoTCP"); - - TCase *ipv4 = tcase_create("IPv4"); - TCase *icmp = tcase_create("ICMP4"); - TCase *dhcp = tcase_create("DHCP"); - TCase *dns = tcase_create("DNS"); - TCase *rb = tcase_create("RB TREE"); - TCase *rb2 = tcase_create("RB TREE 2"); - TCase *socket = tcase_create("SOCKET"); - TCase *nat = tcase_create("NAT"); - TCase *ipfilter = tcase_create("IPFILTER"); -#ifdef PICO_SUPPORT_CRC_FAULTY_UNIT_TEST - TCase *crc = tcase_create("CRC"); -#endif - -#ifdef PICO_SUPPORT_MCAST - TCase *igmp = tcase_create("IGMP"); -#endif -#ifdef PICO_SUPPORT_IPV6 - TCase *ipv6 = tcase_create("IPv6"); -#ifdef PICO_SUPPORT_MCAST - TCase *mld = tcase_create("MLD"); -#endif -#endif - - TCase *frame = tcase_create("FRAME"); - TCase *timers = tcase_create("TIMERS"); - TCase *slaacv4 = tcase_create("SLAACV4"); - TCase *tick = tcase_create("pico_tick"); - TCase *arp = tcase_create("ARP"); - tcase_add_test(ipv4, test_ipv4); - tcase_set_timeout(ipv4, 20); - suite_add_tcase(s, ipv4); - - tcase_add_test(icmp, test_icmp4_ping); - tcase_add_test(icmp, test_icmp4_incoming_ping); - tcase_add_test(icmp, test_icmp4_unreachable_send); - tcase_add_test(icmp, test_icmp4_unreachable_recv); - suite_add_tcase(s, icmp); - - /* XXX: rewrite test_dhcp_client due to architectural changes to support multiple devices */ - /* tcase_add_test(dhcp, test_dhcp_client); */ - tcase_add_test(dhcp, test_dhcp_client_api); - - tcase_add_test(dhcp, test_dhcp_server_ipinarp); - tcase_add_test(dhcp, test_dhcp_server_ipninarp); - tcase_add_test(dhcp, test_dhcp_server_api); - tcase_add_test(dhcp, test_dhcp); - suite_add_tcase(s, dhcp); - - tcase_add_test(dns, test_dns); - suite_add_tcase(s, dns); - - tcase_add_test(rb, test_rbtree); - tcase_set_timeout(rb, 120); - suite_add_tcase(s, rb); - - tcase_add_test(rb2, test_rbtree2); - tcase_set_timeout(rb2, 20); - suite_add_tcase(s, rb2); - - tcase_add_test(socket, test_socket); - suite_add_tcase(s, socket); - - tcase_add_test(nat, test_nat_enable_disable); - tcase_add_test(nat, test_nat_translation); - tcase_add_test(nat, test_nat_port_forwarding); - tcase_set_timeout(nat, 30); - suite_add_tcase(s, nat); - - tcase_add_test(ipfilter, test_ipfilter); - tcase_set_timeout(ipfilter, 10); - suite_add_tcase(s, ipfilter); - -#ifdef PICO_SUPPORT_CRC_FAULTY_UNIT_TEST - tcase_add_test(crc, test_crc_check); - suite_add_tcase(s, crc); -#endif - -#ifdef PICO_SUPPORT_MCAST - tcase_add_test(igmp, test_igmp_sockopts); - suite_add_tcase(s, igmp); -#endif - - tcase_add_test(frame, test_frame); - suite_add_tcase(s, frame); - - tcase_add_test(timers, test_timers); - suite_add_tcase(s, timers); - - tcase_add_test(slaacv4, test_slaacv4); - suite_add_tcase(s, slaacv4); - - tcase_add_test(tick, test_tick); - suite_add_tcase(s, tick); - -#ifdef PICO_SUPPORT_IPV6 - tcase_add_test(ipv6, test_ipv6); - suite_add_tcase(s, ipv6); -#ifdef PICO_SUPPORT_MCAST - tcase_add_test(mld, test_mld_sockopts); - suite_add_tcase(s, mld); -#endif -#endif - - tcase_add_test(arp, arp_update_max_arp_reqs_test); - tcase_add_test(arp, arp_compare_test); - tcase_add_test(arp, arp_lookup_test); - tcase_add_test(arp, arp_expire_test); - tcase_add_test(arp, arp_receive_test); - tcase_add_test(arp, arp_get_test); - tcase_add_test(arp, tc_pico_arp_queue); - suite_add_tcase(s, arp); - return s; -} - - - -int main(void) -{ - int fails; - Suite *s = pico_suite(); - SRunner *sr = srunner_create(s); - srunner_run_all(sr, CK_NORMAL); - fails = srunner_ntests_failed(sr); - srunner_free(sr); - return fails; -} diff --git a/kernel/picotcp/test/units.sh b/kernel/picotcp/test/units.sh deleted file mode 100755 index 87fd02c..0000000 --- a/kernel/picotcp/test/units.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash -rm -f /tmp/pico-mem-report-* - -ASAN_OPTIONS="detect_leaks=0" ./build/test/units || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_fragments.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_pico_stack.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_802154.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_6lowpan.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_ethernet.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_pico_protocol.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_pico_frame.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_seq.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_tcp.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_dev_loop.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_dns_client.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_dns_common.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_sntp_client.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_ipv6_nd.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_mdns.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_dns_sd.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_ipfilter.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_queue.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_tftp.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_aodv.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_dev_ppp.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_mld.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_igmp.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_hotplug_detection.elf || exit 1 -ASAN_OPTIONS="detect_leaks=0" ./build/test/modunit_strings.elf || exit 1 - -MAXMEM=`cat /tmp/pico-mem-report-* | sort -r -n |head -1` -echo -echo -echo -echo "MAX memory used: $MAXMEM" -rm -f /tmp/pico-mem-report-* - -echo "SUCCESS!" && exit 0 diff --git a/kernel/picotcp/test/vde_sock_start.sh b/kernel/picotcp/test/vde_sock_start.sh deleted file mode 100755 index cf0100e..0000000 --- a/kernel/picotcp/test/vde_sock_start.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -gksu "vdecmd -s /tmp/pico.mgmt shutdown" -gksu "vdecmd -s /tmp/pico1.mgmt shutdown" -gksu "vde_switch -t pic0 -s /tmp/pic0.ctl -m 777 -M /tmp/pico.mgmt -d -hub" -gksu "vde_switch -t pic1 -x -s /tmp/pic1.ctl -m 777 -M /tmp/pico1.mgmt -d -hub" - -# we prefer to use ip over ifconfig (which is obsolete), but the script has to work when ip is not available as well -USINGIP=1 -command -v ip >/dev/null 2>&1 || USINGIP=0 - -if [ $USINGIP -eq 1 ]; -then - gksu "ip addr add 10.40.0.1/24 dev pic0" - gksu "ip addr add 10.50.0.1/24 dev pic1" -else - gksu "ifconfig pic0 10.40.0.1 netmask 255.255.255.0" - gksu "ifconfig pic1 10.50.0.1 netmask 255.255.255.0" -fi -#ping 10.40.0.3 & - - diff --git a/kernel/picotcp/test/vde_sock_start_user.sh b/kernel/picotcp/test/vde_sock_start_user.sh deleted file mode 100755 index 624cfc9..0000000 --- a/kernel/picotcp/test/vde_sock_start_user.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -function help(){ - echo 'Cmd line arguments can be:' - echo 'start: to start the vde setup for the autotest.' - echo 'stop: to cleanup the vde setup for the autotest.' - exit -} - -function start_vde(){ - vde_switch -s /tmp/pic0.ctl -m 777 -M /tmp/pico.mgmt -d -hub - vde_switch -s /tmp/pic1.ctl -m 777 -M /tmp/pici.mgmt -d -hub -} - -start_vde - diff --git a/kernel/picotcp/uncrustify.cfg b/kernel/picotcp/uncrustify.cfg deleted file mode 100644 index ba664df..0000000 --- a/kernel/picotcp/uncrustify.cfg +++ /dev/null @@ -1,1579 +0,0 @@ -# Uncrustify 0.60 - -# -# General options -# - -# The type of line endings -newlines = lf # auto/lf/crlf/cr - -# The original size of tabs in the input -input_tab_size = 4 # number - -# The size of tabs in the output (only used if align_with_tabs=true) -output_tab_size = 4 # number - -# The ASCII value of the string escape char, usually 92 (\) or 94 (^). (Pawn) -string_escape_char = 92 # number - -# Alternate string escape char for Pawn. Only works right before the quote char. -string_escape_char2 = 0 # number - -# Allow interpreting '>=' and '>>=' as part of a template in 'void f(list>=val);'. -# If true (default), 'assert(x<0 && y>=3)' will be broken. -# Improvements to template detection may make this option obsolete. -tok_split_gte = false # false/true - -# Control what to do with the UTF-8 BOM (recommend 'remove') -utf8_bom = ignore # ignore/add/remove/force - -# If the file contains bytes with values between 128 and 255, but is not UTF-8, then output as UTF-8 -utf8_byte = false # false/true - -# Force the output encoding to UTF-8 -utf8_force = false # false/true - -# -# Indenting -# - -# The number of columns to indent per level. -# Usually 2, 3, 4, or 8. -indent_columns = 4 # number - -# The continuation indent. If non-zero, this overrides the indent of '(' and '=' continuation indents. -# For FreeBSD, this is set to 4. Negative value is absolute and not increased for each ( level -indent_continue = 0 # number - -# How to use tabs when indenting code -# 0=spaces only -# 1=indent with tabs to brace level, align with spaces -# 2=indent and align with tabs, using spaces when not on a tabstop -indent_with_tabs = 0 # number - -# Comments that are not a brace level are indented with tabs on a tabstop. -# Requires indent_with_tabs=2. If false, will use spaces. -indent_cmt_with_tabs = false # false/true - -# Whether to indent strings broken by '\' so that they line up -indent_align_string = false # false/true - -# The number of spaces to indent multi-line XML strings. -# Requires indent_align_string=True -indent_xml_string = 0 # number - -# Spaces to indent '{' from level -indent_brace = 0 # number - -# Whether braces are indented to the body level -indent_braces = false # false/true - -# Disabled indenting function braces if indent_braces is true -indent_braces_no_func = false # false/true - -# Disabled indenting class braces if indent_braces is true -indent_braces_no_class = false # false/true - -# Disabled indenting struct braces if indent_braces is true -indent_braces_no_struct = false # false/true - -# Indent based on the size of the brace parent, i.e. 'if' => 3 spaces, 'for' => 4 spaces, etc. -indent_brace_parent = false # false/true - -# Whether the 'namespace' body is indented -indent_namespace = false # false/true - -# The number of spaces to indent a namespace block -indent_namespace_level = 0 # number - -# If the body of the namespace is longer than this number, it won't be indented. -# Requires indent_namespace=true. Default=0 (no limit) -indent_namespace_limit = 0 # number - -# Whether the 'extern "C"' body is indented -indent_extern = false # false/true - -# Whether the 'class' body is indented -indent_class = false # false/true - -# Whether to indent the stuff after a leading class colon -indent_class_colon = false # false/true - -# Virtual indent from the ':' for member initializers. Default is 2 -indent_ctor_init_leading = 2 # number - -# Additional indenting for constructor initializer list -indent_ctor_init = 0 # number - -# False=treat 'else\nif' as 'else if' for indenting purposes -# True=indent the 'if' one level -indent_else_if = false # false/true - -# Amount to indent variable declarations after a open brace. neg=relative, pos=absolute -indent_var_def_blk = 0 # number - -# Indent continued variable declarations instead of aligning. -indent_var_def_cont = false # false/true - -# True: force indentation of function definition to start in column 1 -# False: use the default behavior -indent_func_def_force_col1 = false # false/true - -# True: indent continued function call parameters one indent level -# False: align parameters under the open paren -indent_func_call_param = false # false/true - -# Same as indent_func_call_param, but for function defs -indent_func_def_param = false # false/true - -# Same as indent_func_call_param, but for function protos -indent_func_proto_param = false # false/true - -# Same as indent_func_call_param, but for class declarations -indent_func_class_param = false # false/true - -# Same as indent_func_call_param, but for class variable constructors -indent_func_ctor_var_param = false # false/true - -# Same as indent_func_call_param, but for templates -indent_template_param = false # false/true - -# Double the indent for indent_func_xxx_param options -indent_func_param_double = false # false/true - -# Indentation column for standalone 'const' function decl/proto qualifier -indent_func_const = 0 # number - -# Indentation column for standalone 'throw' function decl/proto qualifier -indent_func_throw = 0 # number - -# The number of spaces to indent a continued '->' or '.' -# Usually set to 0, 1, or indent_columns. -indent_member = 0 # number - -# Spaces to indent single line ('//') comments on lines before code -indent_sing_line_comments = 0 # number - -# If set, will indent trailing single line ('//') comments relative -# to the code instead of trying to keep the same absolute column -indent_relative_single_line_comments = false # false/true - -# Spaces to indent 'case' from 'switch' -# Usually 0 or indent_columns. -indent_switch_case = 0 # number - -# Spaces to shift the 'case' line, without affecting any other lines -# Usually 0. -indent_case_shift = 0 # number - -# Spaces to indent '{' from 'case'. -# By default, the brace will appear under the 'c' in case. -# Usually set to 0 or indent_columns. -indent_case_brace = 0 # number - -# Whether to indent comments found in first column -indent_col1_comment = false # false/true - -# How to indent goto labels -# >0 : absolute column where 1 is the leftmost column -# <=0 : subtract from brace indent -indent_label = 1 # number - -# Same as indent_label, but for access specifiers that are followed by a colon -indent_access_spec = 1 # number - -# Indent the code after an access specifier by one level. -# If set, this option forces 'indent_access_spec=0' -indent_access_spec_body = false # false/true - -# If an open paren is followed by a newline, indent the next line so that it lines up after the open paren (not recommended) -indent_paren_nl = false # false/true - -# Controls the indent of a close paren after a newline. -# 0: Indent to body level -# 1: Align under the open paren -# 2: Indent to the brace level -indent_paren_close = 0 # number - -# Controls the indent of a comma when inside a paren.If TRUE, aligns under the open paren -indent_comma_paren = false # false/true - -# Controls the indent of a BOOL operator when inside a paren.If TRUE, aligns under the open paren -indent_bool_paren = false # false/true - -# If 'indent_bool_paren' is true, controls the indent of the first expression. If TRUE, aligns the first expression to the following ones -indent_first_bool_expr = false # false/true - -# If an open square is followed by a newline, indent the next line so that it lines up after the open square (not recommended) -indent_square_nl = false # false/true - -# Don't change the relative indent of ESQL/C 'EXEC SQL' bodies -indent_preserve_sql = false # false/true - -# Align continued statements at the '='. Default=True -# If FALSE or the '=' is followed by a newline, the next line is indent one tab. -indent_align_assign = true # false/true - -# Indent OC blocks at brace level instead of usual rules. -indent_oc_block = false # false/true - -# Indent OC blocks in a message relative to the parameter name. -# 0=use indent_oc_block rules, 1+=spaces to indent -indent_oc_block_msg = 0 # number - -# Minimum indent for subsequent parameters -indent_oc_msg_colon = 0 # number - -# -# Spacing options -# - -# Add or remove space around arithmetic operator '+', '-', '/', '*', etc -sp_arith = add # ignore/add/remove/force - -# Add or remove space around assignment operator '=', '+=', etc -sp_assign = add # ignore/add/remove/force - -# Add or remove space around '=' in C++11 lambda capture specifications. Overrides sp_assign -sp_cpp_lambda_assign = ignore # ignore/add/remove/force - -# Add or remove space after the capture specification in C++11 lambda. -sp_cpp_lambda_paren = ignore # ignore/add/remove/force - -# Add or remove space around assignment operator '=' in a prototype -sp_assign_default = add # ignore/add/remove/force - -# Add or remove space before assignment operator '=', '+=', etc. Overrides sp_assign. -sp_before_assign = add # ignore/add/remove/force - -# Add or remove space after assignment operator '=', '+=', etc. Overrides sp_assign. -sp_after_assign = add # ignore/add/remove/force - -# Add or remove space around assignment '=' in enum -sp_enum_assign = add # ignore/add/remove/force - -# Add or remove space before assignment '=' in enum. Overrides sp_enum_assign. -sp_enum_before_assign = add # ignore/add/remove/force - -# Add or remove space after assignment '=' in enum. Overrides sp_enum_assign. -sp_enum_after_assign = add # ignore/add/remove/force - -# Add or remove space around preprocessor '##' concatenation operator. Default=Add -sp_pp_concat = add # ignore/add/remove/force - -# Add or remove space after preprocessor '#' stringify operator. Also affects the '#@' charizing operator. -sp_pp_stringify = ignore # ignore/add/remove/force - -# Add or remove space before preprocessor '#' stringify operator as in '#define x(y) L#y'. -sp_before_pp_stringify = ignore # ignore/add/remove/force - -# Add or remove space around boolean operators '&&' and '||' -sp_bool = add # ignore/add/remove/force - -# Add or remove space around compare operator '<', '>', '==', etc -sp_compare = add # ignore/add/remove/force - -# Add or remove space inside '(' and ')' -sp_inside_paren = remove # ignore/add/remove/force - -# Add or remove space between nested parens -sp_paren_paren = remove # ignore/add/remove/force - -# Whether to balance spaces inside nested parens -sp_balance_nested_parens = false # false/true - -# Add or remove space between ')' and '{' -sp_paren_brace = ignore # ignore/add/remove/force - -# Add or remove space before pointer star '*' -sp_before_ptr_star = ignore # ignore/add/remove/force - -# Add or remove space before pointer star '*' that isn't followed by a variable name -# If set to 'ignore', sp_before_ptr_star is used instead. -sp_before_unnamed_ptr_star = ignore # ignore/add/remove/force - -# Add or remove space between pointer stars '*' -sp_between_ptr_star = ignore # ignore/add/remove/force - -# Add or remove space after pointer star '*', if followed by a word. -sp_after_ptr_star = remove # ignore/add/remove/force - -# Add or remove space after a pointer star '*', if followed by a func proto/def. -sp_after_ptr_star_func = ignore # ignore/add/remove/force - -# Add or remove space after a pointer star '*', if followed by an open paren (function types). -sp_ptr_star_paren = ignore # ignore/add/remove/force - -# Add or remove space before a pointer star '*', if followed by a func proto/def. -sp_before_ptr_star_func = ignore # ignore/add/remove/force - -# Add or remove space before a reference sign '&' -sp_before_byref = ignore # ignore/add/remove/force - -# Add or remove space before a reference sign '&' that isn't followed by a variable name -# If set to 'ignore', sp_before_byref is used instead. -sp_before_unnamed_byref = ignore # ignore/add/remove/force - -# Add or remove space after reference sign '&', if followed by a word. -sp_after_byref = remove # ignore/add/remove/force - -# Add or remove space after a reference sign '&', if followed by a func proto/def. -sp_after_byref_func = remove # ignore/add/remove/force - -# Add or remove space before a reference sign '&', if followed by a func proto/def. -sp_before_byref_func = ignore # ignore/add/remove/force - -# Add or remove space between type and word. Default=Force -sp_after_type = force # ignore/add/remove/force - -# Add or remove space before the paren in the D constructs 'template Foo(' and 'class Foo('. -sp_before_template_paren = ignore # ignore/add/remove/force - -# Add or remove space in 'template <' vs 'template<'. -# If set to ignore, sp_before_angle is used. -sp_template_angle = ignore # ignore/add/remove/force - -# Add or remove space before '<>' -sp_before_angle = ignore # ignore/add/remove/force - -# Add or remove space inside '<' and '>' -sp_inside_angle = ignore # ignore/add/remove/force - -# Add or remove space after '<>' -sp_after_angle = ignore # ignore/add/remove/force - -# Add or remove space between '<>' and '(' as found in 'new List();' -sp_angle_paren = ignore # ignore/add/remove/force - -# Add or remove space between '<>' and a word as in 'List m;' -sp_angle_word = ignore # ignore/add/remove/force - -# Add or remove space between '>' and '>' in '>>' (template stuff C++/C# only). Default=Add -sp_angle_shift = add # ignore/add/remove/force - -# Permit removal of the space between '>>' in 'foo >' (C++11 only). Default=False -# sp_angle_shift cannot remove the space without this option. -sp_permit_cpp11_shift = false # false/true - -# Add or remove space before '(' of 'if', 'for', 'switch', and 'while' -sp_before_sparen = ignore # ignore/add/remove/force - -# Add or remove space inside if-condition '(' and ')' -sp_inside_sparen = ignore # ignore/add/remove/force - -# Add or remove space before if-condition ')'. Overrides sp_inside_sparen. -sp_inside_sparen_close = ignore # ignore/add/remove/force - -# Add or remove space before if-condition '('. Overrides sp_inside_sparen. -sp_inside_sparen_open = ignore # ignore/add/remove/force - -# Add or remove space after ')' of 'if', 'for', 'switch', and 'while' -sp_after_sparen = ignore # ignore/add/remove/force - -# Add or remove space between ')' and '{' of 'if', 'for', 'switch', and 'while' -sp_sparen_brace = ignore # ignore/add/remove/force - -# Add or remove space between 'invariant' and '(' in the D language. -sp_invariant_paren = ignore # ignore/add/remove/force - -# Add or remove space after the ')' in 'invariant (C) c' in the D language. -sp_after_invariant_paren = ignore # ignore/add/remove/force - -# Add or remove space before empty statement ';' on 'if', 'for' and 'while' -sp_special_semi = ignore # ignore/add/remove/force - -# Add or remove space before ';'. Default=Remove -sp_before_semi = remove # ignore/add/remove/force - -# Add or remove space before ';' in non-empty 'for' statements -sp_before_semi_for = ignore # ignore/add/remove/force - -# Add or remove space before a semicolon of an empty part of a for statement. -sp_before_semi_for_empty = ignore # ignore/add/remove/force - -# Add or remove space after ';', except when followed by a comment. Default=Add -sp_after_semi = remove # ignore/add/remove/force - -# Add or remove space after ';' in non-empty 'for' statements. Default=Force -sp_after_semi_for = force # ignore/add/remove/force - -# Add or remove space after the final semicolon of an empty part of a for statement: for ( ; ; ). -sp_after_semi_for_empty = ignore # ignore/add/remove/force - -# Add or remove space before '[' (except '[]') -sp_before_square = ignore # ignore/add/remove/force - -# Add or remove space before '[]' -sp_before_squares = ignore # ignore/add/remove/force - -# Add or remove space inside a non-empty '[' and ']' -sp_inside_square = ignore # ignore/add/remove/force - -# Add or remove space after ',' -sp_after_comma = add # ignore/add/remove/force - -# Add or remove space before ',' -sp_before_comma = remove # ignore/add/remove/force - -# Add or remove space between an open paren and comma: '(,' vs '( ,' -sp_paren_comma = force # ignore/add/remove/force - -# Add or remove space before the variadic '...' when preceded by a non-punctuator -sp_before_ellipsis = ignore # ignore/add/remove/force - -# Add or remove space after class ':' -sp_after_class_colon = ignore # ignore/add/remove/force - -# Add or remove space before class ':' -sp_before_class_colon = ignore # ignore/add/remove/force - -# Add or remove space before case ':'. Default=Remove -sp_before_case_colon = remove # ignore/add/remove/force - -# Add or remove space between 'operator' and operator sign -sp_after_operator = ignore # ignore/add/remove/force - -# Add or remove space between the operator symbol and the open paren, as in 'operator ++(' -sp_after_operator_sym = ignore # ignore/add/remove/force - -# Add or remove space after C/D cast, i.e. 'cast(int)a' vs 'cast(int) a' or '(int)a' vs '(int) a' -sp_after_cast = ignore # ignore/add/remove/force - -# Add or remove spaces inside cast parens -sp_inside_paren_cast = ignore # ignore/add/remove/force - -# Add or remove space between the type and open paren in a C++ cast, i.e. 'int(exp)' vs 'int (exp)' -sp_cpp_cast_paren = ignore # ignore/add/remove/force - -# Add or remove space between 'sizeof' and '(' -sp_sizeof_paren = ignore # ignore/add/remove/force - -# Add or remove space after the tag keyword (Pawn) -sp_after_tag = ignore # ignore/add/remove/force - -# Add or remove space inside enum '{' and '}' -sp_inside_braces_enum = ignore # ignore/add/remove/force - -# Add or remove space inside struct/union '{' and '}' -sp_inside_braces_struct = ignore # ignore/add/remove/force - -# Add or remove space inside '{' and '}' -sp_inside_braces = ignore # ignore/add/remove/force - -# Add or remove space inside '{}' -sp_inside_braces_empty = ignore # ignore/add/remove/force - -# Add or remove space between return type and function name -# A minimum of 1 is forced except for pointer return types. -sp_type_func = ignore # ignore/add/remove/force - -# Add or remove space between function name and '(' on function declaration -sp_func_proto_paren = ignore # ignore/add/remove/force - -# Add or remove space between function name and '(' on function definition -sp_func_def_paren = ignore # ignore/add/remove/force - -# Add or remove space inside empty function '()' -sp_inside_fparens = ignore # ignore/add/remove/force - -# Add or remove space inside function '(' and ')' -sp_inside_fparen = ignore # ignore/add/remove/force - -# Add or remove space inside the first parens in the function type: 'void (*x)(...)' -sp_inside_tparen = ignore # ignore/add/remove/force - -# Add or remove between the parens in the function type: 'void (*x)(...)' -sp_after_tparen_close = ignore # ignore/add/remove/force - -# Add or remove space between ']' and '(' when part of a function call. -sp_square_fparen = ignore # ignore/add/remove/force - -# Add or remove space between ')' and '{' of function -sp_fparen_brace = ignore # ignore/add/remove/force - -# Add or remove space between function name and '(' on function calls -sp_func_call_paren = ignore # ignore/add/remove/force - -# Add or remove space between function name and '()' on function calls without parameters. -# If set to 'ignore' (the default), sp_func_call_paren is used. -sp_func_call_paren_empty = ignore # ignore/add/remove/force - -# Add or remove space between the user function name and '(' on function calls -# You need to set a keyword to be a user function, like this: 'set func_call_user _' in the config file. -sp_func_call_user_paren = ignore # ignore/add/remove/force - -# Add or remove space between a constructor/destructor and the open paren -sp_func_class_paren = ignore # ignore/add/remove/force - -# Add or remove space between 'return' and '(' -sp_return_paren = ignore # ignore/add/remove/force - -# Add or remove space between '__attribute__' and '(' -sp_attribute_paren = ignore # ignore/add/remove/force - -# Add or remove space between 'defined' and '(' in '#if defined (FOO)' -sp_defined_paren = ignore # ignore/add/remove/force - -# Add or remove space between 'throw' and '(' in 'throw (something)' -sp_throw_paren = ignore # ignore/add/remove/force - -# Add or remove space between 'throw' and anything other than '(' as in '@throw [...];' -sp_after_throw = ignore # ignore/add/remove/force - -# Add or remove space between 'catch' and '(' in 'catch (something) { }' -# If set to ignore, sp_before_sparen is used. -sp_catch_paren = ignore # ignore/add/remove/force - -# Add or remove space between 'version' and '(' in 'version (something) { }' (D language) -# If set to ignore, sp_before_sparen is used. -sp_version_paren = ignore # ignore/add/remove/force - -# Add or remove space between 'scope' and '(' in 'scope (something) { }' (D language) -# If set to ignore, sp_before_sparen is used. -sp_scope_paren = ignore # ignore/add/remove/force - -# Add or remove space between macro and value -sp_macro = ignore # ignore/add/remove/force - -# Add or remove space between macro function ')' and value -sp_macro_func = ignore # ignore/add/remove/force - -# Add or remove space between 'else' and '{' if on the same line -sp_else_brace = ignore # ignore/add/remove/force - -# Add or remove space between '}' and 'else' if on the same line -sp_brace_else = ignore # ignore/add/remove/force - -# Add or remove space between '}' and the name of a typedef on the same line -sp_brace_typedef = ignore # ignore/add/remove/force - -# Add or remove space between 'catch' and '{' if on the same line -sp_catch_brace = ignore # ignore/add/remove/force - -# Add or remove space between '}' and 'catch' if on the same line -sp_brace_catch = ignore # ignore/add/remove/force - -# Add or remove space between 'finally' and '{' if on the same line -sp_finally_brace = ignore # ignore/add/remove/force - -# Add or remove space between '}' and 'finally' if on the same line -sp_brace_finally = ignore # ignore/add/remove/force - -# Add or remove space between 'try' and '{' if on the same line -sp_try_brace = ignore # ignore/add/remove/force - -# Add or remove space between get/set and '{' if on the same line -sp_getset_brace = ignore # ignore/add/remove/force - -# Add or remove space before the '::' operator -sp_before_dc = ignore # ignore/add/remove/force - -# Add or remove space after the '::' operator -sp_after_dc = ignore # ignore/add/remove/force - -# Add or remove around the D named array initializer ':' operator -sp_d_array_colon = ignore # ignore/add/remove/force - -# Add or remove space after the '!' (not) operator. Default=Remove -sp_not = remove # ignore/add/remove/force - -# Add or remove space after the '~' (invert) operator. Default=Remove -sp_inv = remove # ignore/add/remove/force - -# Add or remove space after the '&' (address-of) operator. Default=Remove -# This does not affect the spacing after a '&' that is part of a type. -sp_addr = remove # ignore/add/remove/force - -# Add or remove space around the '.' or '->' operators. Default=Remove -sp_member = remove # ignore/add/remove/force - -# Add or remove space after the '*' (dereference) operator. Default=Remove -# This does not affect the spacing after a '*' that is part of a type. -sp_deref = remove # ignore/add/remove/force - -# Add or remove space after '+' or '-', as in 'x = -5' or 'y = +7'. Default=Remove -sp_sign = remove # ignore/add/remove/force - -# Add or remove space before or after '++' and '--', as in '(--x)' or 'y++;'. Default=Remove -sp_incdec = remove # ignore/add/remove/force - -# Add or remove space before a backslash-newline at the end of a line. Default=Add -sp_before_nl_cont = add # ignore/add/remove/force - -# Add or remove space after the scope '+' or '-', as in '-(void) foo;' or '+(int) bar;' -sp_after_oc_scope = ignore # ignore/add/remove/force - -# Add or remove space after the colon in message specs -# '-(int) f:(int) x;' vs '-(int) f: (int) x;' -sp_after_oc_colon = ignore # ignore/add/remove/force - -# Add or remove space before the colon in message specs -# '-(int) f: (int) x;' vs '-(int) f : (int) x;' -sp_before_oc_colon = ignore # ignore/add/remove/force - -# Add or remove space after the colon in immutable dictionary expression -# 'NSDictionary *test = @{@"foo" :@"bar"};' -sp_after_oc_dict_colon = ignore # ignore/add/remove/force - -# Add or remove space before the colon in immutable dictionary expression -# 'NSDictionary *test = @{@"foo" :@"bar"};' -sp_before_oc_dict_colon = ignore # ignore/add/remove/force - -# Add or remove space after the colon in message specs -# '[object setValue:1];' vs '[object setValue: 1];' -sp_after_send_oc_colon = ignore # ignore/add/remove/force - -# Add or remove space before the colon in message specs -# '[object setValue:1];' vs '[object setValue :1];' -sp_before_send_oc_colon = ignore # ignore/add/remove/force - -# Add or remove space after the (type) in message specs -# '-(int)f: (int) x;' vs '-(int)f: (int)x;' -sp_after_oc_type = ignore # ignore/add/remove/force - -# Add or remove space after the first (type) in message specs -# '-(int) f:(int)x;' vs '-(int)f:(int)x;' -sp_after_oc_return_type = ignore # ignore/add/remove/force - -# Add or remove space between '@selector' and '(' -# '@selector(msgName)' vs '@selector (msgName)' -# Also applies to @protocol() constructs -sp_after_oc_at_sel = ignore # ignore/add/remove/force - -# Add or remove space between '@selector(x)' and the following word -# '@selector(foo) a:' vs '@selector(foo)a:' -sp_after_oc_at_sel_parens = ignore # ignore/add/remove/force - -# Add or remove space inside '@selector' parens -# '@selector(foo)' vs '@selector( foo )' -# Also applies to @protocol() constructs -sp_inside_oc_at_sel_parens = ignore # ignore/add/remove/force - -# Add or remove space before a block pointer caret -# '^int (int arg){...}' vs. ' ^int (int arg){...}' -sp_before_oc_block_caret = ignore # ignore/add/remove/force - -# Add or remove space after a block pointer caret -# '^int (int arg){...}' vs. '^ int (int arg){...}' -sp_after_oc_block_caret = ignore # ignore/add/remove/force - -# Add or remove space between the receiver and selector in a message. -# '[receiver selector ...]' -sp_after_oc_msg_receiver = ignore # ignore/add/remove/force - -# Add or remove space after @property. -sp_after_oc_property = ignore # ignore/add/remove/force - -# Add or remove space around the ':' in 'b ? t : f' -sp_cond_colon = ignore # ignore/add/remove/force - -# Add or remove space around the '?' in 'b ? t : f' -sp_cond_question = ignore # ignore/add/remove/force - -# Fix the spacing between 'case' and the label. Only 'ignore' and 'force' make sense here. -sp_case_label = ignore # ignore/add/remove/force - -# Control the space around the D '..' operator. -sp_range = ignore # ignore/add/remove/force - -# Control the spacing after ':' in 'for (TYPE VAR : EXPR)' (Java) -sp_after_for_colon = ignore # ignore/add/remove/force - -# Control the spacing before ':' in 'for (TYPE VAR : EXPR)' (Java) -sp_before_for_colon = ignore # ignore/add/remove/force - -# Control the spacing in 'extern (C)' (D) -sp_extern_paren = ignore # ignore/add/remove/force - -# Control the space after the opening of a C++ comment '// A' vs '//A' -sp_cmt_cpp_start = ignore # ignore/add/remove/force - -# Controls the spaces between #else or #endif and a trailing comment -sp_endif_cmt = ignore # ignore/add/remove/force - -# Controls the spaces after 'new', 'delete', and 'delete[]' -sp_after_new = ignore # ignore/add/remove/force - -# Controls the spaces before a trailing or embedded comment -sp_before_tr_emb_cmt = ignore # ignore/add/remove/force - -# Number of spaces before a trailing or embedded comment -sp_num_before_tr_emb_cmt = 0 # number - -# Control space between a Java annotation and the open paren. -sp_annotation_paren = ignore # ignore/add/remove/force - -# -# Code alignment (not left column spaces/tabs) -# - -# Whether to keep non-indenting tabs -align_keep_tabs = false # false/true - -# Whether to use tabs for aligning -align_with_tabs = false # false/true - -# Whether to bump out to the next tab when aligning -align_on_tabstop = false # false/true - -# Whether to left-align numbers -align_number_left = false # false/true - -# Align variable definitions in prototypes and functions -align_func_params = false # false/true - -# Align parameters in single-line functions that have the same name. -# The function names must already be aligned with each other. -align_same_func_call_params = false # false/true - -# The span for aligning variable definitions (0=don't align) -align_var_def_span = 0 # number - -# How to align the star in variable definitions. -# 0=Part of the type 'void * foo;' -# 1=Part of the variable 'void *foo;' -# 2=Dangling 'void *foo;' -align_var_def_star_style = 1 # number - -# How to align the '&' in variable definitions. -# 0=Part of the type -# 1=Part of the variable -# 2=Dangling -align_var_def_amp_style = 1 # number - -# The threshold for aligning variable definitions (0=no limit) -align_var_def_thresh = 0 # number - -# The gap for aligning variable definitions -align_var_def_gap = 0 # number - -# Whether to align the colon in struct bit fields -align_var_def_colon = false # false/true - -# Whether to align any attribute after the variable name -align_var_def_attribute = false # false/true - -# Whether to align inline struct/enum/union variable definitions -align_var_def_inline = false # false/true - -# The span for aligning on '=' in assignments (0=don't align) -align_assign_span = 0 # number - -# The threshold for aligning on '=' in assignments (0=no limit) -align_assign_thresh = 0 # number - -# The span for aligning on '=' in enums (0=don't align) -align_enum_equ_span = 0 # number - -# The threshold for aligning on '=' in enums (0=no limit) -align_enum_equ_thresh = 0 # number - -# The span for aligning struct/union (0=don't align) -align_var_struct_span = 0 # number - -# The threshold for aligning struct/union member definitions (0=no limit) -align_var_struct_thresh = 0 # number - -# The gap for aligning struct/union member definitions -align_var_struct_gap = 0 # number - -# The span for aligning struct initializer values (0=don't align) -align_struct_init_span = 0 # number - -# The minimum space between the type and the synonym of a typedef -align_typedef_gap = 0 # number - -# The span for aligning single-line typedefs (0=don't align) -align_typedef_span = 0 # number - -# How to align typedef'd functions with other typedefs -# 0: Don't mix them at all -# 1: align the open paren with the types -# 2: align the function type name with the other type names -align_typedef_func = 0 # number - -# Controls the positioning of the '*' in typedefs. Just try it. -# 0: Align on typedef type, ignore '*' -# 1: The '*' is part of type name: typedef int *pint; -# 2: The '*' is part of the type, but dangling: typedef int *pint; -align_typedef_star_style = 0 # number - -# Controls the positioning of the '&' in typedefs. Just try it. -# 0: Align on typedef type, ignore '&' -# 1: The '&' is part of type name: typedef int &pint; -# 2: The '&' is part of the type, but dangling: typedef int &pint; -align_typedef_amp_style = 0 # number - -# The span for aligning comments that end lines (0=don't align) -align_right_cmt_span = 0 # number - -# If aligning comments, mix with comments after '}' and #endif with less than 3 spaces before the comment -align_right_cmt_mix = false # false/true - -# If a trailing comment is more than this number of columns away from the text it follows, -# it will qualify for being aligned. This has to be > 0 to do anything. -align_right_cmt_gap = 0 # number - -# Align trailing comment at or beyond column N; 'pulls in' comments as a bonus side effect (0=ignore) -align_right_cmt_at_col = 0 # number - -# The span for aligning function prototypes (0=don't align) -align_func_proto_span = 0 # number - -# Minimum gap between the return type and the function name. -align_func_proto_gap = 0 # number - -# Align function protos on the 'operator' keyword instead of what follows -align_on_operator = false # false/true - -# Whether to mix aligning prototype and variable declarations. -# If true, align_var_def_XXX options are used instead of align_func_proto_XXX options. -align_mix_var_proto = false # false/true - -# Align single-line functions with function prototypes, uses align_func_proto_span -align_single_line_func = false # false/true - -# Aligning the open brace of single-line functions. -# Requires align_single_line_func=true, uses align_func_proto_span -align_single_line_brace = false # false/true - -# Gap for align_single_line_brace. -align_single_line_brace_gap = 0 # number - -# The span for aligning ObjC msg spec (0=don't align) -align_oc_msg_spec_span = 0 # number - -# Whether to align macros wrapped with a backslash and a newline. -# This will not work right if the macro contains a multi-line comment. -align_nl_cont = false # false/true - -# # Align macro functions and variables together -align_pp_define_together = false # false/true - -# The minimum space between label and value of a preprocessor define -align_pp_define_gap = 0 # number - -# The span for aligning on '#define' bodies (0=don't align) -align_pp_define_span = 0 # number - -# Align lines that start with '<<' with previous '<<'. Default=true -align_left_shift = true # false/true - -# Span for aligning parameters in an Obj-C message call on the ':' (0=don't align) -align_oc_msg_colon_span = 0 # number - -# If true, always align with the first parameter, even if it is too short. -align_oc_msg_colon_first = false # false/true - -# Aligning parameters in an Obj-C '+' or '-' declaration on the ':' -align_oc_decl_colon = false # false/true - -# -# Newline adding and removing options -# - -# Whether to collapse empty blocks between '{' and '}' -nl_collapse_empty_body = false # false/true - -# Don't split one-line braced assignments - 'foo_t f = { 1, 2 };' -nl_assign_leave_one_liners = false # false/true - -# Don't split one-line braced statements inside a class xx { } body -nl_class_leave_one_liners = false # false/true - -# Don't split one-line enums: 'enum foo { BAR = 15 };' -nl_enum_leave_one_liners = false # false/true - -# Don't split one-line get or set functions -nl_getset_leave_one_liners = false # false/true - -# Don't split one-line function definitions - 'int foo() { return 0; }' -nl_func_leave_one_liners = false # false/true - -# Don't split one-line if/else statements - 'if(a) b++;' -nl_if_leave_one_liners = false # false/true - -# Don't split one-line OC messages -nl_oc_msg_leave_one_liner = false # false/true - -# Add or remove newlines at the start of the file -nl_start_of_file = ignore # ignore/add/remove/force - -# The number of newlines at the start of the file (only used if nl_start_of_file is 'add' or 'force' -nl_start_of_file_min = 0 # number - -# Add or remove newline at the end of the file -nl_end_of_file = ignore # ignore/add/remove/force - -# The number of newlines at the end of the file (only used if nl_end_of_file is 'add' or 'force') -nl_end_of_file_min = 0 # number - -# Add or remove newline between '=' and '{' -nl_assign_brace = ignore # ignore/add/remove/force - -# Add or remove newline between '=' and '[' (D only) -nl_assign_square = ignore # ignore/add/remove/force - -# Add or remove newline after '= [' (D only). Will also affect the newline before the ']' -nl_after_square_assign = ignore # ignore/add/remove/force - -# The number of blank lines after a block of variable definitions at the top of a function body -# 0 = No change (default) -nl_func_var_def_blk = 0 # number - -# The number of newlines before a block of typedefs -# 0 = No change (default) -nl_typedef_blk_start = 0 # number - -# The number of newlines after a block of typedefs -# 0 = No change (default) -nl_typedef_blk_end = 0 # number - -# The maximum consecutive newlines within a block of typedefs -# 0 = No change (default) -nl_typedef_blk_in = 0 # number - -# The number of newlines before a block of variable definitions not at the top of a function body -# 0 = No change (default) -nl_var_def_blk_start = 0 # number - -# The number of newlines after a block of variable definitions not at the top of a function body -# 0 = No change (default) -nl_var_def_blk_end = 0 # number - -# The maximum consecutive newlines within a block of variable definitions -# 0 = No change (default) -nl_var_def_blk_in = 0 # number - -# Add or remove newline between a function call's ')' and '{', as in: -# list_for_each(item, &list) { } -nl_fcall_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'enum' and '{' -nl_enum_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'struct and '{' -nl_struct_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'union' and '{' -nl_union_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'if' and '{' -nl_if_brace = ignore # ignore/add/remove/force - -# Add or remove newline between '}' and 'else' -nl_brace_else = ignore # ignore/add/remove/force - -# Add or remove newline between 'else if' and '{' -# If set to ignore, nl_if_brace is used instead -nl_elseif_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'else' and '{' -nl_else_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'else' and 'if' -nl_else_if = ignore # ignore/add/remove/force - -# Add or remove newline between '}' and 'finally' -nl_brace_finally = ignore # ignore/add/remove/force - -# Add or remove newline between 'finally' and '{' -nl_finally_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'try' and '{' -nl_try_brace = ignore # ignore/add/remove/force - -# Add or remove newline between get/set and '{' -nl_getset_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'for' and '{' -nl_for_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'catch' and '{' -nl_catch_brace = ignore # ignore/add/remove/force - -# Add or remove newline between '}' and 'catch' -nl_brace_catch = ignore # ignore/add/remove/force - -# Add or remove newline between 'while' and '{' -nl_while_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'scope (x)' and '{' (D) -nl_scope_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'unittest' and '{' (D) -nl_unittest_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'version (x)' and '{' (D) -nl_version_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'using' and '{' -nl_using_brace = ignore # ignore/add/remove/force - -# Add or remove newline between two open or close braces. -# Due to general newline/brace handling, REMOVE may not work. -nl_brace_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'do' and '{' -nl_do_brace = ignore # ignore/add/remove/force - -# Add or remove newline between '}' and 'while' of 'do' statement -nl_brace_while = ignore # ignore/add/remove/force - -# Add or remove newline between 'switch' and '{' -nl_switch_brace = ignore # ignore/add/remove/force - -# Add a newline between ')' and '{' if the ')' is on a different line than the if/for/etc. -# Overrides nl_for_brace, nl_if_brace, nl_switch_brace, nl_while_switch, and nl_catch_brace. -nl_multi_line_cond = false # false/true - -# Force a newline in a define after the macro name for multi-line defines. -nl_multi_line_define = false # false/true - -# Whether to put a newline before 'case' statement -nl_before_case = false # false/true - -# Add or remove newline between ')' and 'throw' -nl_before_throw = ignore # ignore/add/remove/force - -# Whether to put a newline after 'case' statement -nl_after_case = false # false/true - -# Add or remove a newline between a case ':' and '{'. Overrides nl_after_case. -nl_case_colon_brace = ignore # ignore/add/remove/force - -# Newline between namespace and { -nl_namespace_brace = ignore # ignore/add/remove/force - -# Add or remove newline between 'template<>' and whatever follows. -nl_template_class = ignore # ignore/add/remove/force - -# Add or remove newline between 'class' and '{' -nl_class_brace = ignore # ignore/add/remove/force - -# Add or remove newline after each ',' in the constructor member initialization -nl_class_init_args = ignore # ignore/add/remove/force - -# Add or remove newline between return type and function name in a function definition -nl_func_type_name = ignore # ignore/add/remove/force - -# Add or remove newline between return type and function name inside a class {} -# Uses nl_func_type_name or nl_func_proto_type_name if set to ignore. -nl_func_type_name_class = ignore # ignore/add/remove/force - -# Add or remove newline between function scope and name in a definition -# Controls the newline after '::' in 'void A::f() { }' -nl_func_scope_name = ignore # ignore/add/remove/force - -# Add or remove newline between return type and function name in a prototype -nl_func_proto_type_name = ignore # ignore/add/remove/force - -# Add or remove newline between a function name and the opening '(' -nl_func_paren = remove # ignore/add/remove/force - -# Add or remove newline between a function name and the opening '(' in the definition -nl_func_def_paren = remove # ignore/add/remove/force - -# Add or remove newline after '(' in a function declaration -nl_func_decl_start = remove # ignore/add/remove/force - -# Add or remove newline after '(' in a function definition -nl_func_def_start = remove # ignore/add/remove/force - -# Overrides nl_func_decl_start when there is only one parameter. -nl_func_decl_start_single = ignore # ignore/add/remove/force - -# Overrides nl_func_def_start when there is only one parameter. -nl_func_def_start_single = ignore # ignore/add/remove/force - -# Add or remove newline after each ',' in a function declaration -nl_func_decl_args = ignore # ignore/add/remove/force - -# Add or remove newline after each ',' in a function definition -nl_func_def_args = ignore # ignore/add/remove/force - -# Add or remove newline before the ')' in a function declaration -nl_func_decl_end = ignore # ignore/add/remove/force - -# Add or remove newline before the ')' in a function definition -nl_func_def_end = remove # ignore/add/remove/force - -# Overrides nl_func_decl_end when there is only one parameter. -nl_func_decl_end_single = ignore # ignore/add/remove/force - -# Overrides nl_func_def_end when there is only one parameter. -nl_func_def_end_single = ignore # ignore/add/remove/force - -# Add or remove newline between '()' in a function declaration. -nl_func_decl_empty = ignore # ignore/add/remove/force - -# Add or remove newline between '()' in a function definition. -nl_func_def_empty = ignore # ignore/add/remove/force - -# Whether to put each OC message parameter on a separate line -# See nl_oc_msg_leave_one_liner -nl_oc_msg_args = false # false/true - -# Add or remove newline between function signature and '{' -nl_fdef_brace = add # ignore/add/remove/force - -# Add or remove a newline between the return keyword and return expression. -nl_return_expr = ignore # ignore/add/remove/force - -# Whether to put a newline after semicolons, except in 'for' statements -nl_after_semicolon = true # false/true - -# Whether to put a newline after brace open. -# This also adds a newline before the matching brace close. -nl_after_brace_open = true # false/true - -# If nl_after_brace_open and nl_after_brace_open_cmt are true, a newline is -# placed between the open brace and a trailing single-line comment. -nl_after_brace_open_cmt = false # false/true - -# Whether to put a newline after a virtual brace open with a non-empty body. -# These occur in un-braced if/while/do/for statement bodies. -nl_after_vbrace_open = false # false/true - -# Whether to put a newline after a virtual brace open with an empty body. -# These occur in un-braced if/while/do/for statement bodies. -nl_after_vbrace_open_empty = false # false/true - -# Whether to put a newline after a brace close. -# Does not apply if followed by a necessary ';'. -nl_after_brace_close = false # false/true - -# Whether to put a newline after a virtual brace close. -# Would add a newline before return in: 'if (foo) a++; return;' -nl_after_vbrace_close = false # false/true - -# Control the newline between the close brace and 'b' in: 'struct { int a; } b;' -# Affects enums, unions, and structures. If set to ignore, uses nl_after_brace_close -nl_brace_struct_var = ignore # ignore/add/remove/force - -# Whether to alter newlines in '#define' macros -nl_define_macro = false # false/true - -# Whether to not put blanks after '#ifxx', '#elxx', or before '#endif' -nl_squeeze_ifdef = false # false/true - -# Add or remove blank line before 'if' -nl_before_if = ignore # ignore/add/remove/force - -# Add or remove blank line after 'if' statement -nl_after_if = add # ignore/add/remove/force - -# Add or remove blank line before 'for' -nl_before_for = ignore # ignore/add/remove/force - -# Add or remove blank line after 'for' statement -nl_after_for = remove # ignore/add/remove/force - -# Add or remove blank line before 'while' -nl_before_while = ignore # ignore/add/remove/force - -# Add or remove blank line after 'while' statement -nl_after_while = remove # ignore/add/remove/force - -# Add or remove blank line before 'switch' -nl_before_switch = ignore # ignore/add/remove/force - -# Add or remove blank line after 'switch' statement -nl_after_switch = ignore # ignore/add/remove/force - -# Add or remove blank line before 'do' -nl_before_do = ignore # ignore/add/remove/force - -# Add or remove blank line after 'do/while' statement -nl_after_do = ignore # ignore/add/remove/force - -# Whether to double-space commented-entries in struct/enum -nl_ds_struct_enum_cmt = false # false/true - -# Whether to double-space before the close brace of a struct/union/enum -# (lower priority than 'eat_blanks_before_close_brace') -nl_ds_struct_enum_close_brace = false # false/true - -# Add or remove a newline around a class colon. -# Related to pos_class_colon, nl_class_init_args, and pos_comma. -nl_class_colon = ignore # ignore/add/remove/force - -# Change simple unbraced if statements into a one-liner -# 'if(b)\n i++;' => 'if(b) i++;' -nl_create_if_one_liner = false # false/true - -# Change simple unbraced for statements into a one-liner -# 'for (i=0;i<5;i++)\n foo(i);' => 'for (i=0;i<5;i++) foo(i);' -nl_create_for_one_liner = false # false/true - -# Change simple unbraced while statements into a one-liner -# 'while (i<5)\n foo(i++);' => 'while (i<5) foo(i++);' -nl_create_while_one_liner = false # false/true - -# -# Positioning options -# - -# The position of arithmetic operators in wrapped expressions -pos_arith = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# The position of assignment in wrapped expressions. -# Do not affect '=' followed by '{' -pos_assign = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# The position of boolean operators in wrapped expressions -pos_bool = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# The position of comparison operators in wrapped expressions -pos_compare = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# The position of conditional (b ? t : f) operators in wrapped expressions -pos_conditional = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# The position of the comma in wrapped expressions -pos_comma = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# The position of the comma in the constructor initialization list -pos_class_comma = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# The position of colons between constructor and member initialization -pos_class_colon = ignore # ignore/join/lead/lead_break/lead_force/trail/trail_break/trail_force - -# -# Line Splitting options -# - -# Try to limit code width to N number of columns -code_width = 0 # number - -# Whether to fully split long 'for' statements at semi-colons -ls_for_split_full = false # false/true - -# Whether to fully split long function protos/calls at commas -ls_func_split_full = false # false/true - -# Whether to split lines as close to code_width as possible and ignore some groupings -ls_code_width = false # false/true - -# -# Blank line options -# - -# The maximum consecutive newlines -nl_max = 0 # number - -# The number of newlines after a function prototype, if followed by another function prototype -nl_after_func_proto = 0 # number - -# The number of newlines after a function prototype, if not followed by another function prototype -nl_after_func_proto_group = 0 # number - -# The number of newlines after '}' of a multi-line function body -nl_after_func_body = 0 # number - -# The number of newlines after '}' of a multi-line function body in a class declaration -nl_after_func_body_class = 0 # number - -# The number of newlines after '}' of a single line function body -nl_after_func_body_one_liner = 0 # number - -# The minimum number of newlines before a multi-line comment. -# Doesn't apply if after a brace open or another multi-line comment. -nl_before_block_comment = 0 # number - -# The minimum number of newlines before a single-line C comment. -# Doesn't apply if after a brace open or other single-line C comments. -nl_before_c_comment = 0 # number - -# The minimum number of newlines before a CPP comment. -# Doesn't apply if after a brace open or other CPP comments. -nl_before_cpp_comment = 0 # number - -# Whether to force a newline after a multi-line comment. -nl_after_multiline_comment = false # false/true - -# The number of newlines after '}' or ';' of a struct/enum/union definition -nl_after_struct = 0 # number - -# The number of newlines after '}' or ';' of a class definition -nl_after_class = 0 # number - -# The number of newlines before a 'private:', 'public:', 'protected:', 'signals:', or 'slots:' label. -# Will not change the newline count if after a brace open. -# 0 = No change. -nl_before_access_spec = 0 # number - -# The number of newlines after a 'private:', 'public:', 'protected:', 'signals:', or 'slots:' label. -# 0 = No change. -nl_after_access_spec = 0 # number - -# The number of newlines between a function def and the function comment. -# 0 = No change. -nl_comment_func_def = 0 # number - -# The number of newlines after a try-catch-finally block that isn't followed by a brace close. -# 0 = No change. -nl_after_try_catch_finally = 0 # number - -# The number of newlines before and after a property, indexer or event decl. -# 0 = No change. -nl_around_cs_property = 0 # number - -# The number of newlines between the get/set/add/remove handlers in C#. -# 0 = No change. -nl_between_get_set = 0 # number - -# Add or remove newline between C# property and the '{' -nl_property_brace = ignore # ignore/add/remove/force - -# Whether to remove blank lines after '{' -eat_blanks_after_open_brace = false # false/true - -# Whether to remove blank lines before '}' -eat_blanks_before_close_brace = false # false/true - -# How aggressively to remove extra newlines not in preproc. -# 0: No change -# 1: Remove most newlines not handled by other config -# 2: Remove all newlines and reformat completely by config -nl_remove_extra_newlines = 0 # number - -# Whether to put a blank line before 'return' statements, unless after an open brace. -nl_before_return = false # false/true - -# Whether to put a blank line after 'return' statements, unless followed by a close brace. -nl_after_return = false # false/true - -# Whether to put a newline after a Java annotation statement. -# Only affects annotations that are after a newline. -nl_after_annotation = ignore # ignore/add/remove/force - -# Controls the newline between two annotations. -nl_between_annotation = ignore # ignore/add/remove/force - -# -# Code modifying options (non-whitespace) -# - -# Add or remove braces on single-line 'do' statement -mod_full_brace_do = ignore # ignore/add/remove/force - -# Add or remove braces on single-line 'for' statement -mod_full_brace_for = ignore # ignore/add/remove/force - -# Add or remove braces on single-line function definitions. (Pawn) -mod_full_brace_function = ignore # ignore/add/remove/force - -# Add or remove braces on single-line 'if' statement. Will not remove the braces if they contain an 'else'. -mod_full_brace_if = ignore # ignore/add/remove/force - -# Make all if/elseif/else statements in a chain be braced or not. Overrides mod_full_brace_if. -# If any must be braced, they are all braced. If all can be unbraced, then the braces are removed. -mod_full_brace_if_chain = false # false/true - -# Don't remove braces around statements that span N newlines -mod_full_brace_nl = 0 # number - -# Add or remove braces on single-line 'while' statement -mod_full_brace_while = ignore # ignore/add/remove/force - -# Add or remove braces on single-line 'using ()' statement -mod_full_brace_using = ignore # ignore/add/remove/force - -# Add or remove unnecessary paren on 'return' statement -mod_paren_on_return = ignore # ignore/add/remove/force - -# Whether to change optional semicolons to real semicolons -mod_pawn_semicolon = false # false/true - -# Add parens on 'while' and 'if' statement around bools -mod_full_paren_if_bool = false # false/true - -# Whether to remove superfluous semicolons -mod_remove_extra_semicolon = false # false/true - -# If a function body exceeds the specified number of newlines and doesn't have a comment after -# the close brace, a comment will be added. -mod_add_long_function_closebrace_comment = 0 # number - -# If a switch body exceeds the specified number of newlines and doesn't have a comment after -# the close brace, a comment will be added. -mod_add_long_switch_closebrace_comment = 0 # number - -# If an #ifdef body exceeds the specified number of newlines and doesn't have a comment after -# the #endif, a comment will be added. -mod_add_long_ifdef_endif_comment = 0 # number - -# If an #ifdef or #else body exceeds the specified number of newlines and doesn't have a comment after -# the #else, a comment will be added. -mod_add_long_ifdef_else_comment = 0 # number - -# If TRUE, will sort consecutive single-line 'import' statements [Java, D] -mod_sort_import = false # false/true - -# If TRUE, will sort consecutive single-line 'using' statements [C#] -mod_sort_using = false # false/true - -# If TRUE, will sort consecutive single-line '#include' statements [C/C++] and '#import' statements [Obj-C] -# This is generally a bad idea, as it may break your code. -mod_sort_include = false # false/true - -# If TRUE, it will move a 'break' that appears after a fully braced 'case' before the close brace. -mod_move_case_break = false # false/true - -# Will add or remove the braces around a fully braced case statement. -# Will only remove the braces if there are no variable declarations in the block. -mod_case_brace = ignore # ignore/add/remove/force - -# If TRUE, it will remove a void 'return;' that appears as the last statement in a function. -mod_remove_empty_return = false # false/true - -# -# Comment modifications -# - -# Try to wrap comments at cmt_width columns -cmt_width = 0 # number - -# Set the comment reflow mode (default: 0) -# 0: no reflowing (apart from the line wrapping due to cmt_width) -# 1: no touching at all -# 2: full reflow -cmt_reflow_mode = 0 # number - -# If false, disable all multi-line comment changes, including cmt_width. keyword substitution, and leading chars. -# Default is true. -cmt_indent_multi = true # false/true - -# Whether to group c-comments that look like they are in a block -cmt_c_group = false # false/true - -# Whether to put an empty '/*' on the first line of the combined c-comment -cmt_c_nl_start = false # false/true - -# Whether to put a newline before the closing '*/' of the combined c-comment -cmt_c_nl_end = false # false/true - -# Whether to group cpp-comments that look like they are in a block -cmt_cpp_group = false # false/true - -# Whether to put an empty '/*' on the first line of the combined cpp-comment -cmt_cpp_nl_start = false # false/true - -# Whether to put a newline before the closing '*/' of the combined cpp-comment -cmt_cpp_nl_end = false # false/true - -# Whether to change cpp-comments into c-comments -cmt_cpp_to_c = true # false/true - -# Whether to put a star on subsequent comment lines -cmt_star_cont = false # false/true - -# The number of spaces to insert at the start of subsequent comment lines -cmt_sp_before_star_cont = 0 # number - -# The number of spaces to insert after the star on subsequent comment lines -cmt_sp_after_star_cont = 0 # number - -# For multi-line comments with a '*' lead, remove leading spaces if the first and last lines of -# the comment are the same length. Default=True -cmt_multi_check_last = false # false/true - -# The filename that contains text to insert at the head of a file if the file doesn't start with a C/C++ comment. -# Will substitute $(filename) with the current file's name. -cmt_insert_file_header = "" # string - -# The filename that contains text to insert at the end of a file if the file doesn't end with a C/C++ comment. -# Will substitute $(filename) with the current file's name. -cmt_insert_file_footer = "" # string - -# The filename that contains text to insert before a function implementation if the function isn't preceded with a C/C++ comment. -# Will substitute $(function) with the function name and $(javaparam) with the javadoc @param and @return stuff. -# Will also substitute $(fclass) with the class name: void CFoo::Bar() { ... } -cmt_insert_func_header = "" # string - -# The filename that contains text to insert before a class if the class isn't preceded with a C/C++ comment. -# Will substitute $(class) with the class name. -cmt_insert_class_header = "" # string - -# The filename that contains text to insert before a Obj-C message specification if the method isn't preceeded with a C/C++ comment. -# Will substitute $(message) with the function name and $(javaparam) with the javadoc @param and @return stuff. -cmt_insert_oc_msg_header = "" # string - -# If a preprocessor is encountered when stepping backwards from a function name, then -# this option decides whether the comment should be inserted. -# Affects cmt_insert_oc_msg_header, cmt_insert_func_header and cmt_insert_class_header. -cmt_insert_before_preproc = false # false/true - -# -# Preprocessor options -# - -# Control indent of preprocessors inside #if blocks at brace level 0 -pp_indent = ignore # ignore/add/remove/force - -# Whether to indent #if/#else/#endif at the brace level (true) or from column 1 (false) -pp_indent_at_level = false # false/true - -# If pp_indent_at_level=false, specifies the number of columns to indent per level. Default=1. -pp_indent_count = 1 # number - -# Add or remove space after # based on pp_level of #if blocks -pp_space = ignore # ignore/add/remove/force - -# Sets the number of spaces added with pp_space -pp_space_count = 0 # number - -# The indent for #region and #endregion in C# and '#pragma region' in C/C++ -pp_indent_region = 0 # number - -# Whether to indent the code between #region and #endregion -pp_region_indent_code = false # false/true - -# If pp_indent_at_level=true, sets the indent for #if, #else, and #endif when not at file-level -pp_indent_if = 0 # number - -# Control whether to indent the code between #if, #else and #endif when not at file-level -pp_if_indent_code = false # false/true - -# Whether to indent '#define' at the brace level (true) or from column 1 (false) -pp_define_at_level = false # false/true - -# You can force a token to be a type with the 'type' option. -# Example: -# type myfoo1 myfoo2 -# -# You can create custom macro-based indentation using macro-open, -# macro-else and macro-close. -# Example: -# macro-open BEGIN_TEMPLATE_MESSAGE_MAP -# macro-open BEGIN_MESSAGE_MAP -# macro-close END_MESSAGE_MAP -# -# You can assign any keyword to any type with the set option. -# set func_call_user _ N_ -# -# The full syntax description of all custom definition config entries -# is shown below: -# -# define custom tokens as: -# - embed whitespace in token using '' escape character, or -# put token in quotes -# - these: ' " and ` are recognized as quote delimiters -# -# type token1 token2 token3 ... -# ^ optionally specify multiple tokens on a single line -# define def_token output_token -# ^ output_token is optional, then NULL is assumed -# macro-open token -# macro-close token -# macro-else token -# set id token1 token2 ... -# ^ optionally specify multiple tokens on a single line -# ^ id is one of the names in token_enum.h sans the CT_ prefix, -# e.g. PP_PRAGMA -# -# all tokens are separated by any mix of ',' commas, '=' equal signs -# and whitespace (space, tab) -# -set IF IF_APPNAME diff --git a/kernel/port_picotcp/modules/pico_arp.c b/kernel/port_picotcp/modules/pico_arp.c deleted file mode 120000 index a5a90fc..0000000 --- a/kernel/port_picotcp/modules/pico_arp.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_arp.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_arp.h b/kernel/port_picotcp/modules/pico_arp.h deleted file mode 120000 index 35bb64b..0000000 --- a/kernel/port_picotcp/modules/pico_arp.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_arp.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_dev_loop.c b/kernel/port_picotcp/modules/pico_dev_loop.c deleted file mode 120000 index ad584cc..0000000 --- a/kernel/port_picotcp/modules/pico_dev_loop.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_dev_loop.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_dev_loop.h b/kernel/port_picotcp/modules/pico_dev_loop.h deleted file mode 120000 index 11e0920..0000000 --- a/kernel/port_picotcp/modules/pico_dev_loop.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_dev_loop.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_ethernet.c b/kernel/port_picotcp/modules/pico_ethernet.c deleted file mode 120000 index 516904d..0000000 --- a/kernel/port_picotcp/modules/pico_ethernet.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_ethernet.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_ethernet.h b/kernel/port_picotcp/modules/pico_ethernet.h deleted file mode 120000 index ff077d5..0000000 --- a/kernel/port_picotcp/modules/pico_ethernet.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_ethernet.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_icmp4.c b/kernel/port_picotcp/modules/pico_icmp4.c deleted file mode 120000 index a960da2..0000000 --- a/kernel/port_picotcp/modules/pico_icmp4.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_icmp4.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_icmp4.h b/kernel/port_picotcp/modules/pico_icmp4.h deleted file mode 120000 index b4e7481..0000000 --- a/kernel/port_picotcp/modules/pico_icmp4.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_icmp4.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_ipv4.c b/kernel/port_picotcp/modules/pico_ipv4.c deleted file mode 120000 index 8952d9c..0000000 --- a/kernel/port_picotcp/modules/pico_ipv4.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_ipv4.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_ipv4.h b/kernel/port_picotcp/modules/pico_ipv4.h deleted file mode 120000 index ab7ef28..0000000 --- a/kernel/port_picotcp/modules/pico_ipv4.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_ipv4.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_socket_tcp.c b/kernel/port_picotcp/modules/pico_socket_tcp.c deleted file mode 120000 index c85b835..0000000 --- a/kernel/port_picotcp/modules/pico_socket_tcp.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_socket_tcp.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_socket_tcp.h b/kernel/port_picotcp/modules/pico_socket_tcp.h deleted file mode 120000 index 4e62c3c..0000000 --- a/kernel/port_picotcp/modules/pico_socket_tcp.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_socket_tcp.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_socket_udp.c b/kernel/port_picotcp/modules/pico_socket_udp.c deleted file mode 120000 index 3f7cc08..0000000 --- a/kernel/port_picotcp/modules/pico_socket_udp.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_socket_udp.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_socket_udp.h b/kernel/port_picotcp/modules/pico_socket_udp.h deleted file mode 120000 index 27106c1..0000000 --- a/kernel/port_picotcp/modules/pico_socket_udp.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_socket_udp.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_tcp.c b/kernel/port_picotcp/modules/pico_tcp.c deleted file mode 120000 index 09f4b2f..0000000 --- a/kernel/port_picotcp/modules/pico_tcp.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_tcp.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_tcp.h b/kernel/port_picotcp/modules/pico_tcp.h deleted file mode 120000 index 5879fee..0000000 --- a/kernel/port_picotcp/modules/pico_tcp.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_tcp.h \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_udp.c b/kernel/port_picotcp/modules/pico_udp.c deleted file mode 120000 index c270baa..0000000 --- a/kernel/port_picotcp/modules/pico_udp.c +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_udp.c \ No newline at end of file diff --git a/kernel/port_picotcp/modules/pico_udp.h b/kernel/port_picotcp/modules/pico_udp.h deleted file mode 120000 index 49a79a4..0000000 --- a/kernel/port_picotcp/modules/pico_udp.h +++ /dev/null @@ -1 +0,0 @@ -../../picotcp/modules/pico_udp.h \ No newline at end of file diff --git a/kernel/port_picotcp/pico_defines.h b/kernel/port_picotcp/pico_defines.h deleted file mode 100644 index 232439e..0000000 --- a/kernel/port_picotcp/pico_defines.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef PICO_DEFINES_H_ -#define PICO_DEFINES_H_ - - - -#endif // PICO_DEFINES_H_ diff --git a/kernel/port_picotcp/pico_mutex.c b/kernel/port_picotcp/pico_mutex.c deleted file mode 100644 index ee71b5d..0000000 --- a/kernel/port_picotcp/pico_mutex.c +++ /dev/null @@ -1,23 +0,0 @@ -#include "spinlock/spinlock.h" -#include "dlmalloc/malloc.h" - -void *pico_mutex_init(void) { - SpinLock *spinlock = dlmalloc(sizeof(*spinlock)); - spinlock_init(spinlock); - return (void *)spinlock; -} - -void pico_mutex_deinit(void *mtx) { - if (mtx != NULL) - dlfree(mtx); -} - -void pico_mutex_lock(void *mtx) { - SpinLock *spinlock = (SpinLock *)mtx; - spinlock_acquire(spinlock); -} - -void pico_mutex_unlock(void *mtx) { - SpinLock *spinlock = (SpinLock *)mtx; - spinlock_release(spinlock); -} diff --git a/kernel/proc/proc.c b/kernel/proc/proc.c index e83799b..7004043 100644 --- a/kernel/proc/proc.c +++ b/kernel/proc/proc.c @@ -13,39 +13,15 @@ #include "vfs/vfs.h" #include "bootinfo/bootinfo.h" #include "ipc/pipe/pipe.h" -#include "ipc/netsock/netsock.h" #include "sysdefs/proc.h" #include "sysdefs/fs.h" #include "time/time.h" -#include "pico_stack.h" #define PROC_REAPER_FREQ 30 uint64_t pids = 0; uint64_t sched_ticks = 0; -uint64_t uptime_ms = 0; -uint64_t last_secs = 0; - -uint64_t get_wallclock_secs(void) { - Time t; - time_get(&t); - return time_totalhours(&t) * 3600 + t.minute * 60 + t.second; -} - -void update_time(void) { - uint64_t now_s = get_wallclock_secs(); - uint64_t delta_s; - - if (now_s >= last_secs) - delta_s = now_s - last_secs; - else - delta_s = 0; - - uptime_ms += (uint64_t)delta_s * 1000; - last_secs = now_s; -} - Procs PROCS; bool proc_checkelf(uint8_t *elf) { @@ -247,9 +223,6 @@ void proc_reaper(void) { void proc_sched(void *cpustate) { hal_intr_disable(); sched_ticks++; - ipc_netsock_cleanup_dangling(); - update_time(); - pico_stack_tick(); IntrStackFrame *frame = cpustate; diff --git a/kernel/syscall/ipcnetsock.c b/kernel/syscall/ipcnetsock.c deleted file mode 100644 index 7b00998..0000000 --- a/kernel/syscall/ipcnetsock.c +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include "syscall.h" -#include "spinlock/spinlock.h" -#include "ipc/netsock/netsock.h" -#include "util/util.h" -#include "errors.h" - -int32_t SYSCALL2(sys_ipc_netsockmake, net1, proto1) { - uint16_t net = net1; - uint16_t proto = proto1; - - IpcNetSock *netsock = ipc_netsockmake(net, proto, _caller_pid); - - if (netsock == NULL) { - return E_NOMEMORY; - } - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - size_t idx = 0; - IpcNetSock *ns = NULL, *nstmp = NULL; - LL_FOREACH_SAFE_IDX(IPC_NETSOCKS.netsocks, ns, nstmp, idx) { - if (ns == netsock) { - break; - } - } - spinlock_release(&IPC_NETSOCKS.spinlock); - - return idx; -} - -int32_t SYSCALL2(sys_ipc_netsocklisten, socknum1, maxlisteners1) { - size_t socknum = socknum1; - size_t maxlisteners = maxlisteners1; - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - size_t idx = 0; - IpcNetSock *ns = NULL, *nstmp = NULL; - LL_FOREACH_SAFE_IDX(IPC_NETSOCKS.netsocks, ns, nstmp, idx) { - if (idx == socknum) { - break; - } - } - spinlock_release(&IPC_NETSOCKS.spinlock); - - if (ns == NULL) { - return E_NOENTRY; - } - - return ipc_netsocklisten(ns, maxlisteners); -} - -int32_t SYSCALL1(sys_ipc_netsockpollev, socknum1) { - size_t socknum = socknum1; - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - size_t idx = 0; - IpcNetSock *ns, *nstmp; - LL_FOREACH_SAFE_IDX(IPC_NETSOCKS.netsocks, ns, nstmp, idx) { - if (idx == socknum) { - break; - } - } - spinlock_release(&IPC_NETSOCKS.spinlock); - - if (ns == NULL) { - return E_NOENTRY; - } - - spinlock_acquire(&ns->spinlock); - - IpcNetSockEventBuffer ev; - bool empty = false; - rbuft_pop(&ns->eventbuffer, &ev, &empty); - - spinlock_release(&ns->spinlock); - - if (empty) { - return E_NOTYET; - } - - return (int32_t)ev; -} - -int32_t SYSCALL1(sys_ipc_netsockdelete, socknum1) { - size_t socknum = socknum1; - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - size_t idx = 0; - IpcNetSock *ns, *nstmp; - LL_FOREACH_SAFE_IDX(IPC_NETSOCKS.netsocks, ns, nstmp, idx) { - if (idx == socknum) { - break; - } - } - spinlock_release(&IPC_NETSOCKS.spinlock); - - if (ns == NULL) { - return E_NOENTRY; - } - - return ipc_netsockdelete(ns); -} - -int32_t SYSCALL2(sys_ipc_netsockbindport, socknum1, port1) { - size_t socknum = socknum1; - uint16_t port = port1; - - spinlock_acquire(&IPC_NETSOCKS.spinlock); - size_t idx = 0; - IpcNetSock *ns, *nstmp; - LL_FOREACH_SAFE_IDX(IPC_NETSOCKS.netsocks, ns, nstmp, idx) { - if (idx == socknum) { - break; - } - } - spinlock_release(&IPC_NETSOCKS.spinlock); - - if (ns == NULL) { - return E_NOENTRY; - } - - return ipc_netsockbindport(ns, port); -} diff --git a/kernel/syscall/ipcnetsock.h b/kernel/syscall/ipcnetsock.h deleted file mode 100644 index 189b849..0000000 --- a/kernel/syscall/ipcnetsock.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef SYSCALL_IPCNETSOCK_H_ -#define SYSCALL_IPCNETSOCK_H_ - -#include -#include -#include "syscall.h" - -int32_t SYSCALL2(sys_ipc_netsockmake, net1, proto1); -int32_t SYSCALL2(sys_ipc_netsocklisten, socknum1, maxlisteners1); -int32_t SYSCALL1(sys_ipc_netsockpollev, socknum1); -int32_t SYSCALL1(sys_ipc_netsockdelete, socknum1); -int32_t SYSCALL2(sys_ipc_netsockbindport, socknum1, port1); - -#endif // SYSCALL_IPC_NETSOCK_H_ diff --git a/kernel/syscall/syscall.c b/kernel/syscall/syscall.c index 5511a67..93f3b82 100644 --- a/kernel/syscall/syscall.c +++ b/kernel/syscall/syscall.c @@ -12,7 +12,6 @@ #include "fs.h" #include "dev.h" #include "time.h" -#include "ipcnetsock.h" int32_t SYSCALL1(sys_debugprint, string) { char *p = (char *)string; @@ -65,10 +64,4 @@ SyscallFn SYSCALL_TABLE[SYSCALLS_MAX] = { [SYS_DEV_CMD] = &sys_dev_cmd, [SYS_TIME] = &sys_time, - - [SYS_IPC_NETSOCKMAKE] = &sys_ipc_netsockmake, - [SYS_IPC_NETSOCKLISTEN] = &sys_ipc_netsocklisten, - [SYS_IPC_NETSOCKPOLLEV] = &sys_ipc_netsockpollev, - [SYS_IPC_NETSOCKDELETE] = &sys_ipc_netsockdelete, - [SYS_IPC_NETSOCKBINDPORT] = &sys_ipc_netsockbindport, }; diff --git a/share/errors.h b/share/errors.h index 588ceb5..ef6581a 100644 --- a/share/errors.h +++ b/share/errors.h @@ -16,8 +16,6 @@ #define E_RESOURCEAVAIL -12 #define E_SPAWNERROR -13 #define E_NOTYET -14 -#define E_NETSOCKLISTEN -15 -#define E_NETSOCKBIND -16 #if !defined(__ASSEMBLER__) @@ -37,6 +35,7 @@ static const char *_ERROR_STRINGS[] = { "Invalid operation", "Resource already available", "Process spawn error", + "Data isn't ready yet", }; #define ERRSTRING_INDEX(ioh) ((size_t)((ioh) < 0 ? (ioh) * (-1) : (ioh))) diff --git a/share/sysdefs/ipcnetsock.h b/share/sysdefs/ipcnetsock.h deleted file mode 100644 index fc56a4a..0000000 --- a/share/sysdefs/ipcnetsock.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef SHARE_SYSDEFS_IPC_NETSOCK_H_ -#define SHARE_SYSDEFS_IPC_NETSOCK_H_ - -// matches values of picotcp - -#define NETSOCK_IPV4 0 -#define NETSOCK_TCP 6 - -#define NETSOCKEV_CLOSE 8 -#define NETSOCKEV_ERR 0x80 -#define NETSOCKEV_READ 1 -#define NETSOCKEV_WRITE 2 -#define NETSOCKEV_FINI 0x10 -#define NETSOCKEV_CONN 4 - -typedef int32_t netsock_t; -typedef int32_t netsockev_t; - -#define IS_NETSOCKEV_READY(ev) ((ev) != E_NOTYET) - -#endif // SHARE_SYSDEFS_IPC_NETSOCK_H_ diff --git a/share/sysdefs/syscall.h b/share/sysdefs/syscall.h index 9eda1af..19e01ad 100644 --- a/share/sysdefs/syscall.h +++ b/share/sysdefs/syscall.h @@ -36,10 +36,5 @@ #define SYS_DEV_STAT 35 #define SYS_DEV_CMD 36 #define SYS_TIME 38 -#define SYS_IPC_NETSOCKMAKE 39 -#define SYS_IPC_NETSOCKLISTEN 40 -#define SYS_IPC_NETSOCKPOLLEV 41 -#define SYS_IPC_NETSOCKDELETE 42 -#define SYS_IPC_NETSOCKBINDPORT 43 #endif // SHARE_HDRS_SYSCALL_H_ diff --git a/std/include/stdatomic.h b/std/include/stdatomic.h index a2185a4..98ce95b 100644 --- a/std/include/stdatomic.h +++ b/std/include/stdatomic.h @@ -38,6 +38,13 @@ #define atomic_dec(p) \ atomic_dec_explicit(p, memory_order_seq_cst) +#define atomic_fetch_add_explicit __atomic_fetch_add +#define atomic_fetch_sub_explicit __atomic_fetch_sub +#define atomic_fetch_and_explicit __atomic_fetch_and +#define atomic_fetch_xor_explicit __atomic_fetch_xor +#define atomic_fetch_or_explicit __atomic_fetch_or +#define atomic_fetch_nand_explicit __atomic_fetch_nand + #define atomic_bool _Atomic(bool) #define atomic_int _Atomic(int) #define atomic_uint _Atomic(unsigned int) diff --git a/ulib/system/system.c b/ulib/system/system.c index e68366b..0b3c627 100644 --- a/ulib/system/system.c +++ b/ulib/system/system.c @@ -147,22 +147,3 @@ int32_t time(Time *time) { return syscall(SYS_TIME, (uint64_t)time, 0, 0, 0, 0, 0); } -int32_t ipc_netsockmake(uint16_t net, uint16_t proto) { - return syscall(SYS_IPC_NETSOCKMAKE, (uint64_t)net, (uint64_t)proto, 0, 0, 0, 0); -} - -int32_t ipc_netsocklisten(uint64_t netsock, size_t maxlisteners) { - return syscall(SYS_IPC_NETSOCKLISTEN, (uint64_t)netsock, (uint64_t)maxlisteners, 0, 0, 0, 0); -} - -int32_t ipc_netsockpollev(uint64_t netsock) { - return syscall(SYS_IPC_NETSOCKPOLLEV, (uint64_t)netsock, 0, 0, 0, 0, 0); -} - -int32_t ipc_netsockdelete(uint64_t netsock) { - return syscall(SYS_IPC_NETSOCKDELETE, (uint64_t)netsock, 0, 0, 0, 0, 0); -} - -int32_t ipc_netsockbindport(uint64_t netsock, uint16_t port) { - return syscall(SYS_IPC_NETSOCKBINDPORT, (uint64_t)netsock, (uint64_t)port, 0, 0, 0, 0); -} diff --git a/ulib/system/system.h b/ulib/system/system.h index e7e11f0..0c90425 100644 --- a/ulib/system/system.h +++ b/ulib/system/system.h @@ -8,7 +8,6 @@ #include #include #include -#include void debugprint(const char *string); int32_t mman_map(uint8_t *addr, size_t size, uint64_t prot, uint64_t flags, uint8_t **out); @@ -45,10 +44,5 @@ int32_t dev_listsize(void); int32_t dev_stat(DevStat *devstatbuf, size_t idx); int32_t dev_cmd(Dev_t *dev, uint64_t cmd, void *buf, size_t len); int32_t time(Time *time); -int32_t ipc_netsockmake(uint16_t net, uint16_t proto); -int32_t ipc_netsocklisten(uint64_t netsock, size_t maxlisteners); -int32_t ipc_netsockpollev(uint64_t netsock); -int32_t ipc_netsockdelete(uint64_t netsock); -int32_t ipc_netsockbindport(uint64_t netsock, uint16_t port); #endif // ULIB_SYSTEM_SYSTEM_H_ diff --git a/user/diagdummy/diagdummy.c b/user/diagdummy/diagdummy.c index d6cffdb..815a1f1 100644 --- a/user/diagdummy/diagdummy.c +++ b/user/diagdummy/diagdummy.c @@ -1,6 +1,5 @@ #include #include "block.h" -#include "tcptest.h" void main(void) { if (argslen() == 0) { @@ -12,10 +11,6 @@ void main(void) { if (string_strcmp(cmd, "block") == 0) { diagdummy_block(); - } else if (string_strcmp(cmd, "tcp-test-server") == 0) { - diagdummy_tcptestserver(); - } else if (string_strcmp(cmd, "tcp-test-client") == 0) { - diagdummy_tcptestclient(); } else { uprintf("diagdummy: unknown cmd %s\n", cmd); } diff --git a/user/diagdummy/tcptest.c b/user/diagdummy/tcptest.c deleted file mode 100644 index 05f1979..0000000 --- a/user/diagdummy/tcptest.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include -#include - -#define MAX_CONNS 10 - -void diagdummy_tcptestserver(void) { - netsock_t netsock = ipc_netsockmake(NETSOCK_IPV4, NETSOCK_TCP); - ipc_netsockbindport(netsock, 1); - ipc_netsocklisten(netsock, MAX_CONNS); - - for (;;) { - netsockev_t ev = ipc_netsockpollev(netsock); - - if (!IS_NETSOCKEV_READY(ev)) { - schedrelease(); - } else { - } - } - - ipc_netsockdelete(netsock); -} - -void diagdummy_tcptestclient(void) { - /* netsock_t netsock = ipc_netsockmake(NETSOCK_IPV4, NETSOCK_TCP, ); */ -} diff --git a/user/diagdummy/tcptest.h b/user/diagdummy/tcptest.h deleted file mode 100644 index 0c1f94a..0000000 --- a/user/diagdummy/tcptest.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef DIAGDUMMY_TCPTEST_H_ -#define DIAGDUMMY_TCPTEST_H_ - -void diagdummy_tcptestserver(void); -void diagdummy_tcptestclient(void); - -#endif // DIAGDUMMY_TCPTEST_H_